Kaydet (Commit) 042ff9eb authored tarafından Guido van Rossum's avatar Guido van Rossum

AMK's latest

üst 104be4a4
......@@ -3,7 +3,7 @@
*************************************************/
#define PCRE_VERSION "1.04 22-Dec-1997"
#define PCRE_VERSION "1.07 16-Feb-1998"
/* This is a library of functions to support regular expressions whose syntax
......@@ -12,7 +12,7 @@ the file Tech.Notes for some information on the internals.
Written by: Philip Hazel <ph10@cam.ac.uk>
Copyright (c) 1997 University of Cambridge
Copyright (c) 1998 University of Cambridge
-----------------------------------------------------------------------------
Permission is granted to anyone to use this software for any purpose on any
......@@ -192,6 +192,7 @@ enum {
OP_CRMINRANGE,
OP_CLASS, /* Match a character class */
OP_NEGCLASS, /* Match a character class, specified negatively */
OP_CLASS_L, /* Match a character class */
OP_REF, /* Match a back reference */
......
......@@ -2,7 +2,7 @@
* Perl-Compatible Regular Expressions *
*************************************************/
/* Copyright (c) 1997 University of Cambridge */
/* Copyright (c) 1998 University of Cambridge */
#ifndef _PCRE_H
#define _PCRE_H
......@@ -17,6 +17,12 @@ it is needed here for malloc. */
#include <sys/types.h>
#include <stdlib.h>
/* Allow for C++ users */
#ifdef __cplusplus
extern "C" {
#endif
/* Options */
#define PCRE_CASELESS 0x0001
......@@ -68,4 +74,8 @@ extern int pcre_info(const pcre *, int *, int *);
extern pcre_extra *pcre_study(const pcre *, int, const char **);
extern const char *pcre_version(void);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* End of pcre.h */
......@@ -72,7 +72,7 @@ staticforward PyTypeObject Pcre_Type;
#define NOT_WORD_BOUNDARY 6
#define BEGINNING_OF_BUFFER 7
#define END_OF_BUFFER 8
#define STRING 9
static PcreObject *
newPcreObject(arg)
......@@ -191,49 +191,20 @@ PyPcre_compile(self, args)
{
PcreObject *rv;
PyObject *dictionary;
char *pattern, *newpattern;
char *pattern;
const char *error;
int num_zeros, i, j;
int patternlen, options, erroroffset;
if (!PyArg_ParseTuple(args, "s#iO!", &pattern, &patternlen, &options,
int options, erroroffset;
if (!PyArg_ParseTuple(args, "siO!", &pattern, &options,
&PyDict_Type, &dictionary))
return NULL;
rv = newPcreObject(args);
if ( rv == NULL )
return NULL;
/* PCRE doesn't like having null bytes in its pattern, so we have to replace
any zeros in the string with the characters '\000'. This increases the size
of the string by 3*num_zeros, plus 1 byte for the terminating \0. */
num_zeros=1; /* Start at 1; this will give 3 extra bytes of leeway */
for(i=0; i<patternlen; i++) {
if (pattern[i]==0) num_zeros++;
}
newpattern=malloc(patternlen + num_zeros*3 + 4);
if (newpattern==NULL) {
PyErr_SetString(PyExc_MemoryError, "can't allocate memory for new pattern");
return NULL;
}
for (i=j=0; i<patternlen; i++, j++)
{
if (pattern[i]!=0) newpattern[j]=pattern[i];
else {
newpattern[j++] ='\\';
newpattern[j++] = '0';
newpattern[j++] = '0';
newpattern[j ] = '0';
}
}
/* Keep purify happy; for pcre, one null byte is enough! */
newpattern[j++]='\0';
newpattern[j++]='\0';
newpattern[j++]='\0';
newpattern[j]='\0';
rv->regex = pcre_compile((char*)newpattern, options,
rv->regex = pcre_compile((char*)pattern, options,
&error, &erroroffset, dictionary);
free(newpattern);
if (rv->regex==NULL)
{
PyMem_DEL(rv);
......@@ -312,6 +283,10 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
*indexptr=index;
return Py_BuildValue("c", (char)8);
break;
case('\\'):
*indexptr=index;
return Py_BuildValue("c", '\\');
break;
case('x'):
{
......@@ -348,6 +323,8 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
case('g'):
{
int end, i;
int group_num = 0, is_number=0;
if (pattern_len<=index)
{
PyErr_SetString(ErrorObject, "unfinished symbolic reference");
......@@ -374,16 +351,22 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
PyErr_SetString(ErrorObject, "zero-length symbolic reference");
return NULL;
}
if (!(pcre_ctypes[pattern[index]] & ctype_word) /* First char. not alphanumeric */
|| (pcre_ctypes[pattern[index]] & ctype_digit) ) /* First char. a digit */
if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
{
/* XXX should include the text of the reference */
PyErr_SetString(ErrorObject, "first character of symbolic reference not a letter or _");
return NULL;
is_number = 1;
group_num = pattern[index] - '0';
}
for(i=index+1; i<end; i++)
{
if (is_number &&
!(pcre_ctypes[pattern[i]] & ctype_digit) )
{
/* XXX should include the text of the reference */
PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
return NULL;
}
else {group_num = group_num * 10 + pattern[i] - '0';}
if (!(pcre_ctypes[pattern[i]] & ctype_word) )
{
/* XXX should include the text of the reference */
......@@ -394,6 +377,9 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
*typeptr = MEMORY_REFERENCE;
*indexptr = end+1;
/* If it's a number, return the integer value of the group */
if (is_number) return Py_BuildValue("i", group_num);
/* Otherwise, return a string containing the group name */
return Py_BuildValue("s#", pattern+index, end-index);
}
break;
......@@ -478,8 +464,11 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
break;
default:
/* It's some unknown escape like \s, so return a string containing
\s */
*typeptr = STRING;
*indexptr = index;
return Py_BuildValue("c", c);
return Py_BuildValue("s#", pattern+index-2, 2);
break;
}
}
......@@ -571,6 +560,12 @@ PyPcre_expand(self, args)
Py_DECREF(result);
}
break;
case(STRING):
{
PyList_Append(results, value);
total_len += PyString_Size(value);
break;
}
default:
Py_DECREF(results);
PyErr_SetString(ErrorObject,
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment