Kaydet (Commit) 042ff9eb authored tarafından Guido van Rossum's avatar Guido van Rossum

AMK's latest

üst 104be4a4
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
*************************************************/ *************************************************/
#define PCRE_VERSION "1.04 22-Dec-1997" #define PCRE_VERSION "1.07 16-Feb-1998"
/* This is a library of functions to support regular expressions whose syntax /* This is a library of functions to support regular expressions whose syntax
...@@ -12,7 +12,7 @@ the file Tech.Notes for some information on the internals. ...@@ -12,7 +12,7 @@ the file Tech.Notes for some information on the internals.
Written by: Philip Hazel <ph10@cam.ac.uk> Written by: Philip Hazel <ph10@cam.ac.uk>
Copyright (c) 1997 University of Cambridge Copyright (c) 1998 University of Cambridge
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
Permission is granted to anyone to use this software for any purpose on any Permission is granted to anyone to use this software for any purpose on any
...@@ -192,6 +192,7 @@ enum { ...@@ -192,6 +192,7 @@ enum {
OP_CRMINRANGE, OP_CRMINRANGE,
OP_CLASS, /* Match a character class */ OP_CLASS, /* Match a character class */
OP_NEGCLASS, /* Match a character class, specified negatively */
OP_CLASS_L, /* Match a character class */ OP_CLASS_L, /* Match a character class */
OP_REF, /* Match a back reference */ OP_REF, /* Match a back reference */
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
* Perl-Compatible Regular Expressions * * Perl-Compatible Regular Expressions *
*************************************************/ *************************************************/
/* Copyright (c) 1997 University of Cambridge */ /* Copyright (c) 1998 University of Cambridge */
#ifndef _PCRE_H #ifndef _PCRE_H
#define _PCRE_H #define _PCRE_H
...@@ -17,6 +17,12 @@ it is needed here for malloc. */ ...@@ -17,6 +17,12 @@ it is needed here for malloc. */
#include <sys/types.h> #include <sys/types.h>
#include <stdlib.h> #include <stdlib.h>
/* Allow for C++ users */
#ifdef __cplusplus
extern "C" {
#endif
/* Options */ /* Options */
#define PCRE_CASELESS 0x0001 #define PCRE_CASELESS 0x0001
...@@ -68,4 +74,8 @@ extern int pcre_info(const pcre *, int *, int *); ...@@ -68,4 +74,8 @@ extern int pcre_info(const pcre *, int *, int *);
extern pcre_extra *pcre_study(const pcre *, int, const char **); extern pcre_extra *pcre_study(const pcre *, int, const char **);
extern const char *pcre_version(void); extern const char *pcre_version(void);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* End of pcre.h */ #endif /* End of pcre.h */
...@@ -72,7 +72,7 @@ staticforward PyTypeObject Pcre_Type; ...@@ -72,7 +72,7 @@ staticforward PyTypeObject Pcre_Type;
#define NOT_WORD_BOUNDARY 6 #define NOT_WORD_BOUNDARY 6
#define BEGINNING_OF_BUFFER 7 #define BEGINNING_OF_BUFFER 7
#define END_OF_BUFFER 8 #define END_OF_BUFFER 8
#define STRING 9
static PcreObject * static PcreObject *
newPcreObject(arg) newPcreObject(arg)
...@@ -191,49 +191,20 @@ PyPcre_compile(self, args) ...@@ -191,49 +191,20 @@ PyPcre_compile(self, args)
{ {
PcreObject *rv; PcreObject *rv;
PyObject *dictionary; PyObject *dictionary;
char *pattern, *newpattern; char *pattern;
const char *error; const char *error;
int num_zeros, i, j; int num_zeros, i, j;
int patternlen, options, erroroffset; int options, erroroffset;
if (!PyArg_ParseTuple(args, "s#iO!", &pattern, &patternlen, &options, if (!PyArg_ParseTuple(args, "siO!", &pattern, &options,
&PyDict_Type, &dictionary)) &PyDict_Type, &dictionary))
return NULL; return NULL;
rv = newPcreObject(args); rv = newPcreObject(args);
if ( rv == NULL ) if ( rv == NULL )
return NULL; return NULL;
/* PCRE doesn't like having null bytes in its pattern, so we have to replace rv->regex = pcre_compile((char*)pattern, options,
any zeros in the string with the characters '\000'. This increases the size
of the string by 3*num_zeros, plus 1 byte for the terminating \0. */
num_zeros=1; /* Start at 1; this will give 3 extra bytes of leeway */
for(i=0; i<patternlen; i++) {
if (pattern[i]==0) num_zeros++;
}
newpattern=malloc(patternlen + num_zeros*3 + 4);
if (newpattern==NULL) {
PyErr_SetString(PyExc_MemoryError, "can't allocate memory for new pattern");
return NULL;
}
for (i=j=0; i<patternlen; i++, j++)
{
if (pattern[i]!=0) newpattern[j]=pattern[i];
else {
newpattern[j++] ='\\';
newpattern[j++] = '0';
newpattern[j++] = '0';
newpattern[j ] = '0';
}
}
/* Keep purify happy; for pcre, one null byte is enough! */
newpattern[j++]='\0';
newpattern[j++]='\0';
newpattern[j++]='\0';
newpattern[j]='\0';
rv->regex = pcre_compile((char*)newpattern, options,
&error, &erroroffset, dictionary); &error, &erroroffset, dictionary);
free(newpattern);
if (rv->regex==NULL) if (rv->regex==NULL)
{ {
PyMem_DEL(rv); PyMem_DEL(rv);
...@@ -312,6 +283,10 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr) ...@@ -312,6 +283,10 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
*indexptr=index; *indexptr=index;
return Py_BuildValue("c", (char)8); return Py_BuildValue("c", (char)8);
break; break;
case('\\'):
*indexptr=index;
return Py_BuildValue("c", '\\');
break;
case('x'): case('x'):
{ {
...@@ -348,6 +323,8 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr) ...@@ -348,6 +323,8 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
case('g'): case('g'):
{ {
int end, i; int end, i;
int group_num = 0, is_number=0;
if (pattern_len<=index) if (pattern_len<=index)
{ {
PyErr_SetString(ErrorObject, "unfinished symbolic reference"); PyErr_SetString(ErrorObject, "unfinished symbolic reference");
...@@ -374,16 +351,22 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr) ...@@ -374,16 +351,22 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
PyErr_SetString(ErrorObject, "zero-length symbolic reference"); PyErr_SetString(ErrorObject, "zero-length symbolic reference");
return NULL; return NULL;
} }
if (!(pcre_ctypes[pattern[index]] & ctype_word) /* First char. not alphanumeric */ if ((pcre_ctypes[pattern[index]] & ctype_digit)) /* First char. a digit */
|| (pcre_ctypes[pattern[index]] & ctype_digit) ) /* First char. a digit */
{ {
/* XXX should include the text of the reference */ is_number = 1;
PyErr_SetString(ErrorObject, "first character of symbolic reference not a letter or _"); group_num = pattern[index] - '0';
return NULL;
} }
for(i=index+1; i<end; i++) for(i=index+1; i<end; i++)
{ {
if (is_number &&
!(pcre_ctypes[pattern[i]] & ctype_digit) )
{
/* XXX should include the text of the reference */
PyErr_SetString(ErrorObject, "illegal non-digit character in \\g<...> starting with digit");
return NULL;
}
else {group_num = group_num * 10 + pattern[i] - '0';}
if (!(pcre_ctypes[pattern[i]] & ctype_word) ) if (!(pcre_ctypes[pattern[i]] & ctype_word) )
{ {
/* XXX should include the text of the reference */ /* XXX should include the text of the reference */
...@@ -394,6 +377,9 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr) ...@@ -394,6 +377,9 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
*typeptr = MEMORY_REFERENCE; *typeptr = MEMORY_REFERENCE;
*indexptr = end+1; *indexptr = end+1;
/* If it's a number, return the integer value of the group */
if (is_number) return Py_BuildValue("i", group_num);
/* Otherwise, return a string containing the group name */
return Py_BuildValue("s#", pattern+index, end-index); return Py_BuildValue("s#", pattern+index, end-index);
} }
break; break;
...@@ -478,8 +464,11 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr) ...@@ -478,8 +464,11 @@ PyPcre_expand_escape(pattern, pattern_len, indexptr, typeptr)
break; break;
default: default:
/* It's some unknown escape like \s, so return a string containing
\s */
*typeptr = STRING;
*indexptr = index; *indexptr = index;
return Py_BuildValue("c", c); return Py_BuildValue("s#", pattern+index-2, 2);
break; break;
} }
} }
...@@ -571,6 +560,12 @@ PyPcre_expand(self, args) ...@@ -571,6 +560,12 @@ PyPcre_expand(self, args)
Py_DECREF(result); Py_DECREF(result);
} }
break; break;
case(STRING):
{
PyList_Append(results, value);
total_len += PyString_Size(value);
break;
}
default: default:
Py_DECREF(results); Py_DECREF(results);
PyErr_SetString(ErrorObject, PyErr_SetString(ErrorObject,
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment