Kaydet (Commit) 74fb3039 authored tarafından Guido van Rossum's avatar Guido van Rossum

Jeffrey's latests

üst 9e18ec7d
/*
* -*- mode: c-mode; c-file-style: python -*-
*/
/* regexpr.c /* regexpr.c
* *
* Author: Tatu Ylonen <ylo@ngs.fi> * Author: Tatu Ylonen <ylo@ngs.fi>
...@@ -472,16 +468,15 @@ static int regexp_ansi_sequences; ...@@ -472,16 +468,15 @@ static int regexp_ansi_sequences;
#define MAX_NESTING 100 /* max nesting level of operators */ #define MAX_NESTING 100 /* max nesting level of operators */
#define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)] #define SYNTAX(ch) re_syntax_table[(unsigned char)(ch)]
#define Sword 1
static char re_syntax_table[256]; char re_syntax_table[256];
static void re_compile_initialize(void) void re_compile_initialize(void)
{ {
int a; int a;
static int syntax_table_inited = 0; static int syntax_table_inited = 0;
if (!syntax_table_inited) if (!syntax_table_inited)
{ {
syntax_table_inited = 1; syntax_table_inited = 1;
...@@ -491,7 +486,11 @@ static void re_compile_initialize(void) ...@@ -491,7 +486,11 @@ static void re_compile_initialize(void)
for (a = 'A'; a <= 'Z'; a++) for (a = 'A'; a <= 'Z'; a++)
re_syntax_table[a] = Sword; re_syntax_table[a] = Sword;
for (a = '0'; a <= '9'; a++) for (a = '0'; a <= '9'; a++)
re_syntax_table[a] = Sword; re_syntax_table[a] = Sword | Sdigit;
re_syntax_table['_'] = Sword;
for (a = 9; a <= 13; a++)
re_syntax_table[a] = Swhitespace;
re_syntax_table[' '] = Swhitespace;
} }
re_compile_initialized = 1; re_compile_initialized = 1;
for (a = 0; a < 256; a++) for (a = 0; a < 256; a++)
...@@ -602,13 +601,12 @@ static void re_compile_fastmap_aux(char *code, ...@@ -602,13 +601,12 @@ static void re_compile_fastmap_aux(char *code,
return; /* we have already been here */ return; /* we have already been here */
visited[pos] = 1; visited[pos] = 1;
for (;;) for (;;)
switch (code[pos++]) switch (code[pos++]) {
{
case Cend: case Cend:
{ {
*can_be_null = 1; *can_be_null = 1;
return; return;
} }
case Cbol: case Cbol:
case Cbegbuf: case Cbegbuf:
case Cendbuf: case Cendbuf:
...@@ -1609,9 +1607,6 @@ int re_match(regexp_t bufp, ...@@ -1609,9 +1607,6 @@ int re_match(regexp_t bufp,
NEW_STATE(state, bufp->num_registers); NEW_STATE(state, bufp->num_registers);
if (!re_compile_initialized)
re_compile_initialize();
continue_matching: continue_matching:
switch (*code++) switch (*code++)
{ {
...@@ -1883,11 +1878,11 @@ int re_match(regexp_t bufp, ...@@ -1883,11 +1878,11 @@ int re_match(regexp_t bufp,
{ {
if (text == textend) if (text == textend)
goto fail; goto fail;
if (SYNTAX(*text) != Sword) if (SYNTAX(*text) & Sword)
goto fail; goto fail;
if (text == textstart) if (text == textstart)
goto continue_matching; goto continue_matching;
if (SYNTAX(text[-1]) != Sword) if (!(SYNTAX(text[-1]) & Sword))
goto continue_matching; goto continue_matching;
goto fail; goto fail;
} }
...@@ -1895,11 +1890,11 @@ int re_match(regexp_t bufp, ...@@ -1895,11 +1890,11 @@ int re_match(regexp_t bufp,
{ {
if (text == textstart) if (text == textstart)
goto fail; goto fail;
if (SYNTAX(text[-1]) != Sword) if (!(SYNTAX(text[-1]) & Sword))
goto fail; goto fail;
if (text == textend) if (text == textend)
goto continue_matching; goto continue_matching;
if (SYNTAX(*text) == Sword) if (SYNTAX(*text) & Sword)
goto fail; goto fail;
goto continue_matching; goto continue_matching;
} }
...@@ -1910,7 +1905,7 @@ int re_match(regexp_t bufp, ...@@ -1910,7 +1905,7 @@ int re_match(regexp_t bufp,
if (text == textstart || text == textend) if (text == textstart || text == textend)
goto continue_matching; goto continue_matching;
if ((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword)) if ((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword))
goto continue_matching; goto continue_matching;
goto fail; goto fail;
} }
...@@ -1920,21 +1915,21 @@ int re_match(regexp_t bufp, ...@@ -1920,21 +1915,21 @@ int re_match(regexp_t bufp,
* beginning and end of buffer. */ * beginning and end of buffer. */
if (text == textstart || text == textend) if (text == textstart || text == textend)
goto fail; goto fail;
if (!((SYNTAX(text[-1]) == Sword) ^ (SYNTAX(*text) == Sword))) if (!((SYNTAX(text[-1]) & Sword) ^ (SYNTAX(*text) & Sword)))
goto fail; goto fail;
goto continue_matching; goto continue_matching;
} }
case Csyntaxspec: case Csyntaxspec:
{ {
NEXTCHAR(ch); NEXTCHAR(ch);
if (SYNTAX(ch) != (unsigned char)*code++) if (!(SYNTAX(ch) & (unsigned char)*code++))
goto fail; goto fail;
goto continue_matching; goto continue_matching;
} }
case Cnotsyntaxspec: case Cnotsyntaxspec:
{ {
NEXTCHAR(ch); NEXTCHAR(ch);
if (SYNTAX(ch) != (unsigned char)*code++) if (SYNTAX(ch) & (unsigned char)*code++)
break; break;
goto continue_matching; goto continue_matching;
} }
...@@ -2067,3 +2062,10 @@ int re_search(regexp_t bufp, ...@@ -2067,3 +2062,10 @@ int re_search(regexp_t bufp,
} }
return -1; return -1;
} }
/*
** Local Variables:
** mode: c
** c-file-style: "python"
** End:
*/
...@@ -67,10 +67,16 @@ typedef struct re_registers ...@@ -67,10 +67,16 @@ typedef struct re_registers
#define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR) #define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
#define RE_SYNTAX_EMACS 0 #define RE_SYNTAX_EMACS 0
#define Sword 1
#define Swhitespace 2
#define Sdigit 4
/* Rename all exported symbols to avoid conflicts with similarly named /* Rename all exported symbols to avoid conflicts with similarly named
symbols in some systems' standard C libraries... */ symbols in some systems' standard C libraries... */
#define re_syntax _Py_re_syntax #define re_syntax _Py_re_syntax
#define re_syntax_table _Py_re_syntax_table
#define re_compile_initialize _Py_re_compile_initialize
#define re_set_syntax _Py_re_set_syntax #define re_set_syntax _Py_re_set_syntax
#define re_compile_pattern _Py_re_compile_pattern #define re_compile_pattern _Py_re_compile_pattern
#define re_match _Py_re_match #define re_match _Py_re_match
...@@ -85,6 +91,10 @@ extern int re_syntax; ...@@ -85,6 +91,10 @@ extern int re_syntax;
/* This is the actual syntax mask. It was added so that Python could do /* This is the actual syntax mask. It was added so that Python could do
* syntax-dependent munging of patterns before compilation. */ * syntax-dependent munging of patterns before compilation. */
extern char re_syntax_table[256];
void re_compile_initialize(void);
int re_set_syntax(int syntax); int re_set_syntax(int syntax);
/* This sets the syntax to use and returns the previous syntax. The /* This sets the syntax to use and returns the previous syntax. The
* syntax is specified by a bit mask of the above defined bits. */ * syntax is specified by a bit mask of the above defined bits. */
...@@ -133,6 +143,8 @@ int re_exec(char *s); ...@@ -133,6 +143,8 @@ int re_exec(char *s);
#else /* HAVE_PROTOTYPES */ #else /* HAVE_PROTOTYPES */
extern int re_syntax; extern int re_syntax;
extern char re_syntax_table[256];
void re_compile_initialize();
int re_set_syntax(); int re_set_syntax();
char *re_compile_pattern(); char *re_compile_pattern();
int re_match(); int re_match();
......
...@@ -43,6 +43,13 @@ PERFORMANCE OF THIS SOFTWARE. ...@@ -43,6 +43,13 @@ PERFORMANCE OF THIS SOFTWARE.
static PyObject *ReopError; /* Exception */ static PyObject *ReopError; /* Exception */
#define IGNORECASE 0x01
#define MULTILINE 0x02
#define DOTALL 0x04
#define VERBOSE 0x08
static char *reop_casefold;
static PyObject * static PyObject *
makeresult(regs, num_regs) makeresult(regs, num_regs)
struct re_registers *regs; struct re_registers *regs;
...@@ -90,6 +97,10 @@ reop_match(self, args) ...@@ -90,6 +97,10 @@ reop_match(self, args)
int flags, pos, result; int flags, pos, result;
struct re_pattern_buffer bufp; struct re_pattern_buffer bufp;
struct re_registers re_regs; struct re_registers re_regs;
PyObject *modules = NULL;
PyObject *reopmodule = NULL;
PyObject *reopdict = NULL;
PyObject *casefold = NULL;
if (!PyArg_Parse(args, "(s#iiis#is#i)", if (!PyArg_Parse(args, "(s#iiis#is#i)",
&(bufp.buffer), &(bufp.allocated), &(bufp.buffer), &(bufp.allocated),
...@@ -102,20 +113,44 @@ reop_match(self, args) ...@@ -102,20 +113,44 @@ reop_match(self, args)
/* XXX sanity-check the input data */ /* XXX sanity-check the input data */
bufp.used=bufp.allocated; bufp.used=bufp.allocated;
bufp.translate=NULL; if (flags & IGNORECASE)
{
if ((modules = PyImport_GetModuleDict()) == NULL)
return NULL;
if ((reopmodule = PyDict_GetItemString(modules,
"reop")) == NULL)
return NULL;
if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
return NULL;
if ((casefold = PyDict_GetItemString(reopdict,
"casefold")) == NULL)
return NULL;
bufp.translate = PyString_AsString(casefold);
}
else
bufp.translate=NULL;
bufp.fastmap_accurate=1; bufp.fastmap_accurate=1;
bufp.can_be_null=can_be_null; bufp.can_be_null=can_be_null;
bufp.uses_registers=1; bufp.uses_registers=1;
bufp.anchor=anchor; bufp.anchor=anchor;
for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;} for(i=0; i<bufp.num_registers; i++) {
re_regs.start[i]=-1;
re_regs.end[i]=-1;
}
result = re_match(&bufp, result = re_match(&bufp,
string, stringlen, pos, string, stringlen, pos,
&re_regs); &re_regs);
if (result < -1) { if (result < -1) {
/* Failure like stack overflow */ /* Failure like stack overflow */
PyErr_SetString(ReopError, "match failure"); PyErr_SetString(ReopError, "match failure");
return NULL; return NULL;
} }
if (result == -1) { if (result == -1) {
...@@ -136,6 +171,10 @@ reop_search(self, args) ...@@ -136,6 +171,10 @@ reop_search(self, args)
int flags, pos, result; int flags, pos, result;
struct re_pattern_buffer bufp; struct re_pattern_buffer bufp;
struct re_registers re_regs; struct re_registers re_regs;
PyObject *modules = NULL;
PyObject *reopmodule = NULL;
PyObject *reopdict = NULL;
PyObject *casefold = NULL;
if (!PyArg_Parse(args, "(s#iiis#is#i)", if (!PyArg_Parse(args, "(s#iiis#is#i)",
&(bufp.buffer), &(bufp.allocated), &(bufp.buffer), &(bufp.allocated),
...@@ -148,26 +187,51 @@ reop_search(self, args) ...@@ -148,26 +187,51 @@ reop_search(self, args)
/* XXX sanity-check the input data */ /* XXX sanity-check the input data */
bufp.used=bufp.allocated; bufp.used=bufp.allocated;
bufp.translate=NULL; if (flags & IGNORECASE)
{
if ((modules = PyImport_GetModuleDict()) == NULL)
return NULL;
if ((reopmodule = PyDict_GetItemString(modules,
"reop")) == NULL)
return NULL;
if ((reopdict = PyModule_GetDict(reopmodule)) == NULL)
return NULL;
if ((casefold = PyDict_GetItemString(reopdict,
"casefold")) == NULL)
return NULL;
bufp.translate = PyString_AsString(casefold);
}
else
bufp.translate=NULL;
bufp.fastmap_accurate=1; bufp.fastmap_accurate=1;
bufp.can_be_null=can_be_null; bufp.can_be_null=can_be_null;
bufp.uses_registers=1; bufp.uses_registers=1;
bufp.anchor=anchor; bufp.anchor=anchor;
for(i=0; i<bufp.num_registers; i++) {re_regs.start[i]=-1; re_regs.end[i]=-1;} for(i = 0; i < bufp.num_registers; i++) {
re_regs.start[i] = -1;
re_regs.end[i] = -1;
}
result = re_search(&bufp, result = re_search(&bufp,
string, stringlen, pos, stringlen-pos, string, stringlen, pos, stringlen-pos,
&re_regs); &re_regs);
if (result < -1) { if (result < -1) {
/* Failure like stack overflow */ /* Failure like stack overflow */
PyErr_SetString(ReopError, "match failure"); PyErr_SetString(ReopError, "match failure");
return NULL; return NULL;
} }
if (result == -1) { if (result == -1) {
Py_INCREF(Py_None); Py_INCREF(Py_None);
return Py_None; return Py_None;
} }
return makeresult(&re_regs, bufp.num_registers); return makeresult(&re_regs, bufp.num_registers);
} }
...@@ -345,10 +409,13 @@ static struct PyMethodDef reop_global_methods[] = { ...@@ -345,10 +409,13 @@ static struct PyMethodDef reop_global_methods[] = {
void void
initreop() initreop()
{ {
PyObject *m, *d, *v; PyObject *m, *d, *k, *v, *o;
int i; int i;
char *s; char *s;
char j[2];
re_compile_initialize();
m = Py_InitModule("reop", reop_global_methods); m = Py_InitModule("reop", reop_global_methods);
d = PyModule_GetDict(m); d = PyModule_GetDict(m);
...@@ -370,12 +437,64 @@ initreop() ...@@ -370,12 +437,64 @@ initreop()
else else
s[i] = i; s[i] = i;
} }
if (PyDict_SetItemString(d, "casefold", v) < 0) if (PyDict_SetItemString(d, "casefold", v) < 0)
goto finally; goto finally;
Py_DECREF(v); Py_DECREF(v);
/* Initialize the syntax table */
o = PyDict_New();
if (o == NULL)
goto finally;
j[1] = '\0';
for (i = 0; i < 256; i++)
{
j[0] = i;
k = PyString_FromStringAndSize(j, 1);
if (k == NULL)
goto finally;
v = PyInt_FromLong(re_syntax_table[i]);
if (v == NULL)
goto finally;
if (PyDict_SetItem(o, k, v) < 0)
goto finally;
Py_DECREF(k);
Py_DECREF(v);
}
if (PyDict_SetItemString(d, "syntax_table", o) < 0)
goto finally;
Py_DECREF(o);
v = PyInt_FromLong(Sword);
if (v == NULL)
goto finally;
if (PyDict_SetItemString(d, "word", v) < 0)
goto finally;
Py_DECREF(v);
v = PyInt_FromLong(Swhitespace);
if (v == NULL)
goto finally;
if (PyDict_SetItemString(d, "whitespace", v) < 0)
goto finally;
Py_DECREF(v);
v = PyInt_FromLong(Sdigit);
if (v == NULL)
goto finally;
if (PyDict_SetItemString(d, "digit", v) < 0)
goto finally;
Py_DECREF(v);
if (!PyErr_Occurred()) if (!PyErr_Occurred())
return; return;
finally: finally:
Py_FatalError("can't initialize reop module"); Py_FatalError("can't initialize reop module");
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment