Kaydet (Commit) 557dea1c authored tarafından Guido van Rossum's avatar Guido van Rossum

AMK's latest -- synchronized with PCRE 1.04.

üst 0148bbf9
......@@ -3,7 +3,7 @@
*************************************************/
#define PCRE_VERSION "1.02 12-Dec-1997"
#define PCRE_VERSION "1.04 22-Dec-1997"
/* This is a library of functions to support regular expressions whose syntax
......@@ -39,6 +39,7 @@ modules, but which are not relevant to the outside. */
define a macro for memmove() if USE_BCOPY is defined. */
#ifdef USE_BCOPY
#undef memmove /* some systems may have a macro */
#define memmove(a, b, c) bcopy(b, a, c)
#endif
......@@ -53,6 +54,13 @@ define a macro for memmove() if USE_BCOPY is defined. */
#include <string.h>
#include "pcre.h"
/* In case there is no definition of offsetof() provided - though any proper
Standard C system should have one. */
#ifndef offsetof
#define offsetof(p_type,field) ((size_t)&(((p_type *)0)->field))
#endif
/* Private options flags start at the most significant end of the two bytes.
The public options defined in pcre.h start at the least significant end. Make
sure they don't overlap! */
......
......@@ -14,6 +14,7 @@
/* Have to include stdlib.h in order to ensure that size_t is defined;
it is needed here for malloc. */
#include <sys/types.h>
#include <stdlib.h>
/* Options */
......
......@@ -231,7 +231,7 @@ PyPcre_compile(self, args)
newpattern[j++]='\0';
newpattern[j++]='\0';
newpattern[j++]='\0';
newpattern[j]='\0';
newpattern[j]='\0';
rv->regex = pcre_compile((char*)newpattern, options,
&error, &erroroffset, dictionary);
......
......@@ -571,6 +571,15 @@ restrictions:
/* #define DEBUG */
/* Use a macro for debugging printing, 'cause that eliminates the the use
of #ifdef inline, and there are *still* stupid compilers about that don't like
indented pre-processor statements. I suppose it's only been 10 years... */
#ifdef DEBUG
#define DPRINTF(p) printf p
#else
#define DPRINTF(p) /*nothing*/
#endif
/* Include the internals header, which itself includes Standard C headers plus
the external pcre header. */
......@@ -740,7 +749,8 @@ Arguments:
Returns: nothing
*/
static pchars(uschar *p, int length, BOOL is_subject, match_data *md)
static void
pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
{
int c;
if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
......@@ -1605,9 +1615,17 @@ for (;; ptr++)
if (code == previous) code += 2; else previous[1]++;
}
/* Insert an UPTO if the max is greater than the min. */
/* If the maximum is unlimited, insert an OP_STAR. */
if (repeat_max != repeat_min)
if (repeat_max < 0)
{
*code++ = c;
*code++ = OP_STAR + repeat_type;
}
/* Else insert an UPTO if the max is greater than the min. */
else if (repeat_max != repeat_min)
{
*code++ = c;
repeat_max -= repeat_min;
......@@ -1651,7 +1669,7 @@ for (;; ptr++)
else if ((int)*previous >= OP_BRA)
{
int i;
int length = code - previous;
int len = code - previous;
if (repeat_max == -1 && could_be_empty(previous))
{
......@@ -1668,8 +1686,8 @@ for (;; ptr++)
{
for (i = 1; i < repeat_min; i++)
{
memcpy(code, previous, length);
code += length;
memcpy(code, previous, len);
code += len;
}
}
......@@ -1681,22 +1699,22 @@ for (;; ptr++)
{
if (repeat_min == 0)
{
memmove(previous+1, previous, length);
memmove(previous+1, previous, len);
code++;
*previous++ = OP_BRAZERO + repeat_type;
}
for (i = 1; i < repeat_min; i++)
{
memcpy(code, previous, length);
code += length;
memcpy(code, previous, len);
code += len;
}
for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
{
*code++ = OP_BRAZERO + repeat_type;
memcpy(code, previous, length);
code += length;
memcpy(code, previous, len);
code += len;
}
}
......@@ -2240,10 +2258,8 @@ if ((options & ~PUBLIC_OPTIONS) != 0)
return NULL;
}
#ifdef DEBUG
printf("------------------------------------------------------------------\n");
printf("%s\n", pattern);
#endif
DPRINTF(("------------------------------------------------------------------\n"));
DPRINTF(("%s\n", pattern));
/* The first thing to do is to make a pass over the pattern to compute the
amount of store required to hold the compiled code. This does not have to be
......@@ -2358,9 +2374,9 @@ while ((c = *(++ptr)) != 0)
{
if (*ptr == '\\')
{
int c = check_escape(&ptr, errorptr, bracount, options, TRUE);
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE);
if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
if (-c == ESC_b) class_charcount++; else class_charcount = 10;
if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
}
else class_charcount++;
ptr++;
......@@ -2376,7 +2392,7 @@ while ((c = *(++ptr)) != 0)
/* A repeat needs either 1 or 5 bytes. */
if (ptr[1] == '{' && is_counted_repeat(ptr+2))
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2))
{
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
......@@ -2508,37 +2524,38 @@ while ((c = *(++ptr)) != 0)
continue;
/* Handle ket. Look for subsequent max/min; for certain sets of values we
have to replicate this bracket up to that many times. */
have to replicate this bracket up to that many times. If brastackptr is
0 this is an unmatched bracket which will generate an error, but take care
not to try to access brastack[-1]. */
case ')':
length += 3;
{
int min = 1;
int max = 1;
int duplength = length - brastack[--brastackptr];
int minval = 1;
int maxval = 1;
int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;
/* Leave ptr at the final char; for read_repeat_counts this happens
automatically; for the others we need an increment. */
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))
{
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);
ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr);
if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
}
else if (c == '*') { min = 0; max = -1; ptr++; }
else if (c == '+') { max = -1; ptr++; }
else if (c == '?') { min = 0; ptr++; }
/* If there is a minimum > 1 we have to replicate up to min-1 times; if
there is a limited maximum we have to replicate up to max-1 times and
allow for a BRAZERO item before each optional copy, as we also have to
do before the first copy if the minimum is zero. */
if (min == 0) length++;
else if (min > 1) length += (min - 1) * duplength;
if (max > min) length += (max - min) * (duplength + 1);
else if (c == '*') { minval = 0; maxval = -1; ptr++; }
else if (c == '+') { maxval = -1; ptr++; }
else if (c == '?') { minval = 0; ptr++; }
/* If there is a minimum > 1 we have to replicate up to minval-1 times;
if there is a limited maximum we have to replicate up to maxval-1 times
and allow for a BRAZERO item before each optional copy, as we also have
to do before the first copy if the minimum is zero. */
if (minval == 0) length++;
else if (minval > 1) length += (minval - 1) * duplength;
if (maxval > minval) length += (maxval - minval) * (duplength + 1);
}
continue;
/* Non-special character. For a run of such characters the length required
......@@ -2599,9 +2616,12 @@ if (length > 65539)
}
/* Compute the size of data block needed and get it, either from malloc or
externally provided function. Put in the magic number and the options. */
externally provided function. We specify "code[0]" in the offsetof() expression
rather than just "code", because it has been reported that one broken compiler
fails on "code" because it is also an independent variable. It should make no
difference to the value of the offsetof(). */
size = length + offsetof(real_pcre, code);
size = length + offsetof(real_pcre, code[0]);
re = (real_pcre *)(pcre_malloc)(size+50);
if (re == NULL)
......@@ -2610,6 +2630,8 @@ if (re == NULL)
return NULL;
}
/* Put in the magic number and the options. */
re->magic_number = MAGIC_NUMBER;
re->options = options;
......@@ -2661,10 +2683,10 @@ if ((options & PCRE_ANCHORED) == 0)
re->options |= PCRE_ANCHORED;
else
{
int c = find_firstchar(re->code);
if (c >= 0)
int ch = find_firstchar(re->code);
if (ch >= 0)
{
re->first_char = c;
re->first_char = ch;
re->options |= PCRE_FIRSTSET;
}
else if (is_startline(re->code))
......@@ -2756,7 +2778,7 @@ while (code < code_end)
case OP_MINUPTO:
if (isprint(c = code[3])) printf(" %c{", c);
else printf(" \\x%02x{", c);
if (*code != OP_EXACT) printf(",");
if (*code != OP_EXACT) printf("0,");
printf("%d}", (code[1] << 8) + code[2]);
if (*code == OP_MINUPTO) printf("?");
code += 3;
......@@ -2801,7 +2823,8 @@ while (code < code_end)
case OP_REF:
printf(" \\%d", *(++code));
break;
code ++;
goto CLASS_REF_REPEAT;
case OP_CLASS:
case OP_CLASS_L:
......@@ -2840,6 +2863,8 @@ while (code < code_end)
code += 32;
/* code ++;*/
CLASS_REF_REPEAT:
switch(*code)
{
case OP_CRSTAR:
......@@ -3071,9 +3096,7 @@ for (;;)
int number = (*ecode - OP_BRA) << 1;
int save_offset1 = 0, save_offset2 = 0;
#ifdef DEBUG
printf("start bracket %d\n", number/2);
#endif
DPRINTF(("start bracket %d\n", number/2));
if (number > 0 && number < md->offset_end)
{
......@@ -3081,9 +3104,7 @@ for (;;)
save_offset2 = md->offset_vector[number+1];
md->offset_vector[number] = eptr - md->start_subject;
#ifdef DEBUG
printf("saving %d %d\n", save_offset1, save_offset2);
#endif
DPRINTF(("saving %d %d\n", save_offset1, save_offset2));
}
/* Recurse for all the alternatives. */
......@@ -3095,9 +3116,7 @@ for (;;)
}
while (*ecode == OP_ALT);
#ifdef DEBUG
printf("bracket %d failed\n", number/2);
#endif
DPRINTF(("bracket %d failed\n", number/2));
if (number > 0 && number < md->offset_end)
{
......@@ -3170,7 +3189,7 @@ for (;;)
ecode += (ecode[1] << 8) + ecode[2];
}
while (*ecode == OP_ALT);
if (*ecode == OP_KET) return FALSE;
if (*ecode == OP_KET) FAIL;
/* Continue as from after the assertion, updating the offsets high water
mark, since extracts may have been taken. */
......@@ -3236,9 +3255,7 @@ for (;;)
number = (*prev - OP_BRA) << 1;
#ifdef DEBUG
printf("end bracket %d\n", number/2);
#endif
DPRINTF(("end bracket %d\n", number/2));
if (number > 0)
{
......@@ -3457,14 +3474,14 @@ for (;;)
case OP_NOT_WORDCHAR_L:
if (eptr >= md->end_subject || (*eptr=='_' || isalnum(*eptr) ))
return FALSE;
FAIL;
eptr++;
ecode++;
break;
case OP_WORDCHAR_L:
if (eptr >= md->end_subject || (*eptr!='_' && !isalnum(*eptr) ))
return FALSE;
FAIL;
eptr++;
ecode++;
break;
......@@ -3833,7 +3850,7 @@ for (;;)
register int length = ecode[1];
ecode += 2;
#ifdef DEBUG
#ifdef DEBUG /* Sigh. Some compilers never learn. */
if (eptr >= md->end_subject)
printf("matching subject <null> against pattern ");
else
......@@ -3901,10 +3918,8 @@ for (;;)
maximum. Alternatively, if maximizing, find the maximum number of
characters and work backwards. */
#ifdef DEBUG
printf("matching %c{%d,%d} against subject %.*s\n", c, min, max,
max, eptr);
#endif
DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
max, eptr));
if (md->caseless)
{
......@@ -3969,7 +3984,7 @@ for (;;)
/* Match a negated single character */
case OP_NOT:
if (eptr > md->end_subject) FAIL;
if (eptr >= md->end_subject) FAIL;
ecode++;
if (md->caseless)
{
......@@ -4028,10 +4043,8 @@ for (;;)
maximum. Alternatively, if maximizing, find the maximum number of
characters and work backwards. */
#ifdef DEBUG
printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
max, eptr);
#endif
DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
max, eptr));
if (md->caseless)
{
......@@ -4174,12 +4187,12 @@ for (;;)
case OP_NOT_WORDCHAR_L:
for (i = 1; i <= min; i++, eptr++) if (*eptr=='_' || isalnum(*eptr))
return FALSE;
FAIL;
break;
case OP_WORDCHAR_L:
for (i = 1; i <= min; i++, eptr++) if (*eptr!='_' && !isalnum(*eptr))
return FALSE;
FAIL;
break;
}
......@@ -4308,9 +4321,7 @@ for (;;)
/* There's been some horrible disaster. */
default:
#ifdef DEBUG
printf("Unknown opcode %d\n", *ecode);
#endif
DPRINTF(("Unknown opcode %d\n", *ecode));
md->errorcode = PCRE_ERROR_UNKNOWN_NODE;
FAIL;
}
......@@ -4354,6 +4365,35 @@ succeed:
/*************************************************
* Segregate setjmp() *
*************************************************/
/* The -Wall option of gcc gives warnings for all local variables when setjmp()
is used, even if the coding conforms to the rules of ANSI C. To avoid this, we
hide it in a separate function. This is called only when PCRE_EXTRA is set,
since it's needed only for the extension \X option, and with any luck, a good
compiler will spot the tail recursion and compile it efficiently.
Arguments:
eptr pointer in subject
ecode position in code
offset_top current top pointer
md pointer to "static" info for the match
Returns: TRUE if matched
*/
static BOOL
match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,
match_data *match_block)
{
return setjmp(match_block->fail_env) == 0 &&
match(eptr, ecode, offset_top, match_block);
}
/*************************************************
* Execute a Regular Expression *
*************************************************/
......@@ -4384,17 +4424,17 @@ pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
/* The "volatile" directives are to make gcc -Wall stop complaining
that these variables can be clobbered by the longjmp. Hopefully
they won't cost too much performance. */
volatile int resetcount;
volatile int ocount = offsetcount;
volatile int first_char = -1;
int resetcount, ocount;
int first_char = -1;
match_data match_block;
volatile const uschar *start_bits = NULL;
const uschar *start_match = (uschar *)subject;
const uschar *start_bits = NULL;
const uschar *start_match = (const uschar *)subject;
const uschar *end_subject;
const real_pcre *re = (const real_pcre *)external_re;
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
volatile BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
volatile BOOL startline = (re->options & PCRE_STARTLINE) != 0;
BOOL using_temporary_offsets = FALSE;
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
BOOL startline = (re->options & PCRE_STARTLINE) != 0;
if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION;
......@@ -4427,18 +4467,17 @@ match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */
/* If the expression has got more back references than the offsets supplied can
hold, we get a temporary bit of working store to use during the matching.
Otherwise, we can use the vector supplied, rounding down the size of it to a
multiple of 2. */
Otherwise, we can use the vector supplied, rounding down its size to a multiple
of 2. */
ocount &= (-2);
if (re->top_backref > 0 && re->top_backref + 1 >= ocount/2)
ocount = offsetcount & (-2);
if (re->top_backref > 0 && re->top_backref >= ocount/2)
{
ocount = re->top_backref * 2 + 2;
match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));
if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
#ifdef DEBUG
printf("Got memory to hold back references\n");
#endif
using_temporary_offsets = TRUE;
DPRINTF(("Got memory to hold back references\n"));
}
else match_block.offset_vector = offsets;
......@@ -4459,7 +4498,7 @@ the right check, because multiline is now set. If it now yields FALSE, the
expression must have had ^ starting some of its branches. Check to see if
that is true for *all* branches, and if so, set the startline flag. */
if (match_block. multiline && anchored && (re->options & PCRE_MULTILINE) == 0 &&
if (match_block.multiline && anchored && (re->options & PCRE_MULTILINE) == 0 &&
!is_anchored(re->code, match_block.multiline))
{
anchored = FALSE;
......@@ -4491,6 +4530,7 @@ if (!anchored)
do
{
int rc;
register int *iptr = match_block.offset_vector;
register int *iend = iptr + resetcount;
......@@ -4532,7 +4572,7 @@ do
}
}
#ifdef DEBUG
#ifdef DEBUG /* Sigh. Some compilers never learn. */
printf(">>>> Match against: ");
pchars(start_match, end_subject - start_match, TRUE, &match_block);
printf("\n");
......@@ -4546,7 +4586,10 @@ do
if certain parts of the pattern were not used.
Before starting the match, we have to set up a longjmp() target to enable
the "cut" operation to fail a match completely without backtracking. */
the "cut" operation to fail a match completely without backtracking. This
is done in a separate function to avoid compiler warnings. We need not do
it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
enabled. */
/* To handle errors such as running out of memory for the failure
stack, we need to save this location via setjmp(), so
......@@ -4554,45 +4597,41 @@ do
if (setjmp(match_block.error_env)==0)
{
if (setjmp(match_block.fail_env) == 0 &&
match(start_match, re->code, 2, &match_block))
if ((re->options & PCRE_EXTRA) != 0)
{
int rc;
if (ocount != offsetcount)
{
if (offsetcount >= 4)
{
memcpy(offsets + 2, match_block.offset_vector + 2,
(offsetcount - 2) * sizeof(int));
#ifdef DEBUG
printf("Copied offsets; freeing temporary memory\n");
#endif
}
if (match_block.end_offset_top > offsetcount)
match_block.offset_overflow = TRUE;
#ifdef DEBUG
printf("Freeing temporary memory\n");
#endif
if (!match_with_setjmp(start_match, re->code, 2, &match_block))
continue;
}
else if (!match(start_match, re->code, 2, &match_block)) continue;
(pcre_free)(match_block.offset_vector);
}
rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
/* Copy the offset information from temporary store if necessary */
if (match_block.offset_end < 2) rc = 0; else
if (using_temporary_offsets)
{
if (offsetcount >= 4)
{
offsets[0] = start_match - match_block.start_subject;
offsets[1] = match_block.end_match_ptr - match_block.start_subject;
memcpy(offsets + 2, match_block.offset_vector + 2,
(offsetcount - 2) * sizeof(int));
DPRINTF(("Copied offsets from temporary memory\n"));
}
if (match_block.end_offset_top > offsetcount)
match_block.offset_overflow = TRUE;
#ifdef DEBUG
printf(">>>> returning %d\n", rc);
#endif
free_stack(&match_block);
return rc;
DPRINTF(("Freeing temporary memory\n"));
(pcre_free)(match_block.offset_vector);
}
rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2;
if (match_block.offset_end < 2) rc = 0; else
{
offsets[0] = start_match - match_block.start_subject;
offsets[1] = match_block.end_match_ptr - match_block.start_subject;
}
DPRINTF((">>>> returning %d\n", rc));
free_stack(&match_block);
return rc;
} /* End of (if setjmp(match_block.error_env)...) */
/* Return an error code; pcremodule.c will preserve the exception */
if (PyErr_Occurred()) return PCRE_ERROR_NOMEMORY;
......@@ -4603,11 +4642,17 @@ while (!anchored &&
match_block.errorcode == PCRE_ERROR_NOMATCH &&
start_match++ < end_subject);
if (using_temporary_offsets)
{
DPRINTF(("Freeing temporary memory\n"));
(pcre_free)(match_block.offset_vector);
}
#ifdef DEBUG
printf(">>>> returning %d\n", match_block.errorcode);
#endif
return match_block.errorcode;
return match_block.errorcode;
}
/* End of pcre.c */
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment