Kaydet (Commit) 01016fe9 authored tarafından Fredrik Lundh's avatar Fredrik Lundh

- fixed split behaviour on empty matches

- fixed compiler problems when using locale/unicode flags

- fixed group/octal code parsing in sub/subn templates
üst 5d6ae76c
...@@ -109,16 +109,13 @@ def _subn(pattern, template, string, count=0): ...@@ -109,16 +109,13 @@ def _subn(pattern, template, string, count=0):
m = c.search() m = c.search()
if not m: if not m:
break break
j = m.start() b, e = m.span()
if j > i: if i < b:
append(string[i:j]) append(string[i:b])
append(filter(m)) append(filter(m))
i = m.end() i = e
if i <= j:
break
n = n + 1 n = n + 1
if i < len(string): append(string[i:])
append(string[i:])
return string[:0].join(s), n return string[:0].join(s), n
def _split(pattern, string, maxsplit=0): def _split(pattern, string, maxsplit=0):
...@@ -128,7 +125,7 @@ def _split(pattern, string, maxsplit=0): ...@@ -128,7 +125,7 @@ def _split(pattern, string, maxsplit=0):
append = s.append append = s.append
extend = s.extend extend = s.extend
c = pattern.scanner(string) c = pattern.scanner(string)
g = c.groups g = pattern.groups
while not maxsplit or n < maxsplit: while not maxsplit or n < maxsplit:
m = c.search() m = c.search()
if not m: if not m:
......
...@@ -61,9 +61,9 @@ def _compile(code, pattern, flags): ...@@ -61,9 +61,9 @@ def _compile(code, pattern, flags):
elif op is CATEGORY: elif op is CATEGORY:
emit(OPCODES[op]) emit(OPCODES[op])
if flags & SRE_FLAG_LOCALE: if flags & SRE_FLAG_LOCALE:
emit(CH_LOCALE[CHCODES[av]]) emit(CHCODES[CH_LOCALE[av]])
elif flags & SRE_FLAG_UNICODE: elif flags & SRE_FLAG_UNICODE:
emit(CH_UNICODE[CHCODES[av]]) emit(CHCODES[CH_UNICODE[av]])
else: else:
emit(CHCODES[av]) emit(CHCODES[av])
elif op is GROUP: elif op is GROUP:
...@@ -92,9 +92,9 @@ def _compile(code, pattern, flags): ...@@ -92,9 +92,9 @@ def _compile(code, pattern, flags):
emit(fixup(av[1])) emit(fixup(av[1]))
elif op is CATEGORY: elif op is CATEGORY:
if flags & SRE_FLAG_LOCALE: if flags & SRE_FLAG_LOCALE:
emit(CH_LOCALE[CHCODES[av]]) emit(CHCODES[CH_LOCALE[av]])
elif flags & SRE_FLAG_UNICODE: elif flags & SRE_FLAG_UNICODE:
emit(CH_UNICODE[CHCODES[av]]) emit(CHCODES[CH_UNICODE[av]])
else: else:
emit(CHCODES[av]) emit(CHCODES[av])
else: else:
......
...@@ -30,26 +30,27 @@ HEXDIGITS = tuple("0123456789abcdefABCDEF") ...@@ -30,26 +30,27 @@ HEXDIGITS = tuple("0123456789abcdefABCDEF")
WHITESPACE = string.whitespace WHITESPACE = string.whitespace
ESCAPES = { ESCAPES = {
"\\a": (LITERAL, chr(7)), r"\a": (LITERAL, chr(7)),
"\\b": (LITERAL, chr(8)), r"\b": (LITERAL, chr(8)),
"\\f": (LITERAL, chr(12)), r"\f": (LITERAL, chr(12)),
"\\n": (LITERAL, chr(10)), r"\n": (LITERAL, chr(10)),
"\\r": (LITERAL, chr(13)), r"\r": (LITERAL, chr(13)),
"\\t": (LITERAL, chr(9)), r"\t": (LITERAL, chr(9)),
"\\v": (LITERAL, chr(11)) r"\v": (LITERAL, chr(11)),
r"\\": (LITERAL, "\\")
} }
CATEGORIES = { CATEGORIES = {
"\\A": (AT, AT_BEGINNING), # start of string r"\A": (AT, AT_BEGINNING), # start of string
"\\b": (AT, AT_BOUNDARY), r"\b": (AT, AT_BOUNDARY),
"\\B": (AT, AT_NON_BOUNDARY), r"\B": (AT, AT_NON_BOUNDARY),
"\\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]), r"\d": (IN, [(CATEGORY, CATEGORY_DIGIT)]),
"\\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]), r"\D": (IN, [(CATEGORY, CATEGORY_NOT_DIGIT)]),
"\\s": (IN, [(CATEGORY, CATEGORY_SPACE)]), r"\s": (IN, [(CATEGORY, CATEGORY_SPACE)]),
"\\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]), r"\S": (IN, [(CATEGORY, CATEGORY_NOT_SPACE)]),
"\\w": (IN, [(CATEGORY, CATEGORY_WORD)]), r"\w": (IN, [(CATEGORY, CATEGORY_WORD)]),
"\\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]), r"\W": (IN, [(CATEGORY, CATEGORY_NOT_WORD)]),
"\\Z": (AT, AT_END), # end of string r"\Z": (AT, AT_END), # end of string
} }
FLAGS = { FLAGS = {
...@@ -185,11 +186,11 @@ def isname(name): ...@@ -185,11 +186,11 @@ def isname(name):
return 0 return 0
return 1 return 1
def _group(escape, state): def _group(escape, groups):
# check if the escape string represents a valid group # check if the escape string represents a valid group
try: try:
group = int(escape[1:]) group = int(escape[1:])
if group and group < state.groups: if group and group < groups:
return group return group
except ValueError: except ValueError:
pass pass
...@@ -239,10 +240,10 @@ def _escape(source, escape, state): ...@@ -239,10 +240,10 @@ def _escape(source, escape, state):
return LITERAL, chr(int(escape[-4:], 16) & 0xff) return LITERAL, chr(int(escape[-4:], 16) & 0xff)
elif escape[1:2] in DIGITS: elif escape[1:2] in DIGITS:
while 1: while 1:
group = _group(escape, state) group = _group(escape, state.groups)
if group: if group:
if (not source.next or if (not source.next or
not _group(escape + source.next, state)): not _group(escape + source.next, state.groups)):
return GROUP, group return GROUP, group
escape = escape + source.get() escape = escape + source.get()
elif source.next in OCTDIGITS: elif source.next in OCTDIGITS:
...@@ -534,6 +535,7 @@ def parse_template(source, pattern): ...@@ -534,6 +535,7 @@ def parse_template(source, pattern):
if this is None: if this is None:
break # end of replacement string break # end of replacement string
if this and this[0] == "\\": if this and this[0] == "\\":
# group
if this == "\\g": if this == "\\g":
name = "" name = ""
if s.match("<"): if s.match("<"):
...@@ -557,15 +559,29 @@ def parse_template(source, pattern): ...@@ -557,15 +559,29 @@ def parse_template(source, pattern):
raise IndexError, "unknown group name" raise IndexError, "unknown group name"
a((MARK, index)) a((MARK, index))
elif len(this) > 1 and this[1] in DIGITS: elif len(this) > 1 and this[1] in DIGITS:
while s.next in DIGITS: code = None
this = this + s.get() while 1:
a((MARK, int(this[1:]))) group = _group(this, pattern.groups+1)
if group:
if (not s.next or
not _group(this + s.next, pattern.groups+1)):
code = MARK, int(group)
break
elif s.next in OCTDIGITS:
this = this + s.get()
else:
break
if not code:
this = this[1:]
# FIXME: support unicode characters!
code = LITERAL, chr(int(this[-6:], 8) & 0xff)
a(code)
else: else:
try: try:
a(ESCAPES[this]) a(ESCAPES[this])
except KeyError: except KeyError:
for char in this: for c in this:
a((LITERAL, char)) a((LITERAL, c))
else: else:
a((LITERAL, this)) a((LITERAL, this))
return p return p
......
...@@ -1534,6 +1534,9 @@ pattern_getattr(PatternObject* self, char* name) ...@@ -1534,6 +1534,9 @@ pattern_getattr(PatternObject* self, char* name)
if (!strcmp(name, "flags")) if (!strcmp(name, "flags"))
return Py_BuildValue("i", self->flags); return Py_BuildValue("i", self->flags);
if (!strcmp(name, "groups"))
return Py_BuildValue("i", self->groups);
if (!strcmp(name, "groupindex") && self->groupindex) { if (!strcmp(name, "groupindex") && self->groupindex) {
Py_INCREF(self->groupindex); Py_INCREF(self->groupindex);
return self->groupindex; return self->groupindex;
...@@ -1939,9 +1942,6 @@ scanner_getattr(ScannerObject* self, char* name) ...@@ -1939,9 +1942,6 @@ scanner_getattr(ScannerObject* self, char* name)
return self->pattern; return self->pattern;
} }
if (!strcmp(name, "groups"))
return Py_BuildValue("i", ((PatternObject*) self->pattern)->groups);
PyErr_SetString(PyExc_AttributeError, name); PyErr_SetString(PyExc_AttributeError, name);
return NULL; return NULL;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment