- pedantic: make sure "python -t" doesn't complain...

90a07913 · Fredrik Lundh · df02d0b3 · 90a07913 · 90a07913 · 90a07913
Kaydet (Commit) 90a07913 authored Haz 30, 2000 tarafından Fredrik Lundh
Hide whitespace changes
Inline Side-by-side

Showing with 587 additions and 587 deletions

sre.py Lib/sre.py +11 -11

sre_compile.py Lib/sre_compile.py +153 -153

sre_parse.py Lib/sre_parse.py +418 -418

test_sre.py Lib/test/test_sre.py +5 -5

No files found.
--- a/Lib/sre.py
+++ b/Lib/sre.py
@@ -98,7 +98,7 @@ def _subn(pattern, template, string, count=0):
    if callable(template):
        filter = template
    else:
-	template = sre_parse.parse_template(template, pattern)
+        template = sre_parse.parse_template(template, pattern)
        def filter(match, template=template):
            return sre_parse.expand_template(template, match)
    n = i = 0
@@ -109,11 +109,11 @@ def _subn(pattern, template, string, count=0):
        m = c.search()
        if not m:
            break
-	b, e = m.span()
+        b, e = m.span()
        if i < b:
            append(string[i:b])
        append(filter(m))
-	i = e
+        i = e
        n = n + 1
    append(string[i:])
    return string[:0].join(s), n
@@ -130,15 +130,15 @@ def _split(pattern, string, maxsplit=0):
        m = c.search()
        if not m:
            break
-	b, e = m.span()
-	if b == e:
-	    if i >= len(string):
-		break
-	    continue
+        b, e = m.span()
+        if b == e:
+            if i >= len(string):
+                break
+            continue
        append(string[i:b])
-	if g and b != e:
-	    extend(m.groups())
-	i = e
+        if g and b != e:
+            extend(m.groups())
+        i = e
        n = n + 1
    append(string[i:])
    return s
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -18,7 +18,7 @@ from sre_constants import *
 # find an array type code that matches the engine's code size
 for WORDSIZE in "BHil":
    if len(array.array(WORDSIZE, [0]).tostring()) == _sre.getcodesize():
-	break
+        break
 else:
    raise RuntimeError, "cannot find a useable array type"

@@ -26,132 +26,132 @@ def _compile(code, pattern, flags):
    # internal: compile a (sub)pattern
    emit = code.append
    for op, av in pattern:
-	if op is ANY:
-	    if flags & SRE_FLAG_DOTALL:
-		emit(OPCODES[op])
-	    else:
-		emit(OPCODES[CATEGORY])
-		emit(CHCODES[CATEGORY_NOT_LINEBREAK])
-	elif op in (SUCCESS, FAILURE):
-	    emit(OPCODES[op])
-	elif op is AT:
-	    emit(OPCODES[op])
-	    if flags & SRE_FLAG_MULTILINE:
-		emit(ATCODES[AT_MULTILINE[av]])
-	    else:
-		emit(ATCODES[av])
-	elif op is BRANCH:
-	    emit(OPCODES[op])
-	    tail = []
-	    for av in av[1]:
-		skip = len(code); emit(0)
-		_compile(code, av, flags)
- 		emit(OPCODES[JUMP])
- 		tail.append(len(code)); emit(0)
-		code[skip] = len(code) - skip
-	    emit(0) # end of branch
- 	    for tail in tail:
-		code[tail] = len(code) - tail
-	elif op is CALL:
-	    emit(OPCODES[op])
-	    skip = len(code); emit(0)
-	    _compile(code, av, flags)
-	    emit(OPCODES[SUCCESS])
-	    code[skip] = len(code) - skip
-	elif op is CATEGORY:
-	    emit(OPCODES[op])
-	    if flags & SRE_FLAG_LOCALE:
-		emit(CHCODES[CH_LOCALE[av]])
-	    elif flags & SRE_FLAG_UNICODE:
-		emit(CHCODES[CH_UNICODE[av]])
-	    else:
-		emit(CHCODES[av])
-	elif op is GROUP:
-	    if flags & SRE_FLAG_IGNORECASE:
-		emit(OPCODES[OP_IGNORE[op]])
-	    else:
-		emit(OPCODES[op])
-	    emit(av-1)
-	elif op is IN:
-	    if flags & SRE_FLAG_IGNORECASE:
-		emit(OPCODES[OP_IGNORE[op]])
-		def fixup(literal, flags=flags):
-		    return _sre.getlower(ord(literal), flags)
-	    else:
-		emit(OPCODES[op])
-		fixup = ord
-	    skip = len(code); emit(0)
-	    for op, av in av:
-		emit(OPCODES[op])
-		if op is NEGATE:
-		    pass
-		elif op is LITERAL:
-		    emit(fixup(av))
-		elif op is RANGE:
-		    emit(fixup(av[0]))
-		    emit(fixup(av[1]))
-		elif op is CATEGORY:
-		    if flags & SRE_FLAG_LOCALE:
-			emit(CHCODES[CH_LOCALE[av]])
-		    elif flags & SRE_FLAG_UNICODE:
-			emit(CHCODES[CH_UNICODE[av]])
-		    else:
-			emit(CHCODES[av])
-		else:
-		    raise error, "internal: unsupported set operator"
-	    emit(OPCODES[FAILURE])
-	    code[skip] = len(code) - skip
-	elif op in (LITERAL, NOT_LITERAL):
-	    if flags & SRE_FLAG_IGNORECASE:
-		emit(OPCODES[OP_IGNORE[op]])
-	    else:
-		emit(OPCODES[op])
-	    emit(ord(av))
-	elif op is MARK:
-	    emit(OPCODES[op])
-	    emit(av)
- 	elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
-	    if flags & SRE_FLAG_TEMPLATE:
-		emit(OPCODES[REPEAT])
-		skip = len(code); emit(0)
-		emit(av[0])
-		emit(av[1])
-		_compile(code, av[2], flags)
-		emit(OPCODES[SUCCESS])
-		code[skip] = len(code) - skip
-	    else:
-		lo, hi = av[2].getwidth()
-		if lo == 0:
-		    raise error, "nothing to repeat"
-		if 0 and lo == hi == 1 and op is MAX_REPEAT:
-		    # FIXME: <fl> need a better way to figure out when
-		    # it's safe to use this one (in the parser, probably)
-		    emit(OPCODES[MAX_REPEAT_ONE])
-		    skip = len(code); emit(0)
-		    emit(av[0])
-		    emit(av[1])
-		    _compile(code, av[2], flags)
-		    emit(OPCODES[SUCCESS])
-		    code[skip] = len(code) - skip
-		else:
-		    emit(OPCODES[op])
-		    skip = len(code); emit(0)
-		    emit(av[0])
-		    emit(av[1])
-		    _compile(code, av[2], flags)
-		    emit(OPCODES[SUCCESS])
-		    code[skip] = len(code) - skip
-	elif op is SUBPATTERN:
- 	    group = av[0]
- 	    if group:
- 		emit(OPCODES[MARK])
- 		emit((group-1)*2)
-	    _compile(code, av[1], flags)
- 	    if group:
- 		emit(OPCODES[MARK])
- 		emit((group-1)*2+1)
-	else:
-	    raise ValueError, ("unsupported operand type", op)
+        if op is ANY:
+            if flags & SRE_FLAG_DOTALL:
+                emit(OPCODES[op])
+            else:
+                emit(OPCODES[CATEGORY])
+                emit(CHCODES[CATEGORY_NOT_LINEBREAK])
+        elif op in (SUCCESS, FAILURE):
+            emit(OPCODES[op])
+        elif op is AT:
+            emit(OPCODES[op])
+            if flags & SRE_FLAG_MULTILINE:
+                emit(ATCODES[AT_MULTILINE[av]])
+            else:
+                emit(ATCODES[av])
+        elif op is BRANCH:
+            emit(OPCODES[op])
+            tail = []
+            for av in av[1]:
+                skip = len(code); emit(0)
+                _compile(code, av, flags)
+                emit(OPCODES[JUMP])
+                tail.append(len(code)); emit(0)
+                code[skip] = len(code) - skip
+            emit(0) # end of branch
+            for tail in tail:
+                code[tail] = len(code) - tail
+        elif op is CALL:
+            emit(OPCODES[op])
+            skip = len(code); emit(0)
+            _compile(code, av, flags)
+            emit(OPCODES[SUCCESS])
+            code[skip] = len(code) - skip
+        elif op is CATEGORY:
+            emit(OPCODES[op])
+            if flags & SRE_FLAG_LOCALE:
+                emit(CHCODES[CH_LOCALE[av]])
+            elif flags & SRE_FLAG_UNICODE:
+                emit(CHCODES[CH_UNICODE[av]])
+            else:
+                emit(CHCODES[av])
+        elif op is GROUP:
+            if flags & SRE_FLAG_IGNORECASE:
+                emit(OPCODES[OP_IGNORE[op]])
+            else:
+                emit(OPCODES[op])
+            emit(av-1)
+        elif op is IN:
+            if flags & SRE_FLAG_IGNORECASE:
+                emit(OPCODES[OP_IGNORE[op]])
+                def fixup(literal, flags=flags):
+                    return _sre.getlower(ord(literal), flags)
+            else:
+                emit(OPCODES[op])
+                fixup = ord
+            skip = len(code); emit(0)
+            for op, av in av:
+                emit(OPCODES[op])
+                if op is NEGATE:
+                    pass
+                elif op is LITERAL:
+                    emit(fixup(av))
+                elif op is RANGE:
+                    emit(fixup(av[0]))
+                    emit(fixup(av[1]))
+                elif op is CATEGORY:
+                    if flags & SRE_FLAG_LOCALE:
+                        emit(CHCODES[CH_LOCALE[av]])
+                    elif flags & SRE_FLAG_UNICODE:
+                        emit(CHCODES[CH_UNICODE[av]])
+                    else:
+                        emit(CHCODES[av])
+                else:
+                    raise error, "internal: unsupported set operator"
+            emit(OPCODES[FAILURE])
+            code[skip] = len(code) - skip
+        elif op in (LITERAL, NOT_LITERAL):
+            if flags & SRE_FLAG_IGNORECASE:
+                emit(OPCODES[OP_IGNORE[op]])
+            else:
+                emit(OPCODES[op])
+            emit(ord(av))
+        elif op is MARK:
+            emit(OPCODES[op])
+            emit(av)
+        elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
+            if flags & SRE_FLAG_TEMPLATE:
+                emit(OPCODES[REPEAT])
+                skip = len(code); emit(0)
+                emit(av[0])
+                emit(av[1])
+                _compile(code, av[2], flags)
+                emit(OPCODES[SUCCESS])
+                code[skip] = len(code) - skip
+            else:
+                lo, hi = av[2].getwidth()
+                if lo == 0:
+                    raise error, "nothing to repeat"
+                if 0 and lo == hi == 1 and op is MAX_REPEAT:
+                    # FIXME: <fl> need a better way to figure out when
+                    # it's safe to use this one (in the parser, probably)
+                    emit(OPCODES[MAX_REPEAT_ONE])
+                    skip = len(code); emit(0)
+                    emit(av[0])
+                    emit(av[1])
+                    _compile(code, av[2], flags)
+                    emit(OPCODES[SUCCESS])
+                    code[skip] = len(code) - skip
+                else:
+                    emit(OPCODES[op])
+                    skip = len(code); emit(0)
+                    emit(av[0])
+                    emit(av[1])
+                    _compile(code, av[2], flags)
+                    emit(OPCODES[SUCCESS])
+                    code[skip] = len(code) - skip
+        elif op is SUBPATTERN:
+            group = av[0]
+            if group:
+                emit(OPCODES[MARK])
+                emit((group-1)*2)
+            _compile(code, av[1], flags)
+            if group:
+                emit(OPCODES[MARK])
+                emit((group-1)*2+1)
+        else:
+            raise ValueError, ("unsupported operand type", op)

 def _compile_info(code, pattern, flags):
    # internal: compile an info block.  in the current version,
@@ -159,15 +159,15 @@ def _compile_info(code, pattern, flags):
    # if any
    lo, hi = pattern.getwidth()
    if lo == 0:
-	return # not worth it
+        return # not worth it
    # look for a literal prefix
    prefix = []
    if not (flags & SRE_FLAG_IGNORECASE):
-	for op, av in pattern.data:
-	    if op is LITERAL:
-		prefix.append(ord(av))
-	    else:
-		break
+        for op, av in pattern.data:
+            if op is LITERAL:
+                prefix.append(ord(av))
+            else:
+                break
    # add an info block
    emit = code.append
    emit(OPCODES[INFO])
@@ -175,25 +175,25 @@ def _compile_info(code, pattern, flags):
    # literal flag
    mask = 0
    if len(prefix) == len(pattern.data):
-	mask = 1
+        mask = 1
    emit(mask)
    # pattern length
    emit(lo)
    if hi < 32768:
-	emit(hi)
+        emit(hi)
    else:
-	emit(0)
+        emit(0)
    # add literal prefix
    emit(len(prefix))
    if prefix:
-	code.extend(prefix)
-	# generate overlap table
-	table = [-1] + ([0]*len(prefix))
-	for i in range(len(prefix)):
-	    table[i+1] = table[i]+1
-	    while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
-		table[i+1] = table[table[i+1]-1]+1
-	code.extend(table[1:]) # don't store first entry
+        code.extend(prefix)
+        # generate overlap table
+        table = [-1] + ([0]*len(prefix))
+        for i in range(len(prefix)):
+            table[i+1] = table[i]+1
+            while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
+                table[i+1] = table[table[i+1]-1]+1
+        code.extend(table[1:]) # don't store first entry
    code[skip] = len(code) - skip

 def compile(p, flags=0):
@@ -201,11 +201,11 @@ def compile(p, flags=0):

    # compile, as necessary
    if type(p) in (type(""), type(u"")):
-	import sre_parse
-	pattern = p
-	p = sre_parse.parse(p)
+        import sre_parse
+        pattern = p
+        p = sre_parse.parse(p)
    else:
-	pattern = None
+        pattern = None

    flags = p.pattern.flags | flags
    code = []
@@ -220,10 +220,10 @@ def compile(p, flags=0):

    # FIXME: <fl> get rid of this limitation!
    assert p.pattern.groups <= 100,\
-	   "sorry, but this version only supports 100 named groups"
+           "sorry, but this version only supports 100 named groups"

    return _sre.compile(
-	pattern, flags,
-	array.array(WORDSIZE, code).tostring(),
-	p.pattern.groups-1, p.pattern.groupdict
-	)
+        pattern, flags,
+        array.array(WORDSIZE, code).tostring(),
+        p.pattern.groups-1, p.pattern.groupdict
+        )
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -67,106 +67,106 @@ FLAGS = {

 class State:
    def __init__(self):
-	self.flags = 0
-	self.groups = 1
-	self.groupdict = {}
+        self.flags = 0
+        self.groups = 1
+        self.groupdict = {}
    def getgroup(self, name=None):
-	gid = self.groups
-	self.groups = gid + 1
-	if name:
-	    self.groupdict[name] = gid
-	return gid
+        gid = self.groups
+        self.groups = gid + 1
+        if name:
+            self.groupdict[name] = gid
+        return gid

 class SubPattern:
    # a subpattern, in intermediate form
    def __init__(self, pattern, data=None):
-	self.pattern = pattern
-	if not data:
-	    data = []
-	self.data = data
-	self.width = None
+        self.pattern = pattern
+        if not data:
+            data = []
+        self.data = data
+        self.width = None
    def __repr__(self):
-	return repr(self.data)
+        return repr(self.data)
    def __len__(self):
-	return len(self.data)
+        return len(self.data)
    def __delitem__(self, index):
-	del self.data[index]
+        del self.data[index]
    def __getitem__(self, index):
-	return self.data[index]
+        return self.data[index]
    def __setitem__(self, index, code):
-	self.data[index] = code
+        self.data[index] = code
    def __getslice__(self, start, stop):
-	return SubPattern(self.pattern, self.data[start:stop])
+        return SubPattern(self.pattern, self.data[start:stop])
    def insert(self, index, code):
-	self.data.insert(index, code)
+        self.data.insert(index, code)
    def append(self, code):
-	self.data.append(code)
+        self.data.append(code)
    def getwidth(self):
-	# determine the width (min, max) for this subpattern
-	if self.width:
-	    return self.width
-	lo = hi = 0L
-	for op, av in self.data:
-	    if op is BRANCH:
-		l = sys.maxint
-		h = 0
-		for av in av[1]:
-		    i, j = av.getwidth()
-		    l = min(l, i)
-		    h = min(h, j)
-		lo = lo + i
-		hi = hi + j
-	    elif op is CALL:
-		i, j = av.getwidth()
-		lo = lo + i
-		hi = hi + j
-	    elif op is SUBPATTERN:
-		i, j = av[1].getwidth()
-		lo = lo + i
-		hi = hi + j
-	    elif op in (MIN_REPEAT, MAX_REPEAT):
-		i, j = av[2].getwidth()
-		lo = lo + long(i) * av[0]
-		hi = hi + long(j) * av[1]
-	    elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
-		lo = lo + 1
-		hi = hi + 1
-	    elif op == SUCCESS:
-		break
-	self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
-	return self.width
+        # determine the width (min, max) for this subpattern
+        if self.width:
+            return self.width
+        lo = hi = 0L
+        for op, av in self.data:
+            if op is BRANCH:
+                l = sys.maxint
+                h = 0
+                for av in av[1]:
+                    i, j = av.getwidth()
+                    l = min(l, i)
+                    h = min(h, j)
+                lo = lo + i
+                hi = hi + j
+            elif op is CALL:
+                i, j = av.getwidth()
+                lo = lo + i
+                hi = hi + j
+            elif op is SUBPATTERN:
+                i, j = av[1].getwidth()
+                lo = lo + i
+                hi = hi + j
+            elif op in (MIN_REPEAT, MAX_REPEAT):
+                i, j = av[2].getwidth()
+                lo = lo + long(i) * av[0]
+                hi = hi + long(j) * av[1]
+            elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
+                lo = lo + 1
+                hi = hi + 1
+            elif op == SUCCESS:
+                break
+        self.width = int(min(lo, sys.maxint)), int(min(hi, sys.maxint))
+        return self.width

 class Tokenizer:
    def __init__(self, string):
-	self.index = 0
-	self.string = string
-	self.next = self.__next()
+        self.index = 0
+        self.string = string
+        self.next = self.__next()
    def __next(self):
-	if self.index >= len(self.string):
-	    return None
-	char = self.string[self.index]
-	if char[0] == "\\":
-	    try:
-		c = self.string[self.index + 1]
-	    except IndexError:
-		raise error, "bogus escape"
-	    char = char + c
-	self.index = self.index + len(char)
-	return char
+        if self.index >= len(self.string):
+            return None
+        char = self.string[self.index]
+        if char[0] == "\\":
+            try:
+                c = self.string[self.index + 1]
+            except IndexError:
+                raise error, "bogus escape"
+            char = char + c
+        self.index = self.index + len(char)
+        return char
    def match(self, char):
-	if char == self.next:
-	    self.next = self.__next()
-	    return 1
-	return 0
+        if char == self.next:
+            self.next = self.__next()
+            return 1
+        return 0
    def match_set(self, set):
-	if self.next and self.next in set:
-	    self.next = self.__next()
-	    return 1
-	return 0
+        if self.next and self.next in set:
+            self.next = self.__next()
+            return 1
+        return 0
    def get(self):
-	this = self.next
-	self.next = self.__next()
-	return this
+        this = self.next
+        self.next = self.__next()
+        return this

 def isident(char):
    return "a" <= char <= "z" or "A" <= char <= "Z" or char == "_"
@@ -180,83 +180,83 @@ def isname(name):
    # expression instead, but I seem to have certain bootstrapping
    # problems here ;-)
    if not isident(name[0]):
-	return 0
+        return 0
    for char in name:
-	if not isident(char) and not isdigit(char):
-	    return 0
+        if not isident(char) and not isdigit(char):
+            return 0
    return 1

 def _group(escape, groups):
    # check if the escape string represents a valid group
    try:
-	group = int(escape[1:])
-	if group and group < groups:
-	    return group
+        group = int(escape[1:])
+        if group and group < groups:
+            return group
    except ValueError:
-	pass
+        pass
    return None # not a valid group

 def _class_escape(source, escape):
    # handle escape code inside character class
    code = ESCAPES.get(escape)
    if code:
-	return code
+        return code
    code = CATEGORIES.get(escape)
    if code:
-	return code
+        return code
    try:
-	if escape[1:2] == "x":
-	    while source.next in HEXDIGITS:
-		escape = escape + source.get()
-	    escape = escape[2:]
-	    # FIXME: support unicode characters!
-	    return LITERAL, chr(int(escape[-4:], 16) & 0xff)
-	elif str(escape[1:2]) in OCTDIGITS:
-	    while source.next in OCTDIGITS:
-		escape = escape + source.get()
-	    escape = escape[1:]
-	    # FIXME: support unicode characters!
-	    return LITERAL, chr(int(escape[-6:], 8) & 0xff)
-	if len(escape) == 2:
-	    return LITERAL, escape[1]
+        if escape[1:2] == "x":
+            while source.next in HEXDIGITS:
+                escape = escape + source.get()
+            escape = escape[2:]
+            # FIXME: support unicode characters!
+            return LITERAL, chr(int(escape[-4:], 16) & 0xff)
+        elif str(escape[1:2]) in OCTDIGITS:
+            while source.next in OCTDIGITS:
+                escape = escape + source.get()
+            escape = escape[1:]
+            # FIXME: support unicode characters!
+            return LITERAL, chr(int(escape[-6:], 8) & 0xff)
+        if len(escape) == 2:
+            return LITERAL, escape[1]
    except ValueError:
-	pass
+        pass
    raise error, "bogus escape: %s" % repr(escape)

 def _escape(source, escape, state):
    # handle escape code in expression
    code = CATEGORIES.get(escape)
    if code:
-	return code
+        return code
    code = ESCAPES.get(escape)
    if code:
-	return code
+        return code
    try:
-	if escape[1:2] == "x":
-	    while source.next in HEXDIGITS:
-		escape = escape + source.get()
-	    escape = escape[2:]
-	    # FIXME: support unicode characters!
-	    return LITERAL, chr(int(escape[-4:], 16) & 0xff)
-	elif escape[1:2] in DIGITS:
-	    while 1:
-		group = _group(escape, state.groups)
-		if group:
-		    if (not source.next or
-			not _group(escape + source.next, state.groups)):
-		        return GROUP, group
-		    escape = escape + source.get()
-		elif source.next in OCTDIGITS:
-		    escape = escape + source.get()
-		else:
-		    break
-	    escape = escape[1:]
-	    # FIXME: support unicode characters!
-	    return LITERAL, chr(int(escape[-6:], 8) & 0xff)
-	if len(escape) == 2:
-	    return LITERAL, escape[1]
+        if escape[1:2] == "x":
+            while source.next in HEXDIGITS:
+                escape = escape + source.get()
+            escape = escape[2:]
+            # FIXME: support unicode characters!
+            return LITERAL, chr(int(escape[-4:], 16) & 0xff)
+        elif escape[1:2] in DIGITS:
+            while 1:
+                group = _group(escape, state.groups)
+                if group:
+                    if (not source.next or
+                        not _group(escape + source.next, state.groups)):
+                        return GROUP, group
+                    escape = escape + source.get()
+                elif source.next in OCTDIGITS:
+                    escape = escape + source.get()
+                else:
+                    break
+            escape = escape[1:]
+            # FIXME: support unicode characters!
+            return LITERAL, chr(int(escape[-6:], 8) & 0xff)
+        if len(escape) == 2:
+            return LITERAL, escape[1]
    except ValueError:
-	pass
+        pass
    raise error, "bogus escape: %s" % repr(escape)


@@ -268,35 +268,35 @@ def _branch(pattern, items):

    # check if all items share a common prefix
    while 1:
-	prefix = None
-	for item in items:
-	    if not item:
-		break
-	    if prefix is None:
-		prefix = item[0]
-	    elif item[0] != prefix:
-		break
-	else:
-	    # all subitems start with a common "prefix".
-	    # move it out of the branch
-	    for item in items:
-		del item[0]
-	    subpattern.append(prefix)
-	    continue # check next one
-	break
+        prefix = None
+        for item in items:
+            if not item:
+                break
+            if prefix is None:
+                prefix = item[0]
+            elif item[0] != prefix:
+                break
+        else:
+            # all subitems start with a common "prefix".
+            # move it out of the branch
+            for item in items:
+                del item[0]
+            subpattern.append(prefix)
+            continue # check next one
+        break

    # check if the branch can be replaced by a character set
    for item in items:
-	if len(item) != 1 or item[0][0] != LITERAL:
-	    break
+        if len(item) != 1 or item[0][0] != LITERAL:
+            break
    else:
-	# we can store this as a character set instead of a
-	# branch (FIXME: use a range if possible)
-	set = []
-	for item in items:
-	    set.append(item[0])
-	subpattern.append((IN, set))
-	return subpattern
+        # we can store this as a character set instead of a
+        # branch (FIXME: use a range if possible)
+        set = []
+        for item in items:
+            set.append(item[0])
+        subpattern.append((IN, set))
+        return subpattern

    subpattern.append((BRANCH, (None, items)))
    return subpattern
@@ -309,197 +309,197 @@ def _parse(source, state, flags=0):

    while 1:

-	if source.next in ("|", ")"):
-	    break # end of subpattern
-	this = source.get()
-	if this is None:
-	    break # end of pattern
-
-	if state.flags & SRE_FLAG_VERBOSE:
-	    # skip whitespace and comments
-	    if this in WHITESPACE:
-		continue
-	    if this == "#":
-		while 1:
-		    this = source.get()
-		    if this in (None, "\n"):
-			break
-		continue
-
-	if this and this[0] not in SPECIAL_CHARS:
-	    subpattern.append((LITERAL, this))
-
-	elif this == "[":
-	    # character set
-	    set = []
-## 	    if source.match(":"):
-## 		pass # handle character classes
-	    if source.match("^"):
-		set.append((NEGATE, None))
-	    # check remaining characters
-	    start = set[:]
-	    while 1:
-		this = source.get()
-		if this == "]" and set != start:
-		    break
-		elif this and this[0] == "\\":
-		    code1 = _class_escape(source, this)
-		elif this:
-		    code1 = LITERAL, this
-		else:
-		    raise error, "unexpected end of regular expression"
-		if source.match("-"):
-		    # potential range
-		    this = source.get()
-		    if this == "]":
-			set.append(code1)
-			set.append((LITERAL, "-"))
-			break
-		    else:
-			if this[0] == "\\":
-			    code2 = _class_escape(source, this)
-			else:
-			    code2 = LITERAL, this
-			if code1[0] != LITERAL or code2[0] != LITERAL:
-			    raise error, "illegal range"
-			if len(code1[1]) != 1 or len(code2[1]) != 1:
-			    raise error, "illegal range"
-			set.append((RANGE, (code1[1], code2[1])))
-		else:
-		    if code1[0] is IN:
-			code1 = code1[1][0]
-		    set.append(code1)
-
-	    # FIXME: <fl> move set optimization to compiler!
-	    if len(set)==1 and set[0][0] is LITERAL:
-		subpattern.append(set[0]) # optimization
-	    elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
-		subpattern.append((NOT_LITERAL, set[1][1])) # optimization
-	    else:
-		# FIXME: <fl> add charmap optimization
-		subpattern.append((IN, set))
-
-	elif this and this[0] in REPEAT_CHARS:
-	    # repeat previous item
-	    if this == "?":
-		min, max = 0, 1
-	    elif this == "*":
-		min, max = 0, MAXREPEAT
-	    elif this == "+":
-		min, max = 1, MAXREPEAT
-	    elif this == "{":
-		min, max = 0, MAXREPEAT
-		lo = hi = ""
-		while source.next in DIGITS:
-		    lo = lo + source.get()
-		if source.match(","):
-		    while source.next in DIGITS:
-			hi = hi + source.get()
-		else:
-		    hi = lo
-		if not source.match("}"):
-		    raise error, "bogus range"
-		if lo:
-		    min = int(lo)
-		if hi:
-		    max = int(hi)
-		# FIXME: <fl> check that hi >= lo!
-	    else:
-		raise error, "not supported"
-	    # figure out which item to repeat
-	    if subpattern:
-		item = subpattern[-1:]
-	    else:
-		raise error, "nothing to repeat"
-	    if source.match("?"):
-		subpattern[-1] = (MIN_REPEAT, (min, max, item))
-	    else:
-		subpattern[-1] = (MAX_REPEAT, (min, max, item))
-
-	elif this == ".":
-	    subpattern.append((ANY, None))
-
-	elif this == "(":
-	    group = 1
-	    name = None
-	    if source.match("?"):
-		group = 0
-		# options
-		if source.match("P"):
-		    # python extensions
-		    if source.match("<"):
-			# named group: skip forward to end of name
-			name = ""
-			while 1:
-			    char = source.get()
-			    if char is None:
-				raise error, "unterminated name"
-			    if char == ">":
-				break
-			    name = name + char
-			group = 1
-			if not isname(name):
-			    raise error, "illegal character in group name"
-		    elif source.match("="):
-			# named backreference
-			raise error, "not yet implemented"
-		    else:
-			char = source.get()
-			if char is None:
-			    raise error, "unexpected end of pattern"
-			raise error, "unknown specifier: ?P%s" % char
-		elif source.match(":"):
-		    # non-capturing group
-		    group = 2
-		elif source.match("#"):
-		    # comment
-		    while 1:
-			if source.next is None or source.next == ")":
-			    break
-			source.get()
-		else:
-		    # flags
-		    while FLAGS.has_key(source.next):
-			state.flags = state.flags | FLAGS[source.get()]
-	    if group:
-		# parse group contents
-		b = []
-		if group == 2:
-		    # anonymous group
-		    group = None
-		else:
-		    group = state.getgroup(name)
-		while 1:
-		    p = _parse(source, state, flags)
-		    if source.match(")"):
-			if b:
-			    b.append(p)
-			    p = _branch(state, b)
-			subpattern.append((SUBPATTERN, (group, p)))
-			break
-		    elif source.match("|"):
-			b.append(p)
-		    else:
-			raise error, "group not properly closed"
-	    else:
-		while 1:
-		    char = source.get()
-		    if char is None or char == ")":
-			break
-		    raise error, "unknown extension"
-
-	elif this == "^":
-	    subpattern.append((AT, AT_BEGINNING))
-
-	elif this == "$":
-	    subpattern.append((AT, AT_END))
-
-	elif this and this[0] == "\\":
-	    code = _escape(source, this, state)
-	    subpattern.append(code)
-
-	else:
-	    raise error, "parser error"
+        if source.next in ("|", ")"):
+            break # end of subpattern
+        this = source.get()
+        if this is None:
+            break # end of pattern
+
+        if state.flags & SRE_FLAG_VERBOSE:
+            # skip whitespace and comments
+            if this in WHITESPACE:
+                continue
+            if this == "#":
+                while 1:
+                    this = source.get()
+                    if this in (None, "\n"):
+                        break
+                continue
+
+        if this and this[0] not in SPECIAL_CHARS:
+            subpattern.append((LITERAL, this))
+
+        elif this == "[":
+            # character set
+            set = []
+##          if source.match(":"):
+##              pass # handle character classes
+            if source.match("^"):
+                set.append((NEGATE, None))
+            # check remaining characters
+            start = set[:]
+            while 1:
+                this = source.get()
+                if this == "]" and set != start:
+                    break
+                elif this and this[0] == "\\":
+                    code1 = _class_escape(source, this)
+                elif this:
+                    code1 = LITERAL, this
+                else:
+                    raise error, "unexpected end of regular expression"
+                if source.match("-"):
+                    # potential range
+                    this = source.get()
+                    if this == "]":
+                        set.append(code1)
+                        set.append((LITERAL, "-"))
+                        break
+                    else:
+                        if this[0] == "\\":
+                            code2 = _class_escape(source, this)
+                        else:
+                            code2 = LITERAL, this
+                        if code1[0] != LITERAL or code2[0] != LITERAL:
+                            raise error, "illegal range"
+                        if len(code1[1]) != 1 or len(code2[1]) != 1:
+                            raise error, "illegal range"
+                        set.append((RANGE, (code1[1], code2[1])))
+                else:
+                    if code1[0] is IN:
+                        code1 = code1[1][0]
+                    set.append(code1)
+
+            # FIXME: <fl> move set optimization to compiler!
+            if len(set)==1 and set[0][0] is LITERAL:
+                subpattern.append(set[0]) # optimization
+            elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
+                subpattern.append((NOT_LITERAL, set[1][1])) # optimization
+            else:
+                # FIXME: <fl> add charmap optimization
+                subpattern.append((IN, set))
+
+        elif this and this[0] in REPEAT_CHARS:
+            # repeat previous item
+            if this == "?":
+                min, max = 0, 1
+            elif this == "*":
+                min, max = 0, MAXREPEAT
+            elif this == "+":
+                min, max = 1, MAXREPEAT
+            elif this == "{":
+                min, max = 0, MAXREPEAT
+                lo = hi = ""
+                while source.next in DIGITS:
+                    lo = lo + source.get()
+                if source.match(","):
+                    while source.next in DIGITS:
+                        hi = hi + source.get()
+                else:
+                    hi = lo
+                if not source.match("}"):
+                    raise error, "bogus range"
+                if lo:
+                    min = int(lo)
+                if hi:
+                    max = int(hi)
+                # FIXME: <fl> check that hi >= lo!
+            else:
+                raise error, "not supported"
+            # figure out which item to repeat
+            if subpattern:
+                item = subpattern[-1:]
+            else:
+                raise error, "nothing to repeat"
+            if source.match("?"):
+                subpattern[-1] = (MIN_REPEAT, (min, max, item))
+            else:
+                subpattern[-1] = (MAX_REPEAT, (min, max, item))
+
+        elif this == ".":
+            subpattern.append((ANY, None))
+
+        elif this == "(":
+            group = 1
+            name = None
+            if source.match("?"):
+                group = 0
+                # options
+                if source.match("P"):
+                    # python extensions
+                    if source.match("<"):
+                        # named group: skip forward to end of name
+                        name = ""
+                        while 1:
+                            char = source.get()
+                            if char is None:
+                                raise error, "unterminated name"
+                            if char == ">":
+                                break
+                            name = name + char
+                        group = 1
+                        if not isname(name):
+                            raise error, "illegal character in group name"
+                    elif source.match("="):
+                        # named backreference
+                        raise error, "not yet implemented"
+                    else:
+                        char = source.get()
+                        if char is None:
+                            raise error, "unexpected end of pattern"
+                        raise error, "unknown specifier: ?P%s" % char
+                elif source.match(":"):
+                    # non-capturing group
+                    group = 2
+                elif source.match("#"):
+                    # comment
+                    while 1:
+                        if source.next is None or source.next == ")":
+                            break
+                        source.get()
+                else:
+                    # flags
+                    while FLAGS.has_key(source.next):
+                        state.flags = state.flags | FLAGS[source.get()]
+            if group:
+                # parse group contents
+                b = []
+                if group == 2:
+                    # anonymous group
+                    group = None
+                else:
+                    group = state.getgroup(name)
+                while 1:
+                    p = _parse(source, state, flags)
+                    if source.match(")"):
+                        if b:
+                            b.append(p)
+                            p = _branch(state, b)
+                        subpattern.append((SUBPATTERN, (group, p)))
+                        break
+                    elif source.match("|"):
+                        b.append(p)
+                    else:
+                        raise error, "group not properly closed"
+            else:
+                while 1:
+                    char = source.get()
+                    if char is None or char == ")":
+                        break
+                    raise error, "unknown extension"
+
+        elif this == "^":
+            subpattern.append((AT, AT_BEGINNING))
+
+        elif this == "$":
+            subpattern.append((AT, AT_END))
+
+        elif this and this[0] == "\\":
+            code = _escape(source, this, state)
+            subpattern.append(code)
+
+        else:
+            raise error, "parser error"

    return subpattern

@@ -509,19 +509,19 @@ def parse(pattern, flags=0):
    state = State()
    b = []
    while 1:
-	p = _parse(source, state, flags)
-	tail = source.get()
-	if tail == "|":
-	    b.append(p)
-	elif tail == ")":
-	    raise error, "unbalanced parenthesis"
-	elif tail is None:
-	    if b:
-		b.append(p)
-		p = _branch(state, b)
-	    break
-	else:
-	    raise error, "bogus characters at end of regular expression"
+        p = _parse(source, state, flags)
+        tail = source.get()
+        if tail == "|":
+            b.append(p)
+        elif tail == ")":
+            raise error, "unbalanced parenthesis"
+        elif tail is None:
+            if b:
+                b.append(p)
+                p = _branch(state, b)
+            break
+        else:
+            raise error, "bogus characters at end of regular expression"
    return p

 def parse_template(source, pattern):
@@ -531,59 +531,59 @@ def parse_template(source, pattern):
    p = []
    a = p.append
    while 1:
-	this = s.get()
-	if this is None:
-	    break # end of replacement string
-	if this and this[0] == "\\":
-	    # group
-	    if this == "\\g":
-		name = ""
-		if s.match("<"):
-		    while 1:
-			char = s.get()
-			if char is None:
-			    raise error, "unterminated group name"
-			if char == ">":
-			    break
-			name = name + char
-		if not name:
-		    raise error, "bad group name"
-		try:
-		    index = int(name)
-		except ValueError:
-		    if not isname(name):
-			raise error, "illegal character in group name"
-		    try:
-			index = pattern.groupindex[name]
-		    except KeyError:
-			raise IndexError, "unknown group name"
-		a((MARK, index))
-	    elif len(this) > 1 and this[1] in DIGITS:
-		code = None
-		while 1:
-		    group = _group(this, pattern.groups+1)
-		    if group:
-			if (not s.next or
-			    not _group(this + s.next, pattern.groups+1)):
-		            code = MARK, int(group)
-			    break
-		    elif s.next in OCTDIGITS:
-			this = this + s.get()
-		    else:
-			break
-		if not code:
-		    this = this[1:]
-		    # FIXME: support unicode characters!
-		    code = LITERAL, chr(int(this[-6:], 8) & 0xff)
-		a(code)
-	    else:
-		try:
-		    a(ESCAPES[this])
-		except KeyError:
-		    for c in this:
-			a((LITERAL, c))
-	else:
-	    a((LITERAL, this))
+        this = s.get()
+        if this is None:
+            break # end of replacement string
+        if this and this[0] == "\\":
+            # group
+            if this == "\\g":
+                name = ""
+                if s.match("<"):
+                    while 1:
+                        char = s.get()
+                        if char is None:
+                            raise error, "unterminated group name"
+                        if char == ">":
+                            break
+                        name = name + char
+                if not name:
+                    raise error, "bad group name"
+                try:
+                    index = int(name)
+                except ValueError:
+                    if not isname(name):
+                        raise error, "illegal character in group name"
+                    try:
+                        index = pattern.groupindex[name]
+                    except KeyError:
+                        raise IndexError, "unknown group name"
+                a((MARK, index))
+            elif len(this) > 1 and this[1] in DIGITS:
+                code = None
+                while 1:
+                    group = _group(this, pattern.groups+1)
+                    if group:
+                        if (not s.next or
+                            not _group(this + s.next, pattern.groups+1)):
+                            code = MARK, int(group)
+                            break
+                    elif s.next in OCTDIGITS:
+                        this = this + s.get()
+                    else:
+                        break
+                if not code:
+                    this = this[1:]
+                    # FIXME: support unicode characters!
+                    code = LITERAL, chr(int(this[-6:], 8) & 0xff)
+                a(code)
+            else:
+                try:
+                    a(ESCAPES[this])
+                except KeyError:
+                    for c in this:
+                        a((LITERAL, c))
+        else:
+            a((LITERAL, this))
    return p

 def expand_template(template, match):
@@ -592,11 +592,11 @@ def expand_template(template, match):
    p = []
    a = p.append
    for c, s in template:
-	if c is LITERAL:
-	    a(s)
-	elif c is MARK:
-	    s = match.group(s)
-	    if s is None:
-		raise error, "empty group"
-	    a(s)
+        if c is LITERAL:
+            a(s)
+        elif c is MARK:
+            s = match.group(s)
+            if s is None:
+                raise error, "empty group"
+            a(s)
    return match.string[:0].join(p)
--- a/Lib/test/test_sre.py
+++ b/Lib/test/test_sre.py
-# FIXME: this is basically test_re.py, with a few 
+# FIXME: this is basically test_re.py, with a few minor changes

 import sys
 sys.path=['.']+sys.path
@@ -337,7 +337,7 @@ for t in tests:
                    print repr(repl)+' should be '+repr(expected)
            else:
                print '=== Failed incorrectly', t
-		continue
+                continue

            # Try the match on a unicode string, and check that it
            # still succeeds.
@@ -359,9 +359,9 @@ for t in tests:
            
            if pattern[:2]!='\\B' and pattern[-2:]!='\\B':
                obj=sre.compile(pattern)
-		result=obj.search(s, result.start(0), result.end(0)+1)
-		if result==None:
-		    print '=== Failed on range-limited match', t
+                result=obj.search(s, result.start(0), result.end(0)+1)
+                if result==None:
+                    print '=== Failed on range-limited match', t

            # Try the match with IGNORECASE enabled, and check that it
            # still succeeds.