Kaydet (Commit) 3f767795 authored tarafından Martin v. Löwis's avatar Martin v. Löwis

Patch #1359618: Speed-up charmap encoder.

üst 67966bed
......@@ -650,6 +650,11 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
const char *errors /* error handling */
);
PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
PyObject* string /* 256 character map */
);
/* --- UTF-7 Codecs ------------------------------------------------------- */
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
......@@ -104,6 +104,9 @@ Extension Modules
Library
-------
- Patch #1359618: Speed up charmap encoder by using a trie structure
for lookup.
- The functions in the ``pprint`` module now sort dictionaries by key
before computing the display. Before 2.5, ``pprint`` sorted a dictionary
if and only if its display required more than one line, although that
......
......@@ -792,6 +792,15 @@ charmap_encode(PyObject *self,
return v;
}
static PyObject*
charmap_build(PyObject *self, PyObject *args)
{
PyObject *map;
if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
return NULL;
return PyUnicode_BuildEncodingMap(map);
}
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
static PyObject *
......@@ -897,6 +906,7 @@ static PyMethodDef _codecs_functions[] = {
{"ascii_decode", ascii_decode, METH_VARARGS},
{"charmap_encode", charmap_encode, METH_VARARGS},
{"charmap_decode", charmap_decode, METH_VARARGS},
{"charmap_build", charmap_build, METH_VARARGS},
{"readbuffer_encode", readbuffer_encode, METH_VARARGS},
{"charbuffer_encode", charbuffer_encode, METH_VARARGS},
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
......
This diff is collapsed.
......@@ -78,7 +78,7 @@ cjk: build/
### Cleanup
clean:
$(RM) build/*
$(RM) -f build/*
distclean: clean
$(RM) -rf MAPPINGS/
......@@ -270,6 +270,11 @@ def codegen(name, map, encodingname, comments=1):
comments=comments,
precisions=(4, 2))
if decoding_table_code:
suffix = 'table'
else:
suffix = 'map'
l = [
'''\
""" Python Character Mapping Codec %s generated from '%s' with gencodec.py.
......@@ -283,30 +288,20 @@ import codecs
class Codec(codecs.Codec):
def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map)
return codecs.charmap_encode(input,errors,encoding_%s)
def decode(self,input,errors='strict'):''' % (encodingname, name)
]
if decoding_table_code:
l.append('''\
return codecs.charmap_decode(input,errors,decoding_table)''')
else:
def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_%s)
''' % (encodingname, name, suffix, suffix)]
l.append('''\
return codecs.charmap_decode(input,errors,decoding_map)''')
l.append('''
class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0]
return codecs.charmap_encode(input,self.errors,encoding_%s)[0]
class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):''')
if decoding_table_code:
l.append('''\
return codecs.charmap_decode(input,self.errors,decoding_table)[0]''')
else:
l.append('''\
return codecs.charmap_decode(input,self.errors,decoding_map)[0]''')
def decode(self, input, final=False):
return codecs.charmap_decode(input,self.errors,decoding_%s)[0]''' %
(suffix, suffix))
l.append('''
class StreamWriter(Codec,codecs.StreamWriter):
......@@ -319,13 +314,13 @@ class StreamReader(Codec,codecs.StreamReader):
def getregentry():
return codecs.CodecInfo(
Codec().encode,
Codec().decode,
name=%r,
streamwriter=StreamWriter,
streamreader=StreamReader,
encode=Codec().encode,
decode=Codec().decode,
incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
)
''' % encodingname.replace('_', '-'))
......@@ -342,6 +337,12 @@ def getregentry():
l.extend(decoding_table_code)
# Add encoding map
if decoding_table_code:
l.append('''
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)
''')
else:
l.append('''
### Encoding Map
''')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment