Kaydet (Commit) 3f767795 authored tarafından Martin v. Löwis's avatar Martin v. Löwis

Patch #1359618: Speed-up charmap encoder.

üst 67966bed
...@@ -650,6 +650,11 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString( ...@@ -650,6 +650,11 @@ PyAPI_FUNC(PyObject*) PyUnicode_AsEncodedString(
const char *errors /* error handling */ const char *errors /* error handling */
); );
PyAPI_FUNC(PyObject*) PyUnicode_BuildEncodingMap(
PyObject* string /* 256 character map */
);
/* --- UTF-7 Codecs ------------------------------------------------------- */ /* --- UTF-7 Codecs ------------------------------------------------------- */
PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7( PyAPI_FUNC(PyObject*) PyUnicode_DecodeUTF7(
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
...@@ -104,6 +104,9 @@ Extension Modules ...@@ -104,6 +104,9 @@ Extension Modules
Library Library
------- -------
- Patch #1359618: Speed up charmap encoder by using a trie structure
for lookup.
- The functions in the ``pprint`` module now sort dictionaries by key - The functions in the ``pprint`` module now sort dictionaries by key
before computing the display. Before 2.5, ``pprint`` sorted a dictionary before computing the display. Before 2.5, ``pprint`` sorted a dictionary
if and only if its display required more than one line, although that if and only if its display required more than one line, although that
......
...@@ -792,6 +792,15 @@ charmap_encode(PyObject *self, ...@@ -792,6 +792,15 @@ charmap_encode(PyObject *self,
return v; return v;
} }
static PyObject*
charmap_build(PyObject *self, PyObject *args)
{
PyObject *map;
if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
return NULL;
return PyUnicode_BuildEncodingMap(map);
}
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
static PyObject * static PyObject *
...@@ -897,6 +906,7 @@ static PyMethodDef _codecs_functions[] = { ...@@ -897,6 +906,7 @@ static PyMethodDef _codecs_functions[] = {
{"ascii_decode", ascii_decode, METH_VARARGS}, {"ascii_decode", ascii_decode, METH_VARARGS},
{"charmap_encode", charmap_encode, METH_VARARGS}, {"charmap_encode", charmap_encode, METH_VARARGS},
{"charmap_decode", charmap_decode, METH_VARARGS}, {"charmap_decode", charmap_decode, METH_VARARGS},
{"charmap_build", charmap_build, METH_VARARGS},
{"readbuffer_encode", readbuffer_encode, METH_VARARGS}, {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
{"charbuffer_encode", charbuffer_encode, METH_VARARGS}, {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T) #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
......
This diff is collapsed.
...@@ -78,7 +78,7 @@ cjk: build/ ...@@ -78,7 +78,7 @@ cjk: build/
### Cleanup ### Cleanup
clean: clean:
$(RM) build/* $(RM) -f build/*
distclean: clean distclean: clean
$(RM) -rf MAPPINGS/ $(RM) -rf MAPPINGS/
...@@ -270,6 +270,11 @@ def codegen(name, map, encodingname, comments=1): ...@@ -270,6 +270,11 @@ def codegen(name, map, encodingname, comments=1):
comments=comments, comments=comments,
precisions=(4, 2)) precisions=(4, 2))
if decoding_table_code:
suffix = 'table'
else:
suffix = 'map'
l = [ l = [
'''\ '''\
""" Python Character Mapping Codec %s generated from '%s' with gencodec.py. """ Python Character Mapping Codec %s generated from '%s' with gencodec.py.
...@@ -283,30 +288,20 @@ import codecs ...@@ -283,30 +288,20 @@ import codecs
class Codec(codecs.Codec): class Codec(codecs.Codec):
def encode(self,input,errors='strict'): def encode(self,input,errors='strict'):
return codecs.charmap_encode(input,errors,encoding_map) return codecs.charmap_encode(input,errors,encoding_%s)
def decode(self,input,errors='strict'):''' % (encodingname, name)
]
if decoding_table_code:
l.append('''\
return codecs.charmap_decode(input,errors,decoding_table)''')
else:
l.append('''\
return codecs.charmap_decode(input,errors,decoding_map)''')
l.append(''' def decode(self,input,errors='strict'):
return codecs.charmap_decode(input,errors,decoding_%s)
''' % (encodingname, name, suffix, suffix)]
l.append('''\
class IncrementalEncoder(codecs.IncrementalEncoder): class IncrementalEncoder(codecs.IncrementalEncoder):
def encode(self, input, final=False): def encode(self, input, final=False):
return codecs.charmap_encode(input,self.errors,encoding_map)[0] return codecs.charmap_encode(input,self.errors,encoding_%s)[0]
class IncrementalDecoder(codecs.IncrementalDecoder): class IncrementalDecoder(codecs.IncrementalDecoder):
def decode(self, input, final=False):''') def decode(self, input, final=False):
if decoding_table_code: return codecs.charmap_decode(input,self.errors,decoding_%s)[0]''' %
l.append('''\ (suffix, suffix))
return codecs.charmap_decode(input,self.errors,decoding_table)[0]''')
else:
l.append('''\
return codecs.charmap_decode(input,self.errors,decoding_map)[0]''')
l.append(''' l.append('''
class StreamWriter(Codec,codecs.StreamWriter): class StreamWriter(Codec,codecs.StreamWriter):
...@@ -319,13 +314,13 @@ class StreamReader(Codec,codecs.StreamReader): ...@@ -319,13 +314,13 @@ class StreamReader(Codec,codecs.StreamReader):
def getregentry(): def getregentry():
return codecs.CodecInfo( return codecs.CodecInfo(
Codec().encode,
Codec().decode,
name=%r, name=%r,
streamwriter=StreamWriter, encode=Codec().encode,
streamreader=StreamReader, decode=Codec().decode,
incrementalencoder=IncrementalEncoder, incrementalencoder=IncrementalEncoder,
incrementaldecoder=IncrementalDecoder, incrementaldecoder=IncrementalDecoder,
streamreader=StreamReader,
streamwriter=StreamWriter,
) )
''' % encodingname.replace('_', '-')) ''' % encodingname.replace('_', '-'))
...@@ -342,10 +337,16 @@ def getregentry(): ...@@ -342,10 +337,16 @@ def getregentry():
l.extend(decoding_table_code) l.extend(decoding_table_code)
# Add encoding map # Add encoding map
l.append(''' if decoding_table_code:
l.append('''
### Encoding table
encoding_table=codecs.charmap_build(decoding_table)
''')
else:
l.append('''
### Encoding Map ### Encoding Map
''') ''')
l.extend(encoding_map_code) l.extend(encoding_map_code)
# Final new-line # Final new-line
l.append('') l.append('')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment