Kaydet (Commit) 24bdb047 authored tarafından Guido van Rossum's avatar Guido van Rossum

Marc-Andre Lemburg:

The attached patch set includes a workaround to get Python with
Unicode compile on BSDI 4.x (courtesy Thomas Wouters; the cause
is a bug in the BSDI wchar.h header file) and Python interfaces
for the MBCS codec donated by Mark Hammond.

Also included are some minor corrections w/r to the docs of
the new "es" and "es#" parser markers (use PyMem_Free() instead
of free(); thanks to Mark Hammond for finding these).

The unicodedata tests are now in a separate file
(test_unicodedata.py) to avoid problems if the module cannot
be found.
üst 66d45139
...@@ -82,6 +82,10 @@ Unicode Integration Proposal (see file Misc/unicode.txt). ...@@ -82,6 +82,10 @@ Unicode Integration Proposal (see file Misc/unicode.txt).
#endif #endif
#ifdef HAVE_WCHAR_H #ifdef HAVE_WCHAR_H
/* Work around a cosmetic bug in BSDI 4.x wchar.h; thanks to Thomas Wouters */
# ifdef _HAVE_BSDI
# include <time.h>
# endif
# include "wchar.h" # include "wchar.h"
#endif #endif
...@@ -562,7 +566,9 @@ extern DL_IMPORT(PyObject *) PyUnicode_TranslateCharmap( ...@@ -562,7 +566,9 @@ extern DL_IMPORT(PyObject *) PyUnicode_TranslateCharmap(
); );
#ifdef MS_WIN32 #ifdef MS_WIN32
/* --- MBCS codecs for Windows -------------------------------------------- */ /* --- MBCS codecs for Windows -------------------------------------------- */
extern DL_IMPORT(PyObject*) PyUnicode_DecodeMBCS( extern DL_IMPORT(PyObject*) PyUnicode_DecodeMBCS(
const char *string, /* MBCS encoded string */ const char *string, /* MBCS encoded string */
int length, /* size of string */ int length, /* size of string */
...@@ -579,8 +585,8 @@ extern DL_IMPORT(PyObject*) PyUnicode_EncodeMBCS( ...@@ -579,8 +585,8 @@ extern DL_IMPORT(PyObject*) PyUnicode_EncodeMBCS(
const char *errors /* error handling */ const char *errors /* error handling */
); );
#endif /* MS_WIN32 */ #endif /* MS_WIN32 */
/* --- Methods & Slots ---------------------------------------------------- /* --- Methods & Slots ----------------------------------------------------
These are capable of handling Unicode objects and strings on input These are capable of handling Unicode objects and strings on input
......
...@@ -34,4 +34,3 @@ class StreamConverter(StreamWriter,StreamReader): ...@@ -34,4 +34,3 @@ class StreamConverter(StreamWriter,StreamReader):
def getregentry(): def getregentry():
return (Codec.encode,Codec.decode,StreamReader,StreamWriter) return (Codec.encode,Codec.decode,StreamReader,StreamWriter)
test_unicode test_unicode
Testing Unicode comparisons... done. Testing Unicode comparisons... done.
Testing Unicode contains method... done.
Testing Unicode formatting strings... done. Testing Unicode formatting strings... done.
Testing unicodedata module... done. Testing builtin codecs... done.
test_unicodedata
Testing unicodedata module... done.
""" Test script for the Unicode implementation. """ Test script for the Unicode implementation.
Written by Marc-Andre Lemburg (mal@lemburg.com). Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY. (c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
...@@ -250,50 +249,6 @@ assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def' ...@@ -250,50 +249,6 @@ assert u"%(x)s, %(y)s" % {'x':u"abc", 'y':"def"} == u'abc, def'
assert u"%(x)s, %()s" % {'x':u"abc", u''.encode('utf-8'):"def"} == u'abc, def' assert u"%(x)s, %()s" % {'x':u"abc", u''.encode('utf-8'):"def"} == u'abc, def'
print 'done.' print 'done.'
# Test Unicode database APIs
try:
import unicodedata
except ImportError:
pass
else:
print 'Testing unicodedata module...',
assert unicodedata.digit(u'A',None) is None
assert unicodedata.digit(u'9') == 9
assert unicodedata.digit(u'\u215b',None) is None
assert unicodedata.digit(u'\u2468') == 9
assert unicodedata.numeric(u'A',None) is None
assert unicodedata.numeric(u'9') == 9
assert unicodedata.numeric(u'\u215b') == 0.125
assert unicodedata.numeric(u'\u2468') == 9.0
assert unicodedata.decimal(u'A',None) is None
assert unicodedata.decimal(u'9') == 9
assert unicodedata.decimal(u'\u215b',None) is None
assert unicodedata.decimal(u'\u2468',None) is None
assert unicodedata.category(u'\uFFFE') == 'Cn'
assert unicodedata.category(u'a') == 'Ll'
assert unicodedata.category(u'A') == 'Lu'
assert unicodedata.bidirectional(u'\uFFFE') == ''
assert unicodedata.bidirectional(u' ') == 'WS'
assert unicodedata.bidirectional(u'A') == 'L'
assert unicodedata.decomposition(u'\uFFFE') == ''
assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
assert unicodedata.mirrored(u'\uFFFE') == 0
assert unicodedata.mirrored(u'a') == 0
assert unicodedata.mirrored(u'\u2201') == 1
assert unicodedata.combining(u'\uFFFE') == 0
assert unicodedata.combining(u'a') == 0
assert unicodedata.combining(u'\u20e1') == 230
print 'done.'
# Test builtin codecs # Test builtin codecs
print 'Testing builtin codecs...', print 'Testing builtin codecs...',
......
""" Test script for the unicodedata module.
Written by Marc-Andre Lemburg (mal@lemburg.com).
(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.
"""#"
from test_support import verbose
import sys
# Test Unicode database APIs
import unicodedata
print 'Testing unicodedata module...',
assert unicodedata.digit(u'A',None) is None
assert unicodedata.digit(u'9') == 9
assert unicodedata.digit(u'\u215b',None) is None
assert unicodedata.digit(u'\u2468') == 9
assert unicodedata.numeric(u'A',None) is None
assert unicodedata.numeric(u'9') == 9
assert unicodedata.numeric(u'\u215b') == 0.125
assert unicodedata.numeric(u'\u2468') == 9.0
assert unicodedata.decimal(u'A',None) is None
assert unicodedata.decimal(u'9') == 9
assert unicodedata.decimal(u'\u215b',None) is None
assert unicodedata.decimal(u'\u2468',None) is None
assert unicodedata.category(u'\uFFFE') == 'Cn'
assert unicodedata.category(u'a') == 'Ll'
assert unicodedata.category(u'A') == 'Lu'
assert unicodedata.bidirectional(u'\uFFFE') == ''
assert unicodedata.bidirectional(u' ') == 'WS'
assert unicodedata.bidirectional(u'A') == 'L'
assert unicodedata.decomposition(u'\uFFFE') == ''
assert unicodedata.decomposition(u'\u00bc') == '<fraction> 0031 2044 0034'
assert unicodedata.mirrored(u'\uFFFE') == 0
assert unicodedata.mirrored(u'a') == 0
assert unicodedata.mirrored(u'\u2201') == 1
assert unicodedata.combining(u'\uFFFE') == 0
assert unicodedata.combining(u'a') == 0
assert unicodedata.combining(u'\u20e1') == 230
print 'done.'
...@@ -740,8 +740,8 @@ These markers are used by the PyArg_ParseTuple() APIs: ...@@ -740,8 +740,8 @@ These markers are used by the PyArg_ParseTuple() APIs:
On output, a buffer of the needed size is allocated and On output, a buffer of the needed size is allocated and
returned through *buffer as NULL-terminated string. returned through *buffer as NULL-terminated string.
The encoded may not contain embedded NULL characters. The encoded may not contain embedded NULL characters.
The caller is responsible for free()ing the allocated *buffer The caller is responsible for calling PyMem_Free()
after usage. to free the allocated *buffer after usage.
"es#": "es#":
Takes three parameters: encoding (const char *), Takes three parameters: encoding (const char *),
...@@ -755,8 +755,9 @@ These markers are used by the PyArg_ParseTuple() APIs: ...@@ -755,8 +755,9 @@ These markers are used by the PyArg_ParseTuple() APIs:
If *buffer is NULL, a buffer of the needed size is If *buffer is NULL, a buffer of the needed size is
allocated and output copied into it. *buffer is then allocated and output copied into it. *buffer is then
updated to point to the allocated memory area. The caller updated to point to the allocated memory area.
is responsible for free()ing *buffer after usage. The caller is responsible for calling PyMem_Free()
to free the allocated *buffer after usage.
In both cases *buffer_len is updated to the number of In both cases *buffer_len is updated to the number of
characters written (excluding the trailing NULL-byte). characters written (excluding the trailing NULL-byte).
...@@ -784,7 +785,7 @@ Using "es#" with auto-allocation: ...@@ -784,7 +785,7 @@ Using "es#" with auto-allocation:
return NULL; return NULL;
} }
str = PyString_FromStringAndSize(buffer, buffer_len); str = PyString_FromStringAndSize(buffer, buffer_len);
free(buffer); PyMem_Free(buffer);
return str; return str;
} }
...@@ -807,7 +808,7 @@ Using "es" with auto-allocation returning a NULL-terminated string: ...@@ -807,7 +808,7 @@ Using "es" with auto-allocation returning a NULL-terminated string:
return NULL; return NULL;
} }
str = PyString_FromString(buffer); str = PyString_FromString(buffer);
free(buffer); PyMem_Free(buffer);
return str; return str;
} }
......
...@@ -286,6 +286,26 @@ charmap_decode(PyObject *self, ...@@ -286,6 +286,26 @@ charmap_decode(PyObject *self,
size); size);
} }
#ifdef MS_WIN32
static PyObject *
mbcs_decode(PyObject *self,
PyObject *args)
{
const char *data;
int size;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "t#|z:mbcs_decode",
&data, &size, &errors))
return NULL;
return codec_tuple(PyUnicode_DecodeMBCS(data, size, errors),
size);
}
#endif /* MS_WIN32 */
/* --- Encoder ------------------------------------------------------------ */ /* --- Encoder ------------------------------------------------------------ */
static PyObject * static PyObject *
...@@ -491,6 +511,28 @@ charmap_encode(PyObject *self, ...@@ -491,6 +511,28 @@ charmap_encode(PyObject *self,
PyUnicode_GET_SIZE(str)); PyUnicode_GET_SIZE(str));
} }
#ifdef MS_WIN32
static PyObject *
mbcs_encode(PyObject *self,
PyObject *args)
{
PyObject *str;
const char *errors = NULL;
if (!PyArg_ParseTuple(args, "U|z:mbcs_encode",
&str, &errors))
return NULL;
return codec_tuple(PyUnicode_EncodeMBCS(
PyUnicode_AS_UNICODE(str),
PyUnicode_GET_SIZE(str),
errors),
PyUnicode_GET_SIZE(str));
}
#endif /* MS_WIN32 */
/* --- Module API --------------------------------------------------------- */ /* --- Module API --------------------------------------------------------- */
static PyMethodDef _codecs_functions[] = { static PyMethodDef _codecs_functions[] = {
...@@ -519,6 +561,10 @@ static PyMethodDef _codecs_functions[] = { ...@@ -519,6 +561,10 @@ static PyMethodDef _codecs_functions[] = {
{"charmap_decode", charmap_decode, 1}, {"charmap_decode", charmap_decode, 1},
{"readbuffer_encode", readbuffer_encode, 1}, {"readbuffer_encode", readbuffer_encode, 1},
{"charbuffer_encode", charbuffer_encode, 1}, {"charbuffer_encode", charbuffer_encode, 1},
#ifdef MS_WIN32
{"mbcs_encode", mbcs_encode, 1},
{"mbcs_decode", mbcs_decode, 1},
#endif
{NULL, NULL} /* sentinel */ {NULL, NULL} /* sentinel */
}; };
......
...@@ -704,7 +704,7 @@ convertsimple1(arg, p_format, p_va) ...@@ -704,7 +704,7 @@ convertsimple1(arg, p_format, p_va)
the data copied into it; *buffer is the data copied into it; *buffer is
updated to point to the new buffer; updated to point to the new buffer;
the caller is responsible for the caller is responsible for
free()ing it after usage PyMem_Free()ing it after usage
- if *buffer is not NULL, the data - if *buffer is not NULL, the data
is copied to *buffer; *buffer_len is copied to *buffer; *buffer_len
...@@ -752,7 +752,7 @@ convertsimple1(arg, p_format, p_va) ...@@ -752,7 +752,7 @@ convertsimple1(arg, p_format, p_va)
is allocated and the data copied is allocated and the data copied
into it; *buffer is updated to into it; *buffer is updated to
point to the new buffer; the caller point to the new buffer; the caller
is responsible for free()ing it is responsible for PyMem_Free()ing it
after usage after usage
*/ */
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment