Kaydet (Commit) a2a89a87 authored tarafından Walter Dörwald's avatar Walter Dörwald

Change all functions that expect one unicode character to accept a pair of

surrogates in narrow builds. Fixes issue #1706460.
üst 261b1363
...@@ -103,6 +103,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): ...@@ -103,6 +103,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
self.assertEqual(self.db.digit(u'9'), 9) self.assertEqual(self.db.digit(u'9'), 9)
self.assertEqual(self.db.digit(u'\u215b', None), None) self.assertEqual(self.db.digit(u'\u215b', None), None)
self.assertEqual(self.db.digit(u'\u2468'), 9) self.assertEqual(self.db.digit(u'\u2468'), 9)
self.assertEqual(self.db.digit(u'\U00020000', None), None)
self.assertRaises(TypeError, self.db.digit) self.assertRaises(TypeError, self.db.digit)
self.assertRaises(TypeError, self.db.digit, u'xx') self.assertRaises(TypeError, self.db.digit, u'xx')
...@@ -113,6 +114,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): ...@@ -113,6 +114,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
self.assertEqual(self.db.numeric(u'9'), 9) self.assertEqual(self.db.numeric(u'9'), 9)
self.assertEqual(self.db.numeric(u'\u215b'), 0.125) self.assertEqual(self.db.numeric(u'\u215b'), 0.125)
self.assertEqual(self.db.numeric(u'\u2468'), 9.0) self.assertEqual(self.db.numeric(u'\u2468'), 9.0)
self.assertEqual(self.db.numeric(u'\U00020000', None), None)
self.assertRaises(TypeError, self.db.numeric) self.assertRaises(TypeError, self.db.numeric)
self.assertRaises(TypeError, self.db.numeric, u'xx') self.assertRaises(TypeError, self.db.numeric, u'xx')
...@@ -123,6 +125,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): ...@@ -123,6 +125,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
self.assertEqual(self.db.decimal(u'9'), 9) self.assertEqual(self.db.decimal(u'9'), 9)
self.assertEqual(self.db.decimal(u'\u215b', None), None) self.assertEqual(self.db.decimal(u'\u215b', None), None)
self.assertEqual(self.db.decimal(u'\u2468', None), None) self.assertEqual(self.db.decimal(u'\u2468', None), None)
self.assertEqual(self.db.decimal(u'\U00020000', None), None)
self.assertRaises(TypeError, self.db.decimal) self.assertRaises(TypeError, self.db.decimal)
self.assertRaises(TypeError, self.db.decimal, u'xx') self.assertRaises(TypeError, self.db.decimal, u'xx')
...@@ -132,6 +135,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): ...@@ -132,6 +135,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
self.assertEqual(self.db.category(u'\uFFFE'), 'Cn') self.assertEqual(self.db.category(u'\uFFFE'), 'Cn')
self.assertEqual(self.db.category(u'a'), 'Ll') self.assertEqual(self.db.category(u'a'), 'Ll')
self.assertEqual(self.db.category(u'A'), 'Lu') self.assertEqual(self.db.category(u'A'), 'Lu')
self.assertEqual(self.db.category(u'\U00020000'), 'Lo')
self.assertRaises(TypeError, self.db.category) self.assertRaises(TypeError, self.db.category)
self.assertRaises(TypeError, self.db.category, u'xx') self.assertRaises(TypeError, self.db.category, u'xx')
...@@ -140,6 +144,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): ...@@ -140,6 +144,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
self.assertEqual(self.db.bidirectional(u'\uFFFE'), '') self.assertEqual(self.db.bidirectional(u'\uFFFE'), '')
self.assertEqual(self.db.bidirectional(u' '), 'WS') self.assertEqual(self.db.bidirectional(u' '), 'WS')
self.assertEqual(self.db.bidirectional(u'A'), 'L') self.assertEqual(self.db.bidirectional(u'A'), 'L')
self.assertEqual(self.db.bidirectional(u'\U00020000'), 'L')
self.assertRaises(TypeError, self.db.bidirectional) self.assertRaises(TypeError, self.db.bidirectional)
self.assertRaises(TypeError, self.db.bidirectional, u'xx') self.assertRaises(TypeError, self.db.bidirectional, u'xx')
...@@ -155,6 +160,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): ...@@ -155,6 +160,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
self.assertEqual(self.db.mirrored(u'\uFFFE'), 0) self.assertEqual(self.db.mirrored(u'\uFFFE'), 0)
self.assertEqual(self.db.mirrored(u'a'), 0) self.assertEqual(self.db.mirrored(u'a'), 0)
self.assertEqual(self.db.mirrored(u'\u2201'), 1) self.assertEqual(self.db.mirrored(u'\u2201'), 1)
self.assertEqual(self.db.mirrored(u'\U00020000'), 0)
self.assertRaises(TypeError, self.db.mirrored) self.assertRaises(TypeError, self.db.mirrored)
self.assertRaises(TypeError, self.db.mirrored, u'xx') self.assertRaises(TypeError, self.db.mirrored, u'xx')
...@@ -163,6 +169,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): ...@@ -163,6 +169,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
self.assertEqual(self.db.combining(u'\uFFFE'), 0) self.assertEqual(self.db.combining(u'\uFFFE'), 0)
self.assertEqual(self.db.combining(u'a'), 0) self.assertEqual(self.db.combining(u'a'), 0)
self.assertEqual(self.db.combining(u'\u20e1'), 230) self.assertEqual(self.db.combining(u'\u20e1'), 230)
self.assertEqual(self.db.combining(u'\U00020000'), 0)
self.assertRaises(TypeError, self.db.combining) self.assertRaises(TypeError, self.db.combining)
self.assertRaises(TypeError, self.db.combining, u'xx') self.assertRaises(TypeError, self.db.combining, u'xx')
...@@ -185,6 +192,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest): ...@@ -185,6 +192,7 @@ class UnicodeFunctionsTest(UnicodeDatabaseTest):
self.assertEqual(eaw(u'\uFF66'), 'H') self.assertEqual(eaw(u'\uFF66'), 'H')
self.assertEqual(eaw(u'\uFF1F'), 'F') self.assertEqual(eaw(u'\uFF1F'), 'F')
self.assertEqual(eaw(u'\u2010'), 'A') self.assertEqual(eaw(u'\u2010'), 'A')
self.assertEqual(eaw(u'\U00020000'), 'W')
class UnicodeMiscTest(UnicodeDatabaseTest): class UnicodeMiscTest(UnicodeDatabaseTest):
......
...@@ -54,12 +54,6 @@ _getrecord_ex(Py_UCS4 code) ...@@ -54,12 +54,6 @@ _getrecord_ex(Py_UCS4 code)
return &_PyUnicode_Database_Records[index]; return &_PyUnicode_Database_Records[index];
} }
static const _PyUnicode_DatabaseRecord*
_getrecord(PyUnicodeObject* v)
{
return _getrecord_ex(*PyUnicode_AS_UNICODE(v));
}
/* ------------- Previous-version API ------------------------------------- */ /* ------------- Previous-version API ------------------------------------- */
typedef struct previous_version { typedef struct previous_version {
PyObject_HEAD PyObject_HEAD
...@@ -92,6 +86,24 @@ new_previous_version(const char*name, const change_record* (*getrecord)(Py_UCS4) ...@@ -92,6 +86,24 @@ new_previous_version(const char*name, const change_record* (*getrecord)(Py_UCS4)
return (PyObject*)self; return (PyObject*)self;
} }
static Py_UCS4 getuchar(PyUnicodeObject *obj)
{
Py_UNICODE *v = PyUnicode_AS_UNICODE(obj);
if (PyUnicode_GET_SIZE(obj) == 1)
return *v;
#ifndef Py_UNICODE_WIDE
else if ((PyUnicode_GET_SIZE(obj) == 2) &&
(0xD800 <= v[0] && v[0] <= 0xDBFF) &&
(0xDC00 <= v[1] && v[1] <= 0xDFFF))
return (((v[0] & 0x3FF)<<10) | (v[1] & 0x3FF)) + 0x10000;
#endif
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
return (Py_UCS4)-1;
}
/* --- Module API --------------------------------------------------------- */ /* --- Module API --------------------------------------------------------- */
PyDoc_STRVAR(unicodedata_decimal__doc__, PyDoc_STRVAR(unicodedata_decimal__doc__,
...@@ -108,17 +120,16 @@ unicodedata_decimal(PyObject *self, PyObject *args) ...@@ -108,17 +120,16 @@ unicodedata_decimal(PyObject *self, PyObject *args)
PyObject *defobj = NULL; PyObject *defobj = NULL;
int have_old = 0; int have_old = 0;
long rc; long rc;
Py_UCS4 c;
if (!PyArg_ParseTuple(args, "O!|O:decimal", &PyUnicode_Type, &v, &defobj)) if (!PyArg_ParseTuple(args, "O!|O:decimal", &PyUnicode_Type, &v, &defobj))
return NULL; return NULL;
if (PyUnicode_GET_SIZE(v) != 1) { c = getuchar(v);
PyErr_SetString(PyExc_TypeError, if (c == (Py_UCS4)-1)
"need a single Unicode character as parameter");
return NULL; return NULL;
}
if (self) { if (self) {
const change_record *old = get_old_record(self, *PyUnicode_AS_UNICODE(v)); const change_record *old = get_old_record(self, c);
if (old->category_changed == 0) { if (old->category_changed == 0) {
/* unassigned */ /* unassigned */
have_old = 1; have_old = 1;
...@@ -131,7 +142,7 @@ unicodedata_decimal(PyObject *self, PyObject *args) ...@@ -131,7 +142,7 @@ unicodedata_decimal(PyObject *self, PyObject *args)
} }
if (!have_old) if (!have_old)
rc = Py_UNICODE_TODECIMAL(*PyUnicode_AS_UNICODE(v)); rc = Py_UNICODE_TODECIMAL(c);
if (rc < 0) { if (rc < 0) {
if (defobj == NULL) { if (defobj == NULL) {
PyErr_SetString(PyExc_ValueError, PyErr_SetString(PyExc_ValueError,
...@@ -159,15 +170,14 @@ unicodedata_digit(PyObject *self, PyObject *args) ...@@ -159,15 +170,14 @@ unicodedata_digit(PyObject *self, PyObject *args)
PyUnicodeObject *v; PyUnicodeObject *v;
PyObject *defobj = NULL; PyObject *defobj = NULL;
long rc; long rc;
Py_UCS4 c;
if (!PyArg_ParseTuple(args, "O!|O:digit", &PyUnicode_Type, &v, &defobj)) if (!PyArg_ParseTuple(args, "O!|O:digit", &PyUnicode_Type, &v, &defobj))
return NULL; return NULL;
if (PyUnicode_GET_SIZE(v) != 1) { c = getuchar(v);
PyErr_SetString(PyExc_TypeError, if (c == (Py_UCS4)-1)
"need a single Unicode character as parameter");
return NULL; return NULL;
} rc = Py_UNICODE_TODIGIT(c);
rc = Py_UNICODE_TODIGIT(*PyUnicode_AS_UNICODE(v));
if (rc < 0) { if (rc < 0) {
if (defobj == NULL) { if (defobj == NULL) {
PyErr_SetString(PyExc_ValueError, "not a digit"); PyErr_SetString(PyExc_ValueError, "not a digit");
...@@ -195,17 +205,16 @@ unicodedata_numeric(PyObject *self, PyObject *args) ...@@ -195,17 +205,16 @@ unicodedata_numeric(PyObject *self, PyObject *args)
PyObject *defobj = NULL; PyObject *defobj = NULL;
int have_old = 0; int have_old = 0;
double rc; double rc;
Py_UCS4 c;
if (!PyArg_ParseTuple(args, "O!|O:numeric", &PyUnicode_Type, &v, &defobj)) if (!PyArg_ParseTuple(args, "O!|O:numeric", &PyUnicode_Type, &v, &defobj))
return NULL; return NULL;
if (PyUnicode_GET_SIZE(v) != 1) { c = getuchar(v);
PyErr_SetString(PyExc_TypeError, if (c == (Py_UCS4)-1)
"need a single Unicode character as parameter"); return NULL;
return NULL;
}
if (self) { if (self) {
const change_record *old = get_old_record(self, *PyUnicode_AS_UNICODE(v)); const change_record *old = get_old_record(self, c);
if (old->category_changed == 0) { if (old->category_changed == 0) {
/* unassigned */ /* unassigned */
have_old = 1; have_old = 1;
...@@ -218,7 +227,7 @@ unicodedata_numeric(PyObject *self, PyObject *args) ...@@ -218,7 +227,7 @@ unicodedata_numeric(PyObject *self, PyObject *args)
} }
if (!have_old) if (!have_old)
rc = Py_UNICODE_TONUMERIC(*PyUnicode_AS_UNICODE(v)); rc = Py_UNICODE_TONUMERIC(c);
if (rc == -1.0) { if (rc == -1.0) {
if (defobj == NULL) { if (defobj == NULL) {
PyErr_SetString(PyExc_ValueError, "not a numeric character"); PyErr_SetString(PyExc_ValueError, "not a numeric character");
...@@ -243,18 +252,17 @@ unicodedata_category(PyObject *self, PyObject *args) ...@@ -243,18 +252,17 @@ unicodedata_category(PyObject *self, PyObject *args)
{ {
PyUnicodeObject *v; PyUnicodeObject *v;
int index; int index;
Py_UCS4 c;
if (!PyArg_ParseTuple(args, "O!:category", if (!PyArg_ParseTuple(args, "O!:category",
&PyUnicode_Type, &v)) &PyUnicode_Type, &v))
return NULL; return NULL;
if (PyUnicode_GET_SIZE(v) != 1) { c = getuchar(v);
PyErr_SetString(PyExc_TypeError, if (c == (Py_UCS4)-1)
"need a single Unicode character as parameter"); return NULL;
return NULL; index = (int) _getrecord_ex(c)->category;
}
index = (int) _getrecord(v)->category;
if (self) { if (self) {
const change_record *old = get_old_record(self, *PyUnicode_AS_UNICODE(v)); const change_record *old = get_old_record(self, c);
if (old->category_changed != 0xFF) if (old->category_changed != 0xFF)
index = old->category_changed; index = old->category_changed;
} }
...@@ -273,18 +281,17 @@ unicodedata_bidirectional(PyObject *self, PyObject *args) ...@@ -273,18 +281,17 @@ unicodedata_bidirectional(PyObject *self, PyObject *args)
{ {
PyUnicodeObject *v; PyUnicodeObject *v;
int index; int index;
Py_UCS4 c;
if (!PyArg_ParseTuple(args, "O!:bidirectional", if (!PyArg_ParseTuple(args, "O!:bidirectional",
&PyUnicode_Type, &v)) &PyUnicode_Type, &v))
return NULL; return NULL;
if (PyUnicode_GET_SIZE(v) != 1) { c = getuchar(v);
PyErr_SetString(PyExc_TypeError, if (c == (Py_UCS4)-1)
"need a single Unicode character as parameter"); return NULL;
return NULL; index = (int) _getrecord_ex(c)->bidirectional;
}
index = (int) _getrecord(v)->bidirectional;
if (self) { if (self) {
const change_record *old = get_old_record(self, *PyUnicode_AS_UNICODE(v)); const change_record *old = get_old_record(self, c);
if (old->category_changed == 0) if (old->category_changed == 0)
index = 0; /* unassigned */ index = 0; /* unassigned */
else if (old->bidir_changed != 0xFF) else if (old->bidir_changed != 0xFF)
...@@ -305,18 +312,17 @@ unicodedata_combining(PyObject *self, PyObject *args) ...@@ -305,18 +312,17 @@ unicodedata_combining(PyObject *self, PyObject *args)
{ {
PyUnicodeObject *v; PyUnicodeObject *v;
int index; int index;
Py_UCS4 c;
if (!PyArg_ParseTuple(args, "O!:combining", if (!PyArg_ParseTuple(args, "O!:combining",
&PyUnicode_Type, &v)) &PyUnicode_Type, &v))
return NULL; return NULL;
if (PyUnicode_GET_SIZE(v) != 1) { c = getuchar(v);
PyErr_SetString(PyExc_TypeError, if (c == (Py_UCS4)-1)
"need a single Unicode character as parameter"); return NULL;
return NULL; index = (int) _getrecord_ex(c)->combining;
}
index = (int) _getrecord(v)->combining;
if (self) { if (self) {
const change_record *old = get_old_record(self, *PyUnicode_AS_UNICODE(v)); const change_record *old = get_old_record(self, c);
if (old->category_changed == 0) if (old->category_changed == 0)
index = 0; /* unassigned */ index = 0; /* unassigned */
} }
...@@ -335,18 +341,17 @@ unicodedata_mirrored(PyObject *self, PyObject *args) ...@@ -335,18 +341,17 @@ unicodedata_mirrored(PyObject *self, PyObject *args)
{ {
PyUnicodeObject *v; PyUnicodeObject *v;
int index; int index;
Py_UCS4 c;
if (!PyArg_ParseTuple(args, "O!:mirrored", if (!PyArg_ParseTuple(args, "O!:mirrored",
&PyUnicode_Type, &v)) &PyUnicode_Type, &v))
return NULL; return NULL;
if (PyUnicode_GET_SIZE(v) != 1) { c = getuchar(v);
PyErr_SetString(PyExc_TypeError, if (c == (Py_UCS4)-1)
"need a single Unicode character as parameter"); return NULL;
return NULL; index = (int) _getrecord_ex(c)->mirrored;
}
index = (int) _getrecord(v)->mirrored;
if (self) { if (self) {
const change_record *old = get_old_record(self, *PyUnicode_AS_UNICODE(v)); const change_record *old = get_old_record(self, c);
if (old->category_changed == 0) if (old->category_changed == 0)
index = 0; /* unassigned */ index = 0; /* unassigned */
} }
...@@ -364,18 +369,17 @@ unicodedata_east_asian_width(PyObject *self, PyObject *args) ...@@ -364,18 +369,17 @@ unicodedata_east_asian_width(PyObject *self, PyObject *args)
{ {
PyUnicodeObject *v; PyUnicodeObject *v;
int index; int index;
Py_UCS4 c;
if (!PyArg_ParseTuple(args, "O!:east_asian_width", if (!PyArg_ParseTuple(args, "O!:east_asian_width",
&PyUnicode_Type, &v)) &PyUnicode_Type, &v))
return NULL; return NULL;
if (PyUnicode_GET_SIZE(v) != 1) { c = getuchar(v);
PyErr_SetString(PyExc_TypeError, if (c == (Py_UCS4)-1)
"need a single Unicode character as parameter"); return NULL;
return NULL; index = (int) _getrecord_ex(c)->east_asian_width;
}
index = (int) _getrecord(v)->east_asian_width;
if (self) { if (self) {
const change_record *old = get_old_record(self, *PyUnicode_AS_UNICODE(v)); const change_record *old = get_old_record(self, c);
if (old->category_changed == 0) if (old->category_changed == 0)
index = 0; /* unassigned */ index = 0; /* unassigned */
} }
...@@ -396,20 +400,19 @@ unicodedata_decomposition(PyObject *self, PyObject *args) ...@@ -396,20 +400,19 @@ unicodedata_decomposition(PyObject *self, PyObject *args)
char decomp[256]; char decomp[256];
int code, index, count, i; int code, index, count, i;
unsigned int prefix_index; unsigned int prefix_index;
Py_UCS4 c;
if (!PyArg_ParseTuple(args, "O!:decomposition", if (!PyArg_ParseTuple(args, "O!:decomposition",
&PyUnicode_Type, &v)) &PyUnicode_Type, &v))
return NULL; return NULL;
if (PyUnicode_GET_SIZE(v) != 1) { c = getuchar(v);
PyErr_SetString(PyExc_TypeError, if (c == (Py_UCS4)-1)
"need a single Unicode character as parameter"); return NULL;
return NULL;
}
code = (int) *PyUnicode_AS_UNICODE(v); code = (int)c;
if (self) { if (self) {
const change_record *old = get_old_record(self, *PyUnicode_AS_UNICODE(v)); const change_record *old = get_old_record(self, c);
if (old->category_changed == 0) if (old->category_changed == 0)
return PyBytes_FromString(""); /* unassigned */ return PyBytes_FromString(""); /* unassigned */
} }
...@@ -1039,20 +1042,18 @@ static PyObject * ...@@ -1039,20 +1042,18 @@ static PyObject *
unicodedata_name(PyObject* self, PyObject* args) unicodedata_name(PyObject* self, PyObject* args)
{ {
char name[NAME_MAXLEN]; char name[NAME_MAXLEN];
Py_UCS4 c;
PyUnicodeObject* v; PyUnicodeObject* v;
PyObject* defobj = NULL; PyObject* defobj = NULL;
if (!PyArg_ParseTuple(args, "O!|O:name", &PyUnicode_Type, &v, &defobj)) if (!PyArg_ParseTuple(args, "O!|O:name", &PyUnicode_Type, &v, &defobj))
return NULL; return NULL;
if (PyUnicode_GET_SIZE(v) != 1) { c = getuchar(v);
PyErr_SetString(PyExc_TypeError, if (c == (Py_UCS4)-1)
"need a single Unicode character as parameter"); return NULL;
return NULL;
}
if (!_getucname(self, (Py_UCS4) *PyUnicode_AS_UNICODE(v), if (!_getucname(self, c, name, sizeof(name))) {
name, sizeof(name))) {
if (defobj == NULL) { if (defobj == NULL) {
PyErr_SetString(PyExc_ValueError, "no such name"); PyErr_SetString(PyExc_ValueError, "no such name");
return NULL; return NULL;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment