Kaydet (Commit) f4fe46d7 authored tarafından Hye-Shik Chang's avatar Hye-Shik Chang

Backport from trunk r52223:

Bug #1572832: fix a bug in ISO-2022 codecs which may cause segfault
when encoding non-BMP unicode characters.  (Submitted by Ray Chason)
üst a5fe3ef8
...@@ -208,6 +208,16 @@ class Test_ISO2022(unittest.TestCase): ...@@ -208,6 +208,16 @@ class Test_ISO2022(unittest.TestCase):
e = u'\u3406'.encode(encoding) e = u'\u3406'.encode(encoding)
self.failIf(filter(lambda x: x >= '\x80', e)) self.failIf(filter(lambda x: x >= '\x80', e))
def test_bug1572832(self):
if sys.maxunicode >= 0x10000:
myunichr = unichr
else:
myunichr = lambda x: unichr(0xD7C0+(x>>10)) + unichr(0xDC00+(x&0x3FF))
for x in xrange(0x10000, 0x110000):
# Any ISO 2022 codec will cause the segfault
myunichr(x).encode('iso_2022_jp', 'ignore')
def test_main(): def test_main():
suite = unittest.TestSuite() suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(Test_MultibyteCodec)) suite.addTest(unittest.makeSuite(Test_MultibyteCodec))
......
...@@ -49,6 +49,9 @@ Core and builtins ...@@ -49,6 +49,9 @@ Core and builtins
Extension Modules Extension Modules
----------------- -----------------
- Bug #1572832: fix a bug in ISO-2022 codecs which may cause segfault
when encoding non-BMP unicode characters.
- Bug #1556784: allow format strings longer than 127 characters in - Bug #1556784: allow format strings longer than 127 characters in
datetime's strftime function. datetime's strftime function.
......
...@@ -592,9 +592,11 @@ ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length) ...@@ -592,9 +592,11 @@ ksx1001_encoder(const ucs4_t *data, Py_ssize_t *length)
{ {
DBCHAR coded; DBCHAR coded;
assert(*length == 1); assert(*length == 1);
TRYMAP_ENC(cp949, coded, *data) if (*data < 0x10000) {
if (!(coded & 0x8000)) TRYMAP_ENC(cp949, coded, *data)
return coded; if (!(coded & 0x8000))
return coded;
}
return MAP_UNMAPPABLE; return MAP_UNMAPPABLE;
} }
...@@ -628,11 +630,13 @@ jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length) ...@@ -628,11 +630,13 @@ jisx0208_encoder(const ucs4_t *data, Py_ssize_t *length)
{ {
DBCHAR coded; DBCHAR coded;
assert(*length == 1); assert(*length == 1);
if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */ if (*data < 0x10000) {
return 0x2140; if (*data == 0xff3c) /* F/W REVERSE SOLIDUS */
else TRYMAP_ENC(jisxcommon, coded, *data) { return 0x2140;
if (!(coded & 0x8000)) else TRYMAP_ENC(jisxcommon, coded, *data) {
return coded; if (!(coded & 0x8000))
return coded;
}
} }
return MAP_UNMAPPABLE; return MAP_UNMAPPABLE;
} }
...@@ -665,9 +669,11 @@ jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length) ...@@ -665,9 +669,11 @@ jisx0212_encoder(const ucs4_t *data, Py_ssize_t *length)
{ {
DBCHAR coded; DBCHAR coded;
assert(*length == 1); assert(*length == 1);
TRYMAP_ENC(jisxcommon, coded, *data) { if (*data < 0x10000) {
if (coded & 0x8000) TRYMAP_ENC(jisxcommon, coded, *data) {
return coded & 0x7fff; if (coded & 0x8000)
return coded & 0x7fff;
}
} }
return MAP_UNMAPPABLE; return MAP_UNMAPPABLE;
} }
...@@ -970,9 +976,11 @@ gb2312_encoder(const ucs4_t *data, Py_ssize_t *length) ...@@ -970,9 +976,11 @@ gb2312_encoder(const ucs4_t *data, Py_ssize_t *length)
{ {
DBCHAR coded; DBCHAR coded;
assert(*length == 1); assert(*length == 1);
TRYMAP_ENC(gbcommon, coded, *data) { if (*data < 0x10000) {
if (!(coded & 0x8000)) TRYMAP_ENC(gbcommon, coded, *data) {
return coded; if (!(coded & 0x8000))
return coded;
}
} }
return MAP_UNMAPPABLE; return MAP_UNMAPPABLE;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment