Kaydet (Commit) 047c05eb authored tarafından Martin v. Löwis's avatar Martin v. Löwis

Do not insert characters for unicode-escape decoders if the error mode

is "ignore". Fixes #529104.
üst bdf1f19f
...@@ -541,6 +541,14 @@ else: ...@@ -541,6 +541,14 @@ else:
verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x") verify(unicode('Andr\202 x','ascii','ignore') == u"Andr x")
verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x') verify(unicode('Andr\202 x','ascii','replace') == u'Andr\uFFFD x')
verify("\\N{foo}xx".decode("unicode-escape", "ignore") == u"xx")
try:
"\\".decode("unicode-escape")
except ValueError:
pass
else:
raise TestFailed, '"\\".decode("unicode-escape") should fail'
verify(u'hello'.encode('ascii') == 'hello') verify(u'hello'.encode('ascii') == 'hello')
verify(u'hello'.encode('utf-7') == 'hello') verify(u'hello'.encode('utf-7') == 'hello')
verify(u'hello'.encode('utf-8') == 'hello') verify(u'hello'.encode('utf-8') == 'hello')
......
...@@ -1514,8 +1514,7 @@ PyObject *PyUnicode_AsUTF16String(PyObject *unicode) ...@@ -1514,8 +1514,7 @@ PyObject *PyUnicode_AsUTF16String(PyObject *unicode)
/* --- Unicode Escape Codec ----------------------------------------------- */ /* --- Unicode Escape Codec ----------------------------------------------- */
static static
int unicodeescape_decoding_error(const char **source, int unicodeescape_decoding_error(Py_UNICODE **x,
Py_UNICODE *x,
const char *errors, const char *errors,
const char *details) const char *details)
{ {
...@@ -1530,7 +1529,8 @@ int unicodeescape_decoding_error(const char **source, ...@@ -1530,7 +1529,8 @@ int unicodeescape_decoding_error(const char **source,
return 0; return 0;
} }
else if (strcmp(errors,"replace") == 0) { else if (strcmp(errors,"replace") == 0) {
*x = Py_UNICODE_REPLACEMENT_CHARACTER; **x = Py_UNICODE_REPLACEMENT_CHARACTER;
(*x)++;
return 0; return 0;
} }
else { else {
...@@ -1628,9 +1628,9 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, ...@@ -1628,9 +1628,9 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
for (i = 0; i < digits; i++) { for (i = 0; i < digits; i++) {
c = (unsigned char) s[i]; c = (unsigned char) s[i];
if (!isxdigit(c)) { if (!isxdigit(c)) {
if (unicodeescape_decoding_error(&s, &x, errors, message)) if (unicodeescape_decoding_error(&p, errors, message))
goto onError; goto onError;
chr = x; chr = 0xffffffff;
i++; i++;
break; break;
} }
...@@ -1643,6 +1643,10 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, ...@@ -1643,6 +1643,10 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
chr += 10 + c - 'A'; chr += 10 + c - 'A';
} }
s += i; s += i;
if (chr == 0xffffffff)
/* _decoding_error will have already written into the
target buffer. */
break;
store: store:
/* when we get here, chr is a 32-bit unicode character */ /* when we get here, chr is a 32-bit unicode character */
if (chr <= 0xffff) if (chr <= 0xffff)
...@@ -1660,11 +1664,10 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, ...@@ -1660,11 +1664,10 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
#endif #endif
} else { } else {
if (unicodeescape_decoding_error( if (unicodeescape_decoding_error(
&s, &x, errors, &p, errors,
"illegal Unicode character") "illegal Unicode character")
) )
goto onError; goto onError;
*p++ = x; /* store replacement character */
} }
break; break;
...@@ -1699,14 +1702,19 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s, ...@@ -1699,14 +1702,19 @@ PyObject *PyUnicode_DecodeUnicodeEscape(const char *s,
goto store; goto store;
} }
} }
if (unicodeescape_decoding_error(&s, &x, errors, message)) if (unicodeescape_decoding_error(&p, errors, message))
goto onError; goto onError;
*p++ = x;
break; break;
default: default:
*p++ = '\\'; if (s > end) {
*p++ = (unsigned char)s[-1]; if (unicodeescape_decoding_error(&p, errors, "\\ at end of string"))
goto onError;
}
else {
*p++ = '\\';
*p++ = (unsigned char)s[-1];
}
break; break;
} }
} }
...@@ -1909,7 +1917,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s, ...@@ -1909,7 +1917,7 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
end = s + size; end = s + size;
while (s < end) { while (s < end) {
unsigned char c; unsigned char c;
Py_UNICODE x; Py_UCS4 x;
int i; int i;
/* Non-escape characters are interpreted as Unicode ordinals */ /* Non-escape characters are interpreted as Unicode ordinals */
...@@ -1938,9 +1946,10 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s, ...@@ -1938,9 +1946,10 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
for (x = 0, i = 0; i < 4; i++) { for (x = 0, i = 0; i < 4; i++) {
c = (unsigned char)s[i]; c = (unsigned char)s[i];
if (!isxdigit(c)) { if (!isxdigit(c)) {
if (unicodeescape_decoding_error(&s, &x, errors, if (unicodeescape_decoding_error(&p, errors,
"truncated \\uXXXX")) "truncated \\uXXXX"))
goto onError; goto onError;
x = 0xffffffff;
i++; i++;
break; break;
} }
...@@ -1953,7 +1962,8 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s, ...@@ -1953,7 +1962,8 @@ PyObject *PyUnicode_DecodeRawUnicodeEscape(const char *s,
x += 10 + c - 'A'; x += 10 + c - 'A';
} }
s += i; s += i;
*p++ = x; if (x != 0xffffffff)
*p++ = x;
} }
if (_PyUnicode_Resize(&v, (int)(p - buf))) if (_PyUnicode_Resize(&v, (int)(p - buf)))
goto onError; goto onError;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment