Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
3d325191
Kaydet (Commit)
3d325191
authored
Kas 04, 2011
tarafından
Martin v. Löwis
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Port code page codec to Unicode API.
üst
8ba79306
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
73 additions
and
62 deletions
+73
-62
test_codeccallbacks.py
Lib/test/test_codeccallbacks.py
+4
-8
unicodeobject.c
Objects/unicodeobject.c
+69
-54
No files found.
Lib/test/test_codeccallbacks.py
Dosyayı görüntüle @
3d325191
...
@@ -577,22 +577,18 @@ class CodecCallbackTest(unittest.TestCase):
...
@@ -577,22 +577,18 @@ class CodecCallbackTest(unittest.TestCase):
UnicodeEncodeError
(
"ascii"
,
"
\uffff
"
,
0
,
1
,
"ouch"
)),
UnicodeEncodeError
(
"ascii"
,
"
\uffff
"
,
0
,
1
,
"ouch"
)),
(
"
\\
uffff"
,
1
)
(
"
\\
uffff"
,
1
)
)
)
if
SIZEOF_WCHAR_T
==
2
:
len_wide
=
2
else
:
len_wide
=
1
if
SIZEOF_WCHAR_T
>
0
:
if
SIZEOF_WCHAR_T
>
0
:
self
.
assertEqual
(
self
.
assertEqual
(
codecs
.
backslashreplace_errors
(
codecs
.
backslashreplace_errors
(
UnicodeEncodeError
(
"ascii"
,
"
\U00010000
"
,
UnicodeEncodeError
(
"ascii"
,
"
\U00010000
"
,
0
,
len_wide
,
"ouch"
)),
0
,
1
,
"ouch"
)),
(
"
\\
U00010000"
,
len_wide
)
(
"
\\
U00010000"
,
1
)
)
)
self
.
assertEqual
(
self
.
assertEqual
(
codecs
.
backslashreplace_errors
(
codecs
.
backslashreplace_errors
(
UnicodeEncodeError
(
"ascii"
,
"
\U0010ffff
"
,
UnicodeEncodeError
(
"ascii"
,
"
\U0010ffff
"
,
0
,
len_wide
,
"ouch"
)),
0
,
1
,
"ouch"
)),
(
"
\\
U0010ffff"
,
len_wide
)
(
"
\\
U0010ffff"
,
1
)
)
)
# Lone surrogates (regardless of unicode width)
# Lone surrogates (regardless of unicode width)
self
.
assertEqual
(
self
.
assertEqual
(
...
...
Objects/unicodeobject.c
Dosyayı görüntüle @
3d325191
...
@@ -4680,9 +4680,6 @@ _PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
...
@@ -4680,9 +4680,6 @@ _PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
int kind;
int kind;
void *data;
void *data;
Py_ssize_t size;
Py_ssize_t size;
#if SIZEOF_WCHAR_T == 2
Py_ssize_t wchar_offset = 0;
#endif
if (!PyUnicode_Check(unicode)) {
if (!PyUnicode_Check(unicode)) {
PyErr_BadArgument();
PyErr_BadArgument();
...
@@ -4738,9 +4735,6 @@ _PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
...
@@ -4738,9 +4735,6 @@ _PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
PyObject *rep;
PyObject *rep;
Py_ssize_t repsize, k, startpos;
Py_ssize_t repsize, k, startpos;
startpos = i-1;
startpos = i-1;
#if SIZEOF_WCHAR_T == 2
startpos += wchar_offset;
#endif
rep = unicode_encode_call_errorhandler(
rep = unicode_encode_call_errorhandler(
errors, &errorHandler, "utf-8", "surrogates not allowed",
errors, &errorHandler, "utf-8", "surrogates not allowed",
unicode, &exc, startpos, startpos+1, &newpos);
unicode, &exc, startpos, startpos+1, &newpos);
...
@@ -4809,9 +4803,6 @@ _PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
...
@@ -4809,9 +4803,6 @@ _PyUnicode_AsUTF8String(PyObject *unicode, const char *errors)
*p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
*p++ = (char)(0x80 | ((ch >> 12) & 0x3f));
*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*p++ = (char)(0x80 | ((ch >> 6) & 0x3f));
*p++ = (char)(0x80 | (ch & 0x3f));
*p++ = (char)(0x80 | (ch & 0x3f));
#if SIZEOF_WCHAR_T == 2
wchar_offset++;
#endif
}
}
}
}
...
@@ -7315,23 +7306,37 @@ encode_code_page_flags(UINT code_page, const char *errors)
...
@@ -7315,23 +7306,37 @@ encode_code_page_flags(UINT code_page, const char *errors)
*/
*/
static int
static int
encode_code_page_strict(UINT code_page, PyObject **outbytes,
encode_code_page_strict(UINT code_page, PyObject **outbytes,
const Py_UNICODE *p, const int size
,
PyObject *unicode, Py_ssize_t offset, int len
,
const char* errors)
const char* errors)
{
{
BOOL usedDefaultChar = FALSE;
BOOL usedDefaultChar = FALSE;
BOOL *pusedDefaultChar = &usedDefaultChar;
BOOL *pusedDefaultChar = &usedDefaultChar;
int outsize;
int outsize;
PyObject *exc = NULL;
PyObject *exc = NULL;
Py_UNICODE *p;
Py_ssize_t size;
const DWORD flags = encode_code_page_flags(code_page, NULL);
const DWORD flags = encode_code_page_flags(code_page, NULL);
char *out;
char *out;
/* Create a substring so that we can get the UTF-16 representation
of just the slice under consideration. */
PyObject *substring;
assert(
size
> 0);
assert(
len
> 0);
if (code_page != CP_UTF8 && code_page != CP_UTF7)
if (code_page != CP_UTF8 && code_page != CP_UTF7)
pusedDefaultChar = &usedDefaultChar;
pusedDefaultChar = &usedDefaultChar;
else
else
pusedDefaultChar = NULL;
pusedDefaultChar = NULL;
substring = PyUnicode_Substring(unicode, offset, offset+len);
if (substring == NULL)
return -1;
p = PyUnicode_AsUnicodeAndSize(substring, &size);
if (p == NULL) {
Py_DECREF(substring);
return -1;
}
/* First get the size of the result */
/* First get the size of the result */
outsize = WideCharToMultiByte(code_page, flags,
outsize = WideCharToMultiByte(code_page, flags,
p, size,
p, size,
...
@@ -7340,14 +7345,18 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
...
@@ -7340,14 +7345,18 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
if (outsize <= 0)
if (outsize <= 0)
goto error;
goto error;
/* If we used a default char, then we failed! */
/* If we used a default char, then we failed! */
if (pusedDefaultChar && *pusedDefaultChar)
if (pusedDefaultChar && *pusedDefaultChar) {
Py_DECREF(substring);
return -2;
return -2;
}
if (*outbytes == NULL) {
if (*outbytes == NULL) {
/* Create string object */
/* Create string object */
*outbytes = PyBytes_FromStringAndSize(NULL, outsize);
*outbytes = PyBytes_FromStringAndSize(NULL, outsize);
if (*outbytes == NULL)
if (*outbytes == NULL) {
Py_DECREF(substring);
return -1;
return -1;
}
out = PyBytes_AS_STRING(*outbytes);
out = PyBytes_AS_STRING(*outbytes);
}
}
else {
else {
...
@@ -7355,10 +7364,13 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
...
@@ -7355,10 +7364,13 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
const Py_ssize_t n = PyBytes_Size(*outbytes);
const Py_ssize_t n = PyBytes_Size(*outbytes);
if (outsize > PY_SSIZE_T_MAX - n) {
if (outsize > PY_SSIZE_T_MAX - n) {
PyErr_NoMemory();
PyErr_NoMemory();
Py_DECREF(substring);
return -1;
return -1;
}
}
if (_PyBytes_Resize(outbytes, n + outsize) < 0)
if (_PyBytes_Resize(outbytes, n + outsize) < 0) {
Py_DECREF(substring);
return -1;
return -1;
}
out = PyBytes_AS_STRING(*outbytes) + n;
out = PyBytes_AS_STRING(*outbytes) + n;
}
}
...
@@ -7367,6 +7379,7 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
...
@@ -7367,6 +7379,7 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
p, size,
p, size,
out, outsize,
out, outsize,
NULL, pusedDefaultChar);
NULL, pusedDefaultChar);
Py_CLEAR(substring);
if (outsize <= 0)
if (outsize <= 0)
goto error;
goto error;
if (pusedDefaultChar && *pusedDefaultChar)
if (pusedDefaultChar && *pusedDefaultChar)
...
@@ -7374,6 +7387,7 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
...
@@ -7374,6 +7387,7 @@ encode_code_page_strict(UINT code_page, PyObject **outbytes,
return 0;
return 0;
error:
error:
Py_XDECREF(substring);
if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION)
if (GetLastError() == ERROR_NO_UNICODE_TRANSLATION)
return -2;
return -2;
PyErr_SetFromWindowsErr(0);
PyErr_SetFromWindowsErr(0);
...
@@ -7390,12 +7404,11 @@ error:
...
@@ -7390,12 +7404,11 @@ error:
static int
static int
encode_code_page_errors(UINT code_page, PyObject **outbytes,
encode_code_page_errors(UINT code_page, PyObject **outbytes,
PyObject *unicode, Py_ssize_t unicode_offset,
PyObject *unicode, Py_ssize_t unicode_offset,
const Py_UNICODE *in, const int insize,
Py_ssize_t insize, const char* errors)
const char* errors)
{
{
const DWORD flags = encode_code_page_flags(code_page, errors);
const DWORD flags = encode_code_page_flags(code_page, errors);
const Py_UNICODE *startin = in
;
Py_ssize_t pos = unicode_offset
;
const Py_UNICODE *endin = in
+ insize;
Py_ssize_t endin = unicode_offset
+ insize;
/* Ideally, we should get reason from FormatMessage. This is the Windows
/* Ideally, we should get reason from FormatMessage. This is the Windows
2000 English version of the message. */
2000 English version of the message. */
const char *reason = "invalid character";
const char *reason = "invalid character";
...
@@ -7404,12 +7417,11 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
...
@@ -7404,12 +7417,11 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
BOOL usedDefaultChar = FALSE, *pusedDefaultChar;
BOOL usedDefaultChar = FALSE, *pusedDefaultChar;
Py_ssize_t outsize;
Py_ssize_t outsize;
char *out;
char *out;
int charsize;
PyObject *errorHandler = NULL;
PyObject *errorHandler = NULL;
PyObject *exc = NULL;
PyObject *exc = NULL;
PyObject *encoding_obj = NULL;
PyObject *encoding_obj = NULL;
char *encoding;
char *encoding;
Py_ssize_t
startpos,
newpos, newoutsize;
Py_ssize_t newpos, newoutsize;
PyObject *rep;
PyObject *rep;
int ret = -1;
int ret = -1;
...
@@ -7422,7 +7434,7 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
...
@@ -7422,7 +7434,7 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
if (errors == NULL || strcmp(errors, "strict") == 0) {
if (errors == NULL || strcmp(errors, "strict") == 0) {
/* The last error was ERROR_NO_UNICODE_TRANSLATION,
/* The last error was ERROR_NO_UNICODE_TRANSLATION,
then we raise a UnicodeEncodeError. */
then we raise a UnicodeEncodeError. */
make_encode_exception
(&exc, encoding, in, insiz
e, 0, 0, reason);
make_encode_exception
_obj(&exc, encoding, unicod
e, 0, 0, reason);
if (exc != NULL) {
if (exc != NULL) {
PyCodec_StrictErrors(exc);
PyCodec_StrictErrors(exc);
Py_DECREF(exc);
Py_DECREF(exc);
...
@@ -7462,23 +7474,30 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
...
@@ -7462,23 +7474,30 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
}
}
/* Encode the string character per character */
/* Encode the string character per character */
while (
in
< endin)
while (
pos
< endin)
{
{
if ((in + 2) <= endin
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, pos);
&& 0xD800 <= in[0] && in[0] <= 0xDBFF
wchar_t chars[2];
&& 0xDC00 <= in[1] && in[1] <= 0xDFFF)
int charsize;
charsize = 2;
if (ch < 0x10000) {
else
chars[0] = (wchar_t)ch;
charsize = 1;
charsize = 1;
}
else {
ch -= 0x10000;
chars[0] = 0xd800 + (ch >> 10);
chars[1] = 0xdc00 + (ch & 0x3ff);
charsize = 2;
}
outsize = WideCharToMultiByte(code_page, flags,
outsize = WideCharToMultiByte(code_page, flags,
in
, charsize,
chars
, charsize,
buffer, Py_ARRAY_LENGTH(buffer),
buffer, Py_ARRAY_LENGTH(buffer),
NULL, pusedDefaultChar);
NULL, pusedDefaultChar);
if (outsize > 0) {
if (outsize > 0) {
if (pusedDefaultChar == NULL || !(*pusedDefaultChar))
if (pusedDefaultChar == NULL || !(*pusedDefaultChar))
{
{
in += charsize
;
pos++
;
memcpy(out, buffer, outsize);
memcpy(out, buffer, outsize);
out += outsize;
out += outsize;
continue;
continue;
...
@@ -7489,15 +7508,13 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
...
@@ -7489,15 +7508,13 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
goto error;
goto error;
}
}
charsize = Py_MAX(charsize - 1, 1);
startpos = unicode_offset + in - startin;
rep = unicode_encode_call_errorhandler(
rep = unicode_encode_call_errorhandler(
errors, &errorHandler, encoding, reason,
errors, &errorHandler, encoding, reason,
unicode, &exc,
unicode, &exc,
startpos, startpos + charsize
, &newpos);
pos, pos + 1
, &newpos);
if (rep == NULL)
if (rep == NULL)
goto error;
goto error;
in += (newpos - startpos)
;
pos = newpos
;
if (PyBytes_Check(rep)) {
if (PyBytes_Check(rep)) {
outsize = PyBytes_GET_SIZE(rep);
outsize = PyBytes_GET_SIZE(rep);
...
@@ -7538,10 +7555,9 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
...
@@ -7538,10 +7555,9 @@ encode_code_page_errors(UINT code_page, PyObject **outbytes,
for (i=0; i < outsize; i++) {
for (i=0; i < outsize; i++) {
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
Py_UCS4 ch = PyUnicode_READ(kind, data, i);
if (ch > 127) {
if (ch > 127) {
raise_encode_exception(&exc,
raise_encode_exception_obj(&exc,
encoding,
encoding, unicode,
startin, insize,
pos, pos + 1,
startpos, startpos + charsize,
"unable to encode error handler result to ASCII");
"unable to encode error handler result to ASCII");
Py_DECREF(rep);
Py_DECREF(rep);
goto error;
goto error;
...
@@ -7572,55 +7588,54 @@ encode_code_page(int code_page,
...
@@ -7572,55 +7588,54 @@ encode_code_page(int code_page,
PyObject *unicode,
PyObject *unicode,
const char *errors)
const char *errors)
{
{
const Py_UNICODE *p;
Py_ssize_t len;
Py_ssize_t size;
PyObject *outbytes = NULL;
PyObject *outbytes = NULL;
Py_ssize_t offset;
Py_ssize_t offset;
int chunk_len, ret, done;
int chunk_len, ret, done;
p = PyUnicode_AsUnicodeAndSize(unicode, &size);
if (PyUnicode_READY(unicode) < 0)
if (p == NULL)
return NULL;
return NULL
;
len = PyUnicode_GET_LENGTH(unicode)
;
if (code_page < 0) {
if (code_page < 0) {
PyErr_SetString(PyExc_ValueError, "invalid code page number");
PyErr_SetString(PyExc_ValueError, "invalid code page number");
return NULL;
return NULL;
}
}
if (
size
== 0)
if (
len
== 0)
return PyBytes_FromStringAndSize(NULL, 0);
return PyBytes_FromStringAndSize(NULL, 0);
offset = 0;
offset = 0;
do
do
{
{
#ifdef NEED_RETRY
#ifdef NEED_RETRY
if (size > INT_MAX) {
/* UTF-16 encoding may double the size, so use only INT_MAX/2
chunk_len = INT_MAX;
chunks. */
if (len > INT_MAX/2) {
chunk_len = INT_MAX/2;
done = 0;
done = 0;
}
}
else
else
#endif
#endif
{
{
chunk_len = (int)
size
;
chunk_len = (int)
len
;
done = 1;
done = 1;
}
}
ret = encode_code_page_strict(code_page, &outbytes,
ret = encode_code_page_strict(code_page, &outbytes,
p
, chunk_len,
unicode, offset
, chunk_len,
errors);
errors);
if (ret == -2)
if (ret == -2)
ret = encode_code_page_errors(code_page, &outbytes,
ret = encode_code_page_errors(code_page, &outbytes,
unicode, offset,
unicode, offset,
p, chunk_len,
chunk_len, errors);
errors);
if (ret < 0) {
if (ret < 0) {
Py_XDECREF(outbytes);
Py_XDECREF(outbytes);
return NULL;
return NULL;
}
}
p += chunk_len;
offset += chunk_len;
offset += chunk_len;
size
-= chunk_len;
len
-= chunk_len;
} while (!done);
} while (!done);
return outbytes;
return outbytes;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment