Kaydet (Commit) d9491269 authored tarafından Victor Stinner's avatar Victor Stinner

Issue #17693: CJK encoders now use the new Unicode API (PEP 393)

üst 71557596
...@@ -42,16 +42,18 @@ ...@@ -42,16 +42,18 @@
ENCODER(gb2312) ENCODER(gb2312)
{ {
while (inleft > 0) { while (*inpos < inlen) {
Py_UCS4 c = IN1; Py_UCS4 c = INCHAR1;
DBCHAR code; DBCHAR code;
if (c < 0x80) { if (c < 0x80) {
WRITE1((unsigned char)c) WRITEBYTE1((unsigned char)c)
NEXT(1, 1) NEXT(1, 1);
continue; continue;
} }
UCS4INVALID(c)
if (c > 0xFFFF)
return 1;
REQUIRE_OUTBUF(2) REQUIRE_OUTBUF(2)
TRYMAP_ENC(gbcommon, code, c); TRYMAP_ENC(gbcommon, code, c);
...@@ -60,9 +62,9 @@ ENCODER(gb2312) ...@@ -60,9 +62,9 @@ ENCODER(gb2312)
if (code & 0x8000) /* MSB set: GBK */ if (code & 0x8000) /* MSB set: GBK */
return 1; return 1;
OUT1((code >> 8) | 0x80) OUTBYTE1((code >> 8) | 0x80)
OUT2((code & 0xFF) | 0x80) OUTBYTE2((code & 0xFF) | 0x80)
NEXT(1, 2) NEXT(1, 2);
} }
return 0; return 0;
...@@ -80,7 +82,7 @@ DECODER(gb2312) ...@@ -80,7 +82,7 @@ DECODER(gb2312)
} }
REQUIRE_INBUF(2) REQUIRE_INBUF(2)
TRYMAP_DEC(gb2312, writer, c ^ 0x80, IN2 ^ 0x80) { TRYMAP_DEC(gb2312, writer, c ^ 0x80, INBYTE2 ^ 0x80) {
NEXT_IN(2); NEXT_IN(2);
} }
else return 1; else return 1;
...@@ -96,28 +98,30 @@ DECODER(gb2312) ...@@ -96,28 +98,30 @@ DECODER(gb2312)
ENCODER(gbk) ENCODER(gbk)
{ {
while (inleft > 0) { while (*inpos < inlen) {
Py_UCS4 c = IN1; Py_UCS4 c = INCHAR1;
DBCHAR code; DBCHAR code;
if (c < 0x80) { if (c < 0x80) {
WRITE1((unsigned char)c) WRITEBYTE1((unsigned char)c)
NEXT(1, 1) NEXT(1, 1);
continue; continue;
} }
UCS4INVALID(c)
if (c > 0xFFFF)
return 1;
REQUIRE_OUTBUF(2) REQUIRE_OUTBUF(2)
GBK_ENCODE(c, code) GBK_ENCODE(c, code)
else return 1; else return 1;
OUT1((code >> 8) | 0x80) OUTBYTE1((code >> 8) | 0x80)
if (code & 0x8000) if (code & 0x8000)
OUT2((code & 0xFF)) /* MSB set: GBK */ OUTBYTE2((code & 0xFF)) /* MSB set: GBK */
else else
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ OUTBYTE2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
NEXT(1, 2) NEXT(1, 2);
} }
return 0; return 0;
...@@ -126,7 +130,7 @@ ENCODER(gbk) ...@@ -126,7 +130,7 @@ ENCODER(gbk)
DECODER(gbk) DECODER(gbk)
{ {
while (inleft > 0) { while (inleft > 0) {
unsigned char c = IN1; unsigned char c = INBYTE1;
if (c < 0x80) { if (c < 0x80) {
OUTCHAR(c); OUTCHAR(c);
...@@ -136,7 +140,7 @@ DECODER(gbk) ...@@ -136,7 +140,7 @@ DECODER(gbk)
REQUIRE_INBUF(2) REQUIRE_INBUF(2)
GBK_DECODE(c, IN2, writer) GBK_DECODE(c, INBYTE2, writer)
else return 1; else return 1;
NEXT_IN(2); NEXT_IN(2);
...@@ -152,41 +156,31 @@ DECODER(gbk) ...@@ -152,41 +156,31 @@ DECODER(gbk)
ENCODER(gb18030) ENCODER(gb18030)
{ {
while (inleft > 0) { while (*inpos < inlen) {
Py_UCS4 c = IN1; Py_UCS4 c = INCHAR1;
DBCHAR code; DBCHAR code;
if (c < 0x80) { if (c < 0x80) {
WRITE1(c) WRITEBYTE1(c)
NEXT(1, 1) NEXT(1, 1);
continue; continue;
} }
DECODE_SURROGATE(c) if (c >= 0x10000) {
if (c > 0x10FFFF)
#if Py_UNICODE_SIZE == 2
return 2; /* surrogates pair */
#else
return 1;
#endif
else if (c >= 0x10000) {
Py_UCS4 tc = c - 0x10000; Py_UCS4 tc = c - 0x10000;
assert (c <= 0x10FFFF);
REQUIRE_OUTBUF(4) REQUIRE_OUTBUF(4)
OUT4((unsigned char)(tc % 10) + 0x30) OUTBYTE4((unsigned char)(tc % 10) + 0x30)
tc /= 10; tc /= 10;
OUT3((unsigned char)(tc % 126) + 0x81) OUTBYTE3((unsigned char)(tc % 126) + 0x81)
tc /= 126; tc /= 126;
OUT2((unsigned char)(tc % 10) + 0x30) OUTBYTE2((unsigned char)(tc % 10) + 0x30)
tc /= 10; tc /= 10;
OUT1((unsigned char)(tc + 0x90)) OUTBYTE1((unsigned char)(tc + 0x90))
#if Py_UNICODE_SIZE == 2 NEXT(1, 4);
NEXT(2, 4) /* surrogates pair */
#else
NEXT(1, 4)
#endif
continue; continue;
} }
...@@ -209,15 +203,15 @@ ENCODER(gb18030) ...@@ -209,15 +203,15 @@ ENCODER(gb18030)
tc = c - utrrange->first + tc = c - utrrange->first +
utrrange->base; utrrange->base;
OUT4((unsigned char)(tc % 10) + 0x30) OUTBYTE4((unsigned char)(tc % 10) + 0x30)
tc /= 10; tc /= 10;
OUT3((unsigned char)(tc % 126) + 0x81) OUTBYTE3((unsigned char)(tc % 126) + 0x81)
tc /= 126; tc /= 126;
OUT2((unsigned char)(tc % 10) + 0x30) OUTBYTE2((unsigned char)(tc % 10) + 0x30)
tc /= 10; tc /= 10;
OUT1((unsigned char)tc + 0x81) OUTBYTE1((unsigned char)tc + 0x81)
NEXT(1, 4) NEXT(1, 4);
break; break;
} }
...@@ -226,13 +220,13 @@ ENCODER(gb18030) ...@@ -226,13 +220,13 @@ ENCODER(gb18030)
continue; continue;
} }
OUT1((code >> 8) | 0x80) OUTBYTE1((code >> 8) | 0x80)
if (code & 0x8000) if (code & 0x8000)
OUT2((code & 0xFF)) /* MSB set: GBK or GB18030ext */ OUTBYTE2((code & 0xFF)) /* MSB set: GBK or GB18030ext */
else else
OUT2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */ OUTBYTE2((code & 0xFF) | 0x80) /* MSB unset: GB2312 */
NEXT(1, 2) NEXT(1, 2);
} }
return 0; return 0;
...@@ -241,7 +235,7 @@ ENCODER(gb18030) ...@@ -241,7 +235,7 @@ ENCODER(gb18030)
DECODER(gb18030) DECODER(gb18030)
{ {
while (inleft > 0) { while (inleft > 0) {
unsigned char c = IN1, c2; unsigned char c = INBYTE1, c2;
if (c < 0x80) { if (c < 0x80) {
OUTCHAR(c); OUTCHAR(c);
...@@ -251,15 +245,15 @@ DECODER(gb18030) ...@@ -251,15 +245,15 @@ DECODER(gb18030)
REQUIRE_INBUF(2) REQUIRE_INBUF(2)
c2 = IN2; c2 = INBYTE2;
if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */ if (c2 >= 0x30 && c2 <= 0x39) { /* 4 bytes seq */
const struct _gb18030_to_unibmp_ranges *utr; const struct _gb18030_to_unibmp_ranges *utr;
unsigned char c3, c4; unsigned char c3, c4;
Py_UCS4 lseq; Py_UCS4 lseq;
REQUIRE_INBUF(4) REQUIRE_INBUF(4)
c3 = IN3; c3 = INBYTE3;
c4 = IN4; c4 = INBYTE4;
if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39) if (c < 0x81 || c3 < 0x81 || c4 < 0x30 || c4 > 0x39)
return 1; return 1;
c -= 0x81; c2 -= 0x30; c -= 0x81; c2 -= 0x30;
...@@ -313,33 +307,34 @@ ENCODER_INIT(hz) ...@@ -313,33 +307,34 @@ ENCODER_INIT(hz)
ENCODER_RESET(hz) ENCODER_RESET(hz)
{ {
if (state->i != 0) { if (state->i != 0) {
WRITE2('~', '}') WRITEBYTE2('~', '}')
state->i = 0; state->i = 0;
NEXT_OUT(2) NEXT_OUT(2);
} }
return 0; return 0;
} }
ENCODER(hz) ENCODER(hz)
{ {
while (inleft > 0) { while (*inpos < inlen) {
Py_UCS4 c = IN1; Py_UCS4 c = INCHAR1;
DBCHAR code; DBCHAR code;
if (c < 0x80) { if (c < 0x80) {
if (state->i == 0) { if (state->i == 0) {
WRITE1((unsigned char)c) WRITEBYTE1((unsigned char)c)
NEXT(1, 1) NEXT(1, 1);
} }
else { else {
WRITE3('~', '}', (unsigned char)c) WRITEBYTE3('~', '}', (unsigned char)c)
NEXT(1, 3) NEXT(1, 3);
state->i = 0; state->i = 0;
} }
continue; continue;
} }
UCS4INVALID(c) if (c > 0xFFFF)
return 1;
TRYMAP_ENC(gbcommon, code, c); TRYMAP_ENC(gbcommon, code, c);
else return 1; else return 1;
...@@ -348,13 +343,13 @@ ENCODER(hz) ...@@ -348,13 +343,13 @@ ENCODER(hz)
return 1; return 1;
if (state->i == 0) { if (state->i == 0) {
WRITE4('~', '{', code >> 8, code & 0xff) WRITEBYTE4('~', '{', code >> 8, code & 0xff)
NEXT(1, 4) NEXT(1, 4);
state->i = 1; state->i = 1;
} }
else { else {
WRITE2(code >> 8, code & 0xff) WRITEBYTE2(code >> 8, code & 0xff)
NEXT(1, 2) NEXT(1, 2);
} }
} }
...@@ -376,10 +371,10 @@ DECODER_RESET(hz) ...@@ -376,10 +371,10 @@ DECODER_RESET(hz)
DECODER(hz) DECODER(hz)
{ {
while (inleft > 0) { while (inleft > 0) {
unsigned char c = IN1; unsigned char c = INBYTE1;
if (c == '~') { if (c == '~') {
unsigned char c2 = IN2; unsigned char c2 = INBYTE2;
REQUIRE_INBUF(2) REQUIRE_INBUF(2)
if (c2 == '~') { if (c2 == '~') {
...@@ -408,7 +403,7 @@ DECODER(hz) ...@@ -408,7 +403,7 @@ DECODER(hz)
} }
else { /* GB mode */ else { /* GB mode */
REQUIRE_INBUF(2) REQUIRE_INBUF(2)
TRYMAP_DEC(gb2312, writer, c, IN2) { TRYMAP_DEC(gb2312, writer, c, INBYTE2) {
NEXT_IN(2); NEXT_IN(2);
} }
else else
......
...@@ -38,35 +38,39 @@ static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5 ...@@ -38,35 +38,39 @@ static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5
ENCODER(big5hkscs) ENCODER(big5hkscs)
{ {
while (inleft > 0) { while (*inpos < inlen) {
Py_UCS4 c = **inbuf; Py_UCS4 c = INCHAR1;
DBCHAR code; DBCHAR code;
Py_ssize_t insize; Py_ssize_t insize;
if (c < 0x80) { if (c < 0x80) {
REQUIRE_OUTBUF(1) REQUIRE_OUTBUF(1)
**outbuf = (unsigned char)c; **outbuf = (unsigned char)c;
NEXT(1, 1) NEXT(1, 1);
continue; continue;
} }
DECODE_SURROGATE(c) insize = 1;
insize = GET_INSIZE(c);
REQUIRE_OUTBUF(2) REQUIRE_OUTBUF(2)
if (c < 0x10000) { if (c < 0x10000) {
TRYMAP_ENC(big5hkscs_bmp, code, c) { TRYMAP_ENC(big5hkscs_bmp, code, c) {
if (code == MULTIC) { if (code == MULTIC) {
if (inleft >= 2 && Py_UCS4 c2;
if (inlen - *inpos >= 2)
c2 = INCHAR2;
else
c2 = 0;
if (inlen - *inpos >= 2 &&
((c & 0xffdf) == 0x00ca) && ((c & 0xffdf) == 0x00ca) &&
(((*inbuf)[1] & 0xfff7) == 0x0304)) { ((c2 & 0xfff7) == 0x0304)) {
code = big5hkscs_pairenc_table[ code = big5hkscs_pairenc_table[
((c >> 4) | ((c >> 4) |
((*inbuf)[1] >> 3)) & 3]; (c2 >> 3)) & 3];
insize = 2; insize = 2;
} }
else if (inleft < 2 && else if (inlen - *inpos < 2 &&
!(flags & MBENC_FLUSH)) !(flags & MBENC_FLUSH))
return MBERR_TOOFEW; return MBERR_TOOFEW;
else { else {
...@@ -89,9 +93,9 @@ ENCODER(big5hkscs) ...@@ -89,9 +93,9 @@ ENCODER(big5hkscs)
else else
return insize; return insize;
OUT1(code >> 8) OUTBYTE1(code >> 8)
OUT2(code & 0xFF) OUTBYTE2(code & 0xFF)
NEXT(insize, 2) NEXT(insize, 2);
} }
return 0; return 0;
...@@ -102,7 +106,7 @@ ENCODER(big5hkscs) ...@@ -102,7 +106,7 @@ ENCODER(big5hkscs)
DECODER(big5hkscs) DECODER(big5hkscs)
{ {
while (inleft > 0) { while (inleft > 0) {
unsigned char c = IN1; unsigned char c = INBYTE1;
Py_UCS4 decoded; Py_UCS4 decoded;
if (c < 0x80) { if (c < 0x80) {
...@@ -113,20 +117,20 @@ DECODER(big5hkscs) ...@@ -113,20 +117,20 @@ DECODER(big5hkscs)
REQUIRE_INBUF(2) REQUIRE_INBUF(2)
if (0xc6 > c || c > 0xc8 || (c < 0xc7 && IN2 < 0xa1)) { if (0xc6 > c || c > 0xc8 || (c < 0xc7 && INBYTE2 < 0xa1)) {
TRYMAP_DEC(big5, writer, c, IN2) { TRYMAP_DEC(big5, writer, c, INBYTE2) {
NEXT_IN(2); NEXT_IN(2);
continue; continue;
} }
} }
TRYMAP_DEC_CHAR(big5hkscs, decoded, c, IN2) TRYMAP_DEC_CHAR(big5hkscs, decoded, c, INBYTE2)
{ {
int s = BH2S(c, IN2); int s = BH2S(c, INBYTE2);
const unsigned char *hintbase; const unsigned char *hintbase;
assert(0x87 <= c && c <= 0xfe); assert(0x87 <= c && c <= 0xfe);
assert(0x40 <= IN2 && IN2 <= 0xfe); assert(0x40 <= INBYTE2 && INBYTE2 <= 0xfe);
if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) { if (BH2S(0x87, 0x40) <= s && s <= BH2S(0xa0, 0xfe)) {
hintbase = big5hkscs_phint_0; hintbase = big5hkscs_phint_0;
...@@ -154,7 +158,7 @@ DECODER(big5hkscs) ...@@ -154,7 +158,7 @@ DECODER(big5hkscs)
continue; continue;
} }
switch ((c << 8) | IN2) { switch ((c << 8) | INBYTE2) {
case 0x8862: OUTCHAR2(0x00ca, 0x0304); break; case 0x8862: OUTCHAR2(0x00ca, 0x0304); break;
case 0x8864: OUTCHAR2(0x00ca, 0x030c); break; case 0x8864: OUTCHAR2(0x00ca, 0x030c); break;
case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break; case 0x88a3: OUTCHAR2(0x00ea, 0x0304); break;
......
...@@ -141,13 +141,13 @@ ENCODER_INIT(iso2022) ...@@ -141,13 +141,13 @@ ENCODER_INIT(iso2022)
ENCODER_RESET(iso2022) ENCODER_RESET(iso2022)
{ {
if (STATE_GETFLAG(F_SHIFTED)) { if (STATE_GETFLAG(F_SHIFTED)) {
WRITE1(SI) WRITEBYTE1(SI)
NEXT_OUT(1) NEXT_OUT(1);
STATE_CLEARFLAG(F_SHIFTED) STATE_CLEARFLAG(F_SHIFTED)
} }
if (STATE_G0 != CHARSET_ASCII) { if (STATE_G0 != CHARSET_ASCII) {
WRITE3(ESC, '(', 'B') WRITEBYTE3(ESC, '(', 'B')
NEXT_OUT(3) NEXT_OUT(3);
STATE_SETG0(CHARSET_ASCII) STATE_SETG0(CHARSET_ASCII)
} }
return 0; return 0;
...@@ -155,30 +155,29 @@ ENCODER_RESET(iso2022) ...@@ -155,30 +155,29 @@ ENCODER_RESET(iso2022)
ENCODER(iso2022) ENCODER(iso2022)
{ {
while (inleft > 0) { while (*inpos < inlen) {
const struct iso2022_designation *dsg; const struct iso2022_designation *dsg;
DBCHAR encoded; DBCHAR encoded;
Py_UCS4 c = **inbuf; Py_UCS4 c = INCHAR1;
Py_ssize_t insize; Py_ssize_t insize;
if (c < 0x80) { if (c < 0x80) {
if (STATE_G0 != CHARSET_ASCII) { if (STATE_G0 != CHARSET_ASCII) {
WRITE3(ESC, '(', 'B') WRITEBYTE3(ESC, '(', 'B')
STATE_SETG0(CHARSET_ASCII) STATE_SETG0(CHARSET_ASCII)
NEXT_OUT(3) NEXT_OUT(3);
} }
if (STATE_GETFLAG(F_SHIFTED)) { if (STATE_GETFLAG(F_SHIFTED)) {
WRITE1(SI) WRITEBYTE1(SI)
STATE_CLEARFLAG(F_SHIFTED) STATE_CLEARFLAG(F_SHIFTED)
NEXT_OUT(1) NEXT_OUT(1);
} }
WRITE1((unsigned char)c) WRITEBYTE1((unsigned char)c)
NEXT(1, 1) NEXT(1, 1);
continue; continue;
} }
DECODE_SURROGATE(c) insize = 1;
insize = GET_INSIZE(c);
encoded = MAP_UNMAPPABLE; encoded = MAP_UNMAPPABLE;
for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
...@@ -187,24 +186,14 @@ ENCODER(iso2022) ...@@ -187,24 +186,14 @@ ENCODER(iso2022)
if (encoded == MAP_MULTIPLE_AVAIL) { if (encoded == MAP_MULTIPLE_AVAIL) {
/* this implementation won't work for pair /* this implementation won't work for pair
* of non-bmp characters. */ * of non-bmp characters. */
if (inleft < 2) { if (inlen - *inpos < 2) {
if (!(flags & MBENC_FLUSH)) if (!(flags & MBENC_FLUSH))
return MBERR_TOOFEW; return MBERR_TOOFEW;
length = -1; length = -1;
} }
else else
length = 2; length = 2;
#if Py_UNICODE_SIZE == 2
if (length == 2) {
Py_UCS4 u4in[2];
u4in[0] = (Py_UCS4)IN1;
u4in[1] = (Py_UCS4)IN2;
encoded = dsg->encoder(u4in, &length);
} else
encoded = dsg->encoder(&c, &length);
#else
encoded = dsg->encoder(&c, &length); encoded = dsg->encoder(&c, &length);
#endif
if (encoded != MAP_UNMAPPABLE) { if (encoded != MAP_UNMAPPABLE) {
insize = length; insize = length;
break; break;
...@@ -221,47 +210,47 @@ ENCODER(iso2022) ...@@ -221,47 +210,47 @@ ENCODER(iso2022)
switch (dsg->plane) { switch (dsg->plane) {
case 0: /* G0 */ case 0: /* G0 */
if (STATE_GETFLAG(F_SHIFTED)) { if (STATE_GETFLAG(F_SHIFTED)) {
WRITE1(SI) WRITEBYTE1(SI)
STATE_CLEARFLAG(F_SHIFTED) STATE_CLEARFLAG(F_SHIFTED)
NEXT_OUT(1) NEXT_OUT(1);
} }
if (STATE_G0 != dsg->mark) { if (STATE_G0 != dsg->mark) {
if (dsg->width == 1) { if (dsg->width == 1) {
WRITE3(ESC, '(', ESCMARK(dsg->mark)) WRITEBYTE3(ESC, '(', ESCMARK(dsg->mark))
STATE_SETG0(dsg->mark) STATE_SETG0(dsg->mark)
NEXT_OUT(3) NEXT_OUT(3);
} }
else if (dsg->mark == CHARSET_JISX0208) { else if (dsg->mark == CHARSET_JISX0208) {
WRITE3(ESC, '$', ESCMARK(dsg->mark)) WRITEBYTE3(ESC, '$', ESCMARK(dsg->mark))
STATE_SETG0(dsg->mark) STATE_SETG0(dsg->mark)
NEXT_OUT(3) NEXT_OUT(3);
} }
else { else {
WRITE4(ESC, '$', '(', WRITEBYTE4(ESC, '$', '(',
ESCMARK(dsg->mark)) ESCMARK(dsg->mark))
STATE_SETG0(dsg->mark) STATE_SETG0(dsg->mark)
NEXT_OUT(4) NEXT_OUT(4);
} }
} }
break; break;
case 1: /* G1 */ case 1: /* G1 */
if (STATE_G1 != dsg->mark) { if (STATE_G1 != dsg->mark) {
if (dsg->width == 1) { if (dsg->width == 1) {
WRITE3(ESC, ')', ESCMARK(dsg->mark)) WRITEBYTE3(ESC, ')', ESCMARK(dsg->mark))
STATE_SETG1(dsg->mark) STATE_SETG1(dsg->mark)
NEXT_OUT(3) NEXT_OUT(3);
} }
else { else {
WRITE4(ESC, '$', ')', WRITEBYTE4(ESC, '$', ')',
ESCMARK(dsg->mark)) ESCMARK(dsg->mark))
STATE_SETG1(dsg->mark) STATE_SETG1(dsg->mark)
NEXT_OUT(4) NEXT_OUT(4);
} }
} }
if (!STATE_GETFLAG(F_SHIFTED)) { if (!STATE_GETFLAG(F_SHIFTED)) {
WRITE1(SO) WRITEBYTE1(SO)
STATE_SETFLAG(F_SHIFTED) STATE_SETFLAG(F_SHIFTED)
NEXT_OUT(1) NEXT_OUT(1);
} }
break; break;
default: /* G2 and G3 is not supported: no encoding in default: /* G2 and G3 is not supported: no encoding in
...@@ -270,14 +259,14 @@ ENCODER(iso2022) ...@@ -270,14 +259,14 @@ ENCODER(iso2022)
} }
if (dsg->width == 1) { if (dsg->width == 1) {
WRITE1((unsigned char)encoded) WRITEBYTE1((unsigned char)encoded)
NEXT_OUT(1) NEXT_OUT(1);
} }
else { else {
WRITE2(encoded >> 8, encoded & 0xff) WRITEBYTE2(encoded >> 8, encoded & 0xff)
NEXT_OUT(2) NEXT_OUT(2);
} }
NEXT_IN(insize); NEXT_INCHAR(insize);
} }
return 0; return 0;
...@@ -323,26 +312,26 @@ iso2022processesc(const void *config, MultibyteCodec_State *state, ...@@ -323,26 +312,26 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
switch (esclen) { switch (esclen) {
case 3: case 3:
if (IN2 == '$') { if (INBYTE2 == '$') {
charset = IN3 | CHARSET_DBCS; charset = INBYTE3 | CHARSET_DBCS;
designation = 0; designation = 0;
} }
else { else {
charset = IN3; charset = INBYTE3;
if (IN2 == '(') designation = 0; if (INBYTE2 == '(') designation = 0;
else if (IN2 == ')') designation = 1; else if (INBYTE2 == ')') designation = 1;
else if (CONFIG_ISSET(USE_G2) && IN2 == '.') else if (CONFIG_ISSET(USE_G2) && INBYTE2 == '.')
designation = 2; designation = 2;
else return 3; else return 3;
} }
break; break;
case 4: case 4:
if (IN2 != '$') if (INBYTE2 != '$')
return 4; return 4;
charset = IN4 | CHARSET_DBCS; charset = INBYTE4 | CHARSET_DBCS;
if (IN3 == '(') designation = 0; if (INBYTE3 == '(') designation = 0;
else if (IN3 == ')') designation = 1; else if (INBYTE3 == ')') designation = 1;
else return 4; else return 4;
break; break;
case 6: /* designation with prefix */ case 6: /* designation with prefix */
...@@ -395,18 +384,18 @@ iso2022processg2(const void *config, MultibyteCodec_State *state, ...@@ -395,18 +384,18 @@ iso2022processg2(const void *config, MultibyteCodec_State *state,
/* not written to use encoder, decoder functions because only few /* not written to use encoder, decoder functions because only few
* encodings use G2 designations in CJKCodecs */ * encodings use G2 designations in CJKCodecs */
if (STATE_G2 == CHARSET_ISO8859_1) { if (STATE_G2 == CHARSET_ISO8859_1) {
if (IN3 < 0x80) if (INBYTE3 < 0x80)
OUTCHAR(IN3 + 0x80); OUTCHAR(INBYTE3 + 0x80);
else else
return 3; return 3;
} }
else if (STATE_G2 == CHARSET_ISO8859_7) { else if (STATE_G2 == CHARSET_ISO8859_7) {
ISO8859_7_DECODE(IN3 ^ 0x80, writer) ISO8859_7_DECODE(INBYTE3 ^ 0x80, writer)
else return 3; else return 3;
} }
else if (STATE_G2 == CHARSET_ASCII) { else if (STATE_G2 == CHARSET_ASCII) {
if (IN3 & 0x80) return 3; if (INBYTE3 & 0x80) return 3;
else OUTCHAR(IN3); else OUTCHAR(INBYTE3);
} }
else else
return MBERR_INTERNAL; return MBERR_INTERNAL;
...@@ -421,7 +410,7 @@ DECODER(iso2022) ...@@ -421,7 +410,7 @@ DECODER(iso2022)
const struct iso2022_designation *dsgcache = NULL; const struct iso2022_designation *dsgcache = NULL;
while (inleft > 0) { while (inleft > 0) {
unsigned char c = IN1; unsigned char c = INBYTE1;
Py_ssize_t err; Py_ssize_t err;
if (STATE_GETFLAG(F_ESCTHROUGHOUT)) { if (STATE_GETFLAG(F_ESCTHROUGHOUT)) {
...@@ -438,13 +427,13 @@ DECODER(iso2022) ...@@ -438,13 +427,13 @@ DECODER(iso2022)
switch (c) { switch (c) {
case ESC: case ESC:
REQUIRE_INBUF(2) REQUIRE_INBUF(2)
if (IS_ISO2022ESC(IN2)) { if (IS_ISO2022ESC(INBYTE2)) {
err = iso2022processesc(config, state, err = iso2022processesc(config, state,
inbuf, &inleft); inbuf, &inleft);
if (err != 0) if (err != 0)
return err; return err;
} }
else if (CONFIG_ISSET(USE_G2) && IN2 == 'N') {/* SS2 */ else if (CONFIG_ISSET(USE_G2) && INBYTE2 == 'N') {/* SS2 */
REQUIRE_INBUF(3) REQUIRE_INBUF(3)
err = iso2022processg2(config, state, err = iso2022processg2(config, state,
inbuf, &inleft, writer); inbuf, &inleft, writer);
......
This diff is collapsed.
...@@ -33,16 +33,18 @@ static const unsigned char u2cgk_jongseong[28] = { ...@@ -33,16 +33,18 @@ static const unsigned char u2cgk_jongseong[28] = {
ENCODER(euc_kr) ENCODER(euc_kr)
{ {
while (inleft > 0) { while (*inpos < inlen) {
Py_UCS4 c = IN1; Py_UCS4 c = INCHAR1;
DBCHAR code; DBCHAR code;
if (c < 0x80) { if (c < 0x80) {
WRITE1((unsigned char)c) WRITEBYTE1((unsigned char)c)
NEXT(1, 1) NEXT(1, 1);
continue; continue;
} }
UCS4INVALID(c)
if (c > 0xFFFF)
return 1;
REQUIRE_OUTBUF(2) REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp949, code, c); TRYMAP_ENC(cp949, code, c);
...@@ -50,9 +52,9 @@ ENCODER(euc_kr) ...@@ -50,9 +52,9 @@ ENCODER(euc_kr)
if ((code & 0x8000) == 0) { if ((code & 0x8000) == 0) {
/* KS X 1001 coded character */ /* KS X 1001 coded character */
OUT1((code >> 8) | 0x80) OUTBYTE1((code >> 8) | 0x80)
OUT2((code & 0xFF) | 0x80) OUTBYTE2((code & 0xFF) | 0x80)
NEXT(1, 2) NEXT(1, 2);
} }
else { /* Mapping is found in CP949 extension, else { /* Mapping is found in CP949 extension,
* but we encode it in KS X 1001:1998 Annex 3, * but we encode it in KS X 1001:1998 Annex 3,
...@@ -61,23 +63,23 @@ ENCODER(euc_kr) ...@@ -61,23 +63,23 @@ ENCODER(euc_kr)
REQUIRE_OUTBUF(8) REQUIRE_OUTBUF(8)
/* syllable composition precedence */ /* syllable composition precedence */
OUT1(EUCKR_JAMO_FIRSTBYTE) OUTBYTE1(EUCKR_JAMO_FIRSTBYTE)
OUT2(EUCKR_JAMO_FILLER) OUTBYTE2(EUCKR_JAMO_FILLER)
/* All codepoints in CP949 extension are in unicode /* All codepoints in CP949 extension are in unicode
* Hangul Syllable area. */ * Hangul Syllable area. */
assert(0xac00 <= c && c <= 0xd7a3); assert(0xac00 <= c && c <= 0xd7a3);
c -= 0xac00; c -= 0xac00;
OUT3(EUCKR_JAMO_FIRSTBYTE) OUTBYTE3(EUCKR_JAMO_FIRSTBYTE)
OUT4(u2cgk_choseong[c / 588]) OUTBYTE4(u2cgk_choseong[c / 588])
NEXT_OUT(4) NEXT_OUT(4);
OUT1(EUCKR_JAMO_FIRSTBYTE) OUTBYTE1(EUCKR_JAMO_FIRSTBYTE)
OUT2(u2cgk_jungseong[(c / 28) % 21]) OUTBYTE2(u2cgk_jungseong[(c / 28) % 21])
OUT3(EUCKR_JAMO_FIRSTBYTE) OUTBYTE3(EUCKR_JAMO_FIRSTBYTE)
OUT4(u2cgk_jongseong[c % 28]) OUTBYTE4(u2cgk_jongseong[c % 28])
NEXT(1, 4) NEXT(1, 4);
} }
} }
...@@ -102,7 +104,7 @@ static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */ ...@@ -102,7 +104,7 @@ static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
DECODER(euc_kr) DECODER(euc_kr)
{ {
while (inleft > 0) { while (inleft > 0) {
unsigned char c = IN1; unsigned char c = INBYTE1;
if (c < 0x80) { if (c < 0x80) {
OUTCHAR(c); OUTCHAR(c);
...@@ -113,7 +115,7 @@ DECODER(euc_kr) ...@@ -113,7 +115,7 @@ DECODER(euc_kr)
REQUIRE_INBUF(2) REQUIRE_INBUF(2)
if (c == EUCKR_JAMO_FIRSTBYTE && if (c == EUCKR_JAMO_FIRSTBYTE &&
IN2 == EUCKR_JAMO_FILLER) { INBYTE2 == EUCKR_JAMO_FILLER) {
/* KS X 1001:1998 Annex 3 make-up sequence */ /* KS X 1001:1998 Annex 3 make-up sequence */
DBCHAR cho, jung, jong; DBCHAR cho, jung, jong;
...@@ -146,7 +148,7 @@ DECODER(euc_kr) ...@@ -146,7 +148,7 @@ DECODER(euc_kr)
OUTCHAR(0xac00 + cho*588 + jung*28 + jong); OUTCHAR(0xac00 + cho*588 + jung*28 + jong);
NEXT_IN(8); NEXT_IN(8);
} }
else TRYMAP_DEC(ksx1001, writer, c ^ 0x80, IN2 ^ 0x80) { else TRYMAP_DEC(ksx1001, writer, c ^ 0x80, INBYTE2 ^ 0x80) {
NEXT_IN(2); NEXT_IN(2);
} }
else else
...@@ -164,27 +166,29 @@ DECODER(euc_kr) ...@@ -164,27 +166,29 @@ DECODER(euc_kr)
ENCODER(cp949) ENCODER(cp949)
{ {
while (inleft > 0) { while (*inpos < inlen) {
Py_UCS4 c = IN1; Py_UCS4 c = INCHAR1;
DBCHAR code; DBCHAR code;
if (c < 0x80) { if (c < 0x80) {
WRITE1((unsigned char)c) WRITEBYTE1((unsigned char)c)
NEXT(1, 1) NEXT(1, 1);
continue; continue;
} }
UCS4INVALID(c)
if (c > 0xFFFF)
return 1;
REQUIRE_OUTBUF(2) REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp949, code, c); TRYMAP_ENC(cp949, code, c);
else return 1; else return 1;
OUT1((code >> 8) | 0x80) OUTBYTE1((code >> 8) | 0x80)
if (code & 0x8000) if (code & 0x8000)
OUT2(code & 0xFF) /* MSB set: CP949 */ OUTBYTE2(code & 0xFF) /* MSB set: CP949 */
else else
OUT2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */ OUTBYTE2((code & 0xFF) | 0x80) /* MSB unset: ks x 1001 */
NEXT(1, 2) NEXT(1, 2);
} }
return 0; return 0;
...@@ -193,7 +197,7 @@ ENCODER(cp949) ...@@ -193,7 +197,7 @@ ENCODER(cp949)
DECODER(cp949) DECODER(cp949)
{ {
while (inleft > 0) { while (inleft > 0) {
unsigned char c = IN1; unsigned char c = INBYTE1;
if (c < 0x80) { if (c < 0x80) {
OUTCHAR(c); OUTCHAR(c);
...@@ -202,8 +206,8 @@ DECODER(cp949) ...@@ -202,8 +206,8 @@ DECODER(cp949)
} }
REQUIRE_INBUF(2) REQUIRE_INBUF(2)
TRYMAP_DEC(ksx1001, writer, c ^ 0x80, IN2 ^ 0x80); TRYMAP_DEC(ksx1001, writer, c ^ 0x80, INBYTE2 ^ 0x80);
else TRYMAP_DEC(cp949ext, writer, c, IN2); else TRYMAP_DEC(cp949ext, writer, c, INBYTE2);
else return 1; else return 1;
NEXT_IN(2); NEXT_IN(2);
...@@ -246,16 +250,18 @@ static const DBCHAR u2johabjamo[] = { ...@@ -246,16 +250,18 @@ static const DBCHAR u2johabjamo[] = {
ENCODER(johab) ENCODER(johab)
{ {
while (inleft > 0) { while (*inpos < inlen) {
Py_UCS4 c = IN1; Py_UCS4 c = INCHAR1;
DBCHAR code; DBCHAR code;
if (c < 0x80) { if (c < 0x80) {
WRITE1((unsigned char)c) WRITEBYTE1((unsigned char)c)
NEXT(1, 1) NEXT(1, 1);
continue; continue;
} }
UCS4INVALID(c)
if (c > 0xFFFF)
return 1;
REQUIRE_OUTBUF(2) REQUIRE_OUTBUF(2)
...@@ -281,9 +287,9 @@ ENCODER(johab) ...@@ -281,9 +287,9 @@ ENCODER(johab)
t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) : t1 = (c1 < 0x4a ? (c1 - 0x21 + 0x1b2) :
(c1 - 0x21 + 0x197)); (c1 - 0x21 + 0x197));
t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21); t2 = ((t1 & 1) ? 0x5e : 0) + (c2 - 0x21);
OUT1(t1 >> 1) OUTBYTE1(t1 >> 1)
OUT2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43) OUTBYTE2(t2 < 0x4e ? t2 + 0x31 : t2 + 0x43)
NEXT(1, 2) NEXT(1, 2);
continue; continue;
} }
else else
...@@ -292,9 +298,9 @@ ENCODER(johab) ...@@ -292,9 +298,9 @@ ENCODER(johab)
else else
return 1; return 1;
OUT1(code >> 8) OUTBYTE1(code >> 8)
OUT2(code & 0xff) OUTBYTE2(code & 0xff)
NEXT(1, 2) NEXT(1, 2);
} }
return 0; return 0;
...@@ -344,7 +350,7 @@ static const unsigned char johabjamo_jongseong[32] = { ...@@ -344,7 +350,7 @@ static const unsigned char johabjamo_jongseong[32] = {
DECODER(johab) DECODER(johab)
{ {
while (inleft > 0) { while (inleft > 0) {
unsigned char c = IN1, c2; unsigned char c = INBYTE1, c2;
if (c < 0x80) { if (c < 0x80) {
OUTCHAR(c); OUTCHAR(c);
...@@ -353,7 +359,7 @@ DECODER(johab) ...@@ -353,7 +359,7 @@ DECODER(johab)
} }
REQUIRE_INBUF(2) REQUIRE_INBUF(2)
c2 = IN2; c2 = INBYTE2;
if (c < 0xd8) { if (c < 0xd8) {
/* johab hangul */ /* johab hangul */
......
...@@ -13,26 +13,28 @@ ...@@ -13,26 +13,28 @@
ENCODER(big5) ENCODER(big5)
{ {
while (inleft > 0) { while (*inpos < inlen) {
Py_UCS4 c = **inbuf; Py_UCS4 c = INCHAR1;
DBCHAR code; DBCHAR code;
if (c < 0x80) { if (c < 0x80) {
REQUIRE_OUTBUF(1) REQUIRE_OUTBUF(1)
**outbuf = (unsigned char)c; **outbuf = (unsigned char)c;
NEXT(1, 1) NEXT(1, 1);
continue; continue;
} }
UCS4INVALID(c)
if (c > 0xFFFF)
return 1;
REQUIRE_OUTBUF(2) REQUIRE_OUTBUF(2)
TRYMAP_ENC(big5, code, c); TRYMAP_ENC(big5, code, c);
else return 1; else return 1;
OUT1(code >> 8) OUTBYTE1(code >> 8)
OUT2(code & 0xFF) OUTBYTE2(code & 0xFF)
NEXT(1, 2) NEXT(1, 2);
} }
return 0; return 0;
...@@ -41,7 +43,7 @@ ENCODER(big5) ...@@ -41,7 +43,7 @@ ENCODER(big5)
DECODER(big5) DECODER(big5)
{ {
while (inleft > 0) { while (inleft > 0) {
unsigned char c = IN1; unsigned char c = INBYTE1;
if (c < 0x80) { if (c < 0x80) {
OUTCHAR(c); OUTCHAR(c);
...@@ -50,7 +52,7 @@ DECODER(big5) ...@@ -50,7 +52,7 @@ DECODER(big5)
} }
REQUIRE_INBUF(2) REQUIRE_INBUF(2)
TRYMAP_DEC(big5, writer, c, IN2) { TRYMAP_DEC(big5, writer, c, INBYTE2) {
NEXT_IN(2); NEXT_IN(2);
} }
else return 1; else return 1;
...@@ -66,25 +68,27 @@ DECODER(big5) ...@@ -66,25 +68,27 @@ DECODER(big5)
ENCODER(cp950) ENCODER(cp950)
{ {
while (inleft > 0) { while (*inpos < inlen) {
Py_UCS4 c = IN1; Py_UCS4 c = INCHAR1;
DBCHAR code; DBCHAR code;
if (c < 0x80) { if (c < 0x80) {
WRITE1((unsigned char)c) WRITEBYTE1((unsigned char)c)
NEXT(1, 1) NEXT(1, 1);
continue; continue;
} }
UCS4INVALID(c)
if (c > 0xFFFF)
return 1;
REQUIRE_OUTBUF(2) REQUIRE_OUTBUF(2)
TRYMAP_ENC(cp950ext, code, c); TRYMAP_ENC(cp950ext, code, c);
else TRYMAP_ENC(big5, code, c); else TRYMAP_ENC(big5, code, c);
else return 1; else return 1;
OUT1(code >> 8) OUTBYTE1(code >> 8)
OUT2(code & 0xFF) OUTBYTE2(code & 0xFF)
NEXT(1, 2) NEXT(1, 2);
} }
return 0; return 0;
...@@ -93,7 +97,7 @@ ENCODER(cp950) ...@@ -93,7 +97,7 @@ ENCODER(cp950)
DECODER(cp950) DECODER(cp950)
{ {
while (inleft > 0) { while (inleft > 0) {
unsigned char c = IN1; unsigned char c = INBYTE1;
if (c < 0x80) { if (c < 0x80) {
OUTCHAR(c); OUTCHAR(c);
...@@ -103,8 +107,8 @@ DECODER(cp950) ...@@ -103,8 +107,8 @@ DECODER(cp950)
REQUIRE_INBUF(2) REQUIRE_INBUF(2)
TRYMAP_DEC(cp950ext, writer, c, IN2); TRYMAP_DEC(cp950ext, writer, c, INBYTE2);
else TRYMAP_DEC(big5, writer, c, IN2); else TRYMAP_DEC(big5, writer, c, INBYTE2);
else return 1; else return 1;
NEXT_IN(2); NEXT_IN(2);
......
...@@ -72,7 +72,8 @@ static const struct dbcs_map *mapping_list; ...@@ -72,7 +72,8 @@ static const struct dbcs_map *mapping_list;
#define ENCODER(encoding) \ #define ENCODER(encoding) \
static Py_ssize_t encoding##_encode( \ static Py_ssize_t encoding##_encode( \
MultibyteCodec_State *state, const void *config, \ MultibyteCodec_State *state, const void *config, \
const Py_UNICODE **inbuf, Py_ssize_t inleft, \ int kind, void *data, \
Py_ssize_t *inpos, Py_ssize_t inlen, \
unsigned char **outbuf, Py_ssize_t outleft, int flags) unsigned char **outbuf, Py_ssize_t outleft, int flags)
#define ENCODER_RESET(encoding) \ #define ENCODER_RESET(encoding) \
static Py_ssize_t encoding##_encode_reset( \ static Py_ssize_t encoding##_encode_reset( \
...@@ -91,25 +92,25 @@ static const struct dbcs_map *mapping_list; ...@@ -91,25 +92,25 @@ static const struct dbcs_map *mapping_list;
static Py_ssize_t encoding##_decode_reset( \ static Py_ssize_t encoding##_decode_reset( \
MultibyteCodec_State *state, const void *config) MultibyteCodec_State *state, const void *config)
#if Py_UNICODE_SIZE == 4
#define UCS4INVALID(code) \
if ((code) > 0xFFFF) \
return 1;
#else
#define UCS4INVALID(code) \
if (0) ;
#endif
#define NEXT_IN(i) \ #define NEXT_IN(i) \
do { \ do { \
(*inbuf) += (i); \ (*inbuf) += (i); \
(inleft) -= (i); \ (inleft) -= (i); \
} while (0) } while (0)
#define NEXT_INCHAR(i) \
do { \
(*inpos) += (i); \
} while (0)
#define NEXT_OUT(o) \ #define NEXT_OUT(o) \
(*outbuf) += (o); \ do { \
(outleft) -= (o); (*outbuf) += (o); \
(outleft) -= (o); \
} while (0)
#define NEXT(i, o) \ #define NEXT(i, o) \
NEXT_IN(i); NEXT_OUT(o) do { \
NEXT_INCHAR(i); \
NEXT_OUT(o); \
} while (0)
#define REQUIRE_INBUF(n) \ #define REQUIRE_INBUF(n) \
if (inleft < (n)) \ if (inleft < (n)) \
...@@ -118,10 +119,13 @@ static const struct dbcs_map *mapping_list; ...@@ -118,10 +119,13 @@ static const struct dbcs_map *mapping_list;
if (outleft < (n)) \ if (outleft < (n)) \
return MBERR_TOOSMALL; return MBERR_TOOSMALL;
#define IN1 ((*inbuf)[0]) #define INBYTE1 ((*inbuf)[0])
#define IN2 ((*inbuf)[1]) #define INBYTE2 ((*inbuf)[1])
#define IN3 ((*inbuf)[2]) #define INBYTE3 ((*inbuf)[2])
#define IN4 ((*inbuf)[3]) #define INBYTE4 ((*inbuf)[3])
#define INCHAR1 PyUnicode_READ(kind, data, *inpos)
#define INCHAR2 PyUnicode_READ(kind, data, *inpos + 1)
#define OUTCHAR(c) \ #define OUTCHAR(c) \
do { \ do { \
...@@ -140,24 +144,24 @@ static const struct dbcs_map *mapping_list; ...@@ -140,24 +144,24 @@ static const struct dbcs_map *mapping_list;
writer->pos += 2; \ writer->pos += 2; \
} while (0) } while (0)
#define OUT1(c) ((*outbuf)[0]) = (c); #define OUTBYTE1(c) ((*outbuf)[0]) = (c);
#define OUT2(c) ((*outbuf)[1]) = (c); #define OUTBYTE2(c) ((*outbuf)[1]) = (c);
#define OUT3(c) ((*outbuf)[2]) = (c); #define OUTBYTE3(c) ((*outbuf)[2]) = (c);
#define OUT4(c) ((*outbuf)[3]) = (c); #define OUTBYTE4(c) ((*outbuf)[3]) = (c);
#define WRITE1(c1) \ #define WRITEBYTE1(c1) \
REQUIRE_OUTBUF(1) \ REQUIRE_OUTBUF(1) \
(*outbuf)[0] = (c1); (*outbuf)[0] = (c1);
#define WRITE2(c1, c2) \ #define WRITEBYTE2(c1, c2) \
REQUIRE_OUTBUF(2) \ REQUIRE_OUTBUF(2) \
(*outbuf)[0] = (c1); \ (*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); (*outbuf)[1] = (c2);
#define WRITE3(c1, c2, c3) \ #define WRITEBYTE3(c1, c2, c3) \
REQUIRE_OUTBUF(3) \ REQUIRE_OUTBUF(3) \
(*outbuf)[0] = (c1); \ (*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \ (*outbuf)[1] = (c2); \
(*outbuf)[2] = (c3); (*outbuf)[2] = (c3);
#define WRITE4(c1, c2, c3, c4) \ #define WRITEBYTE4(c1, c2, c3, c4) \
REQUIRE_OUTBUF(4) \ REQUIRE_OUTBUF(4) \
(*outbuf)[0] = (c1); \ (*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \ (*outbuf)[1] = (c2); \
...@@ -209,20 +213,6 @@ _TRYMAP_DEC_WRITE(_PyUnicodeWriter *writer, Py_UCS4 c) ...@@ -209,20 +213,6 @@ _TRYMAP_DEC_WRITE(_PyUnicodeWriter *writer, Py_UCS4 c)
#define TRYMAP_DEC_MPLANE(charset, writer, plane, c1, c2) \ #define TRYMAP_DEC_MPLANE(charset, writer, plane, c1, c2) \
if _TRYMAP_DEC(&charset##_decmap[plane][c1], writer, c2) if _TRYMAP_DEC(&charset##_decmap[plane][c1], writer, c2)
#if Py_UNICODE_SIZE == 2
#define DECODE_SURROGATE(c) \
if (Py_UNICODE_IS_HIGH_SURROGATE(c)) { \
REQUIRE_INBUF(2) \
if (Py_UNICODE_IS_LOW_SURROGATE(IN2)) { \
c = Py_UNICODE_JOIN_SURROGATES(c, IN2); \
} \
}
#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
#else
#define DECODE_SURROGATE(c) {;}
#define GET_INSIZE(c) 1
#endif
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = { #define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL}, #define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap}, #define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
......
...@@ -27,7 +27,8 @@ typedef union { ...@@ -27,7 +27,8 @@ typedef union {
typedef int (*mbcodec_init)(const void *config); typedef int (*mbcodec_init)(const void *config);
typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state, typedef Py_ssize_t (*mbencode_func)(MultibyteCodec_State *state,
const void *config, const void *config,
const Py_UNICODE **inbuf, Py_ssize_t inleft, int kind, void *data,
Py_ssize_t *inpos, Py_ssize_t inlen,
unsigned char **outbuf, Py_ssize_t outleft, unsigned char **outbuf, Py_ssize_t outleft,
int flags); int flags);
typedef int (*mbencodeinit_func)(MultibyteCodec_State *state, typedef int (*mbencodeinit_func)(MultibyteCodec_State *state,
...@@ -75,8 +76,7 @@ typedef struct { ...@@ -75,8 +76,7 @@ typedef struct {
#define MAXENCPENDING 2 #define MAXENCPENDING 2
#define _MultibyteStatefulEncoder_HEAD \ #define _MultibyteStatefulEncoder_HEAD \
_MultibyteStatefulCodec_HEAD \ _MultibyteStatefulCodec_HEAD \
Py_UNICODE pending[MAXENCPENDING]; \ PyObject *pending;
Py_ssize_t pendingsize;
typedef struct { typedef struct {
_MultibyteStatefulEncoder_HEAD _MultibyteStatefulEncoder_HEAD
} MultibyteStatefulEncoderContext; } MultibyteStatefulEncoderContext;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment