Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
d9491269
Kaydet (Commit)
d9491269
authored
Nis 14, 2013
tarafından
Victor Stinner
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Issue #17693: CJK encoders now use the new Unicode API (PEP 393)
üst
71557596
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
430 additions
and
418 deletions
+430
-418
_codecs_cn.c
Modules/cjkcodecs/_codecs_cn.c
+65
-70
_codecs_hk.c
Modules/cjkcodecs/_codecs_hk.c
+24
-20
_codecs_iso2022.c
Modules/cjkcodecs/_codecs_iso2022.c
+51
-62
_codecs_jp.c
Modules/cjkcodecs/_codecs_jp.c
+86
-82
_codecs_kr.c
Modules/cjkcodecs/_codecs_kr.c
+52
-46
_codecs_tw.c
Modules/cjkcodecs/_codecs_tw.c
+24
-20
cjkcodecs.h
Modules/cjkcodecs/cjkcodecs.h
+29
-39
multibytecodec.c
Modules/cjkcodecs/multibytecodec.c
+96
-76
multibytecodec.h
Modules/cjkcodecs/multibytecodec.h
+3
-3
No files found.
Modules/cjkcodecs/_codecs_cn.c
Dosyayı görüntüle @
d9491269
...
...
@@ -42,16 +42,18 @@
ENCODER
(
gb2312
)
{
while
(
inleft
>
0
)
{
Py_UCS4
c
=
IN1
;
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
WRITE1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
;
continue
;
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
REQUIRE_OUTBUF
(
2
)
TRYMAP_ENC
(
gbcommon
,
code
,
c
);
...
...
@@ -60,9 +62,9 @@ ENCODER(gb2312)
if
(
code
&
0x8000
)
/* MSB set: GBK */
return
1
;
OUT1
((
code
>>
8
)
|
0x80
)
OUT2
((
code
&
0xFF
)
|
0x80
)
NEXT
(
1
,
2
)
OUT
BYTE
1
((
code
>>
8
)
|
0x80
)
OUT
BYTE
2
((
code
&
0xFF
)
|
0x80
)
NEXT
(
1
,
2
)
;
}
return
0
;
...
...
@@ -80,7 +82,7 @@ DECODER(gb2312)
}
REQUIRE_INBUF
(
2
)
TRYMAP_DEC
(
gb2312
,
writer
,
c
^
0x80
,
IN2
^
0x80
)
{
TRYMAP_DEC
(
gb2312
,
writer
,
c
^
0x80
,
IN
BYTE
2
^
0x80
)
{
NEXT_IN
(
2
);
}
else
return
1
;
...
...
@@ -96,28 +98,30 @@ DECODER(gb2312)
ENCODER
(
gbk
)
{
while
(
inleft
>
0
)
{
Py_UCS4
c
=
IN1
;
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
WRITE1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
;
continue
;
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
REQUIRE_OUTBUF
(
2
)
GBK_ENCODE
(
c
,
code
)
else
return
1
;
OUT1
((
code
>>
8
)
|
0x80
)
OUT
BYTE
1
((
code
>>
8
)
|
0x80
)
if
(
code
&
0x8000
)
OUT2
((
code
&
0xFF
))
/* MSB set: GBK */
OUT
BYTE
2
((
code
&
0xFF
))
/* MSB set: GBK */
else
OUT2
((
code
&
0xFF
)
|
0x80
)
/* MSB unset: GB2312 */
NEXT
(
1
,
2
)
OUT
BYTE
2
((
code
&
0xFF
)
|
0x80
)
/* MSB unset: GB2312 */
NEXT
(
1
,
2
)
;
}
return
0
;
...
...
@@ -126,7 +130,7 @@ ENCODER(gbk)
DECODER
(
gbk
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
if
(
c
<
0x80
)
{
OUTCHAR
(
c
);
...
...
@@ -136,7 +140,7 @@ DECODER(gbk)
REQUIRE_INBUF
(
2
)
GBK_DECODE
(
c
,
IN2
,
writer
)
GBK_DECODE
(
c
,
IN
BYTE
2
,
writer
)
else
return
1
;
NEXT_IN
(
2
);
...
...
@@ -152,41 +156,31 @@ DECODER(gbk)
ENCODER
(
gb18030
)
{
while
(
inleft
>
0
)
{
Py_UCS4
c
=
IN1
;
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
WRITE1
(
c
)
NEXT
(
1
,
1
)
WRITE
BYTE
1
(
c
)
NEXT
(
1
,
1
)
;
continue
;
}
DECODE_SURROGATE
(
c
)
if
(
c
>
0x10FFFF
)
#if Py_UNICODE_SIZE == 2
return
2
;
/* surrogates pair */
#else
return
1
;
#endif
else
if
(
c
>=
0x10000
)
{
if
(
c
>=
0x10000
)
{
Py_UCS4
tc
=
c
-
0x10000
;
assert
(
c
<=
0x10FFFF
);
REQUIRE_OUTBUF
(
4
)
OUT4
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
OUT
BYTE
4
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
tc
/=
10
;
OUT3
((
unsigned
char
)(
tc
%
126
)
+
0x81
)
OUT
BYTE
3
((
unsigned
char
)(
tc
%
126
)
+
0x81
)
tc
/=
126
;
OUT2
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
OUT
BYTE
2
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
tc
/=
10
;
OUT1
((
unsigned
char
)(
tc
+
0x90
))
OUT
BYTE
1
((
unsigned
char
)(
tc
+
0x90
))
#if Py_UNICODE_SIZE == 2
NEXT
(
2
,
4
)
/* surrogates pair */
#else
NEXT
(
1
,
4
)
#endif
NEXT
(
1
,
4
);
continue
;
}
...
...
@@ -209,15 +203,15 @@ ENCODER(gb18030)
tc
=
c
-
utrrange
->
first
+
utrrange
->
base
;
OUT4
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
OUT
BYTE
4
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
tc
/=
10
;
OUT3
((
unsigned
char
)(
tc
%
126
)
+
0x81
)
OUT
BYTE
3
((
unsigned
char
)(
tc
%
126
)
+
0x81
)
tc
/=
126
;
OUT2
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
OUT
BYTE
2
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
tc
/=
10
;
OUT1
((
unsigned
char
)
tc
+
0x81
)
OUT
BYTE
1
((
unsigned
char
)
tc
+
0x81
)
NEXT
(
1
,
4
)
NEXT
(
1
,
4
)
;
break
;
}
...
...
@@ -226,13 +220,13 @@ ENCODER(gb18030)
continue
;
}
OUT1
((
code
>>
8
)
|
0x80
)
OUT
BYTE
1
((
code
>>
8
)
|
0x80
)
if
(
code
&
0x8000
)
OUT2
((
code
&
0xFF
))
/* MSB set: GBK or GB18030ext */
OUT
BYTE
2
((
code
&
0xFF
))
/* MSB set: GBK or GB18030ext */
else
OUT2
((
code
&
0xFF
)
|
0x80
)
/* MSB unset: GB2312 */
OUT
BYTE
2
((
code
&
0xFF
)
|
0x80
)
/* MSB unset: GB2312 */
NEXT
(
1
,
2
)
NEXT
(
1
,
2
)
;
}
return
0
;
...
...
@@ -241,7 +235,7 @@ ENCODER(gb18030)
DECODER
(
gb18030
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
,
c2
;
unsigned
char
c
=
IN
BYTE
1
,
c2
;
if
(
c
<
0x80
)
{
OUTCHAR
(
c
);
...
...
@@ -251,15 +245,15 @@ DECODER(gb18030)
REQUIRE_INBUF
(
2
)
c2
=
IN2
;
c2
=
IN
BYTE
2
;
if
(
c2
>=
0x30
&&
c2
<=
0x39
)
{
/* 4 bytes seq */
const
struct
_gb18030_to_unibmp_ranges
*
utr
;
unsigned
char
c3
,
c4
;
Py_UCS4
lseq
;
REQUIRE_INBUF
(
4
)
c3
=
IN3
;
c4
=
IN4
;
c3
=
IN
BYTE
3
;
c4
=
IN
BYTE
4
;
if
(
c
<
0x81
||
c3
<
0x81
||
c4
<
0x30
||
c4
>
0x39
)
return
1
;
c
-=
0x81
;
c2
-=
0x30
;
...
...
@@ -313,33 +307,34 @@ ENCODER_INIT(hz)
ENCODER_RESET
(
hz
)
{
if
(
state
->
i
!=
0
)
{
WRITE2
(
'~'
,
'}'
)
WRITE
BYTE
2
(
'~'
,
'}'
)
state
->
i
=
0
;
NEXT_OUT
(
2
)
NEXT_OUT
(
2
)
;
}
return
0
;
}
ENCODER
(
hz
)
{
while
(
inleft
>
0
)
{
Py_UCS4
c
=
IN1
;
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
if
(
state
->
i
==
0
)
{
WRITE1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
;
}
else
{
WRITE3
(
'~'
,
'}'
,
(
unsigned
char
)
c
)
NEXT
(
1
,
3
)
WRITE
BYTE
3
(
'~'
,
'}'
,
(
unsigned
char
)
c
)
NEXT
(
1
,
3
)
;
state
->
i
=
0
;
}
continue
;
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
TRYMAP_ENC
(
gbcommon
,
code
,
c
);
else
return
1
;
...
...
@@ -348,13 +343,13 @@ ENCODER(hz)
return
1
;
if
(
state
->
i
==
0
)
{
WRITE4
(
'~'
,
'{'
,
code
>>
8
,
code
&
0xff
)
NEXT
(
1
,
4
)
WRITE
BYTE
4
(
'~'
,
'{'
,
code
>>
8
,
code
&
0xff
)
NEXT
(
1
,
4
)
;
state
->
i
=
1
;
}
else
{
WRITE2
(
code
>>
8
,
code
&
0xff
)
NEXT
(
1
,
2
)
WRITE
BYTE
2
(
code
>>
8
,
code
&
0xff
)
NEXT
(
1
,
2
)
;
}
}
...
...
@@ -376,10 +371,10 @@ DECODER_RESET(hz)
DECODER
(
hz
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
if
(
c
==
'~'
)
{
unsigned
char
c2
=
IN2
;
unsigned
char
c2
=
IN
BYTE
2
;
REQUIRE_INBUF
(
2
)
if
(
c2
==
'~'
)
{
...
...
@@ -408,7 +403,7 @@ DECODER(hz)
}
else
{
/* GB mode */
REQUIRE_INBUF
(
2
)
TRYMAP_DEC
(
gb2312
,
writer
,
c
,
IN2
)
{
TRYMAP_DEC
(
gb2312
,
writer
,
c
,
IN
BYTE
2
)
{
NEXT_IN
(
2
);
}
else
...
...
Modules/cjkcodecs/_codecs_hk.c
Dosyayı görüntüle @
d9491269
...
...
@@ -38,35 +38,39 @@ static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5
ENCODER
(
big5hkscs
)
{
while
(
inleft
>
0
)
{
Py_UCS4
c
=
**
inbuf
;
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
INCHAR1
;
DBCHAR
code
;
Py_ssize_t
insize
;
if
(
c
<
0x80
)
{
REQUIRE_OUTBUF
(
1
)
**
outbuf
=
(
unsigned
char
)
c
;
NEXT
(
1
,
1
)
NEXT
(
1
,
1
)
;
continue
;
}
DECODE_SURROGATE
(
c
)
insize
=
GET_INSIZE
(
c
);
insize
=
1
;
REQUIRE_OUTBUF
(
2
)
if
(
c
<
0x10000
)
{
TRYMAP_ENC
(
big5hkscs_bmp
,
code
,
c
)
{
if
(
code
==
MULTIC
)
{
if
(
inleft
>=
2
&&
Py_UCS4
c2
;
if
(
inlen
-
*
inpos
>=
2
)
c2
=
INCHAR2
;
else
c2
=
0
;
if
(
inlen
-
*
inpos
>=
2
&&
((
c
&
0xffdf
)
==
0x00ca
)
&&
((
(
*
inbuf
)[
1
]
&
0xfff7
)
==
0x0304
))
{
((
c2
&
0xfff7
)
==
0x0304
))
{
code
=
big5hkscs_pairenc_table
[
((
c
>>
4
)
|
(
(
*
inbuf
)[
1
]
>>
3
))
&
3
];
(
c2
>>
3
))
&
3
];
insize
=
2
;
}
else
if
(
inle
ft
<
2
&&
else
if
(
inle
n
-
*
inpos
<
2
&&
!
(
flags
&
MBENC_FLUSH
))
return
MBERR_TOOFEW
;
else
{
...
...
@@ -89,9 +93,9 @@ ENCODER(big5hkscs)
else
return
insize
;
OUT1
(
code
>>
8
)
OUT2
(
code
&
0xFF
)
NEXT
(
insize
,
2
)
OUT
BYTE
1
(
code
>>
8
)
OUT
BYTE
2
(
code
&
0xFF
)
NEXT
(
insize
,
2
)
;
}
return
0
;
...
...
@@ -102,7 +106,7 @@ ENCODER(big5hkscs)
DECODER
(
big5hkscs
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
Py_UCS4
decoded
;
if
(
c
<
0x80
)
{
...
...
@@ -113,20 +117,20 @@ DECODER(big5hkscs)
REQUIRE_INBUF
(
2
)
if
(
0xc6
>
c
||
c
>
0xc8
||
(
c
<
0xc7
&&
IN2
<
0xa1
))
{
TRYMAP_DEC
(
big5
,
writer
,
c
,
IN2
)
{
if
(
0xc6
>
c
||
c
>
0xc8
||
(
c
<
0xc7
&&
IN
BYTE
2
<
0xa1
))
{
TRYMAP_DEC
(
big5
,
writer
,
c
,
IN
BYTE
2
)
{
NEXT_IN
(
2
);
continue
;
}
}
TRYMAP_DEC_CHAR
(
big5hkscs
,
decoded
,
c
,
IN2
)
TRYMAP_DEC_CHAR
(
big5hkscs
,
decoded
,
c
,
IN
BYTE
2
)
{
int
s
=
BH2S
(
c
,
IN2
);
int
s
=
BH2S
(
c
,
IN
BYTE
2
);
const
unsigned
char
*
hintbase
;
assert
(
0x87
<=
c
&&
c
<=
0xfe
);
assert
(
0x40
<=
IN
2
&&
IN
2
<=
0xfe
);
assert
(
0x40
<=
IN
BYTE2
&&
INBYTE
2
<=
0xfe
);
if
(
BH2S
(
0x87
,
0x40
)
<=
s
&&
s
<=
BH2S
(
0xa0
,
0xfe
))
{
hintbase
=
big5hkscs_phint_0
;
...
...
@@ -154,7 +158,7 @@ DECODER(big5hkscs)
continue
;
}
switch
((
c
<<
8
)
|
IN2
)
{
switch
((
c
<<
8
)
|
IN
BYTE
2
)
{
case
0x8862
:
OUTCHAR2
(
0x00ca
,
0x0304
);
break
;
case
0x8864
:
OUTCHAR2
(
0x00ca
,
0x030c
);
break
;
case
0x88a3
:
OUTCHAR2
(
0x00ea
,
0x0304
);
break
;
...
...
Modules/cjkcodecs/_codecs_iso2022.c
Dosyayı görüntüle @
d9491269
...
...
@@ -141,13 +141,13 @@ ENCODER_INIT(iso2022)
ENCODER_RESET
(
iso2022
)
{
if
(
STATE_GETFLAG
(
F_SHIFTED
))
{
WRITE1
(
SI
)
NEXT_OUT
(
1
)
WRITE
BYTE
1
(
SI
)
NEXT_OUT
(
1
)
;
STATE_CLEARFLAG
(
F_SHIFTED
)
}
if
(
STATE_G0
!=
CHARSET_ASCII
)
{
WRITE3
(
ESC
,
'('
,
'B'
)
NEXT_OUT
(
3
)
WRITE
BYTE
3
(
ESC
,
'('
,
'B'
)
NEXT_OUT
(
3
)
;
STATE_SETG0
(
CHARSET_ASCII
)
}
return
0
;
...
...
@@ -155,30 +155,29 @@ ENCODER_RESET(iso2022)
ENCODER
(
iso2022
)
{
while
(
inleft
>
0
)
{
while
(
*
inpos
<
inlen
)
{
const
struct
iso2022_designation
*
dsg
;
DBCHAR
encoded
;
Py_UCS4
c
=
**
inbuf
;
Py_UCS4
c
=
INCHAR1
;
Py_ssize_t
insize
;
if
(
c
<
0x80
)
{
if
(
STATE_G0
!=
CHARSET_ASCII
)
{
WRITE3
(
ESC
,
'('
,
'B'
)
WRITE
BYTE
3
(
ESC
,
'('
,
'B'
)
STATE_SETG0
(
CHARSET_ASCII
)
NEXT_OUT
(
3
)
NEXT_OUT
(
3
)
;
}
if
(
STATE_GETFLAG
(
F_SHIFTED
))
{
WRITE1
(
SI
)
WRITE
BYTE
1
(
SI
)
STATE_CLEARFLAG
(
F_SHIFTED
)
NEXT_OUT
(
1
)
NEXT_OUT
(
1
)
;
}
WRITE1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
;
continue
;
}
DECODE_SURROGATE
(
c
)
insize
=
GET_INSIZE
(
c
);
insize
=
1
;
encoded
=
MAP_UNMAPPABLE
;
for
(
dsg
=
CONFIG_DESIGNATIONS
;
dsg
->
mark
;
dsg
++
)
{
...
...
@@ -187,24 +186,14 @@ ENCODER(iso2022)
if
(
encoded
==
MAP_MULTIPLE_AVAIL
)
{
/* this implementation won't work for pair
* of non-bmp characters. */
if
(
inle
ft
<
2
)
{
if
(
inle
n
-
*
inpos
<
2
)
{
if
(
!
(
flags
&
MBENC_FLUSH
))
return
MBERR_TOOFEW
;
length
=
-
1
;
}
else
length
=
2
;
#if Py_UNICODE_SIZE == 2
if
(
length
==
2
)
{
Py_UCS4
u4in
[
2
];
u4in
[
0
]
=
(
Py_UCS4
)
IN1
;
u4in
[
1
]
=
(
Py_UCS4
)
IN2
;
encoded
=
dsg
->
encoder
(
u4in
,
&
length
);
}
else
encoded
=
dsg
->
encoder
(
&
c
,
&
length
);
#else
encoded
=
dsg
->
encoder
(
&
c
,
&
length
);
#endif
if
(
encoded
!=
MAP_UNMAPPABLE
)
{
insize
=
length
;
break
;
...
...
@@ -221,47 +210,47 @@ ENCODER(iso2022)
switch
(
dsg
->
plane
)
{
case
0
:
/* G0 */
if
(
STATE_GETFLAG
(
F_SHIFTED
))
{
WRITE1
(
SI
)
WRITE
BYTE
1
(
SI
)
STATE_CLEARFLAG
(
F_SHIFTED
)
NEXT_OUT
(
1
)
NEXT_OUT
(
1
)
;
}
if
(
STATE_G0
!=
dsg
->
mark
)
{
if
(
dsg
->
width
==
1
)
{
WRITE3
(
ESC
,
'('
,
ESCMARK
(
dsg
->
mark
))
WRITE
BYTE
3
(
ESC
,
'('
,
ESCMARK
(
dsg
->
mark
))
STATE_SETG0
(
dsg
->
mark
)
NEXT_OUT
(
3
)
NEXT_OUT
(
3
)
;
}
else
if
(
dsg
->
mark
==
CHARSET_JISX0208
)
{
WRITE3
(
ESC
,
'$'
,
ESCMARK
(
dsg
->
mark
))
WRITE
BYTE
3
(
ESC
,
'$'
,
ESCMARK
(
dsg
->
mark
))
STATE_SETG0
(
dsg
->
mark
)
NEXT_OUT
(
3
)
NEXT_OUT
(
3
)
;
}
else
{
WRITE4
(
ESC
,
'$'
,
'('
,
WRITE
BYTE
4
(
ESC
,
'$'
,
'('
,
ESCMARK
(
dsg
->
mark
))
STATE_SETG0
(
dsg
->
mark
)
NEXT_OUT
(
4
)
NEXT_OUT
(
4
)
;
}
}
break
;
case
1
:
/* G1 */
if
(
STATE_G1
!=
dsg
->
mark
)
{
if
(
dsg
->
width
==
1
)
{
WRITE3
(
ESC
,
')'
,
ESCMARK
(
dsg
->
mark
))
WRITE
BYTE
3
(
ESC
,
')'
,
ESCMARK
(
dsg
->
mark
))
STATE_SETG1
(
dsg
->
mark
)
NEXT_OUT
(
3
)
NEXT_OUT
(
3
)
;
}
else
{
WRITE4
(
ESC
,
'$'
,
')'
,
WRITE
BYTE
4
(
ESC
,
'$'
,
')'
,
ESCMARK
(
dsg
->
mark
))
STATE_SETG1
(
dsg
->
mark
)
NEXT_OUT
(
4
)
NEXT_OUT
(
4
)
;
}
}
if
(
!
STATE_GETFLAG
(
F_SHIFTED
))
{
WRITE1
(
SO
)
WRITE
BYTE
1
(
SO
)
STATE_SETFLAG
(
F_SHIFTED
)
NEXT_OUT
(
1
)
NEXT_OUT
(
1
)
;
}
break
;
default
:
/* G2 and G3 is not supported: no encoding in
...
...
@@ -270,14 +259,14 @@ ENCODER(iso2022)
}
if
(
dsg
->
width
==
1
)
{
WRITE1
((
unsigned
char
)
encoded
)
NEXT_OUT
(
1
)
WRITE
BYTE
1
((
unsigned
char
)
encoded
)
NEXT_OUT
(
1
)
;
}
else
{
WRITE2
(
encoded
>>
8
,
encoded
&
0xff
)
NEXT_OUT
(
2
)
WRITE
BYTE
2
(
encoded
>>
8
,
encoded
&
0xff
)
NEXT_OUT
(
2
)
;
}
NEXT_IN
(
insize
);
NEXT_IN
CHAR
(
insize
);
}
return
0
;
...
...
@@ -323,26 +312,26 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
switch
(
esclen
)
{
case
3
:
if
(
IN2
==
'$'
)
{
charset
=
IN3
|
CHARSET_DBCS
;
if
(
IN
BYTE
2
==
'$'
)
{
charset
=
IN
BYTE
3
|
CHARSET_DBCS
;
designation
=
0
;
}
else
{
charset
=
IN3
;
if
(
IN2
==
'('
)
designation
=
0
;
else
if
(
IN2
==
')'
)
designation
=
1
;
else
if
(
CONFIG_ISSET
(
USE_G2
)
&&
IN2
==
'.'
)
charset
=
IN
BYTE
3
;
if
(
IN
BYTE
2
==
'('
)
designation
=
0
;
else
if
(
IN
BYTE
2
==
')'
)
designation
=
1
;
else
if
(
CONFIG_ISSET
(
USE_G2
)
&&
IN
BYTE
2
==
'.'
)
designation
=
2
;
else
return
3
;
}
break
;
case
4
:
if
(
IN2
!=
'$'
)
if
(
IN
BYTE
2
!=
'$'
)
return
4
;
charset
=
IN4
|
CHARSET_DBCS
;
if
(
IN3
==
'('
)
designation
=
0
;
else
if
(
IN3
==
')'
)
designation
=
1
;
charset
=
IN
BYTE
4
|
CHARSET_DBCS
;
if
(
IN
BYTE
3
==
'('
)
designation
=
0
;
else
if
(
IN
BYTE
3
==
')'
)
designation
=
1
;
else
return
4
;
break
;
case
6
:
/* designation with prefix */
...
...
@@ -395,18 +384,18 @@ iso2022processg2(const void *config, MultibyteCodec_State *state,
/* not written to use encoder, decoder functions because only few
* encodings use G2 designations in CJKCodecs */
if
(
STATE_G2
==
CHARSET_ISO8859_1
)
{
if
(
IN3
<
0x80
)
OUTCHAR
(
IN3
+
0x80
);
if
(
IN
BYTE
3
<
0x80
)
OUTCHAR
(
IN
BYTE
3
+
0x80
);
else
return
3
;
}
else
if
(
STATE_G2
==
CHARSET_ISO8859_7
)
{
ISO8859_7_DECODE
(
IN3
^
0x80
,
writer
)
ISO8859_7_DECODE
(
IN
BYTE
3
^
0x80
,
writer
)
else
return
3
;
}
else
if
(
STATE_G2
==
CHARSET_ASCII
)
{
if
(
IN3
&
0x80
)
return
3
;
else
OUTCHAR
(
IN3
);
if
(
IN
BYTE
3
&
0x80
)
return
3
;
else
OUTCHAR
(
IN
BYTE
3
);
}
else
return
MBERR_INTERNAL
;
...
...
@@ -421,7 +410,7 @@ DECODER(iso2022)
const
struct
iso2022_designation
*
dsgcache
=
NULL
;
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
Py_ssize_t
err
;
if
(
STATE_GETFLAG
(
F_ESCTHROUGHOUT
))
{
...
...
@@ -438,13 +427,13 @@ DECODER(iso2022)
switch
(
c
)
{
case
ESC
:
REQUIRE_INBUF
(
2
)
if
(
IS_ISO2022ESC
(
IN2
))
{
if
(
IS_ISO2022ESC
(
IN
BYTE
2
))
{
err
=
iso2022processesc
(
config
,
state
,
inbuf
,
&
inleft
);
if
(
err
!=
0
)
return
err
;
}
else
if
(
CONFIG_ISSET
(
USE_G2
)
&&
IN2
==
'N'
)
{
/* SS2 */
else
if
(
CONFIG_ISSET
(
USE_G2
)
&&
IN
BYTE
2
==
'N'
)
{
/* SS2 */
REQUIRE_INBUF
(
3
)
err
=
iso2022processg2
(
config
,
state
,
inbuf
,
&
inleft
,
writer
);
...
...
Modules/cjkcodecs/_codecs_jp.c
Dosyayı görüntüle @
d9491269
...
...
@@ -19,38 +19,39 @@
ENCODER
(
cp932
)
{
while
(
inleft
>
0
)
{
Py_UCS4
c
=
IN1
;
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
unsigned
char
c1
,
c2
;
if
(
c
<=
0x80
)
{
WRITE1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
;
continue
;
}
else
if
(
c
>=
0xff61
&&
c
<=
0xff9f
)
{
WRITE1
(
c
-
0xfec0
)
NEXT
(
1
,
1
)
WRITE
BYTE
1
(
c
-
0xfec0
)
NEXT
(
1
,
1
)
;
continue
;
}
else
if
(
c
>=
0xf8f0
&&
c
<=
0xf8f3
)
{
/* Windows compatibility */
REQUIRE_OUTBUF
(
1
)
if
(
c
==
0xf8f0
)
OUT1
(
0xa0
)
OUT
BYTE
1
(
0xa0
)
else
OUT1
(
c
-
0xfef1
+
0xfd
)
NEXT
(
1
,
1
)
OUT
BYTE
1
(
c
-
0xfef1
+
0xfd
)
NEXT
(
1
,
1
)
;
continue
;
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
REQUIRE_OUTBUF
(
2
)
TRYMAP_ENC
(
cp932ext
,
code
,
c
)
{
OUT1
(
code
>>
8
)
OUT2
(
code
&
0xff
)
OUT
BYTE
1
(
code
>>
8
)
OUT
BYTE
2
(
code
&
0xff
)
}
else
TRYMAP_ENC
(
jisxcommon
,
code
,
c
)
{
if
(
code
&
0x8000
)
/* MSB set: JIS X 0212 */
...
...
@@ -61,20 +62,20 @@ ENCODER(cp932)
c2
=
code
&
0xff
;
c2
=
(((
c1
-
0x21
)
&
1
)
?
0x5e
:
0
)
+
(
c2
-
0x21
);
c1
=
(
c1
-
0x21
)
>>
1
;
OUT1
(
c1
<
0x1f
?
c1
+
0x81
:
c1
+
0xc1
)
OUT2
(
c2
<
0x3f
?
c2
+
0x40
:
c2
+
0x41
)
OUT
BYTE
1
(
c1
<
0x1f
?
c1
+
0x81
:
c1
+
0xc1
)
OUT
BYTE
2
(
c2
<
0x3f
?
c2
+
0x40
:
c2
+
0x41
)
}
else
if
(
c
>=
0xe000
&&
c
<
0xe758
)
{
/* User-defined area */
c1
=
(
Py_UCS4
)(
c
-
0xe000
)
/
188
;
c2
=
(
Py_UCS4
)(
c
-
0xe000
)
%
188
;
OUT1
(
c1
+
0xf0
)
OUT2
(
c2
<
0x3f
?
c2
+
0x40
:
c2
+
0x41
)
OUT
BYTE
1
(
c1
+
0xf0
)
OUT
BYTE
2
(
c2
<
0x3f
?
c2
+
0x40
:
c2
+
0x41
)
}
else
return
1
;
NEXT
(
1
,
2
)
NEXT
(
1
,
2
)
;
}
return
0
;
...
...
@@ -83,7 +84,7 @@ ENCODER(cp932)
DECODER
(
cp932
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
,
c2
;
unsigned
char
c
=
IN
BYTE
1
,
c2
;
if
(
c
<=
0x80
)
{
OUTCHAR
(
c
);
...
...
@@ -106,7 +107,7 @@ DECODER(cp932)
}
REQUIRE_INBUF
(
2
)
c2
=
IN2
;
c2
=
IN
BYTE
2
;
TRYMAP_DEC
(
cp932ext
,
writer
,
c
,
c2
);
else
if
((
c
>=
0x81
&&
c
<=
0x9f
)
||
(
c
>=
0xe0
&&
c
<=
0xea
)){
...
...
@@ -145,25 +146,24 @@ DECODER(cp932)
ENCODER
(
euc_jis_2004
)
{
while
(
inleft
>
0
)
{
Py_UCS4
c
=
IN1
;
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
Py_ssize_t
insize
;
if
(
c
<
0x80
)
{
WRITE1
(
c
)
NEXT
(
1
,
1
)
WRITE
BYTE
1
(
c
)
NEXT
(
1
,
1
)
;
continue
;
}
DECODE_SURROGATE
(
c
)
insize
=
GET_INSIZE
(
c
);
insize
=
1
;
if
(
c
<=
0xFFFF
)
{
EMULATE_JISX0213_2000_ENCODE_BMP
(
code
,
c
)
else
TRYMAP_ENC
(
jisx0213_bmp
,
code
,
c
)
{
if
(
code
==
MULTIC
)
{
if
(
inle
ft
<
2
)
{
if
(
inle
n
-
*
inpos
<
2
)
{
if
(
flags
&
MBENC_FLUSH
)
{
code
=
find_pairencmap
(
(
ucs2_t
)
c
,
0
,
...
...
@@ -176,8 +176,9 @@ ENCODER(euc_jis_2004)
return
MBERR_TOOFEW
;
}
else
{
Py_UCS4
c2
=
INCHAR2
;
code
=
find_pairencmap
(
(
ucs2_t
)
c
,
(
*
inbuf
)[
1
]
,
(
ucs2_t
)
c
,
c2
,
jisx0213_pair_encmap
,
JISX0213_ENCPAIRS
);
if
(
code
==
DBCINV
)
{
...
...
@@ -195,8 +196,8 @@ ENCODER(euc_jis_2004)
else
TRYMAP_ENC
(
jisxcommon
,
code
,
c
);
else
if
(
c
>=
0xff61
&&
c
<=
0xff9f
)
{
/* JIS X 0201 half-width katakana */
WRITE2
(
0x8e
,
c
-
0xfec0
)
NEXT
(
1
,
2
)
WRITE
BYTE
2
(
0x8e
,
c
-
0xfec0
)
NEXT
(
1
,
2
)
;
continue
;
}
else
if
(
c
==
0xff3c
)
...
...
@@ -218,12 +219,12 @@ ENCODER(euc_jis_2004)
if
(
code
&
0x8000
)
{
/* Codeset 2 */
WRITE3
(
0x8f
,
code
>>
8
,
(
code
&
0xFF
)
|
0x80
)
NEXT
(
insize
,
3
)
WRITE
BYTE
3
(
0x8f
,
code
>>
8
,
(
code
&
0xFF
)
|
0x80
)
NEXT
(
insize
,
3
)
;
}
else
{
/* Codeset 1 */
WRITE2
((
code
>>
8
)
|
0x80
,
(
code
&
0xFF
)
|
0x80
)
NEXT
(
insize
,
2
)
WRITE
BYTE
2
((
code
>>
8
)
|
0x80
,
(
code
&
0xFF
)
|
0x80
)
NEXT
(
insize
,
2
)
;
}
}
...
...
@@ -233,7 +234,7 @@ ENCODER(euc_jis_2004)
DECODER
(
euc_jis_2004
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
Py_UCS4
code
;
if
(
c
<
0x80
)
{
...
...
@@ -247,7 +248,7 @@ DECODER(euc_jis_2004)
unsigned
char
c2
;
REQUIRE_INBUF
(
2
)
c2
=
IN2
;
c2
=
IN
BYTE
2
;
if
(
c2
>=
0xa1
&&
c2
<=
0xdf
)
{
OUTCHAR
(
0xfec0
+
c2
);
NEXT_IN
(
2
);
...
...
@@ -259,8 +260,8 @@ DECODER(euc_jis_2004)
unsigned
char
c2
,
c3
;
REQUIRE_INBUF
(
3
)
c2
=
IN2
^
0x80
;
c3
=
IN3
^
0x80
;
c2
=
IN
BYTE
2
^
0x80
;
c3
=
IN
BYTE
3
^
0x80
;
/* JIS X 0213 Plane 2 or JIS X 0212 (see NOTES) */
EMULATE_JISX0213_2000_DECODE_PLANE2
(
writer
,
c2
,
c3
)
...
...
@@ -279,7 +280,7 @@ DECODER(euc_jis_2004)
REQUIRE_INBUF
(
2
)
c
^=
0x80
;
c2
=
IN2
^
0x80
;
c2
=
IN
BYTE
2
^
0x80
;
/* JIS X 0213 Plane 1 */
EMULATE_JISX0213_2000_DECODE_PLANE1
(
writer
,
c
,
c2
)
...
...
@@ -312,35 +313,36 @@ DECODER(euc_jis_2004)
ENCODER
(
euc_jp
)
{
while
(
inleft
>
0
)
{
Py_UCS4
c
=
IN1
;
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
WRITE1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
;
continue
;
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
TRYMAP_ENC
(
jisxcommon
,
code
,
c
);
else
if
(
c
>=
0xff61
&&
c
<=
0xff9f
)
{
/* JIS X 0201 half-width katakana */
WRITE2
(
0x8e
,
c
-
0xfec0
)
NEXT
(
1
,
2
)
WRITE
BYTE
2
(
0x8e
,
c
-
0xfec0
)
NEXT
(
1
,
2
)
;
continue
;
}
#ifndef STRICT_BUILD
else
if
(
c
==
0xff3c
)
/* FULL-WIDTH REVERSE SOLIDUS */
code
=
0x2140
;
else
if
(
c
==
0xa5
)
{
/* YEN SIGN */
WRITE1
(
0x5c
);
NEXT
(
1
,
1
)
WRITE
BYTE
1
(
0x5c
);
NEXT
(
1
,
1
)
;
continue
;
}
else
if
(
c
==
0x203e
)
{
/* OVERLINE */
WRITE1
(
0x7e
);
NEXT
(
1
,
1
)
WRITE
BYTE
1
(
0x7e
);
NEXT
(
1
,
1
)
;
continue
;
}
#endif
...
...
@@ -349,12 +351,12 @@ ENCODER(euc_jp)
if
(
code
&
0x8000
)
{
/* JIS X 0212 */
WRITE3
(
0x8f
,
code
>>
8
,
(
code
&
0xFF
)
|
0x80
)
NEXT
(
1
,
3
)
WRITE
BYTE
3
(
0x8f
,
code
>>
8
,
(
code
&
0xFF
)
|
0x80
)
NEXT
(
1
,
3
)
;
}
else
{
/* JIS X 0208 */
WRITE2
((
code
>>
8
)
|
0x80
,
(
code
&
0xFF
)
|
0x80
)
NEXT
(
1
,
2
)
WRITE
BYTE
2
((
code
>>
8
)
|
0x80
,
(
code
&
0xFF
)
|
0x80
)
NEXT
(
1
,
2
)
;
}
}
...
...
@@ -364,7 +366,7 @@ ENCODER(euc_jp)
DECODER
(
euc_jp
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
if
(
c
<
0x80
)
{
OUTCHAR
(
c
);
...
...
@@ -377,7 +379,7 @@ DECODER(euc_jp)
unsigned
char
c2
;
REQUIRE_INBUF
(
2
)
c2
=
IN2
;
c2
=
IN
BYTE
2
;
if
(
c2
>=
0xa1
&&
c2
<=
0xdf
)
{
OUTCHAR
(
0xfec0
+
c2
);
NEXT_IN
(
2
);
...
...
@@ -389,8 +391,8 @@ DECODER(euc_jp)
unsigned
char
c2
,
c3
;
REQUIRE_INBUF
(
3
)
c2
=
IN2
;
c3
=
IN3
;
c2
=
IN
BYTE
2
;
c3
=
IN
BYTE
3
;
/* JIS X 0212 */
TRYMAP_DEC
(
jisx0212
,
writer
,
c2
^
0x80
,
c3
^
0x80
)
{
NEXT_IN
(
3
);
...
...
@@ -402,7 +404,7 @@ DECODER(euc_jp)
unsigned
char
c2
;
REQUIRE_INBUF
(
2
)
c2
=
IN2
;
c2
=
IN
BYTE
2
;
/* JIS X 0208 */
#ifndef STRICT_BUILD
if
(
c
==
0xa1
&&
c2
==
0xc0
)
...
...
@@ -427,8 +429,8 @@ DECODER(euc_jp)
ENCODER
(
shift_jis
)
{
while
(
inleft
>
0
)
{
Py_UCS4
c
=
IN1
;
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
unsigned
char
c1
,
c2
;
...
...
@@ -440,14 +442,16 @@ ENCODER(shift_jis)
else
if
(
c
==
0x203e
)
code
=
0x7e
;
/* OVERLINE */
#endif
else
JISX0201_K_ENCODE
(
c
,
code
)
else
UCS4INVALID
(
c
)
else
code
=
NOCHAR
;
else
if
(
c
>
0xFFFF
)
return
1
;
else
code
=
NOCHAR
;
if
(
code
<
0x80
||
(
code
>=
0xa1
&&
code
<=
0xdf
))
{
REQUIRE_OUTBUF
(
1
)
OUT1
((
unsigned
char
)
code
)
NEXT
(
1
,
1
)
OUT
BYTE
1
((
unsigned
char
)
code
)
NEXT
(
1
,
1
)
;
continue
;
}
...
...
@@ -470,9 +474,9 @@ ENCODER(shift_jis)
c2
=
code
&
0xff
;
c2
=
(((
c1
-
0x21
)
&
1
)
?
0x5e
:
0
)
+
(
c2
-
0x21
);
c1
=
(
c1
-
0x21
)
>>
1
;
OUT1
(
c1
<
0x1f
?
c1
+
0x81
:
c1
+
0xc1
)
OUT2
(
c2
<
0x3f
?
c2
+
0x40
:
c2
+
0x41
)
NEXT
(
1
,
2
)
OUT
BYTE
1
(
c1
<
0x1f
?
c1
+
0x81
:
c1
+
0xc1
)
OUT
BYTE
2
(
c2
<
0x3f
?
c2
+
0x40
:
c2
+
0x41
)
NEXT
(
1
,
2
)
;
}
return
0
;
...
...
@@ -481,7 +485,7 @@ ENCODER(shift_jis)
DECODER
(
shift_jis
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
#ifdef STRICT_BUILD
JISX0201_R_DECODE
(
c
,
writer
)
...
...
@@ -493,7 +497,7 @@ DECODER(shift_jis)
unsigned
char
c1
,
c2
;
REQUIRE_INBUF
(
2
)
c2
=
IN2
;
c2
=
IN
BYTE
2
;
if
(
c2
<
0x40
||
(
c2
>
0x7e
&&
c2
<
0x80
)
||
c2
>
0xfc
)
return
1
;
...
...
@@ -533,30 +537,29 @@ DECODER(shift_jis)
ENCODER
(
shift_jis_2004
)
{
while
(
inleft
>
0
)
{
Py_UCS4
c
=
IN1
;
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
=
NOCHAR
;
int
c1
,
c2
;
Py_ssize_t
insize
;
JISX0201_ENCODE
(
c
,
code
)
else
DECODE_SURROGATE
(
c
)
if
(
code
<
0x80
||
(
code
>=
0xa1
&&
code
<=
0xdf
))
{
WRITE1
((
unsigned
char
)
code
)
NEXT
(
1
,
1
)
WRITE
BYTE
1
((
unsigned
char
)
code
)
NEXT
(
1
,
1
)
;
continue
;
}
REQUIRE_OUTBUF
(
2
)
insize
=
GET_INSIZE
(
c
)
;
insize
=
1
;
if
(
code
==
NOCHAR
)
{
if
(
c
<=
0xffff
)
{
EMULATE_JISX0213_2000_ENCODE_BMP
(
code
,
c
)
else
TRYMAP_ENC
(
jisx0213_bmp
,
code
,
c
)
{
if
(
code
==
MULTIC
)
{
if
(
inle
ft
<
2
)
{
if
(
inle
n
-
*
inpos
<
2
)
{
if
(
flags
&
MBENC_FLUSH
)
{
code
=
find_pairencmap
((
ucs2_t
)
c
,
0
,
...
...
@@ -569,8 +572,9 @@ ENCODER(shift_jis_2004)
return
MBERR_TOOFEW
;
}
else
{
Py_UCS4
ch2
=
INCHAR2
;
code
=
find_pairencmap
(
(
ucs2_t
)
c
,
IN
2
,
(
ucs2_t
)
c
,
ch
2
,
jisx0213_pair_encmap
,
JISX0213_ENCPAIRS
);
if
(
code
==
DBCINV
)
{
...
...
@@ -615,10 +619,10 @@ ENCODER(shift_jis_2004)
if
(
c1
&
1
)
c2
+=
0x5e
;
c1
>>=
1
;
OUT1
(
c1
+
(
c1
<
0x1f
?
0x81
:
0xc1
))
OUT2
(
c2
+
(
c2
<
0x3f
?
0x40
:
0x41
))
OUT
BYTE
1
(
c1
+
(
c1
<
0x1f
?
0x81
:
0xc1
))
OUT
BYTE
2
(
c2
+
(
c2
<
0x3f
?
0x40
:
0x41
))
NEXT
(
insize
,
2
)
NEXT
(
insize
,
2
)
;
}
return
0
;
...
...
@@ -627,7 +631,7 @@ ENCODER(shift_jis_2004)
DECODER
(
shift_jis_2004
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
JISX0201_DECODE
(
c
,
writer
)
else
if
((
c
>=
0x81
&&
c
<=
0x9f
)
||
(
c
>=
0xe0
&&
c
<=
0xfc
)){
...
...
@@ -635,7 +639,7 @@ DECODER(shift_jis_2004)
Py_UCS4
code
;
REQUIRE_INBUF
(
2
)
c2
=
IN2
;
c2
=
IN
BYTE
2
;
if
(
c2
<
0x40
||
(
c2
>
0x7e
&&
c2
<
0x80
)
||
c2
>
0xfc
)
return
1
;
...
...
Modules/cjkcodecs/_codecs_kr.c
Dosyayı görüntüle @
d9491269
...
...
@@ -33,16 +33,18 @@ static const unsigned char u2cgk_jongseong[28] = {
ENCODER
(
euc_kr
)
{
while
(
inleft
>
0
)
{
Py_UCS4
c
=
IN1
;
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
WRITE1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
;
continue
;
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
REQUIRE_OUTBUF
(
2
)
TRYMAP_ENC
(
cp949
,
code
,
c
);
...
...
@@ -50,9 +52,9 @@ ENCODER(euc_kr)
if
((
code
&
0x8000
)
==
0
)
{
/* KS X 1001 coded character */
OUT1
((
code
>>
8
)
|
0x80
)
OUT2
((
code
&
0xFF
)
|
0x80
)
NEXT
(
1
,
2
)
OUT
BYTE
1
((
code
>>
8
)
|
0x80
)
OUT
BYTE
2
((
code
&
0xFF
)
|
0x80
)
NEXT
(
1
,
2
)
;
}
else
{
/* Mapping is found in CP949 extension,
* but we encode it in KS X 1001:1998 Annex 3,
...
...
@@ -61,23 +63,23 @@ ENCODER(euc_kr)
REQUIRE_OUTBUF
(
8
)
/* syllable composition precedence */
OUT1
(
EUCKR_JAMO_FIRSTBYTE
)
OUT2
(
EUCKR_JAMO_FILLER
)
OUT
BYTE
1
(
EUCKR_JAMO_FIRSTBYTE
)
OUT
BYTE
2
(
EUCKR_JAMO_FILLER
)
/* All codepoints in CP949 extension are in unicode
* Hangul Syllable area. */
assert
(
0xac00
<=
c
&&
c
<=
0xd7a3
);
c
-=
0xac00
;
OUT3
(
EUCKR_JAMO_FIRSTBYTE
)
OUT4
(
u2cgk_choseong
[
c
/
588
])
NEXT_OUT
(
4
)
OUT
BYTE
3
(
EUCKR_JAMO_FIRSTBYTE
)
OUT
BYTE
4
(
u2cgk_choseong
[
c
/
588
])
NEXT_OUT
(
4
)
;
OUT1
(
EUCKR_JAMO_FIRSTBYTE
)
OUT2
(
u2cgk_jungseong
[(
c
/
28
)
%
21
])
OUT3
(
EUCKR_JAMO_FIRSTBYTE
)
OUT4
(
u2cgk_jongseong
[
c
%
28
])
NEXT
(
1
,
4
)
OUT
BYTE
1
(
EUCKR_JAMO_FIRSTBYTE
)
OUT
BYTE
2
(
u2cgk_jungseong
[(
c
/
28
)
%
21
])
OUT
BYTE
3
(
EUCKR_JAMO_FIRSTBYTE
)
OUT
BYTE
4
(
u2cgk_jongseong
[
c
%
28
])
NEXT
(
1
,
4
)
;
}
}
...
...
@@ -102,7 +104,7 @@ static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
DECODER
(
euc_kr
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
if
(
c
<
0x80
)
{
OUTCHAR
(
c
);
...
...
@@ -113,7 +115,7 @@ DECODER(euc_kr)
REQUIRE_INBUF
(
2
)
if
(
c
==
EUCKR_JAMO_FIRSTBYTE
&&
IN2
==
EUCKR_JAMO_FILLER
)
{
IN
BYTE
2
==
EUCKR_JAMO_FILLER
)
{
/* KS X 1001:1998 Annex 3 make-up sequence */
DBCHAR
cho
,
jung
,
jong
;
...
...
@@ -146,7 +148,7 @@ DECODER(euc_kr)
OUTCHAR
(
0xac00
+
cho
*
588
+
jung
*
28
+
jong
);
NEXT_IN
(
8
);
}
else
TRYMAP_DEC
(
ksx1001
,
writer
,
c
^
0x80
,
IN2
^
0x80
)
{
else
TRYMAP_DEC
(
ksx1001
,
writer
,
c
^
0x80
,
IN
BYTE
2
^
0x80
)
{
NEXT_IN
(
2
);
}
else
...
...
@@ -164,27 +166,29 @@ DECODER(euc_kr)
ENCODER
(
cp949
)
{
while
(
inleft
>
0
)
{
Py_UCS4
c
=
IN1
;
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
WRITE1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
;
continue
;
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
REQUIRE_OUTBUF
(
2
)
TRYMAP_ENC
(
cp949
,
code
,
c
);
else
return
1
;
OUT1
((
code
>>
8
)
|
0x80
)
OUT
BYTE
1
((
code
>>
8
)
|
0x80
)
if
(
code
&
0x8000
)
OUT2
(
code
&
0xFF
)
/* MSB set: CP949 */
OUT
BYTE
2
(
code
&
0xFF
)
/* MSB set: CP949 */
else
OUT2
((
code
&
0xFF
)
|
0x80
)
/* MSB unset: ks x 1001 */
NEXT
(
1
,
2
)
OUT
BYTE
2
((
code
&
0xFF
)
|
0x80
)
/* MSB unset: ks x 1001 */
NEXT
(
1
,
2
)
;
}
return
0
;
...
...
@@ -193,7 +197,7 @@ ENCODER(cp949)
DECODER
(
cp949
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
if
(
c
<
0x80
)
{
OUTCHAR
(
c
);
...
...
@@ -202,8 +206,8 @@ DECODER(cp949)
}
REQUIRE_INBUF
(
2
)
TRYMAP_DEC
(
ksx1001
,
writer
,
c
^
0x80
,
IN2
^
0x80
);
else
TRYMAP_DEC
(
cp949ext
,
writer
,
c
,
IN2
);
TRYMAP_DEC
(
ksx1001
,
writer
,
c
^
0x80
,
IN
BYTE
2
^
0x80
);
else
TRYMAP_DEC
(
cp949ext
,
writer
,
c
,
IN
BYTE
2
);
else
return
1
;
NEXT_IN
(
2
);
...
...
@@ -246,16 +250,18 @@ static const DBCHAR u2johabjamo[] = {
ENCODER
(
johab
)
{
while
(
inleft
>
0
)
{
Py_UCS4
c
=
IN1
;
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
WRITE1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
;
continue
;
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
REQUIRE_OUTBUF
(
2
)
...
...
@@ -281,9 +287,9 @@ ENCODER(johab)
t1
=
(
c1
<
0x4a
?
(
c1
-
0x21
+
0x1b2
)
:
(
c1
-
0x21
+
0x197
));
t2
=
((
t1
&
1
)
?
0x5e
:
0
)
+
(
c2
-
0x21
);
OUT1
(
t1
>>
1
)
OUT2
(
t2
<
0x4e
?
t2
+
0x31
:
t2
+
0x43
)
NEXT
(
1
,
2
)
OUT
BYTE
1
(
t1
>>
1
)
OUT
BYTE
2
(
t2
<
0x4e
?
t2
+
0x31
:
t2
+
0x43
)
NEXT
(
1
,
2
)
;
continue
;
}
else
...
...
@@ -292,9 +298,9 @@ ENCODER(johab)
else
return
1
;
OUT1
(
code
>>
8
)
OUT2
(
code
&
0xff
)
NEXT
(
1
,
2
)
OUT
BYTE
1
(
code
>>
8
)
OUT
BYTE
2
(
code
&
0xff
)
NEXT
(
1
,
2
)
;
}
return
0
;
...
...
@@ -344,7 +350,7 @@ static const unsigned char johabjamo_jongseong[32] = {
DECODER
(
johab
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
,
c2
;
unsigned
char
c
=
IN
BYTE
1
,
c2
;
if
(
c
<
0x80
)
{
OUTCHAR
(
c
);
...
...
@@ -353,7 +359,7 @@ DECODER(johab)
}
REQUIRE_INBUF
(
2
)
c2
=
IN2
;
c2
=
IN
BYTE
2
;
if
(
c
<
0xd8
)
{
/* johab hangul */
...
...
Modules/cjkcodecs/_codecs_tw.c
Dosyayı görüntüle @
d9491269
...
...
@@ -13,26 +13,28 @@
ENCODER
(
big5
)
{
while
(
inleft
>
0
)
{
Py_UCS4
c
=
**
inbuf
;
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
INCHAR1
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
REQUIRE_OUTBUF
(
1
)
**
outbuf
=
(
unsigned
char
)
c
;
NEXT
(
1
,
1
)
NEXT
(
1
,
1
)
;
continue
;
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
REQUIRE_OUTBUF
(
2
)
TRYMAP_ENC
(
big5
,
code
,
c
);
else
return
1
;
OUT1
(
code
>>
8
)
OUT2
(
code
&
0xFF
)
NEXT
(
1
,
2
)
OUT
BYTE
1
(
code
>>
8
)
OUT
BYTE
2
(
code
&
0xFF
)
NEXT
(
1
,
2
)
;
}
return
0
;
...
...
@@ -41,7 +43,7 @@ ENCODER(big5)
DECODER
(
big5
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
if
(
c
<
0x80
)
{
OUTCHAR
(
c
);
...
...
@@ -50,7 +52,7 @@ DECODER(big5)
}
REQUIRE_INBUF
(
2
)
TRYMAP_DEC
(
big5
,
writer
,
c
,
IN2
)
{
TRYMAP_DEC
(
big5
,
writer
,
c
,
IN
BYTE
2
)
{
NEXT_IN
(
2
);
}
else
return
1
;
...
...
@@ -66,25 +68,27 @@ DECODER(big5)
ENCODER
(
cp950
)
{
while
(
inleft
>
0
)
{
Py_UCS4
c
=
IN1
;
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
WRITE1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
;
continue
;
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
REQUIRE_OUTBUF
(
2
)
TRYMAP_ENC
(
cp950ext
,
code
,
c
);
else
TRYMAP_ENC
(
big5
,
code
,
c
);
else
return
1
;
OUT1
(
code
>>
8
)
OUT2
(
code
&
0xFF
)
NEXT
(
1
,
2
)
OUT
BYTE
1
(
code
>>
8
)
OUT
BYTE
2
(
code
&
0xFF
)
NEXT
(
1
,
2
)
;
}
return
0
;
...
...
@@ -93,7 +97,7 @@ ENCODER(cp950)
DECODER
(
cp950
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
if
(
c
<
0x80
)
{
OUTCHAR
(
c
);
...
...
@@ -103,8 +107,8 @@ DECODER(cp950)
REQUIRE_INBUF
(
2
)
TRYMAP_DEC
(
cp950ext
,
writer
,
c
,
IN2
);
else
TRYMAP_DEC
(
big5
,
writer
,
c
,
IN2
);
TRYMAP_DEC
(
cp950ext
,
writer
,
c
,
IN
BYTE
2
);
else
TRYMAP_DEC
(
big5
,
writer
,
c
,
IN
BYTE
2
);
else
return
1
;
NEXT_IN
(
2
);
...
...
Modules/cjkcodecs/cjkcodecs.h
Dosyayı görüntüle @
d9491269
...
...
@@ -72,7 +72,8 @@ static const struct dbcs_map *mapping_list;
#define ENCODER(encoding) \
static Py_ssize_t encoding##_encode( \
MultibyteCodec_State *state, const void *config, \
const Py_UNICODE **inbuf, Py_ssize_t inleft, \
int kind, void *data, \
Py_ssize_t *inpos, Py_ssize_t inlen, \
unsigned char **outbuf, Py_ssize_t outleft, int flags)
#define ENCODER_RESET(encoding) \
static Py_ssize_t encoding##_encode_reset( \
...
...
@@ -91,25 +92,25 @@ static const struct dbcs_map *mapping_list;
static Py_ssize_t encoding##_decode_reset( \
MultibyteCodec_State *state, const void *config)
#if Py_UNICODE_SIZE == 4
#define UCS4INVALID(code) \
if ((code) > 0xFFFF) \
return 1;
#else
#define UCS4INVALID(code) \
if (0) ;
#endif
#define NEXT_IN(i) \
do { \
(*inbuf) += (i); \
(inleft) -= (i); \
} while (0)
#define NEXT_INCHAR(i) \
do { \
(*inpos) += (i); \
} while (0)
#define NEXT_OUT(o) \
(*outbuf) += (o); \
(outleft) -= (o);
do { \
(*outbuf) += (o); \
(outleft) -= (o); \
} while (0)
#define NEXT(i, o) \
NEXT_IN(i); NEXT_OUT(o)
do { \
NEXT_INCHAR(i); \
NEXT_OUT(o); \
} while (0)
#define REQUIRE_INBUF(n) \
if (inleft < (n)) \
...
...
@@ -118,10 +119,13 @@ static const struct dbcs_map *mapping_list;
if (outleft < (n)) \
return MBERR_TOOSMALL;
#define IN1 ((*inbuf)[0])
#define IN2 ((*inbuf)[1])
#define IN3 ((*inbuf)[2])
#define IN4 ((*inbuf)[3])
#define INBYTE1 ((*inbuf)[0])
#define INBYTE2 ((*inbuf)[1])
#define INBYTE3 ((*inbuf)[2])
#define INBYTE4 ((*inbuf)[3])
#define INCHAR1 PyUnicode_READ(kind, data, *inpos)
#define INCHAR2 PyUnicode_READ(kind, data, *inpos + 1)
#define OUTCHAR(c) \
do { \
...
...
@@ -140,24 +144,24 @@ static const struct dbcs_map *mapping_list;
writer->pos += 2; \
} while (0)
#define OUT1(c) ((*outbuf)[0]) = (c);
#define OUT2(c) ((*outbuf)[1]) = (c);
#define OUT3(c) ((*outbuf)[2]) = (c);
#define OUT4(c) ((*outbuf)[3]) = (c);
#define OUT
BYTE
1(c) ((*outbuf)[0]) = (c);
#define OUT
BYTE
2(c) ((*outbuf)[1]) = (c);
#define OUT
BYTE
3(c) ((*outbuf)[2]) = (c);
#define OUT
BYTE
4(c) ((*outbuf)[3]) = (c);
#define WRITE1(c1) \
#define WRITE
BYTE
1(c1) \
REQUIRE_OUTBUF(1) \
(*outbuf)[0] = (c1);
#define WRITE2(c1, c2) \
#define WRITE
BYTE
2(c1, c2) \
REQUIRE_OUTBUF(2) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2);
#define WRITE3(c1, c2, c3) \
#define WRITE
BYTE
3(c1, c2, c3) \
REQUIRE_OUTBUF(3) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
(*outbuf)[2] = (c3);
#define WRITE4(c1, c2, c3, c4) \
#define WRITE
BYTE
4(c1, c2, c3, c4) \
REQUIRE_OUTBUF(4) \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
...
...
@@ -209,20 +213,6 @@ _TRYMAP_DEC_WRITE(_PyUnicodeWriter *writer, Py_UCS4 c)
#define TRYMAP_DEC_MPLANE(charset, writer, plane, c1, c2) \
if _TRYMAP_DEC(&charset##_decmap[plane][c1], writer, c2)
#if Py_UNICODE_SIZE == 2
#define DECODE_SURROGATE(c) \
if (Py_UNICODE_IS_HIGH_SURROGATE(c)) { \
REQUIRE_INBUF(2) \
if (Py_UNICODE_IS_LOW_SURROGATE(IN2)) { \
c = Py_UNICODE_JOIN_SURROGATES(c, IN2); \
} \
}
#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
#else
#define DECODE_SURROGATE(c) {;}
#define GET_INSIZE(c) 1
#endif
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
...
...
Modules/cjkcodecs/multibytecodec.c
Dosyayı görüntüle @
d9491269
...
...
@@ -10,7 +10,8 @@
#include "multibytecodec.h"
typedef
struct
{
const
Py_UNICODE
*
inbuf
,
*
inbuf_top
,
*
inbuf_end
;
PyObject
*
inobj
;
Py_ssize_t
inpos
,
inlen
;
unsigned
char
*
outbuf
,
*
outbuf_end
;
PyObject
*
excobj
,
*
outobj
;
}
MultibyteEncodeBuffer
;
...
...
@@ -45,7 +46,7 @@ static char *incrementalkwarglist[] = {"input", "final", NULL};
static
char
*
streamkwarglist
[]
=
{
"stream"
,
"errors"
,
NULL
};
static
PyObject
*
multibytecodec_encode
(
MultibyteCodec
*
,
MultibyteCodec_State
*
,
const
Py_UNICODE
**
,
Py_ssize_t
,
MultibyteCodec_State
*
,
PyObject
*
,
Py_ssize_t
*
,
PyObject
*
,
int
);
#define MBENC_RESET MBENC_MAX<<1
/* reset after an encoding session */
...
...
@@ -224,7 +225,7 @@ multibytecodec_encerror(MultibyteCodec *codec,
return
0
;
/* retry it */
case
MBERR_TOOFEW
:
reason
=
"incomplete multibyte sequence"
;
esize
=
(
Py_ssize_t
)
(
buf
->
inbuf_end
-
buf
->
inbuf
)
;
esize
=
(
Py_ssize_t
)
buf
->
inpos
;
break
;
case
MBERR_INTERNAL
:
PyErr_SetString
(
PyExc_RuntimeError
,
...
...
@@ -238,14 +239,24 @@ multibytecodec_encerror(MultibyteCodec *codec,
}
if
(
errors
==
ERROR_REPLACE
)
{
const
Py_UNICODE
replchar
=
'?'
,
*
inbuf
=
&
replchar
;
PyObject
*
replchar
;
Py_ssize_t
r
;
Py_ssize_t
inpos
;
int
kind
;
void
*
data
;
replchar
=
PyUnicode_FromOrdinal
(
'?'
);
if
(
replchar
==
NULL
)
goto
errorexit
;
kind
=
PyUnicode_KIND
(
replchar
);
data
=
PyUnicode_DATA
(
replchar
);
inpos
=
0
;
for
(;;)
{
Py_ssize_t
outleft
;
Py_ssize_t
outleft
=
(
Py_ssize_t
)(
buf
->
outbuf_end
-
buf
->
outbuf
)
;
outleft
=
(
Py_ssize_t
)(
buf
->
outbuf_end
-
buf
->
outbuf
);
r
=
codec
->
encode
(
state
,
codec
->
config
,
&
inbuf
,
1
,
r
=
codec
->
encode
(
state
,
codec
->
config
,
kind
,
data
,
&
inpos
,
1
,
&
buf
->
outbuf
,
outleft
,
0
);
if
(
r
==
MBERR_TOOSMALL
)
{
REQUIRE_ENCODEBUFFER
(
buf
,
-
1
);
...
...
@@ -255,25 +266,27 @@ multibytecodec_encerror(MultibyteCodec *codec,
break
;
}
Py_DECREF
(
replchar
);
if
(
r
!=
0
)
{
REQUIRE_ENCODEBUFFER
(
buf
,
1
);
*
buf
->
outbuf
++
=
'?'
;
}
}
if
(
errors
==
ERROR_IGNORE
||
errors
==
ERROR_REPLACE
)
{
buf
->
in
buf
+=
esize
;
buf
->
in
pos
+=
esize
;
return
0
;
}
start
=
(
Py_ssize_t
)
(
buf
->
inbuf
-
buf
->
inbuf_top
)
;
start
=
(
Py_ssize_t
)
buf
->
inpos
;
end
=
start
+
esize
;
/* use cached exception object if available */
if
(
buf
->
excobj
==
NULL
)
{
buf
->
excobj
=
PyUnicodeEncodeError_Create
(
codec
->
encoding
,
buf
->
inbuf_top
,
buf
->
inbuf_end
-
buf
->
inbuf_top
,
start
,
end
,
reason
);
buf
->
excobj
=
PyObject_CallFunction
(
PyExc_UnicodeEncodeError
,
"sOnns"
,
codec
->
encoding
,
buf
->
inobj
,
start
,
end
,
reason
);
if
(
buf
->
excobj
==
NULL
)
goto
errorexit
;
}
...
...
@@ -302,10 +315,10 @@ multibytecodec_encerror(MultibyteCodec *codec,
}
if
(
PyUnicode_Check
(
tobj
))
{
const
Py_UNICODE
*
uraw
=
PyUnicode_AS_UNICODE
(
tobj
)
;
Py_ssize_t
inpos
;
retstr
=
multibytecodec_encode
(
codec
,
state
,
&
uraw
,
PyUnicode_GET_SIZE
(
tobj
)
,
ERROR_STRICT
,
retstr
=
multibytecodec_encode
(
codec
,
state
,
tobj
,
&
inpos
,
ERROR_STRICT
,
MBENC_FLUSH
);
if
(
retstr
==
NULL
)
goto
errorexit
;
...
...
@@ -324,15 +337,15 @@ multibytecodec_encerror(MultibyteCodec *codec,
newpos
=
PyLong_AsSsize_t
(
PyTuple_GET_ITEM
(
retobj
,
1
));
if
(
newpos
<
0
&&
!
PyErr_Occurred
())
newpos
+=
(
Py_ssize_t
)
(
buf
->
inbuf_end
-
buf
->
inbuf_top
)
;
if
(
newpos
<
0
||
buf
->
inbuf_top
+
newpos
>
buf
->
inbuf_end
)
{
newpos
+=
(
Py_ssize_t
)
buf
->
inlen
;
if
(
newpos
<
0
||
newpos
>
buf
->
inlen
)
{
PyErr_Clear
();
PyErr_Format
(
PyExc_IndexError
,
"position %zd from error handler out of bounds"
,
newpos
);
goto
errorexit
;
}
buf
->
in
buf
=
buf
->
inbuf_top
+
newpos
;
buf
->
in
pos
=
newpos
;
Py_DECREF
(
retobj
);
Py_DECREF
(
retstr
);
...
...
@@ -449,19 +462,29 @@ errorexit:
static
PyObject
*
multibytecodec_encode
(
MultibyteCodec
*
codec
,
MultibyteCodec_State
*
state
,
const
Py_UNICODE
**
data
,
Py_ssize_t
datalen
,
PyObject
*
text
,
Py_ssize_t
*
inpos_t
,
PyObject
*
errors
,
int
flags
)
{
MultibyteEncodeBuffer
buf
;
Py_ssize_t
finalsize
,
r
=
0
;
Py_ssize_t
datalen
;
int
kind
;
void
*
data
;
if
(
PyUnicode_READY
(
text
)
<
0
)
return
NULL
;
datalen
=
PyUnicode_GET_LENGTH
(
text
);
if
(
datalen
==
0
&&
!
(
flags
&
MBENC_RESET
))
return
PyBytes_FromStringAndSize
(
NULL
,
0
);
buf
.
excobj
=
NULL
;
buf
.
outobj
=
NULL
;
buf
.
inbuf
=
buf
.
inbuf_top
=
*
data
;
buf
.
inbuf_end
=
buf
.
inbuf_top
+
datalen
;
buf
.
inobj
=
text
;
/* borrowed reference */
buf
.
inpos
=
0
;
buf
.
inlen
=
datalen
;
kind
=
PyUnicode_KIND
(
buf
.
inobj
);
data
=
PyUnicode_DATA
(
buf
.
inobj
);
if
(
datalen
>
(
PY_SSIZE_T_MAX
-
16
)
/
2
)
{
PyErr_NoMemory
();
...
...
@@ -474,14 +497,14 @@ multibytecodec_encode(MultibyteCodec *codec,
buf
.
outbuf
=
(
unsigned
char
*
)
PyBytes_AS_STRING
(
buf
.
outobj
);
buf
.
outbuf_end
=
buf
.
outbuf
+
PyBytes_GET_SIZE
(
buf
.
outobj
);
while
(
buf
.
inbuf
<
buf
.
inbuf_end
)
{
Py_ssize_t
inleft
,
outleft
;
while
(
buf
.
inpos
<
buf
.
inlen
)
{
/* we don't reuse inleft and outleft here.
* error callbacks can relocate the cursor anywhere on buffer*/
inleft
=
(
Py_ssize_t
)(
buf
.
inbuf_end
-
buf
.
inbuf
);
outleft
=
(
Py_ssize_t
)(
buf
.
outbuf_end
-
buf
.
outbuf
);
r
=
codec
->
encode
(
state
,
codec
->
config
,
&
buf
.
inbuf
,
inleft
,
Py_ssize_t
outleft
=
(
Py_ssize_t
)(
buf
.
outbuf_end
-
buf
.
outbuf
);
r
=
codec
->
encode
(
state
,
codec
->
config
,
kind
,
data
,
&
buf
.
inpos
,
buf
.
inlen
,
&
buf
.
outbuf
,
outleft
,
flags
);
if
((
r
==
0
)
||
(
r
==
MBERR_TOOFEW
&&
!
(
flags
&
MBENC_FLUSH
)))
break
;
...
...
@@ -512,7 +535,8 @@ multibytecodec_encode(MultibyteCodec *codec,
if
(
_PyBytes_Resize
(
&
buf
.
outobj
,
finalsize
)
==
-
1
)
goto
errorexit
;
*
data
=
buf
.
inbuf
;
if
(
inpos_t
)
*
inpos_t
=
buf
.
inpos
;
Py_XDECREF
(
buf
.
excobj
);
return
buf
.
outobj
;
...
...
@@ -527,7 +551,6 @@ MultibyteCodec_Encode(MultibyteCodecObject *self,
PyObject
*
args
,
PyObject
*
kwargs
)
{
MultibyteCodec_State
state
;
Py_UNICODE
*
data
;
PyObject
*
errorcb
,
*
r
,
*
arg
,
*
ucvt
;
const
char
*
errors
=
NULL
;
Py_ssize_t
datalen
;
...
...
@@ -550,11 +573,11 @@ MultibyteCodec_Encode(MultibyteCodecObject *self,
}
}
data
=
PyUnicode_AsUnicodeAndSize
(
arg
,
&
datalen
);
if
(
data
==
NULL
)
{
if
(
PyUnicode_READY
(
arg
)
<
0
)
{
Py_XDECREF
(
ucvt
);
return
NULL
;
}
datalen
=
PyUnicode_GET_LENGTH
(
arg
);
errorcb
=
internal_error_callback
(
errors
);
if
(
errorcb
==
NULL
)
{
...
...
@@ -566,7 +589,7 @@ MultibyteCodec_Encode(MultibyteCodecObject *self,
self
->
codec
->
encinit
(
&
state
,
self
->
codec
->
config
)
!=
0
)
goto
errorexit
;
r
=
multibytecodec_encode
(
self
->
codec
,
&
state
,
(
const
Py_UNICODE
**
)
&
data
,
datalen
,
errorcb
,
arg
,
NULL
,
errorcb
,
MBENC_FLUSH
|
MBENC_RESET
);
if
(
r
==
NULL
)
goto
errorexit
;
...
...
@@ -712,8 +735,9 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
PyObject
*
unistr
,
int
final
)
{
PyObject
*
ucvt
,
*
r
=
NULL
;
Py_UNICODE
*
inbuf
,
*
inbuf_end
,
*
inbuf_tmp
=
NULL
;
Py_ssize_t
datalen
,
origpending
;
PyObject
*
inbuf
=
NULL
;
Py_ssize_t
inpos
,
datalen
;
PyObject
*
origpending
=
NULL
;
wchar_t
*
data
;
if
(
PyUnicode_Check
(
unistr
))
...
...
@@ -733,66 +757,64 @@ encoder_encode_stateful(MultibyteStatefulEncoderContext *ctx,
data
=
PyUnicode_AsUnicodeAndSize
(
unistr
,
&
datalen
);
if
(
data
==
NULL
)
goto
errorexit
;
origpending
=
ctx
->
pendingsize
;
if
(
origpending
>
0
)
{
if
(
datalen
>
PY_SSIZE_T_MAX
-
ctx
->
pendingsize
)
{
PyErr_NoMemory
();
/* inbuf_tmp == NULL */
goto
errorexit
;
}
inbuf_tmp
=
PyMem_New
(
Py_UNICODE
,
datalen
+
ctx
->
pendingsize
);
if
(
ctx
->
pending
)
{
PyObject
*
inbuf_tmp
;
Py_INCREF
(
ctx
->
pending
);
origpending
=
ctx
->
pending
;
Py_INCREF
(
ctx
->
pending
);
inbuf_tmp
=
ctx
->
pending
;
PyUnicode_Append
(
&
inbuf_tmp
,
unistr
);
if
(
inbuf_tmp
==
NULL
)
goto
errorexit
;
memcpy
(
inbuf_tmp
,
ctx
->
pending
,
Py_UNICODE_SIZE
*
ctx
->
pendingsize
);
memcpy
(
inbuf_tmp
+
ctx
->
pendingsize
,
PyUnicode_AS_UNICODE
(
unistr
),
Py_UNICODE_SIZE
*
datalen
);
datalen
+=
ctx
->
pendingsize
;
ctx
->
pendingsize
=
0
;
Py_CLEAR
(
ctx
->
pending
);
inbuf
=
inbuf_tmp
;
}
else
inbuf
=
(
Py_UNICODE
*
)
PyUnicode_AS_UNICODE
(
unistr
)
;
else
{
origpending
=
NULL
;
inbuf_end
=
inbuf
+
datalen
;
Py_INCREF
(
unistr
);
inbuf
=
unistr
;
}
if
(
PyUnicode_READY
(
inbuf
)
<
0
)
goto
errorexit
;
inpos
=
0
;
datalen
=
PyUnicode_GET_LENGTH
(
inbuf
);
r
=
multibytecodec_encode
(
ctx
->
codec
,
&
ctx
->
state
,
(
const
Py_UNICODE
**
)
&
inbuf
,
datalen
,
ctx
->
errors
,
final
?
MBENC_FLUSH
|
MBENC_RESET
:
0
);
inbuf
,
&
inpos
,
ctx
->
errors
,
final
?
MBENC_FLUSH
|
MBENC_RESET
:
0
);
if
(
r
==
NULL
)
{
/* recover the original pending buffer */
if
(
origpending
>
0
)
memcpy
(
ctx
->
pending
,
inbuf_tmp
,
Py_UNICODE_SIZE
*
origpending
);
ctx
->
pendingsize
=
origpending
;
Py_CLEAR
(
ctx
->
pending
);
ctx
->
pending
=
origpending
;
origpending
=
NULL
;
goto
errorexit
;
}
if
(
inbuf
<
inbuf_end
)
{
ctx
->
pendingsize
=
(
Py_ssize_t
)(
inbuf_end
-
inbuf
);
if
(
ctx
->
pendingsize
>
MAXENCPENDING
)
{
if
(
inpos
<
datalen
)
{
if
(
datalen
-
inpos
>
MAXENCPENDING
)
{
/* normal codecs can't reach here */
ctx
->
pendingsize
=
0
;
PyErr_SetString
(
PyExc_UnicodeError
,
"pending buffer overflow"
);
goto
errorexit
;
}
memcpy
(
ctx
->
pending
,
inbuf
,
ctx
->
pendingsize
*
Py_UNICODE_SIZE
);
ctx
->
pending
=
PyUnicode_Substring
(
inbuf
,
inpos
,
datalen
);
if
(
ctx
->
pending
==
NULL
)
{
/* normal codecs can't reach here */
goto
errorexit
;
}
}
if
(
inbuf_tmp
!=
NULL
)
PyMem_Del
(
inbuf_tmp
);
Py_XDECREF
(
ucvt
);
return
r
;
errorexit:
if
(
inbuf_tmp
!=
NULL
)
PyMem_Del
(
inbuf_tmp
);
Py_XDECREF
(
r
);
Py_XDECREF
(
ucvt
);
Py_XDECREF
(
origpending
);
return
NULL
;
}
...
...
@@ -876,7 +898,7 @@ mbiencoder_reset(MultibyteIncrementalEncoderObject *self)
if
(
r
!=
0
)
return
NULL
;
}
self
->
pendingsize
=
0
;
Py_CLEAR
(
self
->
pending
)
;
Py_RETURN_NONE
;
}
...
...
@@ -912,7 +934,7 @@ mbiencoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
}
self
->
codec
=
((
MultibyteCodecObject
*
)
codec
)
->
codec
;
self
->
pending
size
=
0
;
self
->
pending
=
NULL
;
self
->
errors
=
internal_error_callback
(
errors
);
if
(
self
->
errors
==
NULL
)
goto
errorexit
;
...
...
@@ -1598,18 +1620,16 @@ mbstreamwriter_writelines(MultibyteStreamWriterObject *self, PyObject *lines)
static
PyObject
*
mbstreamwriter_reset
(
MultibyteStreamWriterObject
*
self
)
{
const
Py_UNICODE
*
pending
;
PyObject
*
pwrt
;
pending
=
self
->
pending
;
pwrt
=
multibytecodec_encode
(
self
->
codec
,
&
self
->
state
,
&
pending
,
self
->
pendingsize
,
self
->
errors
,
self
->
pending
,
NULL
,
self
->
errors
,
MBENC_FLUSH
|
MBENC_RESET
);
/* some pending buffer can be truncated when UnicodeEncodeError is
* raised on 'strict' mode. but, 'reset' method is designed to
* reset the pending buffer or states so failed string sequence
* ought to be missed */
self
->
pendingsize
=
0
;
Py_CLEAR
(
self
->
pending
)
;
if
(
pwrt
==
NULL
)
return
NULL
;
...
...
@@ -1655,7 +1675,7 @@ mbstreamwriter_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
self
->
codec
=
((
MultibyteCodecObject
*
)
codec
)
->
codec
;
self
->
stream
=
stream
;
Py_INCREF
(
stream
);
self
->
pending
size
=
0
;
self
->
pending
=
NULL
;
self
->
errors
=
internal_error_callback
(
errors
);
if
(
self
->
errors
==
NULL
)
goto
errorexit
;
...
...
Modules/cjkcodecs/multibytecodec.h
Dosyayı görüntüle @
d9491269
...
...
@@ -27,7 +27,8 @@ typedef union {
typedef
int
(
*
mbcodec_init
)(
const
void
*
config
);
typedef
Py_ssize_t
(
*
mbencode_func
)(
MultibyteCodec_State
*
state
,
const
void
*
config
,
const
Py_UNICODE
**
inbuf
,
Py_ssize_t
inleft
,
int
kind
,
void
*
data
,
Py_ssize_t
*
inpos
,
Py_ssize_t
inlen
,
unsigned
char
**
outbuf
,
Py_ssize_t
outleft
,
int
flags
);
typedef
int
(
*
mbencodeinit_func
)(
MultibyteCodec_State
*
state
,
...
...
@@ -75,8 +76,7 @@ typedef struct {
#define MAXENCPENDING 2
#define _MultibyteStatefulEncoder_HEAD \
_MultibyteStatefulCodec_HEAD \
Py_UNICODE pending[MAXENCPENDING]; \
Py_ssize_t pendingsize;
PyObject *pending;
typedef
struct
{
_MultibyteStatefulEncoder_HEAD
}
MultibyteStatefulEncoderContext
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment