Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
d9491269
Kaydet (Commit)
d9491269
authored
Nis 14, 2013
tarafından
Victor Stinner
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Issue #17693: CJK encoders now use the new Unicode API (PEP 393)
üst
71557596
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
248 additions
and
260 deletions
+248
-260
_codecs_cn.c
Modules/cjkcodecs/_codecs_cn.c
+65
-70
_codecs_hk.c
Modules/cjkcodecs/_codecs_hk.c
+24
-20
_codecs_iso2022.c
Modules/cjkcodecs/_codecs_iso2022.c
+51
-62
_codecs_jp.c
Modules/cjkcodecs/_codecs_jp.c
+0
-0
_codecs_kr.c
Modules/cjkcodecs/_codecs_kr.c
+52
-46
_codecs_tw.c
Modules/cjkcodecs/_codecs_tw.c
+24
-20
cjkcodecs.h
Modules/cjkcodecs/cjkcodecs.h
+29
-39
multibytecodec.c
Modules/cjkcodecs/multibytecodec.c
+0
-0
multibytecodec.h
Modules/cjkcodecs/multibytecodec.h
+3
-3
No files found.
Modules/cjkcodecs/_codecs_cn.c
Dosyayı görüntüle @
d9491269
...
@@ -42,16 +42,18 @@
...
@@ -42,16 +42,18 @@
ENCODER
(
gb2312
)
ENCODER
(
gb2312
)
{
{
while
(
inleft
>
0
)
{
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN1
;
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
WRITE1
((
unsigned
char
)
c
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
NEXT
(
1
,
1
)
;
continue
;
continue
;
}
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
REQUIRE_OUTBUF
(
2
)
REQUIRE_OUTBUF
(
2
)
TRYMAP_ENC
(
gbcommon
,
code
,
c
);
TRYMAP_ENC
(
gbcommon
,
code
,
c
);
...
@@ -60,9 +62,9 @@ ENCODER(gb2312)
...
@@ -60,9 +62,9 @@ ENCODER(gb2312)
if
(
code
&
0x8000
)
/* MSB set: GBK */
if
(
code
&
0x8000
)
/* MSB set: GBK */
return
1
;
return
1
;
OUT1
((
code
>>
8
)
|
0x80
)
OUT
BYTE
1
((
code
>>
8
)
|
0x80
)
OUT2
((
code
&
0xFF
)
|
0x80
)
OUT
BYTE
2
((
code
&
0xFF
)
|
0x80
)
NEXT
(
1
,
2
)
NEXT
(
1
,
2
)
;
}
}
return
0
;
return
0
;
...
@@ -80,7 +82,7 @@ DECODER(gb2312)
...
@@ -80,7 +82,7 @@ DECODER(gb2312)
}
}
REQUIRE_INBUF
(
2
)
REQUIRE_INBUF
(
2
)
TRYMAP_DEC
(
gb2312
,
writer
,
c
^
0x80
,
IN2
^
0x80
)
{
TRYMAP_DEC
(
gb2312
,
writer
,
c
^
0x80
,
IN
BYTE
2
^
0x80
)
{
NEXT_IN
(
2
);
NEXT_IN
(
2
);
}
}
else
return
1
;
else
return
1
;
...
@@ -96,28 +98,30 @@ DECODER(gb2312)
...
@@ -96,28 +98,30 @@ DECODER(gb2312)
ENCODER
(
gbk
)
ENCODER
(
gbk
)
{
{
while
(
inleft
>
0
)
{
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN1
;
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
WRITE1
((
unsigned
char
)
c
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
NEXT
(
1
,
1
)
;
continue
;
continue
;
}
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
REQUIRE_OUTBUF
(
2
)
REQUIRE_OUTBUF
(
2
)
GBK_ENCODE
(
c
,
code
)
GBK_ENCODE
(
c
,
code
)
else
return
1
;
else
return
1
;
OUT1
((
code
>>
8
)
|
0x80
)
OUT
BYTE
1
((
code
>>
8
)
|
0x80
)
if
(
code
&
0x8000
)
if
(
code
&
0x8000
)
OUT2
((
code
&
0xFF
))
/* MSB set: GBK */
OUT
BYTE
2
((
code
&
0xFF
))
/* MSB set: GBK */
else
else
OUT2
((
code
&
0xFF
)
|
0x80
)
/* MSB unset: GB2312 */
OUT
BYTE
2
((
code
&
0xFF
)
|
0x80
)
/* MSB unset: GB2312 */
NEXT
(
1
,
2
)
NEXT
(
1
,
2
)
;
}
}
return
0
;
return
0
;
...
@@ -126,7 +130,7 @@ ENCODER(gbk)
...
@@ -126,7 +130,7 @@ ENCODER(gbk)
DECODER
(
gbk
)
DECODER
(
gbk
)
{
{
while
(
inleft
>
0
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
OUTCHAR
(
c
);
OUTCHAR
(
c
);
...
@@ -136,7 +140,7 @@ DECODER(gbk)
...
@@ -136,7 +140,7 @@ DECODER(gbk)
REQUIRE_INBUF
(
2
)
REQUIRE_INBUF
(
2
)
GBK_DECODE
(
c
,
IN2
,
writer
)
GBK_DECODE
(
c
,
IN
BYTE
2
,
writer
)
else
return
1
;
else
return
1
;
NEXT_IN
(
2
);
NEXT_IN
(
2
);
...
@@ -152,41 +156,31 @@ DECODER(gbk)
...
@@ -152,41 +156,31 @@ DECODER(gbk)
ENCODER
(
gb18030
)
ENCODER
(
gb18030
)
{
{
while
(
inleft
>
0
)
{
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN1
;
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
WRITE1
(
c
)
WRITE
BYTE
1
(
c
)
NEXT
(
1
,
1
)
NEXT
(
1
,
1
)
;
continue
;
continue
;
}
}
DECODE_SURROGATE
(
c
)
if
(
c
>=
0x10000
)
{
if
(
c
>
0x10FFFF
)
#if Py_UNICODE_SIZE == 2
return
2
;
/* surrogates pair */
#else
return
1
;
#endif
else
if
(
c
>=
0x10000
)
{
Py_UCS4
tc
=
c
-
0x10000
;
Py_UCS4
tc
=
c
-
0x10000
;
assert
(
c
<=
0x10FFFF
);
REQUIRE_OUTBUF
(
4
)
REQUIRE_OUTBUF
(
4
)
OUT4
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
OUT
BYTE
4
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
tc
/=
10
;
tc
/=
10
;
OUT3
((
unsigned
char
)(
tc
%
126
)
+
0x81
)
OUT
BYTE
3
((
unsigned
char
)(
tc
%
126
)
+
0x81
)
tc
/=
126
;
tc
/=
126
;
OUT2
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
OUT
BYTE
2
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
tc
/=
10
;
tc
/=
10
;
OUT1
((
unsigned
char
)(
tc
+
0x90
))
OUT
BYTE
1
((
unsigned
char
)(
tc
+
0x90
))
#if Py_UNICODE_SIZE == 2
NEXT
(
1
,
4
);
NEXT
(
2
,
4
)
/* surrogates pair */
#else
NEXT
(
1
,
4
)
#endif
continue
;
continue
;
}
}
...
@@ -209,15 +203,15 @@ ENCODER(gb18030)
...
@@ -209,15 +203,15 @@ ENCODER(gb18030)
tc
=
c
-
utrrange
->
first
+
tc
=
c
-
utrrange
->
first
+
utrrange
->
base
;
utrrange
->
base
;
OUT4
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
OUT
BYTE
4
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
tc
/=
10
;
tc
/=
10
;
OUT3
((
unsigned
char
)(
tc
%
126
)
+
0x81
)
OUT
BYTE
3
((
unsigned
char
)(
tc
%
126
)
+
0x81
)
tc
/=
126
;
tc
/=
126
;
OUT2
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
OUT
BYTE
2
((
unsigned
char
)(
tc
%
10
)
+
0x30
)
tc
/=
10
;
tc
/=
10
;
OUT1
((
unsigned
char
)
tc
+
0x81
)
OUT
BYTE
1
((
unsigned
char
)
tc
+
0x81
)
NEXT
(
1
,
4
)
NEXT
(
1
,
4
)
;
break
;
break
;
}
}
...
@@ -226,13 +220,13 @@ ENCODER(gb18030)
...
@@ -226,13 +220,13 @@ ENCODER(gb18030)
continue
;
continue
;
}
}
OUT1
((
code
>>
8
)
|
0x80
)
OUT
BYTE
1
((
code
>>
8
)
|
0x80
)
if
(
code
&
0x8000
)
if
(
code
&
0x8000
)
OUT2
((
code
&
0xFF
))
/* MSB set: GBK or GB18030ext */
OUT
BYTE
2
((
code
&
0xFF
))
/* MSB set: GBK or GB18030ext */
else
else
OUT2
((
code
&
0xFF
)
|
0x80
)
/* MSB unset: GB2312 */
OUT
BYTE
2
((
code
&
0xFF
)
|
0x80
)
/* MSB unset: GB2312 */
NEXT
(
1
,
2
)
NEXT
(
1
,
2
)
;
}
}
return
0
;
return
0
;
...
@@ -241,7 +235,7 @@ ENCODER(gb18030)
...
@@ -241,7 +235,7 @@ ENCODER(gb18030)
DECODER
(
gb18030
)
DECODER
(
gb18030
)
{
{
while
(
inleft
>
0
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
,
c2
;
unsigned
char
c
=
IN
BYTE
1
,
c2
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
OUTCHAR
(
c
);
OUTCHAR
(
c
);
...
@@ -251,15 +245,15 @@ DECODER(gb18030)
...
@@ -251,15 +245,15 @@ DECODER(gb18030)
REQUIRE_INBUF
(
2
)
REQUIRE_INBUF
(
2
)
c2
=
IN2
;
c2
=
IN
BYTE
2
;
if
(
c2
>=
0x30
&&
c2
<=
0x39
)
{
/* 4 bytes seq */
if
(
c2
>=
0x30
&&
c2
<=
0x39
)
{
/* 4 bytes seq */
const
struct
_gb18030_to_unibmp_ranges
*
utr
;
const
struct
_gb18030_to_unibmp_ranges
*
utr
;
unsigned
char
c3
,
c4
;
unsigned
char
c3
,
c4
;
Py_UCS4
lseq
;
Py_UCS4
lseq
;
REQUIRE_INBUF
(
4
)
REQUIRE_INBUF
(
4
)
c3
=
IN3
;
c3
=
IN
BYTE
3
;
c4
=
IN4
;
c4
=
IN
BYTE
4
;
if
(
c
<
0x81
||
c3
<
0x81
||
c4
<
0x30
||
c4
>
0x39
)
if
(
c
<
0x81
||
c3
<
0x81
||
c4
<
0x30
||
c4
>
0x39
)
return
1
;
return
1
;
c
-=
0x81
;
c2
-=
0x30
;
c
-=
0x81
;
c2
-=
0x30
;
...
@@ -313,33 +307,34 @@ ENCODER_INIT(hz)
...
@@ -313,33 +307,34 @@ ENCODER_INIT(hz)
ENCODER_RESET
(
hz
)
ENCODER_RESET
(
hz
)
{
{
if
(
state
->
i
!=
0
)
{
if
(
state
->
i
!=
0
)
{
WRITE2
(
'~'
,
'}'
)
WRITE
BYTE
2
(
'~'
,
'}'
)
state
->
i
=
0
;
state
->
i
=
0
;
NEXT_OUT
(
2
)
NEXT_OUT
(
2
)
;
}
}
return
0
;
return
0
;
}
}
ENCODER
(
hz
)
ENCODER
(
hz
)
{
{
while
(
inleft
>
0
)
{
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN1
;
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
if
(
state
->
i
==
0
)
{
if
(
state
->
i
==
0
)
{
WRITE1
((
unsigned
char
)
c
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
NEXT
(
1
,
1
)
;
}
}
else
{
else
{
WRITE3
(
'~'
,
'}'
,
(
unsigned
char
)
c
)
WRITE
BYTE
3
(
'~'
,
'}'
,
(
unsigned
char
)
c
)
NEXT
(
1
,
3
)
NEXT
(
1
,
3
)
;
state
->
i
=
0
;
state
->
i
=
0
;
}
}
continue
;
continue
;
}
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
TRYMAP_ENC
(
gbcommon
,
code
,
c
);
TRYMAP_ENC
(
gbcommon
,
code
,
c
);
else
return
1
;
else
return
1
;
...
@@ -348,13 +343,13 @@ ENCODER(hz)
...
@@ -348,13 +343,13 @@ ENCODER(hz)
return
1
;
return
1
;
if
(
state
->
i
==
0
)
{
if
(
state
->
i
==
0
)
{
WRITE4
(
'~'
,
'{'
,
code
>>
8
,
code
&
0xff
)
WRITE
BYTE
4
(
'~'
,
'{'
,
code
>>
8
,
code
&
0xff
)
NEXT
(
1
,
4
)
NEXT
(
1
,
4
)
;
state
->
i
=
1
;
state
->
i
=
1
;
}
}
else
{
else
{
WRITE2
(
code
>>
8
,
code
&
0xff
)
WRITE
BYTE
2
(
code
>>
8
,
code
&
0xff
)
NEXT
(
1
,
2
)
NEXT
(
1
,
2
)
;
}
}
}
}
...
@@ -376,10 +371,10 @@ DECODER_RESET(hz)
...
@@ -376,10 +371,10 @@ DECODER_RESET(hz)
DECODER
(
hz
)
DECODER
(
hz
)
{
{
while
(
inleft
>
0
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
if
(
c
==
'~'
)
{
if
(
c
==
'~'
)
{
unsigned
char
c2
=
IN2
;
unsigned
char
c2
=
IN
BYTE
2
;
REQUIRE_INBUF
(
2
)
REQUIRE_INBUF
(
2
)
if
(
c2
==
'~'
)
{
if
(
c2
==
'~'
)
{
...
@@ -408,7 +403,7 @@ DECODER(hz)
...
@@ -408,7 +403,7 @@ DECODER(hz)
}
}
else
{
/* GB mode */
else
{
/* GB mode */
REQUIRE_INBUF
(
2
)
REQUIRE_INBUF
(
2
)
TRYMAP_DEC
(
gb2312
,
writer
,
c
,
IN2
)
{
TRYMAP_DEC
(
gb2312
,
writer
,
c
,
IN
BYTE
2
)
{
NEXT_IN
(
2
);
NEXT_IN
(
2
);
}
}
else
else
...
...
Modules/cjkcodecs/_codecs_hk.c
Dosyayı görüntüle @
d9491269
...
@@ -38,35 +38,39 @@ static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5
...
@@ -38,35 +38,39 @@ static const DBCHAR big5hkscs_pairenc_table[4] = {0x8862, 0x8864, 0x88a3, 0x88a5
ENCODER
(
big5hkscs
)
ENCODER
(
big5hkscs
)
{
{
while
(
inleft
>
0
)
{
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
**
inbuf
;
Py_UCS4
c
=
INCHAR1
;
DBCHAR
code
;
DBCHAR
code
;
Py_ssize_t
insize
;
Py_ssize_t
insize
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
REQUIRE_OUTBUF
(
1
)
REQUIRE_OUTBUF
(
1
)
**
outbuf
=
(
unsigned
char
)
c
;
**
outbuf
=
(
unsigned
char
)
c
;
NEXT
(
1
,
1
)
NEXT
(
1
,
1
)
;
continue
;
continue
;
}
}
DECODE_SURROGATE
(
c
)
insize
=
1
;
insize
=
GET_INSIZE
(
c
);
REQUIRE_OUTBUF
(
2
)
REQUIRE_OUTBUF
(
2
)
if
(
c
<
0x10000
)
{
if
(
c
<
0x10000
)
{
TRYMAP_ENC
(
big5hkscs_bmp
,
code
,
c
)
{
TRYMAP_ENC
(
big5hkscs_bmp
,
code
,
c
)
{
if
(
code
==
MULTIC
)
{
if
(
code
==
MULTIC
)
{
if
(
inleft
>=
2
&&
Py_UCS4
c2
;
if
(
inlen
-
*
inpos
>=
2
)
c2
=
INCHAR2
;
else
c2
=
0
;
if
(
inlen
-
*
inpos
>=
2
&&
((
c
&
0xffdf
)
==
0x00ca
)
&&
((
c
&
0xffdf
)
==
0x00ca
)
&&
((
(
*
inbuf
)[
1
]
&
0xfff7
)
==
0x0304
))
{
((
c2
&
0xfff7
)
==
0x0304
))
{
code
=
big5hkscs_pairenc_table
[
code
=
big5hkscs_pairenc_table
[
((
c
>>
4
)
|
((
c
>>
4
)
|
(
(
*
inbuf
)[
1
]
>>
3
))
&
3
];
(
c2
>>
3
))
&
3
];
insize
=
2
;
insize
=
2
;
}
}
else
if
(
inle
ft
<
2
&&
else
if
(
inle
n
-
*
inpos
<
2
&&
!
(
flags
&
MBENC_FLUSH
))
!
(
flags
&
MBENC_FLUSH
))
return
MBERR_TOOFEW
;
return
MBERR_TOOFEW
;
else
{
else
{
...
@@ -89,9 +93,9 @@ ENCODER(big5hkscs)
...
@@ -89,9 +93,9 @@ ENCODER(big5hkscs)
else
else
return
insize
;
return
insize
;
OUT1
(
code
>>
8
)
OUT
BYTE
1
(
code
>>
8
)
OUT2
(
code
&
0xFF
)
OUT
BYTE
2
(
code
&
0xFF
)
NEXT
(
insize
,
2
)
NEXT
(
insize
,
2
)
;
}
}
return
0
;
return
0
;
...
@@ -102,7 +106,7 @@ ENCODER(big5hkscs)
...
@@ -102,7 +106,7 @@ ENCODER(big5hkscs)
DECODER
(
big5hkscs
)
DECODER
(
big5hkscs
)
{
{
while
(
inleft
>
0
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
Py_UCS4
decoded
;
Py_UCS4
decoded
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
...
@@ -113,20 +117,20 @@ DECODER(big5hkscs)
...
@@ -113,20 +117,20 @@ DECODER(big5hkscs)
REQUIRE_INBUF
(
2
)
REQUIRE_INBUF
(
2
)
if
(
0xc6
>
c
||
c
>
0xc8
||
(
c
<
0xc7
&&
IN2
<
0xa1
))
{
if
(
0xc6
>
c
||
c
>
0xc8
||
(
c
<
0xc7
&&
IN
BYTE
2
<
0xa1
))
{
TRYMAP_DEC
(
big5
,
writer
,
c
,
IN2
)
{
TRYMAP_DEC
(
big5
,
writer
,
c
,
IN
BYTE
2
)
{
NEXT_IN
(
2
);
NEXT_IN
(
2
);
continue
;
continue
;
}
}
}
}
TRYMAP_DEC_CHAR
(
big5hkscs
,
decoded
,
c
,
IN2
)
TRYMAP_DEC_CHAR
(
big5hkscs
,
decoded
,
c
,
IN
BYTE
2
)
{
{
int
s
=
BH2S
(
c
,
IN2
);
int
s
=
BH2S
(
c
,
IN
BYTE
2
);
const
unsigned
char
*
hintbase
;
const
unsigned
char
*
hintbase
;
assert
(
0x87
<=
c
&&
c
<=
0xfe
);
assert
(
0x87
<=
c
&&
c
<=
0xfe
);
assert
(
0x40
<=
IN
2
&&
IN
2
<=
0xfe
);
assert
(
0x40
<=
IN
BYTE2
&&
INBYTE
2
<=
0xfe
);
if
(
BH2S
(
0x87
,
0x40
)
<=
s
&&
s
<=
BH2S
(
0xa0
,
0xfe
))
{
if
(
BH2S
(
0x87
,
0x40
)
<=
s
&&
s
<=
BH2S
(
0xa0
,
0xfe
))
{
hintbase
=
big5hkscs_phint_0
;
hintbase
=
big5hkscs_phint_0
;
...
@@ -154,7 +158,7 @@ DECODER(big5hkscs)
...
@@ -154,7 +158,7 @@ DECODER(big5hkscs)
continue
;
continue
;
}
}
switch
((
c
<<
8
)
|
IN2
)
{
switch
((
c
<<
8
)
|
IN
BYTE
2
)
{
case
0x8862
:
OUTCHAR2
(
0x00ca
,
0x0304
);
break
;
case
0x8862
:
OUTCHAR2
(
0x00ca
,
0x0304
);
break
;
case
0x8864
:
OUTCHAR2
(
0x00ca
,
0x030c
);
break
;
case
0x8864
:
OUTCHAR2
(
0x00ca
,
0x030c
);
break
;
case
0x88a3
:
OUTCHAR2
(
0x00ea
,
0x0304
);
break
;
case
0x88a3
:
OUTCHAR2
(
0x00ea
,
0x0304
);
break
;
...
...
Modules/cjkcodecs/_codecs_iso2022.c
Dosyayı görüntüle @
d9491269
...
@@ -141,13 +141,13 @@ ENCODER_INIT(iso2022)
...
@@ -141,13 +141,13 @@ ENCODER_INIT(iso2022)
ENCODER_RESET
(
iso2022
)
ENCODER_RESET
(
iso2022
)
{
{
if
(
STATE_GETFLAG
(
F_SHIFTED
))
{
if
(
STATE_GETFLAG
(
F_SHIFTED
))
{
WRITE1
(
SI
)
WRITE
BYTE
1
(
SI
)
NEXT_OUT
(
1
)
NEXT_OUT
(
1
)
;
STATE_CLEARFLAG
(
F_SHIFTED
)
STATE_CLEARFLAG
(
F_SHIFTED
)
}
}
if
(
STATE_G0
!=
CHARSET_ASCII
)
{
if
(
STATE_G0
!=
CHARSET_ASCII
)
{
WRITE3
(
ESC
,
'('
,
'B'
)
WRITE
BYTE
3
(
ESC
,
'('
,
'B'
)
NEXT_OUT
(
3
)
NEXT_OUT
(
3
)
;
STATE_SETG0
(
CHARSET_ASCII
)
STATE_SETG0
(
CHARSET_ASCII
)
}
}
return
0
;
return
0
;
...
@@ -155,30 +155,29 @@ ENCODER_RESET(iso2022)
...
@@ -155,30 +155,29 @@ ENCODER_RESET(iso2022)
ENCODER
(
iso2022
)
ENCODER
(
iso2022
)
{
{
while
(
inleft
>
0
)
{
while
(
*
inpos
<
inlen
)
{
const
struct
iso2022_designation
*
dsg
;
const
struct
iso2022_designation
*
dsg
;
DBCHAR
encoded
;
DBCHAR
encoded
;
Py_UCS4
c
=
**
inbuf
;
Py_UCS4
c
=
INCHAR1
;
Py_ssize_t
insize
;
Py_ssize_t
insize
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
if
(
STATE_G0
!=
CHARSET_ASCII
)
{
if
(
STATE_G0
!=
CHARSET_ASCII
)
{
WRITE3
(
ESC
,
'('
,
'B'
)
WRITE
BYTE
3
(
ESC
,
'('
,
'B'
)
STATE_SETG0
(
CHARSET_ASCII
)
STATE_SETG0
(
CHARSET_ASCII
)
NEXT_OUT
(
3
)
NEXT_OUT
(
3
)
;
}
}
if
(
STATE_GETFLAG
(
F_SHIFTED
))
{
if
(
STATE_GETFLAG
(
F_SHIFTED
))
{
WRITE1
(
SI
)
WRITE
BYTE
1
(
SI
)
STATE_CLEARFLAG
(
F_SHIFTED
)
STATE_CLEARFLAG
(
F_SHIFTED
)
NEXT_OUT
(
1
)
NEXT_OUT
(
1
)
;
}
}
WRITE1
((
unsigned
char
)
c
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
NEXT
(
1
,
1
)
;
continue
;
continue
;
}
}
DECODE_SURROGATE
(
c
)
insize
=
1
;
insize
=
GET_INSIZE
(
c
);
encoded
=
MAP_UNMAPPABLE
;
encoded
=
MAP_UNMAPPABLE
;
for
(
dsg
=
CONFIG_DESIGNATIONS
;
dsg
->
mark
;
dsg
++
)
{
for
(
dsg
=
CONFIG_DESIGNATIONS
;
dsg
->
mark
;
dsg
++
)
{
...
@@ -187,24 +186,14 @@ ENCODER(iso2022)
...
@@ -187,24 +186,14 @@ ENCODER(iso2022)
if
(
encoded
==
MAP_MULTIPLE_AVAIL
)
{
if
(
encoded
==
MAP_MULTIPLE_AVAIL
)
{
/* this implementation won't work for pair
/* this implementation won't work for pair
* of non-bmp characters. */
* of non-bmp characters. */
if
(
inle
ft
<
2
)
{
if
(
inle
n
-
*
inpos
<
2
)
{
if
(
!
(
flags
&
MBENC_FLUSH
))
if
(
!
(
flags
&
MBENC_FLUSH
))
return
MBERR_TOOFEW
;
return
MBERR_TOOFEW
;
length
=
-
1
;
length
=
-
1
;
}
}
else
else
length
=
2
;
length
=
2
;
#if Py_UNICODE_SIZE == 2
if
(
length
==
2
)
{
Py_UCS4
u4in
[
2
];
u4in
[
0
]
=
(
Py_UCS4
)
IN1
;
u4in
[
1
]
=
(
Py_UCS4
)
IN2
;
encoded
=
dsg
->
encoder
(
u4in
,
&
length
);
}
else
encoded
=
dsg
->
encoder
(
&
c
,
&
length
);
#else
encoded
=
dsg
->
encoder
(
&
c
,
&
length
);
encoded
=
dsg
->
encoder
(
&
c
,
&
length
);
#endif
if
(
encoded
!=
MAP_UNMAPPABLE
)
{
if
(
encoded
!=
MAP_UNMAPPABLE
)
{
insize
=
length
;
insize
=
length
;
break
;
break
;
...
@@ -221,47 +210,47 @@ ENCODER(iso2022)
...
@@ -221,47 +210,47 @@ ENCODER(iso2022)
switch
(
dsg
->
plane
)
{
switch
(
dsg
->
plane
)
{
case
0
:
/* G0 */
case
0
:
/* G0 */
if
(
STATE_GETFLAG
(
F_SHIFTED
))
{
if
(
STATE_GETFLAG
(
F_SHIFTED
))
{
WRITE1
(
SI
)
WRITE
BYTE
1
(
SI
)
STATE_CLEARFLAG
(
F_SHIFTED
)
STATE_CLEARFLAG
(
F_SHIFTED
)
NEXT_OUT
(
1
)
NEXT_OUT
(
1
)
;
}
}
if
(
STATE_G0
!=
dsg
->
mark
)
{
if
(
STATE_G0
!=
dsg
->
mark
)
{
if
(
dsg
->
width
==
1
)
{
if
(
dsg
->
width
==
1
)
{
WRITE3
(
ESC
,
'('
,
ESCMARK
(
dsg
->
mark
))
WRITE
BYTE
3
(
ESC
,
'('
,
ESCMARK
(
dsg
->
mark
))
STATE_SETG0
(
dsg
->
mark
)
STATE_SETG0
(
dsg
->
mark
)
NEXT_OUT
(
3
)
NEXT_OUT
(
3
)
;
}
}
else
if
(
dsg
->
mark
==
CHARSET_JISX0208
)
{
else
if
(
dsg
->
mark
==
CHARSET_JISX0208
)
{
WRITE3
(
ESC
,
'$'
,
ESCMARK
(
dsg
->
mark
))
WRITE
BYTE
3
(
ESC
,
'$'
,
ESCMARK
(
dsg
->
mark
))
STATE_SETG0
(
dsg
->
mark
)
STATE_SETG0
(
dsg
->
mark
)
NEXT_OUT
(
3
)
NEXT_OUT
(
3
)
;
}
}
else
{
else
{
WRITE4
(
ESC
,
'$'
,
'('
,
WRITE
BYTE
4
(
ESC
,
'$'
,
'('
,
ESCMARK
(
dsg
->
mark
))
ESCMARK
(
dsg
->
mark
))
STATE_SETG0
(
dsg
->
mark
)
STATE_SETG0
(
dsg
->
mark
)
NEXT_OUT
(
4
)
NEXT_OUT
(
4
)
;
}
}
}
}
break
;
break
;
case
1
:
/* G1 */
case
1
:
/* G1 */
if
(
STATE_G1
!=
dsg
->
mark
)
{
if
(
STATE_G1
!=
dsg
->
mark
)
{
if
(
dsg
->
width
==
1
)
{
if
(
dsg
->
width
==
1
)
{
WRITE3
(
ESC
,
')'
,
ESCMARK
(
dsg
->
mark
))
WRITE
BYTE
3
(
ESC
,
')'
,
ESCMARK
(
dsg
->
mark
))
STATE_SETG1
(
dsg
->
mark
)
STATE_SETG1
(
dsg
->
mark
)
NEXT_OUT
(
3
)
NEXT_OUT
(
3
)
;
}
}
else
{
else
{
WRITE4
(
ESC
,
'$'
,
')'
,
WRITE
BYTE
4
(
ESC
,
'$'
,
')'
,
ESCMARK
(
dsg
->
mark
))
ESCMARK
(
dsg
->
mark
))
STATE_SETG1
(
dsg
->
mark
)
STATE_SETG1
(
dsg
->
mark
)
NEXT_OUT
(
4
)
NEXT_OUT
(
4
)
;
}
}
}
}
if
(
!
STATE_GETFLAG
(
F_SHIFTED
))
{
if
(
!
STATE_GETFLAG
(
F_SHIFTED
))
{
WRITE1
(
SO
)
WRITE
BYTE
1
(
SO
)
STATE_SETFLAG
(
F_SHIFTED
)
STATE_SETFLAG
(
F_SHIFTED
)
NEXT_OUT
(
1
)
NEXT_OUT
(
1
)
;
}
}
break
;
break
;
default
:
/* G2 and G3 is not supported: no encoding in
default
:
/* G2 and G3 is not supported: no encoding in
...
@@ -270,14 +259,14 @@ ENCODER(iso2022)
...
@@ -270,14 +259,14 @@ ENCODER(iso2022)
}
}
if
(
dsg
->
width
==
1
)
{
if
(
dsg
->
width
==
1
)
{
WRITE1
((
unsigned
char
)
encoded
)
WRITE
BYTE
1
((
unsigned
char
)
encoded
)
NEXT_OUT
(
1
)
NEXT_OUT
(
1
)
;
}
}
else
{
else
{
WRITE2
(
encoded
>>
8
,
encoded
&
0xff
)
WRITE
BYTE
2
(
encoded
>>
8
,
encoded
&
0xff
)
NEXT_OUT
(
2
)
NEXT_OUT
(
2
)
;
}
}
NEXT_IN
(
insize
);
NEXT_IN
CHAR
(
insize
);
}
}
return
0
;
return
0
;
...
@@ -323,26 +312,26 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
...
@@ -323,26 +312,26 @@ iso2022processesc(const void *config, MultibyteCodec_State *state,
switch
(
esclen
)
{
switch
(
esclen
)
{
case
3
:
case
3
:
if
(
IN2
==
'$'
)
{
if
(
IN
BYTE
2
==
'$'
)
{
charset
=
IN3
|
CHARSET_DBCS
;
charset
=
IN
BYTE
3
|
CHARSET_DBCS
;
designation
=
0
;
designation
=
0
;
}
}
else
{
else
{
charset
=
IN3
;
charset
=
IN
BYTE
3
;
if
(
IN2
==
'('
)
designation
=
0
;
if
(
IN
BYTE
2
==
'('
)
designation
=
0
;
else
if
(
IN2
==
')'
)
designation
=
1
;
else
if
(
IN
BYTE
2
==
')'
)
designation
=
1
;
else
if
(
CONFIG_ISSET
(
USE_G2
)
&&
IN2
==
'.'
)
else
if
(
CONFIG_ISSET
(
USE_G2
)
&&
IN
BYTE
2
==
'.'
)
designation
=
2
;
designation
=
2
;
else
return
3
;
else
return
3
;
}
}
break
;
break
;
case
4
:
case
4
:
if
(
IN2
!=
'$'
)
if
(
IN
BYTE
2
!=
'$'
)
return
4
;
return
4
;
charset
=
IN4
|
CHARSET_DBCS
;
charset
=
IN
BYTE
4
|
CHARSET_DBCS
;
if
(
IN3
==
'('
)
designation
=
0
;
if
(
IN
BYTE
3
==
'('
)
designation
=
0
;
else
if
(
IN3
==
')'
)
designation
=
1
;
else
if
(
IN
BYTE
3
==
')'
)
designation
=
1
;
else
return
4
;
else
return
4
;
break
;
break
;
case
6
:
/* designation with prefix */
case
6
:
/* designation with prefix */
...
@@ -395,18 +384,18 @@ iso2022processg2(const void *config, MultibyteCodec_State *state,
...
@@ -395,18 +384,18 @@ iso2022processg2(const void *config, MultibyteCodec_State *state,
/* not written to use encoder, decoder functions because only few
/* not written to use encoder, decoder functions because only few
* encodings use G2 designations in CJKCodecs */
* encodings use G2 designations in CJKCodecs */
if
(
STATE_G2
==
CHARSET_ISO8859_1
)
{
if
(
STATE_G2
==
CHARSET_ISO8859_1
)
{
if
(
IN3
<
0x80
)
if
(
IN
BYTE
3
<
0x80
)
OUTCHAR
(
IN3
+
0x80
);
OUTCHAR
(
IN
BYTE
3
+
0x80
);
else
else
return
3
;
return
3
;
}
}
else
if
(
STATE_G2
==
CHARSET_ISO8859_7
)
{
else
if
(
STATE_G2
==
CHARSET_ISO8859_7
)
{
ISO8859_7_DECODE
(
IN3
^
0x80
,
writer
)
ISO8859_7_DECODE
(
IN
BYTE
3
^
0x80
,
writer
)
else
return
3
;
else
return
3
;
}
}
else
if
(
STATE_G2
==
CHARSET_ASCII
)
{
else
if
(
STATE_G2
==
CHARSET_ASCII
)
{
if
(
IN3
&
0x80
)
return
3
;
if
(
IN
BYTE
3
&
0x80
)
return
3
;
else
OUTCHAR
(
IN3
);
else
OUTCHAR
(
IN
BYTE
3
);
}
}
else
else
return
MBERR_INTERNAL
;
return
MBERR_INTERNAL
;
...
@@ -421,7 +410,7 @@ DECODER(iso2022)
...
@@ -421,7 +410,7 @@ DECODER(iso2022)
const
struct
iso2022_designation
*
dsgcache
=
NULL
;
const
struct
iso2022_designation
*
dsgcache
=
NULL
;
while
(
inleft
>
0
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
Py_ssize_t
err
;
Py_ssize_t
err
;
if
(
STATE_GETFLAG
(
F_ESCTHROUGHOUT
))
{
if
(
STATE_GETFLAG
(
F_ESCTHROUGHOUT
))
{
...
@@ -438,13 +427,13 @@ DECODER(iso2022)
...
@@ -438,13 +427,13 @@ DECODER(iso2022)
switch
(
c
)
{
switch
(
c
)
{
case
ESC
:
case
ESC
:
REQUIRE_INBUF
(
2
)
REQUIRE_INBUF
(
2
)
if
(
IS_ISO2022ESC
(
IN2
))
{
if
(
IS_ISO2022ESC
(
IN
BYTE
2
))
{
err
=
iso2022processesc
(
config
,
state
,
err
=
iso2022processesc
(
config
,
state
,
inbuf
,
&
inleft
);
inbuf
,
&
inleft
);
if
(
err
!=
0
)
if
(
err
!=
0
)
return
err
;
return
err
;
}
}
else
if
(
CONFIG_ISSET
(
USE_G2
)
&&
IN2
==
'N'
)
{
/* SS2 */
else
if
(
CONFIG_ISSET
(
USE_G2
)
&&
IN
BYTE
2
==
'N'
)
{
/* SS2 */
REQUIRE_INBUF
(
3
)
REQUIRE_INBUF
(
3
)
err
=
iso2022processg2
(
config
,
state
,
err
=
iso2022processg2
(
config
,
state
,
inbuf
,
&
inleft
,
writer
);
inbuf
,
&
inleft
,
writer
);
...
...
Modules/cjkcodecs/_codecs_jp.c
Dosyayı görüntüle @
d9491269
This diff is collapsed.
Click to expand it.
Modules/cjkcodecs/_codecs_kr.c
Dosyayı görüntüle @
d9491269
...
@@ -33,16 +33,18 @@ static const unsigned char u2cgk_jongseong[28] = {
...
@@ -33,16 +33,18 @@ static const unsigned char u2cgk_jongseong[28] = {
ENCODER
(
euc_kr
)
ENCODER
(
euc_kr
)
{
{
while
(
inleft
>
0
)
{
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN1
;
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
WRITE1
((
unsigned
char
)
c
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
NEXT
(
1
,
1
)
;
continue
;
continue
;
}
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
REQUIRE_OUTBUF
(
2
)
REQUIRE_OUTBUF
(
2
)
TRYMAP_ENC
(
cp949
,
code
,
c
);
TRYMAP_ENC
(
cp949
,
code
,
c
);
...
@@ -50,9 +52,9 @@ ENCODER(euc_kr)
...
@@ -50,9 +52,9 @@ ENCODER(euc_kr)
if
((
code
&
0x8000
)
==
0
)
{
if
((
code
&
0x8000
)
==
0
)
{
/* KS X 1001 coded character */
/* KS X 1001 coded character */
OUT1
((
code
>>
8
)
|
0x80
)
OUT
BYTE
1
((
code
>>
8
)
|
0x80
)
OUT2
((
code
&
0xFF
)
|
0x80
)
OUT
BYTE
2
((
code
&
0xFF
)
|
0x80
)
NEXT
(
1
,
2
)
NEXT
(
1
,
2
)
;
}
}
else
{
/* Mapping is found in CP949 extension,
else
{
/* Mapping is found in CP949 extension,
* but we encode it in KS X 1001:1998 Annex 3,
* but we encode it in KS X 1001:1998 Annex 3,
...
@@ -61,23 +63,23 @@ ENCODER(euc_kr)
...
@@ -61,23 +63,23 @@ ENCODER(euc_kr)
REQUIRE_OUTBUF
(
8
)
REQUIRE_OUTBUF
(
8
)
/* syllable composition precedence */
/* syllable composition precedence */
OUT1
(
EUCKR_JAMO_FIRSTBYTE
)
OUT
BYTE
1
(
EUCKR_JAMO_FIRSTBYTE
)
OUT2
(
EUCKR_JAMO_FILLER
)
OUT
BYTE
2
(
EUCKR_JAMO_FILLER
)
/* All codepoints in CP949 extension are in unicode
/* All codepoints in CP949 extension are in unicode
* Hangul Syllable area. */
* Hangul Syllable area. */
assert
(
0xac00
<=
c
&&
c
<=
0xd7a3
);
assert
(
0xac00
<=
c
&&
c
<=
0xd7a3
);
c
-=
0xac00
;
c
-=
0xac00
;
OUT3
(
EUCKR_JAMO_FIRSTBYTE
)
OUT
BYTE
3
(
EUCKR_JAMO_FIRSTBYTE
)
OUT4
(
u2cgk_choseong
[
c
/
588
])
OUT
BYTE
4
(
u2cgk_choseong
[
c
/
588
])
NEXT_OUT
(
4
)
NEXT_OUT
(
4
)
;
OUT1
(
EUCKR_JAMO_FIRSTBYTE
)
OUT
BYTE
1
(
EUCKR_JAMO_FIRSTBYTE
)
OUT2
(
u2cgk_jungseong
[(
c
/
28
)
%
21
])
OUT
BYTE
2
(
u2cgk_jungseong
[(
c
/
28
)
%
21
])
OUT3
(
EUCKR_JAMO_FIRSTBYTE
)
OUT
BYTE
3
(
EUCKR_JAMO_FIRSTBYTE
)
OUT4
(
u2cgk_jongseong
[
c
%
28
])
OUT
BYTE
4
(
u2cgk_jongseong
[
c
%
28
])
NEXT
(
1
,
4
)
NEXT
(
1
,
4
)
;
}
}
}
}
...
@@ -102,7 +104,7 @@ static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
...
@@ -102,7 +104,7 @@ static const unsigned char cgk2u_jongseong[] = { /* [A1, BE] */
DECODER
(
euc_kr
)
DECODER
(
euc_kr
)
{
{
while
(
inleft
>
0
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
OUTCHAR
(
c
);
OUTCHAR
(
c
);
...
@@ -113,7 +115,7 @@ DECODER(euc_kr)
...
@@ -113,7 +115,7 @@ DECODER(euc_kr)
REQUIRE_INBUF
(
2
)
REQUIRE_INBUF
(
2
)
if
(
c
==
EUCKR_JAMO_FIRSTBYTE
&&
if
(
c
==
EUCKR_JAMO_FIRSTBYTE
&&
IN2
==
EUCKR_JAMO_FILLER
)
{
IN
BYTE
2
==
EUCKR_JAMO_FILLER
)
{
/* KS X 1001:1998 Annex 3 make-up sequence */
/* KS X 1001:1998 Annex 3 make-up sequence */
DBCHAR
cho
,
jung
,
jong
;
DBCHAR
cho
,
jung
,
jong
;
...
@@ -146,7 +148,7 @@ DECODER(euc_kr)
...
@@ -146,7 +148,7 @@ DECODER(euc_kr)
OUTCHAR
(
0xac00
+
cho
*
588
+
jung
*
28
+
jong
);
OUTCHAR
(
0xac00
+
cho
*
588
+
jung
*
28
+
jong
);
NEXT_IN
(
8
);
NEXT_IN
(
8
);
}
}
else
TRYMAP_DEC
(
ksx1001
,
writer
,
c
^
0x80
,
IN2
^
0x80
)
{
else
TRYMAP_DEC
(
ksx1001
,
writer
,
c
^
0x80
,
IN
BYTE
2
^
0x80
)
{
NEXT_IN
(
2
);
NEXT_IN
(
2
);
}
}
else
else
...
@@ -164,27 +166,29 @@ DECODER(euc_kr)
...
@@ -164,27 +166,29 @@ DECODER(euc_kr)
ENCODER
(
cp949
)
ENCODER
(
cp949
)
{
{
while
(
inleft
>
0
)
{
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN1
;
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
WRITE1
((
unsigned
char
)
c
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
NEXT
(
1
,
1
)
;
continue
;
continue
;
}
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
REQUIRE_OUTBUF
(
2
)
REQUIRE_OUTBUF
(
2
)
TRYMAP_ENC
(
cp949
,
code
,
c
);
TRYMAP_ENC
(
cp949
,
code
,
c
);
else
return
1
;
else
return
1
;
OUT1
((
code
>>
8
)
|
0x80
)
OUT
BYTE
1
((
code
>>
8
)
|
0x80
)
if
(
code
&
0x8000
)
if
(
code
&
0x8000
)
OUT2
(
code
&
0xFF
)
/* MSB set: CP949 */
OUT
BYTE
2
(
code
&
0xFF
)
/* MSB set: CP949 */
else
else
OUT2
((
code
&
0xFF
)
|
0x80
)
/* MSB unset: ks x 1001 */
OUT
BYTE
2
((
code
&
0xFF
)
|
0x80
)
/* MSB unset: ks x 1001 */
NEXT
(
1
,
2
)
NEXT
(
1
,
2
)
;
}
}
return
0
;
return
0
;
...
@@ -193,7 +197,7 @@ ENCODER(cp949)
...
@@ -193,7 +197,7 @@ ENCODER(cp949)
DECODER
(
cp949
)
DECODER
(
cp949
)
{
{
while
(
inleft
>
0
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
OUTCHAR
(
c
);
OUTCHAR
(
c
);
...
@@ -202,8 +206,8 @@ DECODER(cp949)
...
@@ -202,8 +206,8 @@ DECODER(cp949)
}
}
REQUIRE_INBUF
(
2
)
REQUIRE_INBUF
(
2
)
TRYMAP_DEC
(
ksx1001
,
writer
,
c
^
0x80
,
IN2
^
0x80
);
TRYMAP_DEC
(
ksx1001
,
writer
,
c
^
0x80
,
IN
BYTE
2
^
0x80
);
else
TRYMAP_DEC
(
cp949ext
,
writer
,
c
,
IN2
);
else
TRYMAP_DEC
(
cp949ext
,
writer
,
c
,
IN
BYTE
2
);
else
return
1
;
else
return
1
;
NEXT_IN
(
2
);
NEXT_IN
(
2
);
...
@@ -246,16 +250,18 @@ static const DBCHAR u2johabjamo[] = {
...
@@ -246,16 +250,18 @@ static const DBCHAR u2johabjamo[] = {
ENCODER
(
johab
)
ENCODER
(
johab
)
{
{
while
(
inleft
>
0
)
{
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN1
;
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
WRITE1
((
unsigned
char
)
c
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
NEXT
(
1
,
1
)
;
continue
;
continue
;
}
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
REQUIRE_OUTBUF
(
2
)
REQUIRE_OUTBUF
(
2
)
...
@@ -281,9 +287,9 @@ ENCODER(johab)
...
@@ -281,9 +287,9 @@ ENCODER(johab)
t1
=
(
c1
<
0x4a
?
(
c1
-
0x21
+
0x1b2
)
:
t1
=
(
c1
<
0x4a
?
(
c1
-
0x21
+
0x1b2
)
:
(
c1
-
0x21
+
0x197
));
(
c1
-
0x21
+
0x197
));
t2
=
((
t1
&
1
)
?
0x5e
:
0
)
+
(
c2
-
0x21
);
t2
=
((
t1
&
1
)
?
0x5e
:
0
)
+
(
c2
-
0x21
);
OUT1
(
t1
>>
1
)
OUT
BYTE
1
(
t1
>>
1
)
OUT2
(
t2
<
0x4e
?
t2
+
0x31
:
t2
+
0x43
)
OUT
BYTE
2
(
t2
<
0x4e
?
t2
+
0x31
:
t2
+
0x43
)
NEXT
(
1
,
2
)
NEXT
(
1
,
2
)
;
continue
;
continue
;
}
}
else
else
...
@@ -292,9 +298,9 @@ ENCODER(johab)
...
@@ -292,9 +298,9 @@ ENCODER(johab)
else
else
return
1
;
return
1
;
OUT1
(
code
>>
8
)
OUT
BYTE
1
(
code
>>
8
)
OUT2
(
code
&
0xff
)
OUT
BYTE
2
(
code
&
0xff
)
NEXT
(
1
,
2
)
NEXT
(
1
,
2
)
;
}
}
return
0
;
return
0
;
...
@@ -344,7 +350,7 @@ static const unsigned char johabjamo_jongseong[32] = {
...
@@ -344,7 +350,7 @@ static const unsigned char johabjamo_jongseong[32] = {
DECODER
(
johab
)
DECODER
(
johab
)
{
{
while
(
inleft
>
0
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
,
c2
;
unsigned
char
c
=
IN
BYTE
1
,
c2
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
OUTCHAR
(
c
);
OUTCHAR
(
c
);
...
@@ -353,7 +359,7 @@ DECODER(johab)
...
@@ -353,7 +359,7 @@ DECODER(johab)
}
}
REQUIRE_INBUF
(
2
)
REQUIRE_INBUF
(
2
)
c2
=
IN2
;
c2
=
IN
BYTE
2
;
if
(
c
<
0xd8
)
{
if
(
c
<
0xd8
)
{
/* johab hangul */
/* johab hangul */
...
...
Modules/cjkcodecs/_codecs_tw.c
Dosyayı görüntüle @
d9491269
...
@@ -13,26 +13,28 @@
...
@@ -13,26 +13,28 @@
ENCODER
(
big5
)
ENCODER
(
big5
)
{
{
while
(
inleft
>
0
)
{
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
**
inbuf
;
Py_UCS4
c
=
INCHAR1
;
DBCHAR
code
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
REQUIRE_OUTBUF
(
1
)
REQUIRE_OUTBUF
(
1
)
**
outbuf
=
(
unsigned
char
)
c
;
**
outbuf
=
(
unsigned
char
)
c
;
NEXT
(
1
,
1
)
NEXT
(
1
,
1
)
;
continue
;
continue
;
}
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
REQUIRE_OUTBUF
(
2
)
REQUIRE_OUTBUF
(
2
)
TRYMAP_ENC
(
big5
,
code
,
c
);
TRYMAP_ENC
(
big5
,
code
,
c
);
else
return
1
;
else
return
1
;
OUT1
(
code
>>
8
)
OUT
BYTE
1
(
code
>>
8
)
OUT2
(
code
&
0xFF
)
OUT
BYTE
2
(
code
&
0xFF
)
NEXT
(
1
,
2
)
NEXT
(
1
,
2
)
;
}
}
return
0
;
return
0
;
...
@@ -41,7 +43,7 @@ ENCODER(big5)
...
@@ -41,7 +43,7 @@ ENCODER(big5)
DECODER
(
big5
)
DECODER
(
big5
)
{
{
while
(
inleft
>
0
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
OUTCHAR
(
c
);
OUTCHAR
(
c
);
...
@@ -50,7 +52,7 @@ DECODER(big5)
...
@@ -50,7 +52,7 @@ DECODER(big5)
}
}
REQUIRE_INBUF
(
2
)
REQUIRE_INBUF
(
2
)
TRYMAP_DEC
(
big5
,
writer
,
c
,
IN2
)
{
TRYMAP_DEC
(
big5
,
writer
,
c
,
IN
BYTE
2
)
{
NEXT_IN
(
2
);
NEXT_IN
(
2
);
}
}
else
return
1
;
else
return
1
;
...
@@ -66,25 +68,27 @@ DECODER(big5)
...
@@ -66,25 +68,27 @@ DECODER(big5)
ENCODER
(
cp950
)
ENCODER
(
cp950
)
{
{
while
(
inleft
>
0
)
{
while
(
*
inpos
<
inlen
)
{
Py_UCS4
c
=
IN1
;
Py_UCS4
c
=
IN
CHAR
1
;
DBCHAR
code
;
DBCHAR
code
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
WRITE1
((
unsigned
char
)
c
)
WRITE
BYTE
1
((
unsigned
char
)
c
)
NEXT
(
1
,
1
)
NEXT
(
1
,
1
)
;
continue
;
continue
;
}
}
UCS4INVALID
(
c
)
if
(
c
>
0xFFFF
)
return
1
;
REQUIRE_OUTBUF
(
2
)
REQUIRE_OUTBUF
(
2
)
TRYMAP_ENC
(
cp950ext
,
code
,
c
);
TRYMAP_ENC
(
cp950ext
,
code
,
c
);
else
TRYMAP_ENC
(
big5
,
code
,
c
);
else
TRYMAP_ENC
(
big5
,
code
,
c
);
else
return
1
;
else
return
1
;
OUT1
(
code
>>
8
)
OUT
BYTE
1
(
code
>>
8
)
OUT2
(
code
&
0xFF
)
OUT
BYTE
2
(
code
&
0xFF
)
NEXT
(
1
,
2
)
NEXT
(
1
,
2
)
;
}
}
return
0
;
return
0
;
...
@@ -93,7 +97,7 @@ ENCODER(cp950)
...
@@ -93,7 +97,7 @@ ENCODER(cp950)
DECODER
(
cp950
)
DECODER
(
cp950
)
{
{
while
(
inleft
>
0
)
{
while
(
inleft
>
0
)
{
unsigned
char
c
=
IN1
;
unsigned
char
c
=
IN
BYTE
1
;
if
(
c
<
0x80
)
{
if
(
c
<
0x80
)
{
OUTCHAR
(
c
);
OUTCHAR
(
c
);
...
@@ -103,8 +107,8 @@ DECODER(cp950)
...
@@ -103,8 +107,8 @@ DECODER(cp950)
REQUIRE_INBUF
(
2
)
REQUIRE_INBUF
(
2
)
TRYMAP_DEC
(
cp950ext
,
writer
,
c
,
IN2
);
TRYMAP_DEC
(
cp950ext
,
writer
,
c
,
IN
BYTE
2
);
else
TRYMAP_DEC
(
big5
,
writer
,
c
,
IN2
);
else
TRYMAP_DEC
(
big5
,
writer
,
c
,
IN
BYTE
2
);
else
return
1
;
else
return
1
;
NEXT_IN
(
2
);
NEXT_IN
(
2
);
...
...
Modules/cjkcodecs/cjkcodecs.h
Dosyayı görüntüle @
d9491269
...
@@ -72,7 +72,8 @@ static const struct dbcs_map *mapping_list;
...
@@ -72,7 +72,8 @@ static const struct dbcs_map *mapping_list;
#define ENCODER(encoding) \
#define ENCODER(encoding) \
static Py_ssize_t encoding##_encode( \
static Py_ssize_t encoding##_encode( \
MultibyteCodec_State *state, const void *config, \
MultibyteCodec_State *state, const void *config, \
const Py_UNICODE **inbuf, Py_ssize_t inleft, \
int kind, void *data, \
Py_ssize_t *inpos, Py_ssize_t inlen, \
unsigned char **outbuf, Py_ssize_t outleft, int flags)
unsigned char **outbuf, Py_ssize_t outleft, int flags)
#define ENCODER_RESET(encoding) \
#define ENCODER_RESET(encoding) \
static Py_ssize_t encoding##_encode_reset( \
static Py_ssize_t encoding##_encode_reset( \
...
@@ -91,25 +92,25 @@ static const struct dbcs_map *mapping_list;
...
@@ -91,25 +92,25 @@ static const struct dbcs_map *mapping_list;
static Py_ssize_t encoding##_decode_reset( \
static Py_ssize_t encoding##_decode_reset( \
MultibyteCodec_State *state, const void *config)
MultibyteCodec_State *state, const void *config)
#if Py_UNICODE_SIZE == 4
#define UCS4INVALID(code) \
if ((code) > 0xFFFF) \
return 1;
#else
#define UCS4INVALID(code) \
if (0) ;
#endif
#define NEXT_IN(i) \
#define NEXT_IN(i) \
do { \
do { \
(*inbuf) += (i); \
(*inbuf) += (i); \
(inleft) -= (i); \
(inleft) -= (i); \
} while (0)
} while (0)
#define NEXT_INCHAR(i) \
do { \
(*inpos) += (i); \
} while (0)
#define NEXT_OUT(o) \
#define NEXT_OUT(o) \
(*outbuf) += (o); \
do { \
(outleft) -= (o);
(*outbuf) += (o); \
(outleft) -= (o); \
} while (0)
#define NEXT(i, o) \
#define NEXT(i, o) \
NEXT_IN(i); NEXT_OUT(o)
do { \
NEXT_INCHAR(i); \
NEXT_OUT(o); \
} while (0)
#define REQUIRE_INBUF(n) \
#define REQUIRE_INBUF(n) \
if (inleft < (n)) \
if (inleft < (n)) \
...
@@ -118,10 +119,13 @@ static const struct dbcs_map *mapping_list;
...
@@ -118,10 +119,13 @@ static const struct dbcs_map *mapping_list;
if (outleft < (n)) \
if (outleft < (n)) \
return MBERR_TOOSMALL;
return MBERR_TOOSMALL;
#define IN1 ((*inbuf)[0])
#define INBYTE1 ((*inbuf)[0])
#define IN2 ((*inbuf)[1])
#define INBYTE2 ((*inbuf)[1])
#define IN3 ((*inbuf)[2])
#define INBYTE3 ((*inbuf)[2])
#define IN4 ((*inbuf)[3])
#define INBYTE4 ((*inbuf)[3])
#define INCHAR1 PyUnicode_READ(kind, data, *inpos)
#define INCHAR2 PyUnicode_READ(kind, data, *inpos + 1)
#define OUTCHAR(c) \
#define OUTCHAR(c) \
do { \
do { \
...
@@ -140,24 +144,24 @@ static const struct dbcs_map *mapping_list;
...
@@ -140,24 +144,24 @@ static const struct dbcs_map *mapping_list;
writer->pos += 2; \
writer->pos += 2; \
} while (0)
} while (0)
#define OUT1(c) ((*outbuf)[0]) = (c);
#define OUT
BYTE
1(c) ((*outbuf)[0]) = (c);
#define OUT2(c) ((*outbuf)[1]) = (c);
#define OUT
BYTE
2(c) ((*outbuf)[1]) = (c);
#define OUT3(c) ((*outbuf)[2]) = (c);
#define OUT
BYTE
3(c) ((*outbuf)[2]) = (c);
#define OUT4(c) ((*outbuf)[3]) = (c);
#define OUT
BYTE
4(c) ((*outbuf)[3]) = (c);
#define WRITE1(c1) \
#define WRITE
BYTE
1(c1) \
REQUIRE_OUTBUF(1) \
REQUIRE_OUTBUF(1) \
(*outbuf)[0] = (c1);
(*outbuf)[0] = (c1);
#define WRITE2(c1, c2) \
#define WRITE
BYTE
2(c1, c2) \
REQUIRE_OUTBUF(2) \
REQUIRE_OUTBUF(2) \
(*outbuf)[0] = (c1); \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2);
(*outbuf)[1] = (c2);
#define WRITE3(c1, c2, c3) \
#define WRITE
BYTE
3(c1, c2, c3) \
REQUIRE_OUTBUF(3) \
REQUIRE_OUTBUF(3) \
(*outbuf)[0] = (c1); \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
(*outbuf)[1] = (c2); \
(*outbuf)[2] = (c3);
(*outbuf)[2] = (c3);
#define WRITE4(c1, c2, c3, c4) \
#define WRITE
BYTE
4(c1, c2, c3, c4) \
REQUIRE_OUTBUF(4) \
REQUIRE_OUTBUF(4) \
(*outbuf)[0] = (c1); \
(*outbuf)[0] = (c1); \
(*outbuf)[1] = (c2); \
(*outbuf)[1] = (c2); \
...
@@ -209,20 +213,6 @@ _TRYMAP_DEC_WRITE(_PyUnicodeWriter *writer, Py_UCS4 c)
...
@@ -209,20 +213,6 @@ _TRYMAP_DEC_WRITE(_PyUnicodeWriter *writer, Py_UCS4 c)
#define TRYMAP_DEC_MPLANE(charset, writer, plane, c1, c2) \
#define TRYMAP_DEC_MPLANE(charset, writer, plane, c1, c2) \
if _TRYMAP_DEC(&charset##_decmap[plane][c1], writer, c2)
if _TRYMAP_DEC(&charset##_decmap[plane][c1], writer, c2)
#if Py_UNICODE_SIZE == 2
#define DECODE_SURROGATE(c) \
if (Py_UNICODE_IS_HIGH_SURROGATE(c)) { \
REQUIRE_INBUF(2) \
if (Py_UNICODE_IS_LOW_SURROGATE(IN2)) { \
c = Py_UNICODE_JOIN_SURROGATES(c, IN2); \
} \
}
#define GET_INSIZE(c) ((c) > 0xffff ? 2 : 1)
#else
#define DECODE_SURROGATE(c) {;}
#define GET_INSIZE(c) 1
#endif
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
#define BEGIN_MAPPINGS_LIST static const struct dbcs_map _mapping_list[] = {
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
#define MAPPING_ENCONLY(enc) {#enc, (void*)enc##_encmap, NULL},
#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
#define MAPPING_DECONLY(enc) {#enc, NULL, (void*)enc##_decmap},
...
...
Modules/cjkcodecs/multibytecodec.c
Dosyayı görüntüle @
d9491269
This diff is collapsed.
Click to expand it.
Modules/cjkcodecs/multibytecodec.h
Dosyayı görüntüle @
d9491269
...
@@ -27,7 +27,8 @@ typedef union {
...
@@ -27,7 +27,8 @@ typedef union {
typedef
int
(
*
mbcodec_init
)(
const
void
*
config
);
typedef
int
(
*
mbcodec_init
)(
const
void
*
config
);
typedef
Py_ssize_t
(
*
mbencode_func
)(
MultibyteCodec_State
*
state
,
typedef
Py_ssize_t
(
*
mbencode_func
)(
MultibyteCodec_State
*
state
,
const
void
*
config
,
const
void
*
config
,
const
Py_UNICODE
**
inbuf
,
Py_ssize_t
inleft
,
int
kind
,
void
*
data
,
Py_ssize_t
*
inpos
,
Py_ssize_t
inlen
,
unsigned
char
**
outbuf
,
Py_ssize_t
outleft
,
unsigned
char
**
outbuf
,
Py_ssize_t
outleft
,
int
flags
);
int
flags
);
typedef
int
(
*
mbencodeinit_func
)(
MultibyteCodec_State
*
state
,
typedef
int
(
*
mbencodeinit_func
)(
MultibyteCodec_State
*
state
,
...
@@ -75,8 +76,7 @@ typedef struct {
...
@@ -75,8 +76,7 @@ typedef struct {
#define MAXENCPENDING 2
#define MAXENCPENDING 2
#define _MultibyteStatefulEncoder_HEAD \
#define _MultibyteStatefulEncoder_HEAD \
_MultibyteStatefulCodec_HEAD \
_MultibyteStatefulCodec_HEAD \
Py_UNICODE pending[MAXENCPENDING]; \
PyObject *pending;
Py_ssize_t pendingsize;
typedef
struct
{
typedef
struct
{
_MultibyteStatefulEncoder_HEAD
_MultibyteStatefulEncoder_HEAD
}
MultibyteStatefulEncoderContext
;
}
MultibyteStatefulEncoderContext
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment