Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
13c3e380
Kaydet (Commit)
13c3e380
authored
Agu 14, 2007
tarafından
Martin v. Löwis
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Add XID_Start and XID_Continue properties to unicodectype.
üst
ff398c6f
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
90 additions
and
13 deletions
+90
-13
unicodeobject.h
Include/unicodeobject.h
+12
-0
unicodectype.c
Objects/unicodectype.c
+22
-0
unicodetype_db.h
Objects/unicodetype_db.h
+0
-0
makeunicodedata.py
Tools/unicode/makeunicodedata.py
+56
-13
No files found.
Include/unicodeobject.h
Dosyayı görüntüle @
13c3e380
...
...
@@ -205,6 +205,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define _PyUnicode_IsLowercase _PyUnicodeUCS2_IsLowercase
# define _PyUnicode_IsNumeric _PyUnicodeUCS2_IsNumeric
# define _PyUnicode_IsTitlecase _PyUnicodeUCS2_IsTitlecase
# define _PyUnicode_IsXidStart _PyUnicodeUCS2_IsXidStart
# define _PyUnicode_IsXidContinue _PyUnicodeUCS2_IsXidContinue
# define _PyUnicode_IsUppercase _PyUnicodeUCS2_IsUppercase
# define _PyUnicode_IsWhitespace _PyUnicodeUCS2_IsWhitespace
# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS2_ToDecimalDigit
...
...
@@ -289,6 +291,8 @@ typedef PY_UNICODE_TYPE Py_UNICODE;
# define _PyUnicode_IsLowercase _PyUnicodeUCS4_IsLowercase
# define _PyUnicode_IsNumeric _PyUnicodeUCS4_IsNumeric
# define _PyUnicode_IsTitlecase _PyUnicodeUCS4_IsTitlecase
# define _PyUnicode_IsXidStart _PyUnicodeUCS4_IsXidStart
# define _PyUnicode_IsXidContinue _PyUnicodeUCS4_IsXidContinue
# define _PyUnicode_IsUppercase _PyUnicodeUCS4_IsUppercase
# define _PyUnicode_IsWhitespace _PyUnicodeUCS4_IsWhitespace
# define _PyUnicode_ToDecimalDigit _PyUnicodeUCS4_ToDecimalDigit
...
...
@@ -1274,6 +1278,14 @@ PyAPI_FUNC(int) _PyUnicode_IsTitlecase(
Py_UNICODE
ch
/* Unicode character */
);
PyAPI_FUNC
(
int
)
_PyUnicode_IsXidStart
(
Py_UNICODE
ch
/* Unicode character */
);
PyAPI_FUNC
(
int
)
_PyUnicode_IsXidContinue
(
Py_UNICODE
ch
/* Unicode character */
);
PyAPI_FUNC
(
int
)
_PyUnicode_IsWhitespace
(
const
Py_UNICODE
ch
/* Unicode character */
);
...
...
Objects/unicodectype.c
Dosyayı görüntüle @
13c3e380
...
...
@@ -19,6 +19,8 @@
#define SPACE_MASK 0x20
#define TITLE_MASK 0x40
#define UPPER_MASK 0x80
#define XID_START_MASK 0x100
#define XID_CONTINUE_MASK 0x200
typedef
struct
{
const
Py_UNICODE
upper
;
...
...
@@ -98,6 +100,26 @@ int _PyUnicode_IsTitlecase(Py_UNICODE ch)
return
(
ctype
->
flags
&
TITLE_MASK
)
!=
0
;
}
/* Returns 1 for Unicode characters having the XID_Start property, 0
otherwise. */
int
_PyUnicode_IsXidStart
(
Py_UNICODE
ch
)
{
const
_PyUnicode_TypeRecord
*
ctype
=
gettyperecord
(
ch
);
return
(
ctype
->
flags
&
XID_START_MASK
)
!=
0
;
}
/* Returns 1 for Unicode characters having the XID_Continue property,
0 otherwise. */
int
_PyUnicode_IsXidContinue
(
Py_UNICODE
ch
)
{
const
_PyUnicode_TypeRecord
*
ctype
=
gettyperecord
(
ch
);
return
(
ctype
->
flags
&
XID_CONTINUE_MASK
)
!=
0
;
}
/* Returns the integer decimal (0-9) for Unicode characters having
this property, -1 otherwise. */
...
...
Objects/unicodetype_db.h
Dosyayı görüntüle @
13c3e380
This source diff could not be displayed because it is too large. You can
view the blob
instead.
Tools/unicode/makeunicodedata.py
Dosyayı görüntüle @
13c3e380
...
...
@@ -34,6 +34,7 @@ UNIDATA_VERSION = "4.1.0"
UNICODE_DATA
=
"UnicodeData
%
s.txt"
COMPOSITION_EXCLUSIONS
=
"CompositionExclusions
%
s.txt"
EASTASIAN_WIDTH
=
"EastAsianWidth
%
s.txt"
DERIVED_CORE_PROPERTIES
=
"DerivedCoreProperties
%
s.txt"
old_versions
=
[
"3.2.0"
]
...
...
@@ -57,6 +58,8 @@ LINEBREAK_MASK = 0x10
SPACE_MASK
=
0x20
TITLE_MASK
=
0x40
UPPER_MASK
=
0x80
XID_START_MASK
=
0x100
XID_CONTINUE_MASK
=
0x200
def
maketables
(
trace
=
0
):
...
...
@@ -65,16 +68,18 @@ def maketables(trace=0):
version
=
""
unicode
=
UnicodeData
(
UNICODE_DATA
%
version
,
COMPOSITION_EXCLUSIONS
%
version
,
EASTASIAN_WIDTH
%
version
)
EASTASIAN_WIDTH
%
version
,
DERIVED_CORE_PROPERTIES
%
version
)
print
(
len
(
filter
(
None
,
unicode
.
table
)),
"characters"
)
print
(
len
(
list
(
filter
(
None
,
unicode
.
table
)
)),
"characters"
)
for
version
in
old_versions
:
print
(
"--- Reading"
,
UNICODE_DATA
%
(
"-"
+
version
),
"..."
)
old_unicode
=
UnicodeData
(
UNICODE_DATA
%
(
"-"
+
version
),
COMPOSITION_EXCLUSIONS
%
(
"-"
+
version
),
EASTASIAN_WIDTH
%
(
"-"
+
version
))
print
(
len
(
filter
(
None
,
old_unicode
.
table
)),
"characters"
)
EASTASIAN_WIDTH
%
(
"-"
+
version
),
DERIVED_CORE_PROPERTIES
%
(
"-"
+
version
))
print
(
len
(
list
(
filter
(
None
,
old_unicode
.
table
))),
"characters"
)
merge_old_version
(
version
,
unicode
,
old_unicode
)
makeunicodename
(
unicode
,
trace
)
...
...
@@ -148,7 +153,7 @@ def makeunicodedata(unicode, trace):
assert
prefix
<
256
# content
decomp
=
[
prefix
+
(
len
(
decomp
)
<<
8
)]
+
\
map
(
lambda
s
:
int
(
s
,
16
),
decomp
)
list
(
map
(
lambda
s
:
int
(
s
,
16
),
decomp
)
)
# Collect NFC pairs
if
not
prefix
and
len
(
decomp
)
==
3
and
\
char
not
in
unicode
.
exclusions
and
\
...
...
@@ -353,6 +358,7 @@ def makeunicodetype(unicode, trace):
# extract database properties
category
=
record
[
2
]
bidirectional
=
record
[
4
]
properties
=
record
[
16
]
flags
=
0
if
category
in
[
"Lm"
,
"Lt"
,
"Lu"
,
"Ll"
,
"Lo"
]:
flags
|=
ALPHA_MASK
...
...
@@ -366,6 +372,10 @@ def makeunicodetype(unicode, trace):
flags
|=
TITLE_MASK
if
category
==
"Lu"
:
flags
|=
UPPER_MASK
if
"XID_Start"
in
properties
:
flags
|=
XID_START_MASK
if
"XID_Continue"
in
properties
:
flags
|=
XID_CONTINUE_MASK
# use delta predictor for upper/lower/title
if
record
[
12
]:
upper
=
int
(
record
[
12
],
16
)
-
char
...
...
@@ -447,7 +457,7 @@ def makeunicodename(unicode, trace):
if
name
and
name
[
0
]
!=
"<"
:
names
[
char
]
=
name
+
chr
(
0
)
print
(
len
(
filter
(
lambda
n
:
n
is
not
None
,
names
)),
"distinct names"
)
print
(
len
(
list
(
filter
(
lambda
n
:
n
is
not
None
,
names
)
)),
"distinct names"
)
# collect unique words from names (note that we differ between
# words inside a sentence, and words ending a sentence. the
...
...
@@ -470,10 +480,12 @@ def makeunicodename(unicode, trace):
print
(
n
,
"words in text;"
,
b
,
"bytes"
)
wordlist
=
words
.
items
(
)
wordlist
=
list
(
words
.
items
()
)
# sort on falling frequency, then by name
def
cmpwords
((
aword
,
alist
),(
bword
,
blist
)):
def
cmpwords
(
a
,
b
):
aword
,
alist
=
a
bword
,
blist
=
b
r
=
-
cmp
(
len
(
alist
),
len
(
blist
))
if
r
:
return
r
...
...
@@ -526,7 +538,7 @@ def makeunicodename(unicode, trace):
words
[
w
]
=
len
(
lexicon_offset
)
lexicon_offset
.
append
(
o
)
lexicon
=
map
(
ord
,
lexicon
)
lexicon
=
list
(
map
(
ord
,
lexicon
)
)
# generate phrasebook from names and lexicon
phrasebook
=
[
0
]
...
...
@@ -660,11 +672,14 @@ def merge_old_version(version, new, old):
elif
k
==
14
:
# change to simple titlecase mapping; ignore
pass
elif
k
==
16
:
# derived property changes; not yet
pass
else
:
class
Difference
(
Exception
):
pass
raise
Difference
,
(
hex
(
i
),
k
,
old
.
table
[
i
],
new
.
table
[
i
])
new
.
changed
.
append
((
version
,
zip
(
bidir_changes
,
category_changes
,
decimal_changes
,
numeric_changes
),
new
.
changed
.
append
((
version
,
list
(
zip
(
bidir_changes
,
category_changes
,
decimal_changes
,
numeric_changes
)
)
,
normalization_changes
))
...
...
@@ -677,8 +692,14 @@ def merge_old_version(version, new, old):
import
sys
class
UnicodeData
:
def
__init__
(
self
,
filename
,
exclusions
,
eastasianwidth
,
expand
=
1
):
# Record structure:
# [ID, name, category, combining, bidi, decomp, (6)
# decimal, digit, numeric, bidi-mirrored, Unicode-1-name, (11)
# ISO-comment, uppercase, lowercase, titlecase, ea-width, (16)
# derived-props] (17)
def
__init__
(
self
,
filename
,
exclusions
,
eastasianwidth
,
derivedprops
,
expand
=
1
):
self
.
changed
=
[]
file
=
open
(
filename
)
table
=
[
None
]
*
0x110000
...
...
@@ -742,6 +763,28 @@ class UnicodeData:
if
table
[
i
]
is
not
None
:
table
[
i
]
.
append
(
widths
[
i
])
for
i
in
range
(
0
,
0x110000
):
if
table
[
i
]
is
not
None
:
table
[
i
]
.
append
(
set
())
for
s
in
open
(
derivedprops
):
s
=
s
.
split
(
'#'
,
1
)[
0
]
.
strip
()
if
not
s
:
continue
r
,
p
=
s
.
split
(
";"
)
r
=
r
.
strip
()
p
=
p
.
strip
()
if
".."
in
r
:
first
,
last
=
[
int
(
c
,
16
)
for
c
in
r
.
split
(
'..'
)]
chars
=
range
(
first
,
last
+
1
)
else
:
chars
=
[
int
(
r
,
16
)]
for
char
in
chars
:
if
table
[
char
]:
# Some properties (e.g. Default_Ignorable_Code_Point)
# apply to unassigned code points; ignore them
table
[
char
][
-
1
]
.
add
(
p
)
def
uselatin1
(
self
):
# restrict character range to ISO Latin 1
self
.
chars
=
range
(
256
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment