Skip to content
Projeler
Gruplar
Parçacıklar
Yardım
Yükleniyor...
Oturum aç / Kaydol
Gezinmeyi değiştir
C
cpython
Proje
Proje
Ayrıntılar
Etkinlik
Cycle Analytics
Depo (repository)
Depo (repository)
Dosyalar
Kayıtlar (commit)
Dallar (branch)
Etiketler
Katkıda bulunanlar
Grafik
Karşılaştır
Grafikler
Konular (issue)
0
Konular (issue)
0
Liste
Pano
Etiketler
Kilometre Taşları
Birleştirme (merge) Talepleri
0
Birleştirme (merge) Talepleri
0
CI / CD
CI / CD
İş akışları (pipeline)
İşler
Zamanlamalar
Grafikler
Paketler
Paketler
Wiki
Wiki
Parçacıklar
Parçacıklar
Üyeler
Üyeler
Collapse sidebar
Close sidebar
Etkinlik
Grafik
Grafikler
Yeni bir konu (issue) oluştur
İşler
Kayıtlar (commit)
Konu (issue) Panoları
Kenar çubuğunu aç
Batuhan Osman TASKAYA
cpython
Commits
d23d3930
Kaydet (Commit)
d23d3930
authored
Mar 02, 2010
tarafından
Victor Stinner
Dosyalara gözat
Seçenekler
Dosyalara Gözat
İndir
Eposta Yamaları
Sade Fark
Issue #7820: The parser tokenizer restores all bytes in the right if the BOM
check fails. Fix an assertion in pydebug mode.
üst
0e717add
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
46 additions
and
22 deletions
+46
-22
test_pep263.py
Lib/test/test_pep263.py
+11
-0
NEWS
Misc/NEWS
+3
-0
tokenizer.c
Parser/tokenizer.c
+32
-22
No files found.
Lib/test/test_pep263.py
Dosyayı görüntüle @
d23d3930
...
...
@@ -30,6 +30,17 @@ class PEP263Test(unittest.TestCase):
self
.
assertEqual
(
d
[
'a'
],
d
[
'b'
])
self
.
assertEqual
(
len
(
d
[
'a'
]),
len
(
d
[
'b'
]))
def
test_issue7820
(
self
):
# Ensure that check_bom() restores all bytes in the right order if
# check_bom() fails in pydebug mode: a buffer starts with the first
# byte of a valid BOM, but next bytes are different
# one byte in common with the UTF-16-LE BOM
self
.
assertRaises
(
SyntaxError
,
eval
,
'
\xff\x20
'
)
# two bytes in common with the UTF-8 BOM
self
.
assertRaises
(
SyntaxError
,
eval
,
'
\xef\xbb\x20
'
)
def
test_main
():
test_support
.
run_unittest
(
PEP263Test
)
...
...
Misc/NEWS
Dosyayı görüntüle @
d23d3930
...
...
@@ -12,6 +12,9 @@ What's New in Python 2.7 alpha 4?
Core and Builtins
-----------------
- Issue #7820: The parser tokenizer restores all bytes in the right if
the BOM check fails.
- Issue #7309: Fix unchecked attribute access when converting
UnicodeEncodeError, UnicodeDecodeError, and UnicodeTranslateError to
strings.
...
...
Parser/tokenizer.c
Dosyayı görüntüle @
d23d3930
...
...
@@ -312,47 +312,57 @@ check_bom(int get_char(struct tok_state *),
int
set_readline
(
struct
tok_state
*
,
const
char
*
),
struct
tok_state
*
tok
)
{
int
ch
=
get_char
(
tok
);
int
ch1
,
ch2
,
ch3
;
ch1
=
get_char
(
tok
);
tok
->
decoding_state
=
1
;
if
(
ch
==
EOF
)
{
if
(
ch
1
==
EOF
)
{
return
1
;
}
else
if
(
ch
==
0xEF
)
{
ch
=
get_char
(
tok
);
if
(
ch
!=
0xBB
)
goto
NON_BOM
;
ch
=
get_char
(
tok
);
if
(
ch
!=
0xBF
)
goto
NON_BOM
;
}
else
if
(
ch1
==
0xEF
)
{
ch2
=
get_char
(
tok
);
if
(
ch2
!=
0xBB
)
{
unget_char
(
ch2
,
tok
);
unget_char
(
ch1
,
tok
);
return
1
;
}
ch3
=
get_char
(
tok
);
if
(
ch3
!=
0xBF
)
{
unget_char
(
ch3
,
tok
);
unget_char
(
ch2
,
tok
);
unget_char
(
ch1
,
tok
);
return
1
;
}
#if 0
/* Disable support for UTF-16 BOMs until a decision
is made whether this needs to be supported. */
} else if (ch == 0xFE) {
ch = get_char(tok);
if (ch != 0xFF)
goto NON_BOM;
} else if (ch1 == 0xFE) {
ch2 = get_char(tok);
if (ch2 != 0xFF) {
unget_char(ch2, tok);
unget_char(ch1, tok);
return 1;
}
if (!set_readline(tok, "utf-16-be"))
return 0;
tok->decoding_state = -1;
} else if (ch == 0xFF) {
ch = get_char(tok);
if (ch != 0xFE)
goto NON_BOM;
} else if (ch1 == 0xFF) {
ch2 = get_char(tok);
if (ch2 != 0xFE) {
unget_char(ch2, tok);
unget_char(ch1, tok);
return 1;
}
if (!set_readline(tok, "utf-16-le"))
return 0;
tok->decoding_state = -1;
#endif
}
else
{
unget_char
(
ch
,
tok
);
unget_char
(
ch
1
,
tok
);
return
1
;
}
if
(
tok
->
encoding
!=
NULL
)
PyMem_FREE
(
tok
->
encoding
);
tok
->
encoding
=
new_string
(
"utf-8"
,
5
);
/* resulting is in utf-8 */
return
1
;
NON_BOM:
/* any token beginning with '\xEF', '\xFE', '\xFF' is a bad token */
unget_char
(
0xFF
,
tok
);
/* XXX this will cause a syntax error */
return
1
;
}
/* Read a line of text from TOK into S, using the stream in TOK.
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment