Kaydet (Commit) 6c863d1a authored tarafından Walter Dörwald's avatar Walter Dörwald

Merged revisions 71894 via svnmerge from

svn+ssh://pythondev@svn.python.org/python/trunk

........
  r71894 | walter.doerwald | 2009-04-25 16:03:16 +0200 (Sa, 25 Apr 2009) | 4 lines

  Issue #5828 (Invalid behavior of unicode.lower): Fixed bogus logic in
  makeunicodedata.py and regenerated the Unicode database (This fixes
  u'\u1d79'.lower() == '\x00').
........
üst 22999a69
...@@ -20,7 +20,7 @@ encoding = 'utf-8' ...@@ -20,7 +20,7 @@ encoding = 'utf-8'
class UnicodeMethodsTest(unittest.TestCase): class UnicodeMethodsTest(unittest.TestCase):
# update this, if the database changes # update this, if the database changes
expectedchecksum = 'aef99984a58c8e1e5363a3175f2ff9608599a93e' expectedchecksum = 'b7db9b5f1d804976fa921d2009cbef6f025620c1'
def test_method_checksum(self): def test_method_checksum(self):
h = hashlib.sha1() h = hashlib.sha1()
...@@ -257,6 +257,19 @@ class UnicodeMiscTest(UnicodeDatabaseTest): ...@@ -257,6 +257,19 @@ class UnicodeMiscTest(UnicodeDatabaseTest):
# the upper-case mapping: as delta, or as absolute value # the upper-case mapping: as delta, or as absolute value
self.assert_(u"a".upper()==u'A') self.assert_(u"a".upper()==u'A')
self.assert_(u"\u1d79".upper()==u'\ua77d') self.assert_(u"\u1d79".upper()==u'\ua77d')
self.assert_(u".".upper()==u".")
def test_bug_5828(self):
self.assertEqual(u"\u1d79".lower(), u"\u1d79")
# Only U+0000 should have U+0000 as its upper/lower/titlecase variant
self.assertEqual(
[
c for c in range(sys.maxunicode+1)
if u"\x00" in unichr(c).lower()+unichr(c).upper()+unichr(c).title()
],
[0]
)
def test_main(): def test_main():
test.test_support.run_unittest( test.test_support.run_unittest(
......
...@@ -34,6 +34,10 @@ Library ...@@ -34,6 +34,10 @@ Library
dispatcher now has an 'ignore_log_types' attribute for suppressing dispatcher now has an 'ignore_log_types' attribute for suppressing
log messages, which is set to 'warning' by default. log messages, which is set to 'warning' by default.
- Issue #5828 (Invalid behavior of unicode.lower): Fixed bogus logic in
makeunicodedata.py and regenerated the Unicode database (This fixes
u'\u1d79'.lower() == '\x00').
Tests Tests
----- -----
......
...@@ -118,7 +118,7 @@ const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = { ...@@ -118,7 +118,7 @@ const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {
{0, 0, 0, 0, 7, 4}, {0, 0, 0, 0, 7, 4},
{0, 0, 0, 0, 8, 4}, {0, 0, 0, 0, 8, 4},
{0, 0, 0, 0, 9, 4}, {0, 0, 0, 0, 9, 4},
{42877, 0, 42877, 0, 0, 265}, {42877, 7545, 42877, 0, 0, 265},
{3814, 0, 3814, 0, 0, 9}, {3814, 0, 3814, 0, 0, 9},
{65477, 0, 65477, 0, 0, 9}, {65477, 0, 65477, 0, 0, 9},
{0, 57921, 0, 0, 0, 129}, {0, 57921, 0, 0, 0, 129},
...@@ -159,7 +159,7 @@ const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = { ...@@ -159,7 +159,7 @@ const _PyUnicode_TypeRecord _PyUnicode_TypeRecords[] = {
{0, 54787, 0, 0, 0, 129}, {0, 54787, 0, 0, 0, 129},
{0, 54753, 0, 0, 0, 129}, {0, 54753, 0, 0, 0, 129},
{58272, 0, 58272, 0, 0, 9}, {58272, 0, 58272, 0, 0, 9},
{0, 7545, 0, 0, 0, 385}, {42877, 7545, 42877, 0, 0, 385},
{0, 40, 0, 0, 0, 129}, {0, 40, 0, 0, 0, 129},
{65496, 0, 65496, 0, 0, 9}, {65496, 0, 65496, 0, 0, 9},
}; };
......
...@@ -371,33 +371,32 @@ def makeunicodetype(unicode, trace): ...@@ -371,33 +371,32 @@ def makeunicodetype(unicode, trace):
flags |= UPPER_MASK flags |= UPPER_MASK
# use delta predictor for upper/lower/title if it fits # use delta predictor for upper/lower/title if it fits
if record[12]: if record[12]:
upper = int(record[12], 16) - char upper = int(record[12], 16)
if -32768 <= upper <= 32767 and delta:
upper = upper & 0xffff
else:
upper += char
delta = False
else: else:
upper = 0 upper = char
if record[13]: if record[13]:
lower = int(record[13], 16) - char lower = int(record[13], 16)
if -32768 <= lower <= 32767 and delta:
lower = lower & 0xffff
else:
lower += char
delta = False
else: else:
lower = 0 lower = char
if record[14]: if record[14]:
title = int(record[14], 16) - char title = int(record[14], 16)
if -32768 <= lower <= 32767 and delta: else:
title = title & 0xffff # UCD.html says that a missing title char means that
else: # it defaults to the uppercase character, not to the
title += char # character itself. Apparently, in the current UCD (5.x)
delta = False # this feature is never used
title = upper
upper_d = upper - char
lower_d = lower - char
title_d = title - char
if -32768 <= upper_d <= 32767 and \
-32768 <= lower_d <= 32767 and \
-32768 <= title_d <= 32767:
# use deltas
upper = upper_d & 0xffff
lower = lower_d & 0xffff
title = title_d & 0xffff
else: else:
title = 0
if not delta:
flags |= NODELTA_MASK flags |= NODELTA_MASK
# decimal digit, integer digit # decimal digit, integer digit
decimal = 0 decimal = 0
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment