Kaydet (Commit) 26cb4657 authored tarafından Serhiy Storchaka's avatar Serhiy Storchaka Kaydeden (comit) GitHub

bpo-29755: Fixed the lgettext() family of functions in the gettext module. (#2266)

They now always return bytes.

Updated the gettext documentation.
üst 8457706e
This diff is collapsed.
...@@ -279,7 +279,9 @@ class NullTranslations: ...@@ -279,7 +279,9 @@ class NullTranslations:
def lgettext(self, message): def lgettext(self, message):
if self._fallback: if self._fallback:
return self._fallback.lgettext(message) return self._fallback.lgettext(message)
return message if self._output_charset:
return message.encode(self._output_charset)
return message.encode(locale.getpreferredencoding())
def ngettext(self, msgid1, msgid2, n): def ngettext(self, msgid1, msgid2, n):
if self._fallback: if self._fallback:
...@@ -293,9 +295,12 @@ class NullTranslations: ...@@ -293,9 +295,12 @@ class NullTranslations:
if self._fallback: if self._fallback:
return self._fallback.lngettext(msgid1, msgid2, n) return self._fallback.lngettext(msgid1, msgid2, n)
if n == 1: if n == 1:
return msgid1 tmsg = msgid1
else: else:
return msgid2 tmsg = msgid2
if self._output_charset:
return tmsg.encode(self._output_charset)
return tmsg.encode(locale.getpreferredencoding())
def info(self): def info(self):
return self._info return self._info
...@@ -377,7 +382,7 @@ class GNUTranslations(NullTranslations): ...@@ -377,7 +382,7 @@ class GNUTranslations(NullTranslations):
if mlen == 0: if mlen == 0:
# Catalog description # Catalog description
lastk = None lastk = None
for b_item in tmsg.split('\n'.encode("ascii")): for b_item in tmsg.split(b'\n'):
item = b_item.decode().strip() item = b_item.decode().strip()
if not item: if not item:
continue continue
...@@ -425,7 +430,7 @@ class GNUTranslations(NullTranslations): ...@@ -425,7 +430,7 @@ class GNUTranslations(NullTranslations):
if tmsg is missing: if tmsg is missing:
if self._fallback: if self._fallback:
return self._fallback.lgettext(message) return self._fallback.lgettext(message)
return message tmsg = message
if self._output_charset: if self._output_charset:
return tmsg.encode(self._output_charset) return tmsg.encode(self._output_charset)
return tmsg.encode(locale.getpreferredencoding()) return tmsg.encode(locale.getpreferredencoding())
...@@ -433,16 +438,16 @@ class GNUTranslations(NullTranslations): ...@@ -433,16 +438,16 @@ class GNUTranslations(NullTranslations):
def lngettext(self, msgid1, msgid2, n): def lngettext(self, msgid1, msgid2, n):
try: try:
tmsg = self._catalog[(msgid1, self.plural(n))] tmsg = self._catalog[(msgid1, self.plural(n))]
if self._output_charset:
return tmsg.encode(self._output_charset)
return tmsg.encode(locale.getpreferredencoding())
except KeyError: except KeyError:
if self._fallback: if self._fallback:
return self._fallback.lngettext(msgid1, msgid2, n) return self._fallback.lngettext(msgid1, msgid2, n)
if n == 1: if n == 1:
return msgid1 tmsg = msgid1
else: else:
return msgid2 tmsg = msgid2
if self._output_charset:
return tmsg.encode(self._output_charset)
return tmsg.encode(locale.getpreferredencoding())
def gettext(self, message): def gettext(self, message):
missing = object() missing = object()
...@@ -582,11 +587,11 @@ def dgettext(domain, message): ...@@ -582,11 +587,11 @@ def dgettext(domain, message):
return t.gettext(message) return t.gettext(message)
def ldgettext(domain, message): def ldgettext(domain, message):
codeset = _localecodesets.get(domain)
try: try:
t = translation(domain, _localedirs.get(domain, None), t = translation(domain, _localedirs.get(domain, None), codeset=codeset)
codeset=_localecodesets.get(domain))
except OSError: except OSError:
return message return message.encode(codeset or locale.getpreferredencoding())
return t.lgettext(message) return t.lgettext(message)
def dngettext(domain, msgid1, msgid2, n): def dngettext(domain, msgid1, msgid2, n):
...@@ -601,14 +606,15 @@ def dngettext(domain, msgid1, msgid2, n): ...@@ -601,14 +606,15 @@ def dngettext(domain, msgid1, msgid2, n):
return t.ngettext(msgid1, msgid2, n) return t.ngettext(msgid1, msgid2, n)
def ldngettext(domain, msgid1, msgid2, n): def ldngettext(domain, msgid1, msgid2, n):
codeset = _localecodesets.get(domain)
try: try:
t = translation(domain, _localedirs.get(domain, None), t = translation(domain, _localedirs.get(domain, None), codeset=codeset)
codeset=_localecodesets.get(domain))
except OSError: except OSError:
if n == 1: if n == 1:
return msgid1 tmsg = msgid1
else: else:
return msgid2 tmsg = msgid2
return tmsg.encode(codeset or locale.getpreferredencoding())
return t.lngettext(msgid1, msgid2, n) return t.lngettext(msgid1, msgid2, n)
def gettext(message): def gettext(message):
......
import os import os
import base64 import base64
import gettext import gettext
import locale
import unittest import unittest
from test import support from test import support
...@@ -455,6 +456,122 @@ class PluralFormsTestCase(GettextBaseTest): ...@@ -455,6 +456,122 @@ class PluralFormsTestCase(GettextBaseTest):
self.assertRaises(TypeError, f, object()) self.assertRaises(TypeError, f, object())
class LGettextTestCase(GettextBaseTest):
def setUp(self):
GettextBaseTest.setUp(self)
self.mofile = MOFILE
def test_lgettext(self):
lgettext = gettext.lgettext
ldgettext = gettext.ldgettext
self.assertEqual(lgettext('mullusk'), b'bacon')
self.assertEqual(lgettext('spam'), b'spam')
self.assertEqual(ldgettext('gettext', 'mullusk'), b'bacon')
self.assertEqual(ldgettext('gettext', 'spam'), b'spam')
def test_lgettext_2(self):
with open(self.mofile, 'rb') as fp:
t = gettext.GNUTranslations(fp)
lgettext = t.lgettext
self.assertEqual(lgettext('mullusk'), b'bacon')
self.assertEqual(lgettext('spam'), b'spam')
def test_lgettext_bind_textdomain_codeset(self):
lgettext = gettext.lgettext
ldgettext = gettext.ldgettext
saved_codeset = gettext.bind_textdomain_codeset('gettext')
try:
gettext.bind_textdomain_codeset('gettext', 'utf-16')
self.assertEqual(lgettext('mullusk'), 'bacon'.encode('utf-16'))
self.assertEqual(lgettext('spam'), 'spam'.encode('utf-16'))
self.assertEqual(ldgettext('gettext', 'mullusk'), 'bacon'.encode('utf-16'))
self.assertEqual(ldgettext('gettext', 'spam'), 'spam'.encode('utf-16'))
finally:
del gettext._localecodesets['gettext']
gettext.bind_textdomain_codeset('gettext', saved_codeset)
def test_lgettext_output_encoding(self):
with open(self.mofile, 'rb') as fp:
t = gettext.GNUTranslations(fp)
lgettext = t.lgettext
t.set_output_charset('utf-16')
self.assertEqual(lgettext('mullusk'), 'bacon'.encode('utf-16'))
self.assertEqual(lgettext('spam'), 'spam'.encode('utf-16'))
def test_lngettext(self):
lngettext = gettext.lngettext
ldngettext = gettext.ldngettext
x = lngettext('There is %s file', 'There are %s files', 1)
self.assertEqual(x, b'Hay %s fichero')
x = lngettext('There is %s file', 'There are %s files', 2)
self.assertEqual(x, b'Hay %s ficheros')
x = lngettext('There is %s directory', 'There are %s directories', 1)
self.assertEqual(x, b'There is %s directory')
x = lngettext('There is %s directory', 'There are %s directories', 2)
self.assertEqual(x, b'There are %s directories')
x = ldngettext('gettext', 'There is %s file', 'There are %s files', 1)
self.assertEqual(x, b'Hay %s fichero')
x = ldngettext('gettext', 'There is %s file', 'There are %s files', 2)
self.assertEqual(x, b'Hay %s ficheros')
x = ldngettext('gettext', 'There is %s directory', 'There are %s directories', 1)
self.assertEqual(x, b'There is %s directory')
x = ldngettext('gettext', 'There is %s directory', 'There are %s directories', 2)
self.assertEqual(x, b'There are %s directories')
def test_lngettext_2(self):
with open(self.mofile, 'rb') as fp:
t = gettext.GNUTranslations(fp)
lngettext = t.lngettext
x = lngettext('There is %s file', 'There are %s files', 1)
self.assertEqual(x, b'Hay %s fichero')
x = lngettext('There is %s file', 'There are %s files', 2)
self.assertEqual(x, b'Hay %s ficheros')
x = lngettext('There is %s directory', 'There are %s directories', 1)
self.assertEqual(x, b'There is %s directory')
x = lngettext('There is %s directory', 'There are %s directories', 2)
self.assertEqual(x, b'There are %s directories')
def test_lngettext_bind_textdomain_codeset(self):
lngettext = gettext.lngettext
ldngettext = gettext.ldngettext
saved_codeset = gettext.bind_textdomain_codeset('gettext')
try:
gettext.bind_textdomain_codeset('gettext', 'utf-16')
x = lngettext('There is %s file', 'There are %s files', 1)
self.assertEqual(x, 'Hay %s fichero'.encode('utf-16'))
x = lngettext('There is %s file', 'There are %s files', 2)
self.assertEqual(x, 'Hay %s ficheros'.encode('utf-16'))
x = lngettext('There is %s directory', 'There are %s directories', 1)
self.assertEqual(x, 'There is %s directory'.encode('utf-16'))
x = lngettext('There is %s directory', 'There are %s directories', 2)
self.assertEqual(x, 'There are %s directories'.encode('utf-16'))
x = ldngettext('gettext', 'There is %s file', 'There are %s files', 1)
self.assertEqual(x, 'Hay %s fichero'.encode('utf-16'))
x = ldngettext('gettext', 'There is %s file', 'There are %s files', 2)
self.assertEqual(x, 'Hay %s ficheros'.encode('utf-16'))
x = ldngettext('gettext', 'There is %s directory', 'There are %s directories', 1)
self.assertEqual(x, 'There is %s directory'.encode('utf-16'))
x = ldngettext('gettext', 'There is %s directory', 'There are %s directories', 2)
self.assertEqual(x, 'There are %s directories'.encode('utf-16'))
finally:
del gettext._localecodesets['gettext']
gettext.bind_textdomain_codeset('gettext', saved_codeset)
def test_lngettext_output_encoding(self):
with open(self.mofile, 'rb') as fp:
t = gettext.GNUTranslations(fp)
lngettext = t.lngettext
t.set_output_charset('utf-16')
x = lngettext('There is %s file', 'There are %s files', 1)
self.assertEqual(x, 'Hay %s fichero'.encode('utf-16'))
x = lngettext('There is %s file', 'There are %s files', 2)
self.assertEqual(x, 'Hay %s ficheros'.encode('utf-16'))
x = lngettext('There is %s directory', 'There are %s directories', 1)
self.assertEqual(x, 'There is %s directory'.encode('utf-16'))
x = lngettext('There is %s directory', 'There are %s directories', 2)
self.assertEqual(x, 'There are %s directories'.encode('utf-16'))
class GNUTranslationParsingTest(GettextBaseTest): class GNUTranslationParsingTest(GettextBaseTest):
def test_plural_form_error_issue17898(self): def test_plural_form_error_issue17898(self):
with open(MOFILE, 'wb') as fp: with open(MOFILE, 'wb') as fp:
...@@ -472,13 +589,10 @@ class UnicodeTranslationsTest(GettextBaseTest): ...@@ -472,13 +589,10 @@ class UnicodeTranslationsTest(GettextBaseTest):
self._ = self.t.gettext self._ = self.t.gettext
def test_unicode_msgid(self): def test_unicode_msgid(self):
unless = self.assertTrue self.assertIsInstance(self._(''), str)
unless(isinstance(self._(''), str))
unless(isinstance(self._(''), str))
def test_unicode_msgstr(self): def test_unicode_msgstr(self):
eq = self.assertEqual self.assertEqual(self._('ab\xde'), '\xa4yz')
eq(self._('ab\xde'), '\xa4yz')
class WeirdMetadataTest(GettextBaseTest): class WeirdMetadataTest(GettextBaseTest):
...@@ -547,7 +661,7 @@ if __name__ == '__main__': ...@@ -547,7 +661,7 @@ if __name__ == '__main__':
# The original version was automatically generated from the sources with # The original version was automatically generated from the sources with
# pygettext. Later it was manually modified to add plural forms support. # pygettext. Later it was manually modified to add plural forms support.
''' b'''
# Dummy translation for the Python test_gettext.py module. # Dummy translation for the Python test_gettext.py module.
# Copyright (C) 2001 Python Software Foundation # Copyright (C) 2001 Python Software Foundation
# Barry Warsaw <barry@python.org>, 2000. # Barry Warsaw <barry@python.org>, 2000.
...@@ -607,7 +721,7 @@ msgstr[1] "Hay %s ficheros" ...@@ -607,7 +721,7 @@ msgstr[1] "Hay %s ficheros"
# Here's the second example po file example, used to generate the UMO_DATA # Here's the second example po file example, used to generate the UMO_DATA
# containing utf-8 encoded Unicode strings # containing utf-8 encoded Unicode strings
''' b'''
# Dummy translation for the Python test_gettext.py module. # Dummy translation for the Python test_gettext.py module.
# Copyright (C) 2001 Python Software Foundation # Copyright (C) 2001 Python Software Foundation
# Barry Warsaw <barry@python.org>, 2000. # Barry Warsaw <barry@python.org>, 2000.
...@@ -630,7 +744,7 @@ msgstr "\xc2\xa4yz" ...@@ -630,7 +744,7 @@ msgstr "\xc2\xa4yz"
# Here's the third example po file, used to generate MMO_DATA # Here's the third example po file, used to generate MMO_DATA
''' b'''
msgid "" msgid ""
msgstr "" msgstr ""
"Project-Id-Version: No Project 0.0\n" "Project-Id-Version: No Project 0.0\n"
...@@ -649,7 +763,7 @@ msgstr "" ...@@ -649,7 +763,7 @@ msgstr ""
# messages.po, used for bug 17898 # messages.po, used for bug 17898
# #
''' b'''
# test file for http://bugs.python.org/issue17898 # test file for http://bugs.python.org/issue17898
msgid "" msgid ""
msgstr "" msgstr ""
......
...@@ -368,6 +368,9 @@ Extension Modules ...@@ -368,6 +368,9 @@ Extension Modules
Library Library
------- -------
- bpo-29755: Fixed the lgettext() family of functions in the gettext module.
They now always return bytes.
- [Security] bpo-30500: Fix urllib.parse.splithost() to correctly parse - [Security] bpo-30500: Fix urllib.parse.splithost() to correctly parse
fragments. For example, ``splithost('//127.0.0.1#@evil.com/')`` now fragments. For example, ``splithost('//127.0.0.1#@evil.com/')`` now
correctly returns the ``127.0.0.1`` host, instead of treating ``@evil.com`` correctly returns the ``127.0.0.1`` host, instead of treating ``@evil.com``
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment