Kaydet (Commit) 7bd33c5e authored tarafından Gustavo Niemeyer's avatar Gustavo Niemeyer

This change implements the following gettext features, as

discussed recently in python-dev:

In _locale module:

- bind_textdomain_codeset() binding

In gettext module:

- bind_textdomain_codeset() function
- lgettext(), lngettext(), ldgettext(), ldngettext(),
  which return translated strings encoded in
  preferred system encoding, if
  bind_textdomain_codeset() was not used.
- Added equivalent functionality in translate()
  function and catalog classes.

Every change was also documented.
üst 5980ff2d
This diff is collapsed.
......@@ -469,15 +469,16 @@ that the \module{_locale} module is not accessible as a shared library.
The locale module exposes the C library's gettext interface on systems
that provide this interface. It consists of the functions
\function{gettext()}, \function{dgettext()}, \function{dcgettext()},
\function{textdomain()}, and \function{bindtextdomain()}. These are
similar to the same functions in the \refmodule{gettext} module, but use
the C library's binary format for message catalogs, and the C
library's search algorithms for locating message catalogs.
\function{textdomain()}, \function{bindtextdomain()}, and
\function{bind_textdomain_codeset()}. These are similar to the same
functions in the \refmodule{gettext} module, but use the C library's
binary format for message catalogs, and the C library's search
algorithms for locating message catalogs.
Python applications should normally find no need to invoke these
functions, and should use \refmodule{gettext} instead. A known
exception to this rule are applications that link use additional C
libraries which internally invoke \cfunction{gettext()} or
\function{cdgettext()}. For these applications, it may be necessary to
\function{dcgettext()}. For these applications, it may be necessary to
bind the text domain, so that the libraries can properly locate their
message catalogs.
......@@ -46,7 +46,7 @@ internationalized, to the local language and cultural habits.
# find this format documented anywhere.
import copy, os, re, struct, sys
import locale, copy, os, re, struct, sys
from errno import ENOENT
......@@ -171,6 +171,7 @@ class NullTranslations:
def __init__(self, fp=None):
self._info = {}
self._charset = None
self._output_charset = None
self._fallback = None
if fp is not None:
self._parse(fp)
......@@ -189,6 +190,11 @@ class NullTranslations:
return self._fallback.gettext(message)
return message
def lgettext(self, message):
if self._fallback:
return self._fallback.lgettext(message)
return message
def ngettext(self, msgid1, msgid2, n):
if self._fallback:
return self._fallback.ngettext(msgid1, msgid2, n)
......@@ -197,6 +203,14 @@ class NullTranslations:
else:
return msgid2
def lngettext(self, msgid1, msgid2, n):
if self._fallback:
return self._fallback.lngettext(msgid1, msgid2, n)
if n == 1:
return msgid1
else:
return msgid2
def ugettext(self, message):
if self._fallback:
return self._fallback.ugettext(message)
......@@ -216,6 +230,12 @@ class NullTranslations:
def charset(self):
return self._charset
def output_charset(self):
return self._output_charset
def set_output_charset(self, charset):
self._output_charset = charset
def install(self, unicode=False):
import __builtin__
__builtin__.__dict__['_'] = unicode and self.ugettext or self.gettext
......@@ -315,14 +335,29 @@ class GNUTranslations(NullTranslations):
return self._fallback.gettext(message)
return message
# Encode the Unicode tmsg back to an 8-bit string, if possible
if self._charset:
if self._output_charset:
return tmsg.encode(self._output_charset)
elif self._charset:
return tmsg.encode(self._charset)
return tmsg
def lgettext(self, message):
missing = object()
tmsg = self._catalog.get(message, missing)
if tmsg is missing:
if self._fallback:
return self._fallback.lgettext(message)
return message
if self._output_charset:
return tmsg.encode(self._output_charset)
return tmsg.encode(locale.getpreferredencoding())
def ngettext(self, msgid1, msgid2, n):
try:
tmsg = self._catalog[(msgid1, self.plural(n))]
if self._charset:
if self._output_charset:
return tmsg.encode(self._output_charset)
elif self._charset:
return tmsg.encode(self._charset)
return tmsg
except KeyError:
......@@ -333,6 +368,20 @@ class GNUTranslations(NullTranslations):
else:
return msgid2
def lngettext(self, msgid1, msgid2, n):
try:
tmsg = self._catalog[(msgid1, self.plural(n))]
if self._output_charset:
return tmsg.encode(self._output_charset)
return tmsg.encode(locale.getpreferredencoding())
except KeyError:
if self._fallback:
return self._fallback.lngettext(msgid1, msgid2, n)
if n == 1:
return msgid1
else:
return msgid2
def ugettext(self, message):
missing = object()
tmsg = self._catalog.get(message, missing)
......@@ -397,7 +446,7 @@ def find(domain, localedir=None, languages=None, all=0):
_translations = {}
def translation(domain, localedir=None, languages=None,
class_=None, fallback=False):
class_=None, fallback=False, codeset=None):
if class_ is None:
class_ = GNUTranslations
mofiles = find(domain, localedir, languages, all=1)
......@@ -414,9 +463,12 @@ def translation(domain, localedir=None, languages=None,
t = _translations.get(key)
if t is None:
t = _translations.setdefault(key, class_(open(mofile, 'rb')))
# Copy the translation object to allow setting fallbacks.
# All other instance data is shared with the cached object.
# Copy the translation object to allow setting fallbacks and
# output charset. All other instance data is shared with the
# cached object.
t = copy.copy(t)
if codeset:
t.set_output_charset(codeset)
if result is None:
result = t
else:
......@@ -424,13 +476,16 @@ def translation(domain, localedir=None, languages=None,
return result
def install(domain, localedir=None, unicode=False):
translation(domain, localedir, fallback=True).install(unicode)
def install(domain, localedir=None, unicode=False, codeset=None):
t = translation(domain, localedir, fallback=True, codeset=codeset)
t.install(unicode)
# a mapping b/w domains and locale directories
_localedirs = {}
# a mapping b/w domains and codesets
_localecodesets = {}
# current global domain, `messages' used for compatibility w/ GNU gettext
_current_domain = 'messages'
......@@ -449,17 +504,33 @@ def bindtextdomain(domain, localedir=None):
return _localedirs.get(domain, _default_localedir)
def bind_textdomain_codeset(domain, codeset=None):
global _localecodesets
if codeset is not None:
_localecodesets[domain] = codeset
return _localecodesets.get(domain)
def dgettext(domain, message):
try:
t = translation(domain, _localedirs.get(domain, None))
t = translation(domain, _localedirs.get(domain, None),
codeset=_localecodesets.get(domain))
except IOError:
return message
return t.gettext(message)
def ldgettext(domain, message):
try:
t = translation(domain, _localedirs.get(domain, None),
codeset=_localecodesets.get(domain))
except IOError:
return message
return t.lgettext(message)
def dngettext(domain, msgid1, msgid2, n):
try:
t = translation(domain, _localedirs.get(domain, None))
t = translation(domain, _localedirs.get(domain, None),
codeset=_localecodesets.get(domain))
except IOError:
if n == 1:
return msgid1
......@@ -467,14 +538,28 @@ def dngettext(domain, msgid1, msgid2, n):
return msgid2
return t.ngettext(msgid1, msgid2, n)
def ldngettext(domain, msgid1, msgid2, n):
try:
t = translation(domain, _localedirs.get(domain, None),
codeset=_localecodesets.get(domain))
except IOError:
if n == 1:
return msgid1
else:
return msgid2
return t.lngettext(msgid1, msgid2, n)
def gettext(message):
return dgettext(_current_domain, message)
def lgettext(message):
return ldgettext(_current_domain, message)
def ngettext(msgid1, msgid2, n):
return dngettext(_current_domain, msgid1, msgid2, n)
def lngettext(msgid1, msgid2, n):
return ldngettext(_current_domain, msgid1, msgid2, n)
# dcgettext() has been deemed unnecessary and is not implemented.
......
......@@ -33,6 +33,8 @@ Core and builtins
will cause a TypeError to be raised. This matches the behavior of
Jython.
- Implemented bind_textdomain_codeset() in locale module.
Extension modules
-----------------
......@@ -112,6 +114,12 @@ Library
- Bug #990307: when keep_empty_values is True, cgi.parse_qsl()
no longer returns spurious empty fields.
- Implemented bind_textdomain_codeset() in gettext module.
- Introduced in gettext module the l*gettext() family of functions,
which return translation strings encoded in the preferred encoding,
as informed by locale module's getpreferredencoding().
Tools/Demos
-----------
......
......@@ -649,6 +649,24 @@ PyIntl_bindtextdomain(PyObject* self,PyObject*args)
return PyString_FromString(dirname);
}
#ifdef HAVE_BIND_TEXTDOMAIN_CODESET
PyDoc_STRVAR(bind_textdomain_codeset__doc__,
"bind_textdomain_codeset(domain, codeset) -> string\n"
"Bind the C library's domain to codeset.");
static PyObject*
PyIntl_bind_textdomain_codeset(PyObject* self,PyObject*args)
{
char *domain,*codeset;
if (!PyArg_ParseTuple(args, "sz", &domain, &codeset))
return NULL;
codeset = bind_textdomain_codeset(domain, codeset);
if (codeset)
return PyString_FromString(codeset);
Py_RETURN_NONE;
}
#endif
#endif
static struct PyMethodDef PyLocale_Methods[] = {
......@@ -678,6 +696,10 @@ static struct PyMethodDef PyLocale_Methods[] = {
textdomain__doc__},
{"bindtextdomain",(PyCFunction)PyIntl_bindtextdomain,METH_VARARGS,
bindtextdomain__doc__},
#ifdef HAVE_BIND_TEXTDOMAIN_CODESET
{"bind_textdomain_codeset",(PyCFunction)PyIntl_bind_textdomain_codeset,
METH_VARARGS, bind_textdomain_codeset__doc__},
#endif
#endif
{NULL, NULL}
};
......
......@@ -2044,8 +2044,8 @@ fi
AC_MSG_RESULT(MACHDEP_OBJS)
# checks for library functions
AC_CHECK_FUNCS(alarm chown clock confstr ctermid execv \
fork fpathconf ftime ftruncate \
AC_CHECK_FUNCS(alarm bind_textdomain_codeset chown clock confstr ctermid \
execv fork fpathconf ftime ftruncate \
gai_strerror getgroups getlogin getloadavg getpeername getpgid getpid \
getpriority getpwent getsid getwd \
kill killpg lchown lstat mkfifo mknod mktime \
......
......@@ -37,6 +37,9 @@
/* Define this if your time.h defines altzone. */
#undef HAVE_ALTZONE
/* Define to 1 if you have the `bind_textdomain_codeset' function. */
#undef HAVE_BIND_TEXTDOMAIN_CODESET
/* Define to 1 if you have the <bluetooth/bluetooth.h> header file. */
#undef HAVE_BLUETOOTH_BLUETOOTH_H
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment