locale.py 27.4 KB
Newer Older
1
""" Locale support.
2

3 4 5 6 7 8 9 10 11 12
    The module provides low-level access to the C lib's locale APIs
    and adds high level number formatting APIs as well as a locale
    aliasing engine to complement these.

    The aliasing engine includes support for many commonly used locale
    names and maps them to values suitable for passing to the C lib's
    setlocale() function. It also includes default encodings for all
    supported locale names.

"""
13

14
import sys
15

16 17 18
# Try importing the _locale module.
#
# If this fails, fall back on a basic 'C' locale emulation.
19

20 21
# Yuck:  LC_MESSAGES is non-standard:  can't tell whether it exists before
# trying the import.  So __all__ is also fiddled at the end of the file.
22 23
__all__ = ["setlocale","Error","localeconv","strcoll","strxfrm",
           "format","str","atof","atoi","LC_CTYPE","LC_COLLATE",
24
           "LC_TIME","LC_MONETARY","LC_NUMERIC", "LC_ALL","CHAR_MAX"]
25

26
try:
27

28 29 30 31
    from _locale import *

except ImportError:

32 33
    # Locale emulation

34 35 36 37 38 39 40 41 42 43 44
    CHAR_MAX = 127
    LC_ALL = 6
    LC_COLLATE = 3
    LC_CTYPE = 0
    LC_MESSAGES = 5
    LC_MONETARY = 4
    LC_NUMERIC = 1
    LC_TIME = 2
    Error = ValueError

    def localeconv():
45
        """ localeconv() -> dict.
46 47 48 49 50 51
            Returns numeric and monetary locale-specific parameters.
        """
        # 'C' locale default values
        return {'grouping': [127],
                'currency_symbol': '',
                'n_sign_posn': 127,
52 53 54
                'p_cs_precedes': 127,
                'n_cs_precedes': 127,
                'mon_grouping': [],
55 56 57 58
                'n_sep_by_space': 127,
                'decimal_point': '.',
                'negative_sign': '',
                'positive_sign': '',
59
                'p_sep_by_space': 127,
60
                'int_curr_symbol': '',
61
                'p_sign_posn': 127,
62
                'thousands_sep': '',
63 64
                'mon_thousands_sep': '',
                'frac_digits': 127,
65 66
                'mon_decimal_point': '',
                'int_frac_digits': 127}
67

68
    def setlocale(category, value=None):
69
        """ setlocale(integer,string=None) -> string.
70 71
            Activates/queries locale processing.
        """
72
        if value is not None and value != 'C':
73
            raise Error, '_locale emulation only supports "C" locale'
74 75 76
        return 'C'

    def strcoll(a,b):
77
        """ strcoll(string,string) -> int.
78 79 80 81 82
            Compares two strings according to the locale.
        """
        return cmp(a,b)

    def strxfrm(s):
83
        """ strxfrm(string) -> string.
84 85 86
            Returns a string that behaves for cmp locale-aware.
        """
        return s
87 88 89 90 91

### Number formatting APIs

# Author: Martin von Loewis

92 93 94 95
#perform the grouping from right to left
def _group(s):
    conv=localeconv()
    grouping=conv['grouping']
96
    if not grouping:return (s, 0)
97
    result=""
98 99 100 101 102 103
    seps = 0
    spaces = ""
    if s[-1] == ' ':
        sp = s.find(' ')
        spaces = s[sp:]
        s = s[:sp]
104
    while s and grouping:
105
        # if grouping is -1, we are done
106 107 108 109 110 111 112 113 114
        if grouping[0]==CHAR_MAX:
            break
        # 0: re-use last group ad infinitum
        elif grouping[0]!=0:
            #process last group
            group=grouping[0]
            grouping=grouping[1:]
        if result:
            result=s[-group:]+conv['thousands_sep']+result
115
            seps += 1
116 117 118
        else:
            result=s[-group:]
        s=s[:-group]
119 120 121
        if s and s[-1] not in "0123456789":
            # the leading string is only spaces and signs
            return s+result+spaces,seps
122
    if not result:
123
        return s+spaces,seps
124
    if s:
125
        result=s+conv['thousands_sep']+result
126 127
        seps += 1
    return result+spaces,seps
128 129 130

def format(f,val,grouping=0):
    """Formats a value in the same way that the % formatting would use,
131
    but takes the current locale into account.
132
    Grouping is applied if the third parameter is true."""
133
    result = f % val
134
    fields = result.split(".")
135
    seps = 0
136
    if grouping:
137
        fields[0],seps=_group(fields[0])
138
    if len(fields)==2:
139
        result = fields[0]+localeconv()['decimal_point']+fields[1]
140
    elif len(fields)==1:
141
        result = fields[0]
142
    else:
143 144
        raise Error, "Too many decimal points in result string"

145 146 147 148 149 150 151 152 153 154 155 156
    while seps:
        # If the number was formatted for a specific width, then it
        # might have been filled with spaces to the left or right. If
        # so, kill as much spaces as there where separators.
        # Leading zeroes as fillers are not yet dealt with, as it is
        # not clear how they should interact with grouping.
        sp = result.find(" ")
        if sp==-1:break
        result = result[:sp]+result[sp+1:]
        seps -= 1

    return result
157

158 159 160 161
def str(val):
    """Convert float to integer, taking the locale into account."""
    return format("%.12g",val)

162
def atof(str,func=float):
163 164
    "Parses a string as a float according to the locale settings."
    #First, get rid of the grouping
165 166
    ts = localeconv()['thousands_sep']
    if ts:
167 168
        s=str.split(ts)
        str="".join(s)
169
    #next, replace the decimal point with a dot
170 171
    dd = localeconv()['decimal_point']
    if dd:
172 173
        s=str.split(dd)
        str='.'.join(s)
174 175 176 177 178
    #finally, parse the string
    return func(str)

def atoi(str):
    "Converts a string to an integer according to the locale settings."
179
    return atof(str, int)
180

181
def _test():
182
    setlocale(LC_ALL, "")
183
    #do grouping
184 185
    s1=format("%d", 123456789,1)
    print s1, "is", atoi(s1)
186 187
    #standard formatting
    s1=str(3.14)
188
    print s1, "is", atof(s1)
189 190 191 192

### Locale name aliasing engine

# Author: Marc-Andre Lemburg, mal@lemburg.com
193 194 195 196 197
# Various tweaks by Fredrik Lundh <effbot@telia.com>

# store away the low-level version of setlocale (it's
# overridden below)
_setlocale = setlocale
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215

def normalize(localename):

    """ Returns a normalized locale code for the given locale
        name.

        The returned locale code is formatted for use with
        setlocale().

        If normalization fails, the original name is returned
        unchanged.

        If the given encoding is not known, the function defaults to
        the default encoding for the locale code just like setlocale()
        does.

    """
    # Normalize the locale name and extract the encoding
216
    fullname = localename.lower()
217 218
    if ':' in fullname:
        # ':' is sometimes used as encoding delimiter.
219
        fullname = fullname.replace(':', '.')
220
    if '.' in fullname:
221
        langname, encoding = fullname.split('.')[:2]
222 223 224 225 226 227 228 229 230 231 232 233 234 235
        fullname = langname + '.' + encoding
    else:
        langname = fullname
        encoding = ''

    # First lookup: fullname (possibly with encoding)
    code = locale_alias.get(fullname, None)
    if code is not None:
        return code

    # Second try: langname (without encoding)
    code = locale_alias.get(langname, None)
    if code is not None:
        if '.' in code:
236
            langname, defenc = code.split('.')
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262
        else:
            langname = code
            defenc = ''
        if encoding:
            encoding = encoding_alias.get(encoding, encoding)
        else:
            encoding = defenc
        if encoding:
            return langname + '.' + encoding
        else:
            return langname

    else:
        return localename

def _parse_localename(localename):

    """ Parses the locale code for localename and returns the
        result as tuple (language code, encoding).

        The localename is normalized and passed through the locale
        alias engine. A ValueError is raised in case the locale name
        cannot be parsed.

        The language code corresponds to RFC 1766.  code and encoding
        can be None in case the values cannot be determined or are
Jeremy Hylton's avatar
Jeremy Hylton committed
263
        unknown to this implementation.
264 265 266 267

    """
    code = normalize(localename)
    if '.' in code:
268
        return code.split('.')[:2]
269 270
    elif code == 'C':
        return None, None
271
    raise ValueError, 'unknown locale: %s' % localename
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287

def _build_localename(localetuple):

    """ Builds a locale code from the given tuple (language code,
        encoding).

        No aliasing or normalizing takes place.

    """
    language, encoding = localetuple
    if language is None:
        language = 'C'
    if encoding is None:
        return language
    else:
        return language + '.' + encoding
288 289

def getdefaultlocale(envvars=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')):
290 291 292 293 294

    """ Tries to determine the default locale settings and returns
        them as tuple (language code, encoding).

        According to POSIX, a program which has not called
295 296
        setlocale(LC_ALL, "") runs using the portable 'C' locale.
        Calling setlocale(LC_ALL, "") lets it use the default locale as
297
        defined by the LANG variable. Since we don't want to interfere
298
        with the current locale setting we thus emulate the behavior
299 300 301 302 303 304 305 306 307 308 309 310 311
        in the way described above.

        To maintain compatibility with other platforms, not only the
        LANG variable is tested, but a list of variables given as
        envvars parameter. The first found to be defined will be
        used. envvars defaults to the search path used in GNU gettext;
        it must always contain the variable name 'LANG'.

        Except for the code 'C', the language code corresponds to RFC
        1766.  code and encoding can be None in case the values cannot
        be determined.

    """
312

313 314 315 316
    try:
        # check if it's supported by the _locale module
        import _locale
        code, encoding = _locale._getdefaultlocale()
317 318 319
    except (ImportError, AttributeError):
        pass
    else:
320
        # make sure the code/encoding values are valid
321 322 323
        if sys.platform == "win32" and code and code[:2] == "0x":
            # map windows language identifier to language name
            code = windows_locale.get(int(code, 0))
324 325
        # ...add other platform-specific processing here, if
        # necessary...
326
        return code, encoding
327

328
    # fall back on POSIX behaviour
329 330 331 332 333 334 335 336 337 338
    import os
    lookup = os.environ.get
    for variable in envvars:
        localename = lookup(variable,None)
        if localename is not None:
            break
    else:
        localename = 'C'
    return _parse_localename(localename)

339 340

def getlocale(category=LC_CTYPE):
341 342 343 344 345 346 347 348 349 350 351 352

    """ Returns the current setting for the given locale category as
        tuple (language code, encoding).

        category may be one of the LC_* value except LC_ALL. It
        defaults to LC_CTYPE.

        Except for the code 'C', the language code corresponds to RFC
        1766.  code and encoding can be None in case the values cannot
        be determined.

    """
353
    localename = _setlocale(category)
354
    if category == LC_ALL and ';' in localename:
355
        raise TypeError, 'category LC_ALL is not supported'
356 357
    return _parse_localename(localename)

358
def setlocale(category, locale=None):
359

360 361
    """ Set the locale for the given category.  The locale can be
        a string, a locale tuple (language code, encoding), or None.
362

363 364
        Locale tuples are converted to strings the locale aliasing
        engine.  Locale strings are passed directly to the C lib.
365

366
        category may be given as one of the LC_* values.
367 368

    """
369 370 371 372
    if locale and type(locale) is not type(""):
        # convert to string
        locale = normalize(_build_localename(locale))
    return _setlocale(category, locale)
373

374
def resetlocale(category=LC_ALL):
375 376 377 378

    """ Sets the locale for category to the default setting.

        The default setting is determined by calling
379 380
        getdefaultlocale(). category defaults to LC_ALL.

381
    """
382
    _setlocale(category, _build_localename(getdefaultlocale()))
383 384 385 386 387 388 389

### Database
#
# The following data was extracted from the locale.alias file which
# comes with X11 and then hand edited removing the explicit encoding
# definitions and adding some more aliases. The file is usually
# available as /usr/lib/X11/locale/locale.alias.
390
#
391 392 393 394 395 396

#
# The encoding_alias table maps lowercase encoding alias names to C
# locale encoding names (case-sensitive).
#
encoding_alias = {
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
        '437':                          'C',
        'c':                            'C',
        'iso8859':                      'ISO8859-1',
        '8859':                         'ISO8859-1',
        '88591':                        'ISO8859-1',
        'ascii':                        'ISO8859-1',
        'en':                           'ISO8859-1',
        'iso88591':                     'ISO8859-1',
        'iso_8859-1':                   'ISO8859-1',
        '885915':                       'ISO8859-15',
        'iso885915':                    'ISO8859-15',
        'iso_8859-15':                  'ISO8859-15',
        'iso8859-2':                    'ISO8859-2',
        'iso88592':                     'ISO8859-2',
        'iso_8859-2':                   'ISO8859-2',
        'iso88595':                     'ISO8859-5',
        'iso88596':                     'ISO8859-6',
        'iso88597':                     'ISO8859-7',
        'iso88598':                     'ISO8859-8',
        'iso88599':                     'ISO8859-9',
        'iso-2022-jp':                  'JIS7',
        'jis':                          'JIS7',
        'jis7':                         'JIS7',
        'sjis':                         'SJIS',
        'tis620':                       'TACTIS',
        'ajec':                         'eucJP',
        'eucjp':                        'eucJP',
        'ujis':                         'eucJP',
        'utf-8':                        'utf',
        'utf8':                         'utf',
        'utf8@ucs4':                    'utf',
428 429
}

430
#
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
# The locale_alias table maps lowercase alias names to C locale names
# (case-sensitive). Encodings are always separated from the locale
# name using a dot ('.'); they should only be given in case the
# language name is needed to interpret the given encoding alias
# correctly (CJK codes often have this need).
#
locale_alias = {
        'american':                      'en_US.ISO8859-1',
        'ar':                            'ar_AA.ISO8859-6',
        'ar_aa':                         'ar_AA.ISO8859-6',
        'ar_sa':                         'ar_SA.ISO8859-6',
        'arabic':                        'ar_AA.ISO8859-6',
        'bg':                            'bg_BG.ISO8859-5',
        'bg_bg':                         'bg_BG.ISO8859-5',
        'bulgarian':                     'bg_BG.ISO8859-5',
        'c-french':                      'fr_CA.ISO8859-1',
        'c':                             'C',
        'c_c':                           'C',
        'cextend':                       'en_US.ISO8859-1',
        'chinese-s':                     'zh_CN.eucCN',
        'chinese-t':                     'zh_TW.eucTW',
        'croatian':                      'hr_HR.ISO8859-2',
        'cs':                            'cs_CZ.ISO8859-2',
        'cs_cs':                         'cs_CZ.ISO8859-2',
        'cs_cz':                         'cs_CZ.ISO8859-2',
        'cz':                            'cz_CZ.ISO8859-2',
        'cz_cz':                         'cz_CZ.ISO8859-2',
        'czech':                         'cs_CS.ISO8859-2',
        'da':                            'da_DK.ISO8859-1',
        'da_dk':                         'da_DK.ISO8859-1',
        'danish':                        'da_DK.ISO8859-1',
        'de':                            'de_DE.ISO8859-1',
        'de_at':                         'de_AT.ISO8859-1',
        'de_ch':                         'de_CH.ISO8859-1',
        'de_de':                         'de_DE.ISO8859-1',
        'dutch':                         'nl_BE.ISO8859-1',
        'ee':                            'ee_EE.ISO8859-4',
        'el':                            'el_GR.ISO8859-7',
        'el_gr':                         'el_GR.ISO8859-7',
        'en':                            'en_US.ISO8859-1',
        'en_au':                         'en_AU.ISO8859-1',
        'en_ca':                         'en_CA.ISO8859-1',
        'en_gb':                         'en_GB.ISO8859-1',
        'en_ie':                         'en_IE.ISO8859-1',
        'en_nz':                         'en_NZ.ISO8859-1',
        'en_uk':                         'en_GB.ISO8859-1',
        'en_us':                         'en_US.ISO8859-1',
        'eng_gb':                        'en_GB.ISO8859-1',
        'english':                       'en_EN.ISO8859-1',
        'english_uk':                    'en_GB.ISO8859-1',
        'english_united-states':         'en_US.ISO8859-1',
        'english_us':                    'en_US.ISO8859-1',
        'es':                            'es_ES.ISO8859-1',
        'es_ar':                         'es_AR.ISO8859-1',
        'es_bo':                         'es_BO.ISO8859-1',
        'es_cl':                         'es_CL.ISO8859-1',
        'es_co':                         'es_CO.ISO8859-1',
        'es_cr':                         'es_CR.ISO8859-1',
        'es_ec':                         'es_EC.ISO8859-1',
        'es_es':                         'es_ES.ISO8859-1',
        'es_gt':                         'es_GT.ISO8859-1',
        'es_mx':                         'es_MX.ISO8859-1',
        'es_ni':                         'es_NI.ISO8859-1',
        'es_pa':                         'es_PA.ISO8859-1',
        'es_pe':                         'es_PE.ISO8859-1',
        'es_py':                         'es_PY.ISO8859-1',
        'es_sv':                         'es_SV.ISO8859-1',
        'es_uy':                         'es_UY.ISO8859-1',
        'es_ve':                         'es_VE.ISO8859-1',
        'et':                            'et_EE.ISO8859-4',
        'et_ee':                         'et_EE.ISO8859-4',
        'fi':                            'fi_FI.ISO8859-1',
        'fi_fi':                         'fi_FI.ISO8859-1',
        'finnish':                       'fi_FI.ISO8859-1',
        'fr':                            'fr_FR.ISO8859-1',
        'fr_be':                         'fr_BE.ISO8859-1',
        'fr_ca':                         'fr_CA.ISO8859-1',
        'fr_ch':                         'fr_CH.ISO8859-1',
        'fr_fr':                         'fr_FR.ISO8859-1',
        'fre_fr':                        'fr_FR.ISO8859-1',
        'french':                        'fr_FR.ISO8859-1',
        'french_france':                 'fr_FR.ISO8859-1',
        'ger_de':                        'de_DE.ISO8859-1',
        'german':                        'de_DE.ISO8859-1',
        'german_germany':                'de_DE.ISO8859-1',
        'greek':                         'el_GR.ISO8859-7',
        'hebrew':                        'iw_IL.ISO8859-8',
        'hr':                            'hr_HR.ISO8859-2',
        'hr_hr':                         'hr_HR.ISO8859-2',
        'hu':                            'hu_HU.ISO8859-2',
        'hu_hu':                         'hu_HU.ISO8859-2',
        'hungarian':                     'hu_HU.ISO8859-2',
        'icelandic':                     'is_IS.ISO8859-1',
        'id':                            'id_ID.ISO8859-1',
        'id_id':                         'id_ID.ISO8859-1',
        'is':                            'is_IS.ISO8859-1',
        'is_is':                         'is_IS.ISO8859-1',
        'iso-8859-1':                    'en_US.ISO8859-1',
        'iso-8859-15':                   'en_US.ISO8859-15',
        'iso8859-1':                     'en_US.ISO8859-1',
        'iso8859-15':                    'en_US.ISO8859-15',
        'iso_8859_1':                    'en_US.ISO8859-1',
        'iso_8859_15':                   'en_US.ISO8859-15',
        'it':                            'it_IT.ISO8859-1',
        'it_ch':                         'it_CH.ISO8859-1',
        'it_it':                         'it_IT.ISO8859-1',
        'italian':                       'it_IT.ISO8859-1',
        'iw':                            'iw_IL.ISO8859-8',
        'iw_il':                         'iw_IL.ISO8859-8',
        'ja':                            'ja_JP.eucJP',
        'ja.jis':                        'ja_JP.JIS7',
        'ja.sjis':                       'ja_JP.SJIS',
        'ja_jp':                         'ja_JP.eucJP',
        'ja_jp.ajec':                    'ja_JP.eucJP',
        'ja_jp.euc':                     'ja_JP.eucJP',
        'ja_jp.eucjp':                   'ja_JP.eucJP',
        'ja_jp.iso-2022-jp':             'ja_JP.JIS7',
        'ja_jp.jis':                     'ja_JP.JIS7',
        'ja_jp.jis7':                    'ja_JP.JIS7',
        'ja_jp.mscode':                  'ja_JP.SJIS',
        'ja_jp.sjis':                    'ja_JP.SJIS',
        'ja_jp.ujis':                    'ja_JP.eucJP',
        'japan':                         'ja_JP.eucJP',
        'japanese':                      'ja_JP.SJIS',
        'japanese-euc':                  'ja_JP.eucJP',
        'japanese.euc':                  'ja_JP.eucJP',
        'jp_jp':                         'ja_JP.eucJP',
        'ko':                            'ko_KR.eucKR',
        'ko_kr':                         'ko_KR.eucKR',
        'ko_kr.euc':                     'ko_KR.eucKR',
        'korean':                        'ko_KR.eucKR',
        'lt':                            'lt_LT.ISO8859-4',
        'lv':                            'lv_LV.ISO8859-4',
        'mk':                            'mk_MK.ISO8859-5',
        'mk_mk':                         'mk_MK.ISO8859-5',
        'nl':                            'nl_NL.ISO8859-1',
        'nl_be':                         'nl_BE.ISO8859-1',
        'nl_nl':                         'nl_NL.ISO8859-1',
        'no':                            'no_NO.ISO8859-1',
        'no_no':                         'no_NO.ISO8859-1',
        'norwegian':                     'no_NO.ISO8859-1',
        'pl':                            'pl_PL.ISO8859-2',
        'pl_pl':                         'pl_PL.ISO8859-2',
        'polish':                        'pl_PL.ISO8859-2',
        'portuguese':                    'pt_PT.ISO8859-1',
        'portuguese_brazil':             'pt_BR.ISO8859-1',
        'posix':                         'C',
        'posix-utf2':                    'C',
        'pt':                            'pt_PT.ISO8859-1',
        'pt_br':                         'pt_BR.ISO8859-1',
        'pt_pt':                         'pt_PT.ISO8859-1',
        'ro':                            'ro_RO.ISO8859-2',
        'ro_ro':                         'ro_RO.ISO8859-2',
        'ru':                            'ru_RU.ISO8859-5',
        'ru_ru':                         'ru_RU.ISO8859-5',
        'rumanian':                      'ro_RO.ISO8859-2',
        'russian':                       'ru_RU.ISO8859-5',
        'serbocroatian':                 'sh_YU.ISO8859-2',
        'sh':                            'sh_YU.ISO8859-2',
        'sh_hr':                         'sh_HR.ISO8859-2',
        'sh_sp':                         'sh_YU.ISO8859-2',
        'sh_yu':                         'sh_YU.ISO8859-2',
        'sk':                            'sk_SK.ISO8859-2',
        'sk_sk':                         'sk_SK.ISO8859-2',
        'sl':                            'sl_CS.ISO8859-2',
        'sl_cs':                         'sl_CS.ISO8859-2',
        'sl_si':                         'sl_SI.ISO8859-2',
        'slovak':                        'sk_SK.ISO8859-2',
        'slovene':                       'sl_CS.ISO8859-2',
        'sp':                            'sp_YU.ISO8859-5',
        'sp_yu':                         'sp_YU.ISO8859-5',
        'spanish':                       'es_ES.ISO8859-1',
        'spanish_spain':                 'es_ES.ISO8859-1',
        'sr_sp':                         'sr_SP.ISO8859-2',
        'sv':                            'sv_SE.ISO8859-1',
        'sv_se':                         'sv_SE.ISO8859-1',
        'swedish':                       'sv_SE.ISO8859-1',
        'th_th':                         'th_TH.TACTIS',
        'tr':                            'tr_TR.ISO8859-9',
        'tr_tr':                         'tr_TR.ISO8859-9',
        'turkish':                       'tr_TR.ISO8859-9',
        'univ':                          'en_US.utf',
        'universal':                     'en_US.utf',
        'zh':                            'zh_CN.eucCN',
        'zh_cn':                         'zh_CN.eucCN',
        'zh_cn.big5':                    'zh_TW.eucTW',
        'zh_cn.euc':                     'zh_CN.eucCN',
        'zh_tw':                         'zh_TW.eucTW',
        'zh_tw.euc':                     'zh_TW.eucTW',
}

622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661
#
# this maps windows language identifiers (as used on Windows 95 and
# earlier) to locale strings.
#
# NOTE: this mapping is incomplete.  If your language is missing, send
# a note with the missing language identifier and the suggested locale
# code to Fredrik Lundh <effbot@telia.com>.  Thanks /F

windows_locale = {
    0x0404: "zh_TW", # Chinese (Taiwan)
    0x0804: "zh_CN", # Chinese (PRC)
    0x0406: "da_DK", # Danish
    0x0413: "nl_NL", # Dutch (Netherlands)
    0x0409: "en_US", # English (United States)
    0x0809: "en_UK", # English (United Kingdom)
    0x0c09: "en_AU", # English (Australian)
    0x1009: "en_CA", # English (Canadian)
    0x1409: "en_NZ", # English (New Zealand)
    0x1809: "en_IE", # English (Ireland)
    0x1c09: "en_ZA", # English (South Africa)
    0x040b: "fi_FI", # Finnish
    0x040c: "fr_FR", # French (Standard)
    0x080c: "fr_BE", # French (Belgian)
    0x0c0c: "fr_CA", # French (Canadian)
    0x100c: "fr_CH", # French (Switzerland)
    0x0407: "de_DE", # German (Standard)
    0x0408: "el_GR", # Greek
    0x040d: "iw_IL", # Hebrew
    0x040f: "is_IS", # Icelandic
    0x0410: "it_IT", # Italian (Standard)
    0x0411: "ja_JA", # Japanese
    0x0414: "no_NO", # Norwegian (Bokmal)
    0x0816: "pt_PT", # Portuguese (Standard)
    0x0c0a: "es_ES", # Spanish (Modern Sort)
    0x0441: "sw_KE", # Swahili (Kenya)
    0x041d: "sv_SE", # Swedish
    0x081d: "sv_FI", # Swedish (Finland)
    0x041f: "tr_TR", # Turkish
}

662 663 664 665 666 667 668 669 670 671 672 673
def _print_locale():

    """ Test function.
    """
    categories = {}
    def _init_categories(categories=categories):
        for k,v in globals().items():
            if k[:3] == 'LC_':
                categories[k] = v
    _init_categories()
    del categories['LC_ALL']

674
    print 'Locale defaults as determined by getdefaultlocale():'
675
    print '-'*72
676
    lang, enc = getdefaultlocale()
677 678 679 680 681 682 683
    print 'Language: ', lang or '(undefined)'
    print 'Encoding: ', enc or '(undefined)'
    print

    print 'Locale settings on startup:'
    print '-'*72
    for name,category in categories.items():
684 685
        print name, '...'
        lang, enc = getlocale(category)
686 687 688 689 690
        print '   Language: ', lang or '(undefined)'
        print '   Encoding: ', enc or '(undefined)'
        print

    print
691
    print 'Locale settings after calling resetlocale():'
692
    print '-'*72
693
    resetlocale()
694
    for name,category in categories.items():
695 696
        print name, '...'
        lang, enc = getlocale(category)
697 698 699
        print '   Language: ', lang or '(undefined)'
        print '   Encoding: ', enc or '(undefined)'
        print
700

701
    try:
702
        setlocale(LC_ALL, "")
703 704
    except:
        print 'NOTE:'
705
        print 'setlocale(LC_ALL, "") does not support the default locale'
706 707 708
        print 'given in the OS environment variables.'
    else:
        print
709
        print 'Locale settings after calling setlocale(LC_ALL, ""):'
710 711
        print '-'*72
        for name,category in categories.items():
712 713
            print name, '...'
            lang, enc = getlocale(category)
714 715 716
            print '   Language: ', lang or '(undefined)'
            print '   Encoding: ', enc or '(undefined)'
            print
717

718
###
719

720 721 722 723 724 725 726
try:
    LC_MESSAGES
except:
    pass
else:
    __all__.append("LC_MESSAGES")

727
if __name__=='__main__':
728 729 730 731 732 733 734
    print 'Locale aliasing:'
    print
    _print_locale()
    print
    print 'Number formatting:'
    print
    _test()