locale.py 26.8 KB
Newer Older
1
""" Locale support.
2

3 4 5 6 7 8 9 10 11 12
    The module provides low-level access to the C lib's locale APIs
    and adds high level number formatting APIs as well as a locale
    aliasing engine to complement these.

    The aliasing engine includes support for many commonly used locale
    names and maps them to values suitable for passing to the C lib's
    setlocale() function. It also includes default encodings for all
    supported locale names.

"""
13

14
import sys
15

16 17 18
# Try importing the _locale module.
#
# If this fails, fall back on a basic 'C' locale emulation.
19

20 21
# Yuck:  LC_MESSAGES is non-standard:  can't tell whether it exists before
# trying the import.  So __all__ is also fiddled at the end of the file.
22 23
__all__ = ["setlocale","Error","localeconv","strcoll","strxfrm",
           "format","str","atof","atoi","LC_CTYPE","LC_COLLATE",
24
           "LC_TIME","LC_MONETARY","LC_NUMERIC", "LC_ALL","CHAR_MAX"]
25

26
try:
27

28 29 30 31
    from _locale import *

except ImportError:

32 33
    # Locale emulation

34 35 36 37 38 39 40 41 42 43 44
    CHAR_MAX = 127
    LC_ALL = 6
    LC_COLLATE = 3
    LC_CTYPE = 0
    LC_MESSAGES = 5
    LC_MONETARY = 4
    LC_NUMERIC = 1
    LC_TIME = 2
    Error = ValueError

    def localeconv():
45
        """ localeconv() -> dict.
46 47 48 49 50 51
            Returns numeric and monetary locale-specific parameters.
        """
        # 'C' locale default values
        return {'grouping': [127],
                'currency_symbol': '',
                'n_sign_posn': 127,
52 53 54
                'p_cs_precedes': 127,
                'n_cs_precedes': 127,
                'mon_grouping': [],
55 56 57 58
                'n_sep_by_space': 127,
                'decimal_point': '.',
                'negative_sign': '',
                'positive_sign': '',
59
                'p_sep_by_space': 127,
60
                'int_curr_symbol': '',
61
                'p_sign_posn': 127,
62
                'thousands_sep': '',
63 64
                'mon_thousands_sep': '',
                'frac_digits': 127,
65 66
                'mon_decimal_point': '',
                'int_frac_digits': 127}
67

68
    def setlocale(category, value=None):
69
        """ setlocale(integer,string=None) -> string.
70 71 72 73
            Activates/queries locale processing.
        """
        if value is not None and \
           value is not 'C':
74
            raise Error, '_locale emulation only supports "C" locale'
75 76 77
        return 'C'

    def strcoll(a,b):
78
        """ strcoll(string,string) -> int.
79 80 81 82 83
            Compares two strings according to the locale.
        """
        return cmp(a,b)

    def strxfrm(s):
84
        """ strxfrm(string) -> string.
85 86 87
            Returns a string that behaves for cmp locale-aware.
        """
        return s
88 89 90 91 92

### Number formatting APIs

# Author: Martin von Loewis

93 94 95 96 97 98 99
#perform the grouping from right to left
def _group(s):
    conv=localeconv()
    grouping=conv['grouping']
    if not grouping:return s
    result=""
    while s and grouping:
100
        # if grouping is -1, we are done
101 102 103 104 105 106 107 108 109 110 111 112
        if grouping[0]==CHAR_MAX:
            break
        # 0: re-use last group ad infinitum
        elif grouping[0]!=0:
            #process last group
            group=grouping[0]
            grouping=grouping[1:]
        if result:
            result=s[-group:]+conv['thousands_sep']+result
        else:
            result=s[-group:]
        s=s[:-group]
113 114 115
    if not result:
        return s
    if s:
116
        result=s+conv['thousands_sep']+result
117 118 119 120
    return result

def format(f,val,grouping=0):
    """Formats a value in the same way that the % formatting would use,
121
    but takes the current locale into account.
122
    Grouping is applied if the third parameter is true."""
123 124
    result = f % abs(val)
    fields = result.split(".")
125
    if grouping:
126
        fields[0]=_group(fields[0])
127
    if len(fields)==2:
128
        res = fields[0]+localeconv()['decimal_point']+fields[1]
129
    elif len(fields)==1:
130
        res = fields[0]
131
    else:
132 133
        raise Error, "Too many decimal points in result string"

134 135 136 137 138
    if val < 0:
        return '-'+res
    else:
        return res

139 140 141 142
def str(val):
    """Convert float to integer, taking the locale into account."""
    return format("%.12g",val)

143
def atof(str,func=float):
144 145
    "Parses a string as a float according to the locale settings."
    #First, get rid of the grouping
146 147
    ts = localeconv()['thousands_sep']
    if ts:
148 149
        s=str.split(ts)
        str="".join(s)
150
    #next, replace the decimal point with a dot
151 152
    dd = localeconv()['decimal_point']
    if dd:
153 154
        s=str.split(dd)
        str='.'.join(s)
155 156 157 158 159
    #finally, parse the string
    return func(str)

def atoi(str):
    "Converts a string to an integer according to the locale settings."
160
    return atof(str, int)
161

162
def _test():
163
    setlocale(LC_ALL, "")
164
    #do grouping
165 166
    s1=format("%d", 123456789,1)
    print s1, "is", atoi(s1)
167 168
    #standard formatting
    s1=str(3.14)
169
    print s1, "is", atof(s1)
170 171 172 173

### Locale name aliasing engine

# Author: Marc-Andre Lemburg, mal@lemburg.com
174 175 176 177 178
# Various tweaks by Fredrik Lundh <effbot@telia.com>

# store away the low-level version of setlocale (it's
# overridden below)
_setlocale = setlocale
179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196

def normalize(localename):

    """ Returns a normalized locale code for the given locale
        name.

        The returned locale code is formatted for use with
        setlocale().

        If normalization fails, the original name is returned
        unchanged.

        If the given encoding is not known, the function defaults to
        the default encoding for the locale code just like setlocale()
        does.

    """
    # Normalize the locale name and extract the encoding
197
    fullname = localename.lower()
198 199
    if ':' in fullname:
        # ':' is sometimes used as encoding delimiter.
200
        fullname = fullname.replace(':', '.')
201
    if '.' in fullname:
202
        langname, encoding = fullname.split('.')[:2]
203 204 205 206 207 208 209 210 211 212 213 214 215 216
        fullname = langname + '.' + encoding
    else:
        langname = fullname
        encoding = ''

    # First lookup: fullname (possibly with encoding)
    code = locale_alias.get(fullname, None)
    if code is not None:
        return code

    # Second try: langname (without encoding)
    code = locale_alias.get(langname, None)
    if code is not None:
        if '.' in code:
217
            langname, defenc = code.split('.')
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
        else:
            langname = code
            defenc = ''
        if encoding:
            encoding = encoding_alias.get(encoding, encoding)
        else:
            encoding = defenc
        if encoding:
            return langname + '.' + encoding
        else:
            return langname

    else:
        return localename

def _parse_localename(localename):

    """ Parses the locale code for localename and returns the
        result as tuple (language code, encoding).

        The localename is normalized and passed through the locale
        alias engine. A ValueError is raised in case the locale name
        cannot be parsed.

        The language code corresponds to RFC 1766.  code and encoding
        can be None in case the values cannot be determined or are
Jeremy Hylton's avatar
Jeremy Hylton committed
244
        unknown to this implementation.
245 246 247 248

    """
    code = normalize(localename)
    if '.' in code:
249
        return code.split('.')[:2]
250 251 252
    elif code == 'C':
        return None, None
    else:
253
        raise ValueError, 'unknown locale: %s' % localename
254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
    return l

def _build_localename(localetuple):

    """ Builds a locale code from the given tuple (language code,
        encoding).

        No aliasing or normalizing takes place.

    """
    language, encoding = localetuple
    if language is None:
        language = 'C'
    if encoding is None:
        return language
    else:
        return language + '.' + encoding
271 272

def getdefaultlocale(envvars=('LANGUAGE', 'LC_ALL', 'LC_CTYPE', 'LANG')):
273 274 275 276 277

    """ Tries to determine the default locale settings and returns
        them as tuple (language code, encoding).

        According to POSIX, a program which has not called
278 279
        setlocale(LC_ALL, "") runs using the portable 'C' locale.
        Calling setlocale(LC_ALL, "") lets it use the default locale as
280
        defined by the LANG variable. Since we don't want to interfere
281
        with the current locale setting we thus emulate the behavior
282 283 284 285 286 287 288 289 290 291 292 293 294
        in the way described above.

        To maintain compatibility with other platforms, not only the
        LANG variable is tested, but a list of variables given as
        envvars parameter. The first found to be defined will be
        used. envvars defaults to the search path used in GNU gettext;
        it must always contain the variable name 'LANG'.

        Except for the code 'C', the language code corresponds to RFC
        1766.  code and encoding can be None in case the values cannot
        be determined.

    """
295

296 297 298 299
    try:
        # check if it's supported by the _locale module
        import _locale
        code, encoding = _locale._getdefaultlocale()
300 301 302
    except (ImportError, AttributeError):
        pass
    else:
303
        # make sure the code/encoding values are valid
304 305 306
        if sys.platform == "win32" and code and code[:2] == "0x":
            # map windows language identifier to language name
            code = windows_locale.get(int(code, 0))
307 308
        # ...add other platform-specific processing here, if
        # necessary...
309
        return code, encoding
310

311
    # fall back on POSIX behaviour
312 313 314 315 316 317 318 319 320 321
    import os
    lookup = os.environ.get
    for variable in envvars:
        localename = lookup(variable,None)
        if localename is not None:
            break
    else:
        localename = 'C'
    return _parse_localename(localename)

322 323

def getlocale(category=LC_CTYPE):
324 325 326 327 328 329 330 331 332 333 334 335

    """ Returns the current setting for the given locale category as
        tuple (language code, encoding).

        category may be one of the LC_* value except LC_ALL. It
        defaults to LC_CTYPE.

        Except for the code 'C', the language code corresponds to RFC
        1766.  code and encoding can be None in case the values cannot
        be determined.

    """
336
    localename = _setlocale(category)
337
    if category == LC_ALL and ';' in localename:
338
        raise TypeError, 'category LC_ALL is not supported'
339 340
    return _parse_localename(localename)

341
def setlocale(category, locale=None):
342

343 344
    """ Set the locale for the given category.  The locale can be
        a string, a locale tuple (language code, encoding), or None.
345

346 347
        Locale tuples are converted to strings the locale aliasing
        engine.  Locale strings are passed directly to the C lib.
348

349
        category may be given as one of the LC_* values.
350 351

    """
352 353 354 355
    if locale and type(locale) is not type(""):
        # convert to string
        locale = normalize(_build_localename(locale))
    return _setlocale(category, locale)
356

357
def resetlocale(category=LC_ALL):
358 359 360 361

    """ Sets the locale for category to the default setting.

        The default setting is determined by calling
362 363
        getdefaultlocale(). category defaults to LC_ALL.

364
    """
365
    _setlocale(category, _build_localename(getdefaultlocale()))
366 367 368 369 370 371 372

### Database
#
# The following data was extracted from the locale.alias file which
# comes with X11 and then hand edited removing the explicit encoding
# definitions and adding some more aliases. The file is usually
# available as /usr/lib/X11/locale/locale.alias.
373
#
374 375 376 377 378 379

#
# The encoding_alias table maps lowercase encoding alias names to C
# locale encoding names (case-sensitive).
#
encoding_alias = {
380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410
        '437':                          'C',
        'c':                            'C',
        'iso8859':                      'ISO8859-1',
        '8859':                         'ISO8859-1',
        '88591':                        'ISO8859-1',
        'ascii':                        'ISO8859-1',
        'en':                           'ISO8859-1',
        'iso88591':                     'ISO8859-1',
        'iso_8859-1':                   'ISO8859-1',
        '885915':                       'ISO8859-15',
        'iso885915':                    'ISO8859-15',
        'iso_8859-15':                  'ISO8859-15',
        'iso8859-2':                    'ISO8859-2',
        'iso88592':                     'ISO8859-2',
        'iso_8859-2':                   'ISO8859-2',
        'iso88595':                     'ISO8859-5',
        'iso88596':                     'ISO8859-6',
        'iso88597':                     'ISO8859-7',
        'iso88598':                     'ISO8859-8',
        'iso88599':                     'ISO8859-9',
        'iso-2022-jp':                  'JIS7',
        'jis':                          'JIS7',
        'jis7':                         'JIS7',
        'sjis':                         'SJIS',
        'tis620':                       'TACTIS',
        'ajec':                         'eucJP',
        'eucjp':                        'eucJP',
        'ujis':                         'eucJP',
        'utf-8':                        'utf',
        'utf8':                         'utf',
        'utf8@ucs4':                    'utf',
411 412
}

413
#
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604
# The locale_alias table maps lowercase alias names to C locale names
# (case-sensitive). Encodings are always separated from the locale
# name using a dot ('.'); they should only be given in case the
# language name is needed to interpret the given encoding alias
# correctly (CJK codes often have this need).
#
locale_alias = {
        'american':                      'en_US.ISO8859-1',
        'ar':                            'ar_AA.ISO8859-6',
        'ar_aa':                         'ar_AA.ISO8859-6',
        'ar_sa':                         'ar_SA.ISO8859-6',
        'arabic':                        'ar_AA.ISO8859-6',
        'bg':                            'bg_BG.ISO8859-5',
        'bg_bg':                         'bg_BG.ISO8859-5',
        'bulgarian':                     'bg_BG.ISO8859-5',
        'c-french':                      'fr_CA.ISO8859-1',
        'c':                             'C',
        'c_c':                           'C',
        'cextend':                       'en_US.ISO8859-1',
        'chinese-s':                     'zh_CN.eucCN',
        'chinese-t':                     'zh_TW.eucTW',
        'croatian':                      'hr_HR.ISO8859-2',
        'cs':                            'cs_CZ.ISO8859-2',
        'cs_cs':                         'cs_CZ.ISO8859-2',
        'cs_cz':                         'cs_CZ.ISO8859-2',
        'cz':                            'cz_CZ.ISO8859-2',
        'cz_cz':                         'cz_CZ.ISO8859-2',
        'czech':                         'cs_CS.ISO8859-2',
        'da':                            'da_DK.ISO8859-1',
        'da_dk':                         'da_DK.ISO8859-1',
        'danish':                        'da_DK.ISO8859-1',
        'de':                            'de_DE.ISO8859-1',
        'de_at':                         'de_AT.ISO8859-1',
        'de_ch':                         'de_CH.ISO8859-1',
        'de_de':                         'de_DE.ISO8859-1',
        'dutch':                         'nl_BE.ISO8859-1',
        'ee':                            'ee_EE.ISO8859-4',
        'el':                            'el_GR.ISO8859-7',
        'el_gr':                         'el_GR.ISO8859-7',
        'en':                            'en_US.ISO8859-1',
        'en_au':                         'en_AU.ISO8859-1',
        'en_ca':                         'en_CA.ISO8859-1',
        'en_gb':                         'en_GB.ISO8859-1',
        'en_ie':                         'en_IE.ISO8859-1',
        'en_nz':                         'en_NZ.ISO8859-1',
        'en_uk':                         'en_GB.ISO8859-1',
        'en_us':                         'en_US.ISO8859-1',
        'eng_gb':                        'en_GB.ISO8859-1',
        'english':                       'en_EN.ISO8859-1',
        'english_uk':                    'en_GB.ISO8859-1',
        'english_united-states':         'en_US.ISO8859-1',
        'english_us':                    'en_US.ISO8859-1',
        'es':                            'es_ES.ISO8859-1',
        'es_ar':                         'es_AR.ISO8859-1',
        'es_bo':                         'es_BO.ISO8859-1',
        'es_cl':                         'es_CL.ISO8859-1',
        'es_co':                         'es_CO.ISO8859-1',
        'es_cr':                         'es_CR.ISO8859-1',
        'es_ec':                         'es_EC.ISO8859-1',
        'es_es':                         'es_ES.ISO8859-1',
        'es_gt':                         'es_GT.ISO8859-1',
        'es_mx':                         'es_MX.ISO8859-1',
        'es_ni':                         'es_NI.ISO8859-1',
        'es_pa':                         'es_PA.ISO8859-1',
        'es_pe':                         'es_PE.ISO8859-1',
        'es_py':                         'es_PY.ISO8859-1',
        'es_sv':                         'es_SV.ISO8859-1',
        'es_uy':                         'es_UY.ISO8859-1',
        'es_ve':                         'es_VE.ISO8859-1',
        'et':                            'et_EE.ISO8859-4',
        'et_ee':                         'et_EE.ISO8859-4',
        'fi':                            'fi_FI.ISO8859-1',
        'fi_fi':                         'fi_FI.ISO8859-1',
        'finnish':                       'fi_FI.ISO8859-1',
        'fr':                            'fr_FR.ISO8859-1',
        'fr_be':                         'fr_BE.ISO8859-1',
        'fr_ca':                         'fr_CA.ISO8859-1',
        'fr_ch':                         'fr_CH.ISO8859-1',
        'fr_fr':                         'fr_FR.ISO8859-1',
        'fre_fr':                        'fr_FR.ISO8859-1',
        'french':                        'fr_FR.ISO8859-1',
        'french_france':                 'fr_FR.ISO8859-1',
        'ger_de':                        'de_DE.ISO8859-1',
        'german':                        'de_DE.ISO8859-1',
        'german_germany':                'de_DE.ISO8859-1',
        'greek':                         'el_GR.ISO8859-7',
        'hebrew':                        'iw_IL.ISO8859-8',
        'hr':                            'hr_HR.ISO8859-2',
        'hr_hr':                         'hr_HR.ISO8859-2',
        'hu':                            'hu_HU.ISO8859-2',
        'hu_hu':                         'hu_HU.ISO8859-2',
        'hungarian':                     'hu_HU.ISO8859-2',
        'icelandic':                     'is_IS.ISO8859-1',
        'id':                            'id_ID.ISO8859-1',
        'id_id':                         'id_ID.ISO8859-1',
        'is':                            'is_IS.ISO8859-1',
        'is_is':                         'is_IS.ISO8859-1',
        'iso-8859-1':                    'en_US.ISO8859-1',
        'iso-8859-15':                   'en_US.ISO8859-15',
        'iso8859-1':                     'en_US.ISO8859-1',
        'iso8859-15':                    'en_US.ISO8859-15',
        'iso_8859_1':                    'en_US.ISO8859-1',
        'iso_8859_15':                   'en_US.ISO8859-15',
        'it':                            'it_IT.ISO8859-1',
        'it_ch':                         'it_CH.ISO8859-1',
        'it_it':                         'it_IT.ISO8859-1',
        'italian':                       'it_IT.ISO8859-1',
        'iw':                            'iw_IL.ISO8859-8',
        'iw_il':                         'iw_IL.ISO8859-8',
        'ja':                            'ja_JP.eucJP',
        'ja.jis':                        'ja_JP.JIS7',
        'ja.sjis':                       'ja_JP.SJIS',
        'ja_jp':                         'ja_JP.eucJP',
        'ja_jp.ajec':                    'ja_JP.eucJP',
        'ja_jp.euc':                     'ja_JP.eucJP',
        'ja_jp.eucjp':                   'ja_JP.eucJP',
        'ja_jp.iso-2022-jp':             'ja_JP.JIS7',
        'ja_jp.jis':                     'ja_JP.JIS7',
        'ja_jp.jis7':                    'ja_JP.JIS7',
        'ja_jp.mscode':                  'ja_JP.SJIS',
        'ja_jp.sjis':                    'ja_JP.SJIS',
        'ja_jp.ujis':                    'ja_JP.eucJP',
        'japan':                         'ja_JP.eucJP',
        'japanese':                      'ja_JP.SJIS',
        'japanese-euc':                  'ja_JP.eucJP',
        'japanese.euc':                  'ja_JP.eucJP',
        'jp_jp':                         'ja_JP.eucJP',
        'ko':                            'ko_KR.eucKR',
        'ko_kr':                         'ko_KR.eucKR',
        'ko_kr.euc':                     'ko_KR.eucKR',
        'korean':                        'ko_KR.eucKR',
        'lt':                            'lt_LT.ISO8859-4',
        'lv':                            'lv_LV.ISO8859-4',
        'mk':                            'mk_MK.ISO8859-5',
        'mk_mk':                         'mk_MK.ISO8859-5',
        'nl':                            'nl_NL.ISO8859-1',
        'nl_be':                         'nl_BE.ISO8859-1',
        'nl_nl':                         'nl_NL.ISO8859-1',
        'no':                            'no_NO.ISO8859-1',
        'no_no':                         'no_NO.ISO8859-1',
        'norwegian':                     'no_NO.ISO8859-1',
        'pl':                            'pl_PL.ISO8859-2',
        'pl_pl':                         'pl_PL.ISO8859-2',
        'polish':                        'pl_PL.ISO8859-2',
        'portuguese':                    'pt_PT.ISO8859-1',
        'portuguese_brazil':             'pt_BR.ISO8859-1',
        'posix':                         'C',
        'posix-utf2':                    'C',
        'pt':                            'pt_PT.ISO8859-1',
        'pt_br':                         'pt_BR.ISO8859-1',
        'pt_pt':                         'pt_PT.ISO8859-1',
        'ro':                            'ro_RO.ISO8859-2',
        'ro_ro':                         'ro_RO.ISO8859-2',
        'ru':                            'ru_RU.ISO8859-5',
        'ru_ru':                         'ru_RU.ISO8859-5',
        'rumanian':                      'ro_RO.ISO8859-2',
        'russian':                       'ru_RU.ISO8859-5',
        'serbocroatian':                 'sh_YU.ISO8859-2',
        'sh':                            'sh_YU.ISO8859-2',
        'sh_hr':                         'sh_HR.ISO8859-2',
        'sh_sp':                         'sh_YU.ISO8859-2',
        'sh_yu':                         'sh_YU.ISO8859-2',
        'sk':                            'sk_SK.ISO8859-2',
        'sk_sk':                         'sk_SK.ISO8859-2',
        'sl':                            'sl_CS.ISO8859-2',
        'sl_cs':                         'sl_CS.ISO8859-2',
        'sl_si':                         'sl_SI.ISO8859-2',
        'slovak':                        'sk_SK.ISO8859-2',
        'slovene':                       'sl_CS.ISO8859-2',
        'sp':                            'sp_YU.ISO8859-5',
        'sp_yu':                         'sp_YU.ISO8859-5',
        'spanish':                       'es_ES.ISO8859-1',
        'spanish_spain':                 'es_ES.ISO8859-1',
        'sr_sp':                         'sr_SP.ISO8859-2',
        'sv':                            'sv_SE.ISO8859-1',
        'sv_se':                         'sv_SE.ISO8859-1',
        'swedish':                       'sv_SE.ISO8859-1',
        'th_th':                         'th_TH.TACTIS',
        'tr':                            'tr_TR.ISO8859-9',
        'tr_tr':                         'tr_TR.ISO8859-9',
        'turkish':                       'tr_TR.ISO8859-9',
        'univ':                          'en_US.utf',
        'universal':                     'en_US.utf',
        'zh':                            'zh_CN.eucCN',
        'zh_cn':                         'zh_CN.eucCN',
        'zh_cn.big5':                    'zh_TW.eucTW',
        'zh_cn.euc':                     'zh_CN.eucCN',
        'zh_tw':                         'zh_TW.eucTW',
        'zh_tw.euc':                     'zh_TW.eucTW',
}

605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644
#
# this maps windows language identifiers (as used on Windows 95 and
# earlier) to locale strings.
#
# NOTE: this mapping is incomplete.  If your language is missing, send
# a note with the missing language identifier and the suggested locale
# code to Fredrik Lundh <effbot@telia.com>.  Thanks /F

windows_locale = {
    0x0404: "zh_TW", # Chinese (Taiwan)
    0x0804: "zh_CN", # Chinese (PRC)
    0x0406: "da_DK", # Danish
    0x0413: "nl_NL", # Dutch (Netherlands)
    0x0409: "en_US", # English (United States)
    0x0809: "en_UK", # English (United Kingdom)
    0x0c09: "en_AU", # English (Australian)
    0x1009: "en_CA", # English (Canadian)
    0x1409: "en_NZ", # English (New Zealand)
    0x1809: "en_IE", # English (Ireland)
    0x1c09: "en_ZA", # English (South Africa)
    0x040b: "fi_FI", # Finnish
    0x040c: "fr_FR", # French (Standard)
    0x080c: "fr_BE", # French (Belgian)
    0x0c0c: "fr_CA", # French (Canadian)
    0x100c: "fr_CH", # French (Switzerland)
    0x0407: "de_DE", # German (Standard)
    0x0408: "el_GR", # Greek
    0x040d: "iw_IL", # Hebrew
    0x040f: "is_IS", # Icelandic
    0x0410: "it_IT", # Italian (Standard)
    0x0411: "ja_JA", # Japanese
    0x0414: "no_NO", # Norwegian (Bokmal)
    0x0816: "pt_PT", # Portuguese (Standard)
    0x0c0a: "es_ES", # Spanish (Modern Sort)
    0x0441: "sw_KE", # Swahili (Kenya)
    0x041d: "sv_SE", # Swedish
    0x081d: "sv_FI", # Swedish (Finland)
    0x041f: "tr_TR", # Turkish
}

645 646 647 648 649 650 651 652 653 654 655 656
def _print_locale():

    """ Test function.
    """
    categories = {}
    def _init_categories(categories=categories):
        for k,v in globals().items():
            if k[:3] == 'LC_':
                categories[k] = v
    _init_categories()
    del categories['LC_ALL']

657
    print 'Locale defaults as determined by getdefaultlocale():'
658
    print '-'*72
659
    lang, enc = getdefaultlocale()
660 661 662 663 664 665 666
    print 'Language: ', lang or '(undefined)'
    print 'Encoding: ', enc or '(undefined)'
    print

    print 'Locale settings on startup:'
    print '-'*72
    for name,category in categories.items():
667 668
        print name, '...'
        lang, enc = getlocale(category)
669 670 671 672 673
        print '   Language: ', lang or '(undefined)'
        print '   Encoding: ', enc or '(undefined)'
        print

    print
674
    print 'Locale settings after calling resetlocale():'
675
    print '-'*72
676
    resetlocale()
677
    for name,category in categories.items():
678 679
        print name, '...'
        lang, enc = getlocale(category)
680 681 682
        print '   Language: ', lang or '(undefined)'
        print '   Encoding: ', enc or '(undefined)'
        print
683

684
    try:
685
        setlocale(LC_ALL, "")
686 687
    except:
        print 'NOTE:'
688
        print 'setlocale(LC_ALL, "") does not support the default locale'
689 690 691
        print 'given in the OS environment variables.'
    else:
        print
692
        print 'Locale settings after calling setlocale(LC_ALL, ""):'
693 694
        print '-'*72
        for name,category in categories.items():
695 696
            print name, '...'
            lang, enc = getlocale(category)
697 698 699
            print '   Language: ', lang or '(undefined)'
            print '   Encoding: ', enc or '(undefined)'
            print
700

701
###
702

703 704 705 706 707 708 709
try:
    LC_MESSAGES
except:
    pass
else:
    __all__.append("LC_MESSAGES")

710
if __name__=='__main__':
711 712 713 714 715 716 717
    print 'Locale aliasing:'
    print
    _print_locale()
    print
    print 'Number formatting:'
    print
    _test()