__init__.py 3.71 KB
Newer Older
1 2 3 4 5
""" Standard "encodings" Package

    Standard Python encoding modules are stored in this package
    directory.

6 7 8
    Codec modules must have names corresponding to normalized encoding
    names as defined in the normalize_encoding() function below, e.g.
    'utf-8' must be implemented by the module 'utf_8.py'.
9 10 11 12 13 14 15 16 17 18 19 20

    Each codec module must export the following interface:

    * getregentry() -> (encoder, decoder, stream_reader, stream_writer)
    The getregentry() API must return callable objects which adhere to
    the Python Codec Interface Standard.

    In addition, a module may optionally also define the following
    APIs which are then used by the package's codec search function:

    * getaliases() -> sequence of encoding name strings to use as aliases

21 22
    Alias names returned by getaliases() must be normalized encoding
    names as defined by normalize_encoding().
23 24 25 26 27 28 29

Written by Marc-Andre Lemburg (mal@lemburg.com).

(c) Copyright CNRI, All Rights Reserved. NO WARRANTY.

"""#"

30
import codecs, exceptions, re
31 32

_cache = {}
33
_unknown = '--unknown--'
34
_import_tail = ['*']
35
_norm_encoding_RE = re.compile('[^a-zA-Z0-9.]')
36

37 38 39 40
class CodecRegistryError(exceptions.LookupError,
                         exceptions.SystemError):
    pass

41 42 43 44 45 46 47 48
def normalize_encoding(encoding):

    """ Normalize an encoding name.

        Normalization works as follows: all non-alphanumeric
        characters except the dot used for Python package names are
        collapsed and replaced with a single underscore, e.g. '  -;#'
        becomes '_'.
Tim Peters's avatar
Tim Peters committed
49

50 51 52
    """
    return '_'.join(_norm_encoding_RE.split(encoding))

53
def search_function(encoding):
Tim Peters's avatar
Tim Peters committed
54

55
    # Cache lookup
56
    entry = _cache.get(encoding, _unknown)
57
    if entry is not _unknown:
58 59
        return entry

60 61 62 63 64 65
    # Import the module:
    #
    # First look in the encodings package, then try to lookup the
    # encoding in the aliases mapping and retry the import using the
    # default import module lookup scheme with the alias name.
    #
66
    modname = normalize_encoding(encoding)
67
    try:
68 69
        mod = __import__('encodings.' + modname,
                         globals(), locals(), _import_tail)
70
    except ImportError:
71
        import aliases
72 73 74
        modname = (aliases.aliases.get(modname) or
                   aliases.aliases.get(modname.replace('.', '_')) or
                   modname)
75
        try:
76 77
            mod = __import__(modname, globals(), locals(), _import_tail)
        except ImportError:
78
            mod = None
79 80 81 82 83 84 85

    try:
        getregentry = mod.getregentry
    except AttributeError:
        # Not a codec module
        mod = None

86
    if mod is None:
87
        # Cache misses
88
        _cache[encoding] = None
Tim Peters's avatar
Tim Peters committed
89 90
        return None

91
    # Now ask the module for the registry entry
92
    entry = tuple(getregentry())
93
    if len(entry) != 4:
94 95 96
        raise CodecRegistryError,\
              'module "%s" (%s) failed to register' % \
              (mod.__name__, mod.__file__)
97 98
    for obj in entry:
        if not callable(obj):
99 100 101
            raise CodecRegistryError,\
                  'incompatible codecs in module "%s" (%s)' % \
                  (mod.__name__, mod.__file__)
102

103
    # Cache the codec registry entry
104
    _cache[encoding] = entry
105 106 107

    # Register its aliases (without overwriting previously registered
    # aliases)
108 109 110 111 112
    try:
        codecaliases = mod.getaliases()
    except AttributeError:
        pass
    else:
113
        import aliases
114
        for alias in codecaliases:
115 116 117 118
            if not aliases.aliases.has_key(alias):
                aliases.aliases[alias] = modname

    # Return the registry entry
119 120 121 122
    return entry

# Register the search_function in the Python codec registry
codecs.register(search_function)
123 124 125 126 127 128

# Register iconv_codec lookup function if available
try:
    import iconv_codec
except ImportError:
    pass