msgfmt.py 6.87 KB
Newer Older
1
#! /usr/bin/env python3
2
# Written by Martin v. Löwis <loewis@informatik.hu-berlin.de>
3 4 5 6 7 8 9 10 11 12

"""Generate binary message catalog from textual translation description.

This program converts a textual Uniforum-style message catalog (.po file) into
a binary GNU catalog (.mo file).  This is essentially the same function as the
GNU msgfmt program, however, it is a simpler implementation.

Usage: msgfmt.py [OPTIONS] filename.po

Options:
13 14 15 16 17
    -o file
    --output-file=file
        Specify the output file to write to.  If omitted, output will go to a
        file named filename.mo (based off the input file name).

18 19 20 21 22 23 24 25 26
    -h
    --help
        Print this message and exit.

    -V
    --version
        Display version information and exit.
"""

27
import os
28 29
import sys
import ast
30 31 32
import getopt
import struct
import array
Martin v. Löwis's avatar
Martin v. Löwis committed
33
from email.parser import HeaderParser
34

35
__version__ = "1.1"
36 37 38 39 40 41

MESSAGES = {}



def usage(code, msg=''):
42
    print(__doc__, file=sys.stderr)
43
    if msg:
44
        print(msg, file=sys.stderr)
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
    sys.exit(code)



def add(id, str, fuzzy):
    "Add a non-fuzzy translation to the dictionary."
    global MESSAGES
    if not fuzzy and str:
        MESSAGES[id] = str



def generate():
    "Return the generated output."
    global MESSAGES
    # the keys are sorted in the .mo file
61
    keys = sorted(MESSAGES.keys())
62
    offsets = []
Martin v. Löwis's avatar
Martin v. Löwis committed
63
    ids = strs = b''
64 65 66 67
    for id in keys:
        # For each string, we need size and file offset.  Each string is NUL
        # terminated; the NUL does not count into the size.
        offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id])))
Martin v. Löwis's avatar
Martin v. Löwis committed
68 69
        ids += id + b'\0'
        strs += MESSAGES[id] + b'\0'
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
    output = ''
    # The header is 7 32-bit unsigned integers.  We don't use hash tables, so
    # the keys start right after the index tables.
    # translated string.
    keystart = 7*4+16*len(keys)
    # and the values start after the keys
    valuestart = keystart + len(ids)
    koffsets = []
    voffsets = []
    # The string table first has the list of keys, then the list of values.
    # Each entry has first the size of the string, then the file offset.
    for o1, l1, o2, l2 in offsets:
        koffsets += [l1, o1+keystart]
        voffsets += [l2, o2+valuestart]
    offsets = koffsets + voffsets
85
    output = struct.pack("Iiiiiii",
86
                         0x950412de,       # Magic
87 88 89 90 91 92 93 94 95 96 97 98
                         0,                 # Version
                         len(keys),         # # of entries
                         7*4,               # start of key index
                         7*4+len(keys)*8,   # start of value index
                         0, 0)              # size and offset of hash table
    output += array.array("i", offsets).tostring()
    output += ids
    output += strs
    return output



99
def make(filename, outfile):
100 101 102
    ID = 1
    STR = 2

103
    # Compute .mo name from .po name and arguments
104 105 106 107
    if filename.endswith('.po'):
        infile = filename
    else:
        infile = filename + '.po'
108 109 110
    if outfile is None:
        outfile = os.path.splitext(infile)[0] + '.mo'

111
    try:
Martin v. Löwis's avatar
Martin v. Löwis committed
112
        lines = open(infile, 'rb').readlines()
113
    except IOError as msg:
114
        print(msg, file=sys.stderr)
115
        sys.exit(1)
116

117 118 119
    section = None
    fuzzy = 0

Martin v. Löwis's avatar
Martin v. Löwis committed
120 121 122 123
    # Start off assuming Latin-1, so everything decodes without failure,
    # until we know the exact encoding
    encoding = 'latin-1'

124 125 126
    # Parse the catalog
    lno = 0
    for l in lines:
Martin v. Löwis's avatar
Martin v. Löwis committed
127
        l = l.decode(encoding)
128 129 130 131 132 133 134
        lno += 1
        # If we get a comment line after a msgstr, this is a new entry
        if l[0] == '#' and section == STR:
            add(msgid, msgstr, fuzzy)
            section = None
            fuzzy = 0
        # Record a fuzzy mark
135
        if l[:2] == '#,' and 'fuzzy' in l:
136 137 138 139 140
            fuzzy = 1
        # Skip comments
        if l[0] == '#':
            continue
        # Now we are in a msgid section, output previous section
141
        if l.startswith('msgid') and not l.startswith('msgid_plural'):
142 143
            if section == STR:
                add(msgid, msgstr, fuzzy)
Martin v. Löwis's avatar
Martin v. Löwis committed
144 145 146 147 148 149
                if not msgid:
                    # See whether there is an encoding declaration
                    p = HeaderParser()
                    charset = p.parsestr(msgstr.decode(encoding)).get_content_charset()
                    if charset:
                        encoding = charset
150 151
            section = ID
            l = l[5:]
Martin v. Löwis's avatar
Martin v. Löwis committed
152
            msgid = msgstr = b''
153 154 155 156
            is_plural = False
        # This is a message with plural forms
        elif l.startswith('msgid_plural'):
            if section != ID:
157
                print('msgid_plural not preceded by msgid on %s:%d' % (infile, lno),
158 159 160
                      file=sys.stderr)
                sys.exit(1)
            l = l[12:]
Martin v. Löwis's avatar
Martin v. Löwis committed
161
            msgid += b'\0' # separator of singular and plural
162
            is_plural = True
163 164 165
        # Now we are in a msgstr section
        elif l.startswith('msgstr'):
            section = STR
166 167
            if l.startswith('msgstr['):
                if not is_plural:
168
                    print('plural without msgid_plural on %s:%d' % (infile, lno),
169 170 171 172
                          file=sys.stderr)
                    sys.exit(1)
                l = l.split(']', 1)[1]
                if msgstr:
Martin v. Löwis's avatar
Martin v. Löwis committed
173
                    msgstr += b'\0' # Separator of the various plural forms
174 175
            else:
                if is_plural:
176
                    print('indexed msgstr required for plural on  %s:%d' % (infile, lno),
177 178 179
                          file=sys.stderr)
                    sys.exit(1)
                l = l[6:]
180 181 182 183
        # Skip empty lines
        l = l.strip()
        if not l:
            continue
184
        l = ast.literal_eval(l)
185
        if section == ID:
Martin v. Löwis's avatar
Martin v. Löwis committed
186
            msgid += l.encode(encoding)
187
        elif section == STR:
Martin v. Löwis's avatar
Martin v. Löwis committed
188
            msgstr += l.encode(encoding)
189
        else:
190 191 192
            print('Syntax error on %s:%d' % (infile, lno), \
                  'before:', file=sys.stderr)
            print(l, file=sys.stderr)
193 194 195 196 197 198 199 200 201 202
            sys.exit(1)
    # Add last entry
    if section == STR:
        add(msgid, msgstr, fuzzy)

    # Compute output
    output = generate()

    try:
        open(outfile,"wb").write(output)
203
    except IOError as msg:
204
        print(msg, file=sys.stderr)
205

206 207 208 209


def main():
    try:
210 211
        opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
                                   ['help', 'version', 'output-file='])
212
    except getopt.error as msg:
213 214
        usage(1, msg)

215
    outfile = None
216 217 218 219 220
    # parse options
    for opt, arg in opts:
        if opt in ('-h', '--help'):
            usage(0)
        elif opt in ('-V', '--version'):
221
            print("msgfmt.py", __version__)
222
            sys.exit(0)
223 224
        elif opt in ('-o', '--output-file'):
            outfile = arg
225 226
    # do it
    if not args:
227 228
        print('No input file given', file=sys.stderr)
        print("Try `msgfmt --help' for more information.", file=sys.stderr)
229 230 231
        return

    for filename in args:
232
        make(filename, outfile)
233 234 235 236


if __name__ == '__main__':
    main()