abitype.py 5.44 KB
Newer Older
1
#!/usr/bin/env python3
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
# This script converts a C file to use the PEP 384 type definition API
# Usage: abitype.py < old_code > new_code
import re, sys

###### Replacement of PyTypeObject static instances ##############

# classify each token, giving it a one-letter code:
# S: static
# T: PyTypeObject
# I: ident
# W: whitespace
# =, {, }, ; : themselves
def classify():
    res = []
    for t,v in tokens:
        if t == 'other' and v in "={};":
            res.append(v)
        elif t == 'ident':
            if v == 'PyTypeObject':
                res.append('T')
            elif v == 'static':
                res.append('S')
            else:
                res.append('I')
        elif t == 'ws':
            res.append('W')
        else:
            res.append('.')
    return ''.join(res)

# Obtain a list of fields of a PyTypeObject, in declaration order,
# skipping ob_base
# All comments are dropped from the variable (which are typically
# just the slot names, anyway), and information is discarded whether
# the original type was static.
def get_fields(start, real_end):
    pos = start
    # static?
    if tokens[pos][1] == 'static':
        pos += 2
    # PyTypeObject
    pos += 2
    # name
    name = tokens[pos][1]
    pos += 1
    while tokens[pos][1] != '{':
        pos += 1
    pos += 1
    # PyVarObject_HEAD_INIT
    while tokens[pos][0] in ('ws', 'comment'):
        pos += 1
    if tokens[pos][1] != 'PyVarObject_HEAD_INIT':
54
        raise Exception('%s has no PyVarObject_HEAD_INIT' % name)
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
    while tokens[pos][1] != ')':
        pos += 1
    pos += 1
    # field definitions: various tokens, comma-separated
    fields = []
    while True:
        while tokens[pos][0] in ('ws', 'comment'):
            pos += 1
        end = pos
        while tokens[end][1] not in ',}':
            if tokens[end][1] == '(':
                nesting = 1
                while nesting:
                    end += 1
                    if tokens[end][1] == '(': nesting+=1
                    if tokens[end][1] == ')': nesting-=1
            end += 1
        assert end < real_end
        # join field, excluding separator and trailing ws
        end1 = end-1
        while tokens[end1][0] in ('ws', 'comment'):
            end1 -= 1
        fields.append(''.join(t[1] for t in tokens[pos:end1+1]))
        if tokens[end][1] == '}':
            break
        pos = end+1
    return name, fields

# List of type slots as of Python 3.2, omitting ob_base
typeslots = [
    'tp_name',
    'tp_basicsize',
    'tp_itemsize',
    'tp_dealloc',
    'tp_print',
    'tp_getattr',
    'tp_setattr',
    'tp_reserved',
    'tp_repr',
    'tp_as_number',
    'tp_as_sequence',
    'tp_as_mapping',
    'tp_hash',
    'tp_call',
    'tp_str',
    'tp_getattro',
    'tp_setattro',
    'tp_as_buffer',
    'tp_flags',
    'tp_doc',
    'tp_traverse',
    'tp_clear',
    'tp_richcompare',
    'tp_weaklistoffset',
    'tp_iter',
    'iternextfunc',
    'tp_methods',
    'tp_members',
    'tp_getset',
    'tp_base',
    'tp_dict',
    'tp_descr_get',
    'tp_descr_set',
    'tp_dictoffset',
    'tp_init',
    'tp_alloc',
    'tp_new',
    'tp_free',
    'tp_is_gc',
    'tp_bases',
    'tp_mro',
    'tp_cache',
    'tp_subclasses',
    'tp_weaklist',
129 130
    'tp_del',
    'tp_version_tag',
131 132 133 134 135 136 137
]

# Generate a PyType_Spec definition
def make_slots(name, fields):
    res = []
    res.append('static PyType_Slot %s_slots[] = {' % name)
    # defaults for spec
138
    spec = { 'tp_itemsize':'0' }
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
    for i, val in enumerate(fields):
        if val.endswith('0'):
            continue
        if typeslots[i] in ('tp_name', 'tp_doc', 'tp_basicsize',
                         'tp_itemsize', 'tp_flags'):
            spec[typeslots[i]] = val
            continue
        res.append('    {Py_%s, %s},' % (typeslots[i], val))
    res.append('};')
    res.append('static PyType_Spec %s_spec = {' % name)
    res.append('    %s,' % spec['tp_name'])
    res.append('    %s,' % spec['tp_basicsize'])
    res.append('    %s,' % spec['tp_itemsize'])
    res.append('    %s,' % spec['tp_flags'])
    res.append('    %s_slots,' % name)
    res.append('};\n')
    return '\n'.join(res)


158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
if __name__ == '__main__':

    ############ Simplistic C scanner ##################################
    tokenizer = re.compile(
        r"(?P<preproc>#.*\n)"
        r"|(?P<comment>/\*.*?\*/)"
        r"|(?P<ident>[a-zA-Z_][a-zA-Z0-9_]*)"
        r"|(?P<ws>[ \t\n]+)"
        r"|(?P<other>.)",
        re.MULTILINE)

    tokens = []
    source = sys.stdin.read()
    pos = 0
    while pos != len(source):
        m = tokenizer.match(source, pos)
        tokens.append([m.lastgroup, m.group()])
        pos += len(tokens[-1][1])
        if tokens[-1][0] == 'preproc':
            # continuation lines are considered
            # only in preprocess statements
            while tokens[-1][1].endswith('\\\n'):
                nl = source.find('\n', pos)
                if nl == -1:
                    line = source[pos:]
                else:
                    line = source[pos:nl+1]
                tokens[-1][1] += line
                pos += len(line)

    # Main loop: replace all static PyTypeObjects until
    # there are none left.
    while 1:
        c = classify()
        m = re.search('(SW)?TWIW?=W?{.*?};', c)
        if not m:
            break
        start = m.start()
        end = m.end()
197
        name, fields = get_fields(start, end)
198
        tokens[start:end] = [('',make_slots(name, fields))]
199

200 201 202
    # Output result to stdout
    for t, v in tokens:
        sys.stdout.write(v)