parsetok.c 10.6 KB
Newer Older
1

Guido van Rossum's avatar
Guido van Rossum committed
2 3
/* Parser-tokenizer link implementation */

Guido van Rossum's avatar
Guido van Rossum committed
4
#include "pgenheaders.h"
Guido van Rossum's avatar
Guido van Rossum committed
5 6 7 8
#include "tokenizer.h"
#include "node.h"
#include "grammar.h"
#include "parser.h"
Guido van Rossum's avatar
Guido van Rossum committed
9
#include "parsetok.h"
Guido van Rossum's avatar
Guido van Rossum committed
10
#include "errcode.h"
11
#include "graminit.h"
Guido van Rossum's avatar
Guido van Rossum committed
12 13


Guido van Rossum's avatar
Guido van Rossum committed
14
/* Forward */
15
static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
16
static int initerr(perrdetail *err_ret, PyObject * filename);
Guido van Rossum's avatar
Guido van Rossum committed
17

Guido van Rossum's avatar
Guido van Rossum committed
18
/* Parse input coming from a string.  Return error code, print some errors. */
19
node *
20
PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
21
{
22
    return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
23 24 25
}

node *
26
PyParser_ParseStringFlags(const char *s, grammar *g, int start,
27
                          perrdetail *err_ret, int flags)
28
{
29 30
    return PyParser_ParseStringFlagsFilename(s, NULL,
                                             g, start, err_ret, flags);
31 32 33
}

node *
34
PyParser_ParseStringFlagsFilename(const char *s, const char *filename,
35 36
                          grammar *g, int start,
                          perrdetail *err_ret, int flags)
37
{
38 39 40
    int iflags = flags;
    return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,
                                               err_ret, &iflags);
41 42 43
}

node *
44 45 46
PyParser_ParseStringObject(const char *s, PyObject *filename,
                           grammar *g, int start,
                           perrdetail *err_ret, int *flags)
Guido van Rossum's avatar
Guido van Rossum committed
47
{
48 49 50
    struct tok_state *tok;
    int exec_input = start == file_input;

51 52
    if (initerr(err_ret, filename) < 0)
        return NULL;
53 54 55 56 57 58 59 60 61 62

    if (*flags & PyPARSE_IGNORE_COOKIE)
        tok = PyTokenizer_FromUTF8(s, exec_input);
    else
        tok = PyTokenizer_FromString(s, exec_input);
    if (tok == NULL) {
        err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
        return NULL;
    }

63 64 65 66
#ifndef PGEN
    Py_INCREF(err_ret->filename);
    tok->filename = err_ret->filename;
#endif
67
    return parsetok(tok, g, start, err_ret, flags);
Guido van Rossum's avatar
Guido van Rossum committed
68 69
}

70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
node *
PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
                          grammar *g, int start,
                          perrdetail *err_ret, int *flags)
{
    node *n;
    PyObject *filename = NULL;
#ifndef PGEN
    if (filename_str != NULL) {
        filename = PyUnicode_DecodeFSDefault(filename_str);
        if (filename == NULL) {
            err_ret->error = E_ERROR;
            return NULL;
        }
    }
#endif
    n = PyParser_ParseStringObject(s, filename, g, start, err_ret, flags);
#ifndef PGEN
    Py_XDECREF(filename);
#endif
    return n;
}

Guido van Rossum's avatar
Guido van Rossum committed
93
/* Parse input coming from a file.  Return error code, print some errors. */
Guido van Rossum's avatar
Guido van Rossum committed
94

95
node *
96
PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
97 98
                   const char *ps1, const char *ps2,
                   perrdetail *err_ret)
99
{
100 101
    return PyParser_ParseFileFlags(fp, filename, NULL,
                                   g, start, ps1, ps2, err_ret, 0);
102 103 104
}

node *
105
PyParser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
106
                        grammar *g, int start,
107 108
                        const char *ps1, const char *ps2,
                        perrdetail *err_ret, int flags)
109
{
110 111 112
    int iflags = flags;
    return PyParser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
                                     ps2, err_ret, &iflags);
113 114 115
}

node *
116 117
PyParser_ParseFileObject(FILE *fp, PyObject *filename,
                         const char *enc, grammar *g, int start,
118 119
                         const char *ps1, const char *ps2,
                         perrdetail *err_ret, int *flags)
Guido van Rossum's avatar
Guido van Rossum committed
120
{
121
    struct tok_state *tok;
122

123 124
    if (initerr(err_ret, filename) < 0)
        return NULL;
125

126
    if ((tok = PyTokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
127 128 129
        err_ret->error = E_NOMEM;
        return NULL;
    }
130 131 132 133
#ifndef PGEN
    Py_INCREF(err_ret->filename);
    tok->filename = err_ret->filename;
#endif
134
    return parsetok(tok, g, start, err_ret, flags);
135
}
Guido van Rossum's avatar
Guido van Rossum committed
136

137 138 139
node *
PyParser_ParseFileFlagsEx(FILE *fp, const char *filename,
                          const char *enc, grammar *g, int start,
140 141
                          const char *ps1, const char *ps2,
                          perrdetail *err_ret, int *flags)
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
{
    node *n;
    PyObject *fileobj = NULL;
#ifndef PGEN
    if (filename != NULL) {
        fileobj = PyUnicode_DecodeFSDefault(filename);
        if (fileobj == NULL) {
            err_ret->error = E_ERROR;
            return NULL;
        }
    }
#endif
    n = PyParser_ParseFileObject(fp, fileobj, enc, g,
                                 start, ps1, ps2, err_ret, flags);
#ifndef PGEN
    Py_XDECREF(fileobj);
#endif
    return n;
}

162
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
163
#if 0
164
static const char with_msg[] =
165 166
"%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";

167
static const char as_msg[] =
168 169 170 171 172
"%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";

static void
warn(const char *msg, const char *filename, int lineno)
{
173 174 175
    if (filename == NULL)
        filename = "<string>";
    PySys_WriteStderr(msg, filename, lineno);
176
}
177
#endif
178
#endif
179

180 181 182
/* Parse input coming from the given tokenizer structure.
   Return error code. */

183
static node *
184
parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
185
         int *flags)
Guido van Rossum's avatar
Guido van Rossum committed
186
{
187 188
    parser_state *ps;
    node *n;
189
    int started = 0;
190 191 192 193 194 195

    if ((ps = PyParser_New(g, start)) == NULL) {
        err_ret->error = E_NOMEM;
        PyTokenizer_Free(tok);
        return NULL;
    }
196
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
197 198
    if (*flags & PyPARSE_BARRY_AS_BDFL)
        ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
199
#endif
200

201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
    for (;;) {
        char *a, *b;
        int type;
        size_t len;
        char *str;
        int col_offset;

        type = PyTokenizer_Get(tok, &a, &b);
        if (type == ERRORTOKEN) {
            err_ret->error = tok->done;
            break;
        }
        if (type == ENDMARKER && started) {
            type = NEWLINE; /* Add an extra newline */
            started = 0;
            /* Add the right number of dedent tokens,
               except if a certain flag is given --
               codeop.py uses this. */
            if (tok->indent &&
                !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
            {
                tok->pendin = -tok->indent;
                tok->indent = 0;
            }
        }
        else
            started = 1;
        len = b - a; /* XXX this may compute NULL - NULL */
        str = (char *) PyObject_MALLOC(len + 1);
        if (str == NULL) {
            err_ret->error = E_NOMEM;
            break;
        }
        if (len > 0)
            strncpy(str, a, len);
        str[len] = '\0';
237

238
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
239 240 241
        if (type == NOTEQUAL) {
            if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
                            strcmp(str, "!=")) {
242
                PyObject_FREE(str);
243 244 245 246 247
                err_ret->error = E_SYNTAX;
                break;
            }
            else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
                            strcmp(str, "<>")) {
248
                PyObject_FREE(str);
249 250 251 252 253 254
                err_ret->text = "with Barry as BDFL, use '<>' "
                                "instead of '!='";
                err_ret->error = E_SYNTAX;
                break;
            }
        }
255
#endif
256
        if (a >= tok->line_start)
257
            col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,
258
                                          intptr_t, int);
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
        else
            col_offset = -1;

        if ((err_ret->error =
             PyParser_AddToken(ps, (int)type, str,
                               tok->lineno, col_offset,
                               &(err_ret->expected))) != E_OK) {
            if (err_ret->error != E_DONE) {
                PyObject_FREE(str);
                err_ret->token = type;
            }
            break;
        }
    }

    if (err_ret->error == E_DONE) {
        n = ps->p_tree;
        ps->p_tree = NULL;
277

278
#ifndef PGEN
279 280 281 282 283 284 285 286
        /* Check that the source for a single input statement really
           is a single statement by looking at what is left in the
           buffer after parsing.  Trailing whitespace and comments
           are OK.  */
        if (start == single_input) {
            char *cur = tok->cur;
            char c = *tok->cur;

287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
            for (;;) {
                while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
                    c = *++cur;

                if (!c)
                    break;

                if (c != '#') {
                    err_ret->error = E_BADSINGLE;
                    PyNode_Free(n);
                    n = NULL;
                    break;
                }

                /* Suck up comment. */
                while (c && c != '\n')
                    c = *++cur;
304 305
            }
        }
306
#endif
307 308 309
    }
    else
        n = NULL;
310

311
#ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
312
    *flags = ps->p_flags;
313
#endif
314 315 316
    PyParser_Delete(ps);

    if (n == NULL) {
317
        if (tok->done == E_EOF)
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352
            err_ret->error = E_EOF;
        err_ret->lineno = tok->lineno;
        if (tok->buf != NULL) {
            size_t len;
            assert(tok->cur - tok->buf < INT_MAX);
            err_ret->offset = (int)(tok->cur - tok->buf);
            len = tok->inp - tok->buf;
            err_ret->text = (char *) PyObject_MALLOC(len + 1);
            if (err_ret->text != NULL) {
                if (len > 0)
                    strncpy(err_ret->text, tok->buf, len);
                err_ret->text[len] = '\0';
            }
        }
    } else if (tok->encoding != NULL) {
        /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
         * allocated using PyMem_
         */
        node* r = PyNode_New(encoding_decl);
        if (r)
            r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
        if (!r || !r->n_str) {
            err_ret->error = E_NOMEM;
            if (r)
                PyObject_FREE(r);
            n = NULL;
            goto done;
        }
        strcpy(r->n_str, tok->encoding);
        PyMem_FREE(tok->encoding);
        tok->encoding = NULL;
        r->n_nchildren = 1;
        r->n_child = n;
        n = r;
    }
353

354
done:
355
    PyTokenizer_Free(tok);
356

357
    return n;
Guido van Rossum's avatar
Guido van Rossum committed
358
}
359

360
static int
361
initerr(perrdetail *err_ret, PyObject *filename)
362
{
363 364 365 366 367 368
    err_ret->error = E_OK;
    err_ret->lineno = 0;
    err_ret->offset = 0;
    err_ret->text = NULL;
    err_ret->token = -1;
    err_ret->expected = -1;
369
#ifndef PGEN
370 371 372 373 374
    if (filename) {
        Py_INCREF(filename);
        err_ret->filename = filename;
    }
    else {
375
        err_ret->filename = PyUnicode_FromString("<string>");
376 377 378 379
        if (err_ret->filename == NULL) {
            err_ret->error = E_ERROR;
            return -1;
        }
380 381 382
    }
#endif
    return 0;
383
}