pgen.c 17.9 KB
Newer Older
Guido van Rossum's avatar
Guido van Rossum committed
1 2 3 4
/* Parser generator */

/* For a description, see the comments at end of this file */

5
#include "Python.h"
Guido van Rossum's avatar
Guido van Rossum committed
6
#include "pgenheaders.h"
Guido van Rossum's avatar
Guido van Rossum committed
7 8 9 10 11 12
#include "token.h"
#include "node.h"
#include "grammar.h"
#include "metagrammar.h"
#include "pgen.h"

13
extern int Py_DebugFlag;
14
extern int Py_IgnoreEnvironmentFlag; /* needed by Py_GETENV */
Guido van Rossum's avatar
Guido van Rossum committed
15 16 17 18 19


/* PART ONE -- CONSTRUCT NFA -- Cf. Algorithm 3.2 from [Aho&Ullman 77] */

typedef struct _nfaarc {
20 21
    int         ar_label;
    int         ar_arrow;
Guido van Rossum's avatar
Guido van Rossum committed
22 23 24
} nfaarc;

typedef struct _nfastate {
25 26
    int         st_narcs;
    nfaarc      *st_arc;
Guido van Rossum's avatar
Guido van Rossum committed
27 28 29
} nfastate;

typedef struct _nfa {
30 31 32 33 34
    int                 nf_type;
    char                *nf_name;
    int                 nf_nstates;
    nfastate            *nf_state;
    int                 nf_start, nf_finish;
Guido van Rossum's avatar
Guido van Rossum committed
35 36
} nfa;

37
/* Forward */
38
static void compile_rhs(labellist *ll,
39
                        nfa *nf, node *n, int *pa, int *pb);
40
static void compile_alt(labellist *ll,
41
                        nfa *nf, node *n, int *pa, int *pb);
42
static void compile_item(labellist *ll,
43
                         nfa *nf, node *n, int *pa, int *pb);
44
static void compile_atom(labellist *ll,
45
                         nfa *nf, node *n, int *pa, int *pb);
46

Guido van Rossum's avatar
Guido van Rossum committed
47
static int
Thomas Wouters's avatar
Thomas Wouters committed
48
addnfastate(nfa *nf)
Guido van Rossum's avatar
Guido van Rossum committed
49
{
50 51 52 53 54 55 56 57 58 59
    nfastate *st;

    nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state,
                                sizeof(nfastate) * (nf->nf_nstates + 1));
    if (nf->nf_state == NULL)
        Py_FatalError("out of mem");
    st = &nf->nf_state[nf->nf_nstates++];
    st->st_narcs = 0;
    st->st_arc = NULL;
    return st - nf->nf_state;
Guido van Rossum's avatar
Guido van Rossum committed
60 61 62
}

static void
Thomas Wouters's avatar
Thomas Wouters committed
63
addnfaarc(nfa *nf, int from, int to, int lbl)
Guido van Rossum's avatar
Guido van Rossum committed
64
{
65 66 67 68 69 70 71 72 73 74 75
    nfastate *st;
    nfaarc *ar;

    st = &nf->nf_state[from];
    st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc,
                                  sizeof(nfaarc) * (st->st_narcs + 1));
    if (st->st_arc == NULL)
        Py_FatalError("out of mem");
    ar = &st->st_arc[st->st_narcs++];
    ar->ar_label = lbl;
    ar->ar_arrow = to;
Guido van Rossum's avatar
Guido van Rossum committed
76 77 78
}

static nfa *
Thomas Wouters's avatar
Thomas Wouters committed
79
newnfa(char *name)
Guido van Rossum's avatar
Guido van Rossum committed
80
{
81 82 83 84 85 86 87 88 89 90 91 92
    nfa *nf;
    static int type = NT_OFFSET; /* All types will be disjunct */

    nf = (nfa *)PyObject_MALLOC(sizeof(nfa));
    if (nf == NULL)
        Py_FatalError("no mem for new nfa");
    nf->nf_type = type++;
    nf->nf_name = name; /* XXX strdup(name) ??? */
    nf->nf_nstates = 0;
    nf->nf_state = NULL;
    nf->nf_start = nf->nf_finish = -1;
    return nf;
Guido van Rossum's avatar
Guido van Rossum committed
93 94 95
}

typedef struct _nfagrammar {
96 97 98
    int                 gr_nnfas;
    nfa                 **gr_nfa;
    labellist           gr_ll;
Guido van Rossum's avatar
Guido van Rossum committed
99 100
} nfagrammar;

101
/* Forward */
102
static void compile_rule(nfagrammar *gr, node *n);
103

Guido van Rossum's avatar
Guido van Rossum committed
104
static nfagrammar *
Thomas Wouters's avatar
Thomas Wouters committed
105
newnfagrammar(void)
Guido van Rossum's avatar
Guido van Rossum committed
106
{
107 108 109 110 111 112 113 114 115 116 117
    nfagrammar *gr;

    gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar));
    if (gr == NULL)
        Py_FatalError("no mem for new nfa grammar");
    gr->gr_nnfas = 0;
    gr->gr_nfa = NULL;
    gr->gr_ll.ll_nlabels = 0;
    gr->gr_ll.ll_label = NULL;
    addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");
    return gr;
Guido van Rossum's avatar
Guido van Rossum committed
118 119
}

120 121 122 123 124 125 126 127 128 129
static void
freenfagrammar(nfagrammar *gr)
{
    for (int i = 0; i < gr->gr_nnfas; i++) {
        PyObject_FREE(gr->gr_nfa[i]->nf_state);
    }
    PyObject_FREE(gr->gr_nfa);
    PyObject_FREE(gr);
}

Guido van Rossum's avatar
Guido van Rossum committed
130
static nfa *
Thomas Wouters's avatar
Thomas Wouters committed
131
addnfa(nfagrammar *gr, char *name)
Guido van Rossum's avatar
Guido van Rossum committed
132
{
133 134 135 136 137 138 139 140 141 142
    nfa *nf;

    nf = newnfa(name);
    gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa,
                                  sizeof(nfa*) * (gr->gr_nnfas + 1));
    if (gr->gr_nfa == NULL)
        Py_FatalError("out of mem");
    gr->gr_nfa[gr->gr_nnfas++] = nf;
    addlabel(&gr->gr_ll, NAME, nf->nf_name);
    return nf;
Guido van Rossum's avatar
Guido van Rossum committed
143 144
}

145
#ifdef Py_DEBUG
Guido van Rossum's avatar
Guido van Rossum committed
146

147
static const char REQNFMT[] = "metacompile: less than %d children\n";
Guido van Rossum's avatar
Guido van Rossum committed
148

149
#define REQN(i, count) do { \
150 151 152
    if (i < count) { \
        fprintf(stderr, REQNFMT, count); \
        Py_FatalError("REQN"); \
153 154
    } \
} while (0)
Guido van Rossum's avatar
Guido van Rossum committed
155 156

#else
157
#define REQN(i, count)  /* empty */
Guido van Rossum's avatar
Guido van Rossum committed
158 159 160
#endif

static nfagrammar *
Thomas Wouters's avatar
Thomas Wouters committed
161
metacompile(node *n)
Guido van Rossum's avatar
Guido van Rossum committed
162
{
163 164 165 166 167 168 169 170 171 172 173 174 175 176
    nfagrammar *gr;
    int i;

    if (Py_DebugFlag)
        printf("Compiling (meta-) parse tree into NFA grammar\n");
    gr = newnfagrammar();
    REQ(n, MSTART);
    i = n->n_nchildren - 1; /* Last child is ENDMARKER */
    n = n->n_child;
    for (; --i >= 0; n++) {
        if (n->n_type != NEWLINE)
            compile_rule(gr, n);
    }
    return gr;
Guido van Rossum's avatar
Guido van Rossum committed
177 178
}

Guido van Rossum's avatar
Guido van Rossum committed
179
static void
Thomas Wouters's avatar
Thomas Wouters committed
180
compile_rule(nfagrammar *gr, node *n)
Guido van Rossum's avatar
Guido van Rossum committed
181
{
182 183 184 185 186 187 188 189 190 191 192 193 194 195
    nfa *nf;

    REQ(n, RULE);
    REQN(n->n_nchildren, 4);
    n = n->n_child;
    REQ(n, NAME);
    nf = addnfa(gr, n->n_str);
    n++;
    REQ(n, COLON);
    n++;
    REQ(n, RHS);
    compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);
    n++;
    REQ(n, NEWLINE);
Guido van Rossum's avatar
Guido van Rossum committed
196 197
}

Guido van Rossum's avatar
Guido van Rossum committed
198
static void
Thomas Wouters's avatar
Thomas Wouters committed
199
compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
Guido van Rossum's avatar
Guido van Rossum committed
200
{
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
    int i;
    int a, b;

    REQ(n, RHS);
    i = n->n_nchildren;
    REQN(i, 1);
    n = n->n_child;
    REQ(n, ALT);
    compile_alt(ll, nf, n, pa, pb);
    if (--i <= 0)
        return;
    n++;
    a = *pa;
    b = *pb;
    *pa = addnfastate(nf);
    *pb = addnfastate(nf);
    addnfaarc(nf, *pa, a, EMPTY);
    addnfaarc(nf, b, *pb, EMPTY);
    for (; --i >= 0; n++) {
        REQ(n, VBAR);
        REQN(i, 1);
        --i;
        n++;
        REQ(n, ALT);
        compile_alt(ll, nf, n, &a, &b);
        addnfaarc(nf, *pa, a, EMPTY);
        addnfaarc(nf, b, *pb, EMPTY);
    }
Guido van Rossum's avatar
Guido van Rossum committed
229 230
}

Guido van Rossum's avatar
Guido van Rossum committed
231
static void
Thomas Wouters's avatar
Thomas Wouters committed
232
compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
Guido van Rossum's avatar
Guido van Rossum committed
233
{
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250
    int i;
    int a, b;

    REQ(n, ALT);
    i = n->n_nchildren;
    REQN(i, 1);
    n = n->n_child;
    REQ(n, ITEM);
    compile_item(ll, nf, n, pa, pb);
    --i;
    n++;
    for (; --i >= 0; n++) {
        REQ(n, ITEM);
        compile_item(ll, nf, n, &a, &b);
        addnfaarc(nf, *pb, a, EMPTY);
        *pb = b;
    }
Guido van Rossum's avatar
Guido van Rossum committed
251 252
}

Guido van Rossum's avatar
Guido van Rossum committed
253
static void
Thomas Wouters's avatar
Thomas Wouters committed
254
compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
Guido van Rossum's avatar
Guido van Rossum committed
255
{
256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
    int i;
    int a, b;

    REQ(n, ITEM);
    i = n->n_nchildren;
    REQN(i, 1);
    n = n->n_child;
    if (n->n_type == LSQB) {
        REQN(i, 3);
        n++;
        REQ(n, RHS);
        *pa = addnfastate(nf);
        *pb = addnfastate(nf);
        addnfaarc(nf, *pa, *pb, EMPTY);
        compile_rhs(ll, nf, n, &a, &b);
        addnfaarc(nf, *pa, a, EMPTY);
        addnfaarc(nf, b, *pb, EMPTY);
        REQN(i, 1);
        n++;
        REQ(n, RSQB);
    }
    else {
        compile_atom(ll, nf, n, pa, pb);
        if (--i <= 0)
            return;
        n++;
        addnfaarc(nf, *pb, *pa, EMPTY);
        if (n->n_type == STAR)
            *pb = *pa;
        else
            REQ(n, PLUS);
    }
Guido van Rossum's avatar
Guido van Rossum committed
288 289
}

Guido van Rossum's avatar
Guido van Rossum committed
290
static void
Thomas Wouters's avatar
Thomas Wouters committed
291
compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb)
Guido van Rossum's avatar
Guido van Rossum committed
292
{
293 294 295 296
    int i;

    REQ(n, ATOM);
    i = n->n_nchildren;
297
    (void)i; /* Don't warn about set but unused */
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
    REQN(i, 1);
    n = n->n_child;
    if (n->n_type == LPAR) {
        REQN(i, 3);
        n++;
        REQ(n, RHS);
        compile_rhs(ll, nf, n, pa, pb);
        n++;
        REQ(n, RPAR);
    }
    else if (n->n_type == NAME || n->n_type == STRING) {
        *pa = addnfastate(nf);
        *pb = addnfastate(nf);
        addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));
    }
    else
        REQ(n, NAME);
Guido van Rossum's avatar
Guido van Rossum committed
315 316 317
}

static void
Thomas Wouters's avatar
Thomas Wouters committed
318
dumpstate(labellist *ll, nfa *nf, int istate)
Guido van Rossum's avatar
Guido van Rossum committed
319
{
320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337
    nfastate *st;
    int i;
    nfaarc *ar;

    printf("%c%2d%c",
        istate == nf->nf_start ? '*' : ' ',
        istate,
        istate == nf->nf_finish ? '.' : ' ');
    st = &nf->nf_state[istate];
    ar = st->st_arc;
    for (i = 0; i < st->st_narcs; i++) {
        if (i > 0)
            printf("\n    ");
        printf("-> %2d  %s", ar->ar_arrow,
            PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));
        ar++;
    }
    printf("\n");
Guido van Rossum's avatar
Guido van Rossum committed
338 339 340
}

static void
Thomas Wouters's avatar
Thomas Wouters committed
341
dumpnfa(labellist *ll, nfa *nf)
Guido van Rossum's avatar
Guido van Rossum committed
342
{
343 344 345 346 347 348
    int i;

    printf("NFA '%s' has %d states; start %d, finish %d\n",
        nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);
    for (i = 0; i < nf->nf_nstates; i++)
        dumpstate(ll, nf, i);
Guido van Rossum's avatar
Guido van Rossum committed
349 350 351 352 353
}


/* PART TWO -- CONSTRUCT DFA -- Algorithm 3.1 from [Aho&Ullman 77] */

354
static void
Thomas Wouters's avatar
Thomas Wouters committed
355
addclosure(bitset ss, nfa *nf, int istate)
Guido van Rossum's avatar
Guido van Rossum committed
356
{
357 358 359 360 361 362 363 364 365 366 367
    if (addbit(ss, istate)) {
        nfastate *st = &nf->nf_state[istate];
        nfaarc *ar = st->st_arc;
        int i;

        for (i = st->st_narcs; --i >= 0; ) {
            if (ar->ar_label == EMPTY)
                addclosure(ss, nf, ar->ar_arrow);
            ar++;
        }
    }
Guido van Rossum's avatar
Guido van Rossum committed
368 369 370
}

typedef struct _ss_arc {
371 372 373
    bitset      sa_bitset;
    int         sa_arrow;
    int         sa_label;
Guido van Rossum's avatar
Guido van Rossum committed
374 375 376
} ss_arc;

typedef struct _ss_state {
377 378 379 380 381 382
    bitset      ss_ss;
    int         ss_narcs;
    struct _ss_arc      *ss_arc;
    int         ss_deleted;
    int         ss_finish;
    int         ss_rename;
Guido van Rossum's avatar
Guido van Rossum committed
383 384 385
} ss_state;

typedef struct _ss_dfa {
386 387
    int         sd_nstates;
    ss_state *sd_state;
Guido van Rossum's avatar
Guido van Rossum committed
388 389
} ss_dfa;

390
/* Forward */
391
static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
392
                       labellist *ll, const char *msg);
393 394
static void simplify(int xx_nstates, ss_state *xx_state);
static void convert(dfa *d, int xx_nstates, ss_state *xx_state);
395

Guido van Rossum's avatar
Guido van Rossum committed
396
static void
Thomas Wouters's avatar
Thomas Wouters committed
397
makedfa(nfagrammar *gr, nfa *nf, dfa *d)
Guido van Rossum's avatar
Guido van Rossum committed
398
{
399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
    int nbits = nf->nf_nstates;
    bitset ss;
    int xx_nstates;
    ss_state *xx_state, *yy;
    ss_arc *zz;
    int istate, jstate, iarc, jarc, ibit;
    nfastate *st;
    nfaarc *ar;

    ss = newbitset(nbits);
    addclosure(ss, nf, nf->nf_start);
    xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state));
    if (xx_state == NULL)
        Py_FatalError("no mem for xx_state in makedfa");
    xx_nstates = 1;
    yy = &xx_state[0];
    yy->ss_ss = ss;
    yy->ss_narcs = 0;
    yy->ss_arc = NULL;
    yy->ss_deleted = 0;
    yy->ss_finish = testbit(ss, nf->nf_finish);
    if (yy->ss_finish)
        printf("Error: nonterminal '%s' may produce empty.\n",
            nf->nf_name);

    /* This algorithm is from a book written before
       the invention of structured programming... */

    /* For each unmarked state... */
    for (istate = 0; istate < xx_nstates; ++istate) {
        size_t size;
        yy = &xx_state[istate];
        ss = yy->ss_ss;
        /* For all its states... */
        for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {
            if (!testbit(ss, ibit))
                continue;
            st = &nf->nf_state[ibit];
            /* For all non-empty arcs from this state... */
            for (iarc = 0; iarc < st->st_narcs; iarc++) {
                ar = &st->st_arc[iarc];
                if (ar->ar_label == EMPTY)
                    continue;
                /* Look up in list of arcs from this state */
                for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {
                    zz = &yy->ss_arc[jarc];
                    if (ar->ar_label == zz->sa_label)
                        goto found;
                }
                /* Add new arc for this state */
                size = sizeof(ss_arc) * (yy->ss_narcs + 1);
                yy->ss_arc = (ss_arc *)PyObject_REALLOC(
                                            yy->ss_arc, size);
                if (yy->ss_arc == NULL)
                    Py_FatalError("out of mem");
                zz = &yy->ss_arc[yy->ss_narcs++];
                zz->sa_label = ar->ar_label;
                zz->sa_bitset = newbitset(nbits);
                zz->sa_arrow = -1;
             found:             ;
                /* Add destination */
                addclosure(zz->sa_bitset, nf, ar->ar_arrow);
            }
        }
        /* Now look up all the arrow states */
        for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {
            zz = &xx_state[istate].ss_arc[jarc];
            for (jstate = 0; jstate < xx_nstates; jstate++) {
                if (samebitset(zz->sa_bitset,
                    xx_state[jstate].ss_ss, nbits)) {
                    zz->sa_arrow = jstate;
                    goto done;
                }
            }
            size = sizeof(ss_state) * (xx_nstates + 1);
            xx_state = (ss_state *)PyObject_REALLOC(xx_state,
                                                        size);
            if (xx_state == NULL)
                Py_FatalError("out of mem");
            zz->sa_arrow = xx_nstates;
            yy = &xx_state[xx_nstates++];
            yy->ss_ss = zz->sa_bitset;
            yy->ss_narcs = 0;
            yy->ss_arc = NULL;
            yy->ss_deleted = 0;
            yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);
         done:          ;
        }
    }

    if (Py_DebugFlag)
        printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
                                        "before minimizing");

    simplify(xx_nstates, xx_state);

    if (Py_DebugFlag)
        printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,
                                        "after minimizing");

    convert(d, xx_nstates, xx_state);

501 502 503 504 505
    for (int i = 0; i < xx_nstates; i++) {
        for (int j = 0; j < xx_state[i].ss_narcs; j++)
            delbitset(xx_state[i].ss_arc[j].sa_bitset);
        PyObject_FREE(xx_state[i].ss_arc);
    }
506
    PyObject_FREE(xx_state);
Guido van Rossum's avatar
Guido van Rossum committed
507 508
}

Guido van Rossum's avatar
Guido van Rossum committed
509
static void
Thomas Wouters's avatar
Thomas Wouters committed
510
printssdfa(int xx_nstates, ss_state *xx_state, int nbits,
511
           labellist *ll, const char *msg)
Guido van Rossum's avatar
Guido van Rossum committed
512
{
513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538
    int i, ibit, iarc;
    ss_state *yy;
    ss_arc *zz;

    printf("Subset DFA %s\n", msg);
    for (i = 0; i < xx_nstates; i++) {
        yy = &xx_state[i];
        if (yy->ss_deleted)
            continue;
        printf(" Subset %d", i);
        if (yy->ss_finish)
            printf(" (finish)");
        printf(" { ");
        for (ibit = 0; ibit < nbits; ibit++) {
            if (testbit(yy->ss_ss, ibit))
                printf("%d ", ibit);
        }
        printf("}\n");
        for (iarc = 0; iarc < yy->ss_narcs; iarc++) {
            zz = &yy->ss_arc[iarc];
            printf("  Arc to state %d, label %s\n",
                zz->sa_arrow,
                PyGrammar_LabelRepr(
                    &ll->ll_label[zz->sa_label]));
        }
    }
Guido van Rossum's avatar
Guido van Rossum committed
539 540 541 542 543 544 545 546 547
}


/* PART THREE -- SIMPLIFY DFA */

/* Simplify the DFA by repeatedly eliminating states that are
   equivalent to another oner.  This is NOT Algorithm 3.3 from
   [Aho&Ullman 77].  It does not always finds the minimal DFA,
   but it does usually make a much smaller one...  (For an example
548
   of sub-optimal behavior, try S: x a b+ | y a b+.)
Guido van Rossum's avatar
Guido van Rossum committed
549 550 551
*/

static int
Thomas Wouters's avatar
Thomas Wouters committed
552
samestate(ss_state *s1, ss_state *s2)
Guido van Rossum's avatar
Guido van Rossum committed
553
{
554 555 556 557 558 559 560 561 562 563
    int i;

    if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)
        return 0;
    for (i = 0; i < s1->ss_narcs; i++) {
        if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||
            s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)
            return 0;
    }
    return 1;
Guido van Rossum's avatar
Guido van Rossum committed
564 565 566
}

static void
Thomas Wouters's avatar
Thomas Wouters committed
567
renamestates(int xx_nstates, ss_state *xx_state, int from, int to)
Guido van Rossum's avatar
Guido van Rossum committed
568
{
569 570 571 572 573 574 575 576 577 578 579 580
    int i, j;

    if (Py_DebugFlag)
        printf("Rename state %d to %d.\n", from, to);
    for (i = 0; i < xx_nstates; i++) {
        if (xx_state[i].ss_deleted)
            continue;
        for (j = 0; j < xx_state[i].ss_narcs; j++) {
            if (xx_state[i].ss_arc[j].sa_arrow == from)
                xx_state[i].ss_arc[j].sa_arrow = to;
        }
    }
Guido van Rossum's avatar
Guido van Rossum committed
581 582
}

Guido van Rossum's avatar
Guido van Rossum committed
583
static void
Thomas Wouters's avatar
Thomas Wouters committed
584
simplify(int xx_nstates, ss_state *xx_state)
Guido van Rossum's avatar
Guido van Rossum committed
585
{
586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606
    int changes;
    int i, j;

    do {
        changes = 0;
        for (i = 1; i < xx_nstates; i++) {
            if (xx_state[i].ss_deleted)
                continue;
            for (j = 0; j < i; j++) {
                if (xx_state[j].ss_deleted)
                    continue;
                if (samestate(&xx_state[i], &xx_state[j])) {
                    xx_state[i].ss_deleted++;
                    renamestates(xx_nstates, xx_state,
                                 i, j);
                    changes++;
                    break;
                }
            }
        }
    } while (changes);
Guido van Rossum's avatar
Guido van Rossum committed
607 608 609 610 611 612 613
}


/* PART FOUR -- GENERATE PARSING TABLES */

/* Convert the DFA into a grammar that can be used by our parser */

Guido van Rossum's avatar
Guido van Rossum committed
614
static void
Thomas Wouters's avatar
Thomas Wouters committed
615
convert(dfa *d, int xx_nstates, ss_state *xx_state)
Guido van Rossum's avatar
Guido van Rossum committed
616
{
617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
    int i, j;
    ss_state *yy;
    ss_arc *zz;

    for (i = 0; i < xx_nstates; i++) {
        yy = &xx_state[i];
        if (yy->ss_deleted)
            continue;
        yy->ss_rename = addstate(d);
    }

    for (i = 0; i < xx_nstates; i++) {
        yy = &xx_state[i];
        if (yy->ss_deleted)
            continue;
        for (j = 0; j < yy->ss_narcs; j++) {
            zz = &yy->ss_arc[j];
            addarc(d, yy->ss_rename,
                xx_state[zz->sa_arrow].ss_rename,
                zz->sa_label);
        }
        if (yy->ss_finish)
            addarc(d, yy->ss_rename, yy->ss_rename, 0);
    }

    d->d_initial = 0;
Guido van Rossum's avatar
Guido van Rossum committed
643 644 645 646 647 648
}


/* PART FIVE -- GLUE IT ALL TOGETHER */

static grammar *
Thomas Wouters's avatar
Thomas Wouters committed
649
maketables(nfagrammar *gr)
Guido van Rossum's avatar
Guido van Rossum committed
650
{
651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673
    int i;
    nfa *nf;
    dfa *d;
    grammar *g;

    if (gr->gr_nnfas == 0)
        return NULL;
    g = newgrammar(gr->gr_nfa[0]->nf_type);
                    /* XXX first rule must be start rule */
    g->g_ll = gr->gr_ll;

    for (i = 0; i < gr->gr_nnfas; i++) {
        nf = gr->gr_nfa[i];
        if (Py_DebugFlag) {
            printf("Dump of NFA for '%s' ...\n", nf->nf_name);
            dumpnfa(&gr->gr_ll, nf);
            printf("Making DFA for '%s' ...\n", nf->nf_name);
        }
        d = adddfa(g, nf->nf_type, nf->nf_name);
        makedfa(gr, gr->gr_nfa[i], d);
    }

    return g;
Guido van Rossum's avatar
Guido van Rossum committed
674 675 676
}

grammar *
Thomas Wouters's avatar
Thomas Wouters committed
677
pgen(node *n)
Guido van Rossum's avatar
Guido van Rossum committed
678
{
679 680 681 682 683 684 685
    nfagrammar *gr;
    grammar *g;

    gr = metacompile(n);
    g = maketables(gr);
    translatelabels(g);
    addfirstsets(g);
686
    freenfagrammar(gr);
687
    return g;
Guido van Rossum's avatar
Guido van Rossum committed
688 689
}

690 691 692 693 694
grammar *
Py_pgen(node *n)
{
  return pgen(n);
}
Guido van Rossum's avatar
Guido van Rossum committed
695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720

/*

Description
-----------

Input is a grammar in extended BNF (using * for repetition, + for
at-least-once repetition, [] for optional parts, | for alternatives and
() for grouping).  This has already been parsed and turned into a parse
tree.

Each rule is considered as a regular expression in its own right.
It is turned into a Non-deterministic Finite Automaton (NFA), which
is then turned into a Deterministic Finite Automaton (DFA), which is then
optimized to reduce the number of states.  See [Aho&Ullman 77] chapter 3,
or similar compiler books (this technique is more often used for lexical
analyzers).

The DFA's are used by the parser as parsing tables in a special way
that's probably unique.  Before they are usable, the FIRST sets of all
non-terminals are computed.

Reference
---------

[Aho&Ullman 77]
721 722
    Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977
    (first edition)
Guido van Rossum's avatar
Guido van Rossum committed
723 724

*/