Parser/pgen/token.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42

import itertools


def generate_tokens(tokens):
    numbers = itertools.count(0)
    for line in tokens:
        line = line.strip()

        if not line:
            continue
        if line.strip().startswith('#'):
            continue

        name = line.split()[0]
        yield (name, next(numbers))

    yield ('N_TOKENS', next(numbers))
    yield ('NT_OFFSET', 256)


def generate_opmap(tokens):
    for line in tokens:
        line = line.strip()

        if not line:
            continue
        if line.strip().startswith('#'):
            continue

        pieces = line.split()

        if len(pieces) != 2:
            continue

        name, op = pieces
        yield (op.strip("'"), name)

    # Yield independently <>. This is needed so it does not collide
    # with the token generation in "generate_tokens" because if this
    # symbol is included in Grammar/Tokens, it will collide with !=
    # as it has the same name (NOTEQUAL).
    yield ('<>', 'NOTEQUAL')