blob: d3ef935e4ce5d9f152970a54df7ebae6eeadec24 (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
/****************************************************************************
**
** This file is part of the $PACKAGE_NAME$.
**
** Copyright (C) $THISYEAR$ $COMPANY_NAME$.
**
** $QT_EXTENDED_DUAL_LICENSE$
**
****************************************************************************/
#include <QByteArray>
#include "lexer.h"
#include "keywords.cpp"
#include <QDebug>
QT_BEGIN_NAMESPACE
QList<LexerToken> tokenize(const char *text)
{
QList<LexerToken> rv;
int lineNo = 0;
int charNo = 0;
int state = 0;
int tokenStart = 0;
bool other = false;
const char *textprog = text;
bool done = false;
while (!done) {
char textchar = *textprog;
done = !textchar;
if (other) {
if (keywords[state].next[(int)textchar]) {
// Do other token
LexerToken token;
token.token = OTHER;
token.start = tokenStart;
token.end = textprog - text - 1;
token.line = lineNo + 1;
token.offset = charNo - (token.end - token.start);
tokenStart = token.end + 1;
rv.append(token);
other = false;
} else {
goto continue_loop;
}
}
if (keywords[state].next[(int)textchar]) {
state = keywords[state].next[(int)textchar];
} else if (0 == state ||
keywords[state].token == INCOMPLETE) {
other = true;
if (keywords[state].token == INCOMPLETE) {
state = 0;
continue;
}
} else {
// Token completed
Token tokenType = keywords[state].token;
bool tokenCollapsed = false;
if (tokenType == CHARACTER ||
tokenType == DIGIT ||
tokenType == WHITESPACE) {
Token lastTokenType =
rv.isEmpty()?NOTOKEN:rv.last().token;
if (tokenType == lastTokenType) {
rv.last().end = textprog - text - 1;
tokenStart = rv.last().end + 1;
tokenCollapsed = true;
}
}
if (!tokenCollapsed) {
LexerToken token;
token.token = keywords[state].token;
token.start = tokenStart;
token.end = textprog - text - 1;
token.line = lineNo + 1;
token.offset = charNo - (token.end - token.start);
tokenStart = token.end + 1;
rv.append(token);
}
state = keywords[0].next[(int)textchar];
if (0 == state)
other = true;
}
continue_loop:
// Reset error reporting variables
if (textchar == '\n') {
++lineNo;
charNo = 0;
} else {
charNo++;
}
// Increment ptrs
++textprog;
}
if (other && ((textprog - text - 1) != tokenStart)) {
// Do other token
LexerToken token;
token.token = OTHER;
token.start = tokenStart;
token.end = textprog - text - 1;
token.line = lineNo + 1;
token.offset = charNo - (token.end - token.start);
tokenStart = token.end + 1;
rv.append(token);
other = false;
}
return rv;
}
void dumpTokens(const char *text, const QList<LexerToken> &tokens)
{
for (int ii = 0; ii < tokens.count(); ++ii) {
QByteArray ba(text + tokens.at(ii).start, tokens.at(ii).end - tokens.at(ii).start + 1);
qWarning() << tokens.at(ii).line << ":" << tokens.at(ii).offset << tokenToString(tokens.at(ii).token) << "(" << tokens.at(ii).start << "-" << tokens.at(ii).end << ")" << ba;
}
}
QT_END_NAMESPACE
|