summaryrefslogtreecommitdiffstats
path: root/util/lexgen
diff options
context:
space:
mode:
Diffstat (limited to 'util/lexgen')
-rw-r--r--util/lexgen/README16
-rw-r--r--util/lexgen/configfile.cpp99
-rw-r--r--util/lexgen/configfile.h81
-rw-r--r--util/lexgen/css2-simplified.lexgen93
-rw-r--r--util/lexgen/generator.cpp532
-rw-r--r--util/lexgen/generator.h221
-rw-r--r--util/lexgen/global.h113
-rw-r--r--util/lexgen/lexgen.lexgen24
-rw-r--r--util/lexgen/lexgen.pri3
-rw-r--r--util/lexgen/lexgen.pro6
-rw-r--r--util/lexgen/main.cpp323
-rw-r--r--util/lexgen/nfa.cpp508
-rw-r--r--util/lexgen/nfa.h127
-rw-r--r--util/lexgen/re2nfa.cpp547
-rw-r--r--util/lexgen/re2nfa.h116
-rw-r--r--util/lexgen/test.lexgen9
-rw-r--r--util/lexgen/tests/testdata/backtrack1/input1
-rw-r--r--util/lexgen/tests/testdata/backtrack1/output1
-rw-r--r--util/lexgen/tests/testdata/backtrack1/rules.lexgen3
-rw-r--r--util/lexgen/tests/testdata/backtrack2/input1
-rw-r--r--util/lexgen/tests/testdata/backtrack2/output2
-rw-r--r--util/lexgen/tests/testdata/backtrack2/rules.lexgen4
-rw-r--r--util/lexgen/tests/testdata/casesensitivity/input1
-rw-r--r--util/lexgen/tests/testdata/casesensitivity/output14
-rw-r--r--util/lexgen/tests/testdata/casesensitivity/rules.lexgen7
-rw-r--r--util/lexgen/tests/testdata/comments/input1
-rw-r--r--util/lexgen/tests/testdata/comments/output2
-rw-r--r--util/lexgen/tests/testdata/comments/rules.lexgen2
-rw-r--r--util/lexgen/tests/testdata/dot/input1
-rw-r--r--util/lexgen/tests/testdata/dot/output2
-rw-r--r--util/lexgen/tests/testdata/dot/rules.lexgen3
-rw-r--r--util/lexgen/tests/testdata/negation/input1
-rw-r--r--util/lexgen/tests/testdata/negation/output2
-rw-r--r--util/lexgen/tests/testdata/negation/rules.lexgen3
-rw-r--r--util/lexgen/tests/testdata/quoteinset/input1
-rw-r--r--util/lexgen/tests/testdata/quoteinset/output1
-rw-r--r--util/lexgen/tests/testdata/quoteinset/rules.lexgen2
-rw-r--r--util/lexgen/tests/testdata/quotes/input1
-rw-r--r--util/lexgen/tests/testdata/quotes/output1
-rw-r--r--util/lexgen/tests/testdata/quotes/rules.lexgen2
-rw-r--r--util/lexgen/tests/testdata/simple/input1
-rw-r--r--util/lexgen/tests/testdata/simple/output2
-rw-r--r--util/lexgen/tests/testdata/simple/rules.lexgen3
-rw-r--r--util/lexgen/tests/testdata/subsets1/input1
-rw-r--r--util/lexgen/tests/testdata/subsets1/output2
-rw-r--r--util/lexgen/tests/testdata/subsets1/rules.lexgen3
-rw-r--r--util/lexgen/tests/testdata/subsets2/input1
-rw-r--r--util/lexgen/tests/testdata/subsets2/output3
-rw-r--r--util/lexgen/tests/testdata/subsets2/rules.lexgen4
-rw-r--r--util/lexgen/tests/tests.pro6
-rw-r--r--util/lexgen/tests/tst_lexgen.cpp285
-rw-r--r--util/lexgen/tokenizer.cpp237
52 files changed, 3425 insertions, 0 deletions
diff --git a/util/lexgen/README b/util/lexgen/README
new file mode 100644
index 0000000..b8e9277
--- /dev/null
+++ b/util/lexgen/README
@@ -0,0 +1,16 @@
+Lexgen
+------
+
+This is a little tool to generate lexical scanners from a rather simplistic
+configuration file. We use it internally in Qt to generate the scanner for the
+CSS parser that is built into the toolkit (used for the widget styling and the
+HTML import into QTextDocument).
+
+Beware, it's very slow (in generating the code) and it may not generate what
+you want. But I like that it generates code that operates on QChar and friends.
+
+Use at your own risk ;-)
+
+
+--
+Simon Hausmann <simon@trolltech.com>
diff --git a/util/lexgen/configfile.cpp b/util/lexgen/configfile.cpp
new file mode 100644
index 0000000..8d77689
--- /dev/null
+++ b/util/lexgen/configfile.cpp
@@ -0,0 +1,99 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#include "configfile.h"
+
+#include <QFile>
+
+ConfigFile::SectionMap ConfigFile::parse(const QString &fileName)
+{
+ QFile f(fileName);
+ if (!f.open(QIODevice::ReadOnly))
+ return ConfigFile::SectionMap();
+ return parse(&f);
+}
+
+ConfigFile::SectionMap ConfigFile::parse(QIODevice *dev)
+{
+ SectionMap sections;
+ SectionMap::Iterator currentSection = sections.end();
+
+ ConfigFile::SectionMap result;
+ int currentLineNumber = 0;
+ while (!dev->atEnd()) {
+ QString line = QString::fromUtf8(dev->readLine()).trimmed();
+ ++currentLineNumber;
+
+ if (line.isEmpty() || line.startsWith(QLatin1Char('#')))
+ continue;
+
+ if (line.startsWith(QLatin1Char('['))) {
+ if (!line.endsWith(']')) {
+ qWarning("Syntax error at line %d: Missing ']' at start of new section.", currentLineNumber);
+ return SectionMap();
+ }
+ line.remove(0, 1);
+ line.chop(1);
+ const QString sectionName = line;
+ currentSection = sections.insert(sectionName, Section());
+ continue;
+ }
+
+ if (currentSection == sections.end()) {
+ qWarning("Syntax error at line %d: Entry found outside of any section.", currentLineNumber);
+ return SectionMap();
+ }
+
+ Entry e;
+ e.lineNumber = currentLineNumber;
+
+ int equalPos = line.indexOf(QLatin1Char('='));
+ if (equalPos == -1) {
+ e.key = line;
+ } else {
+ e.key = line;
+ e.key.truncate(equalPos);
+ e.key = e.key.trimmed();
+ e.value = line.mid(equalPos + 1).trimmed();
+ }
+ currentSection->append(e);
+ }
+ return sections;
+}
diff --git a/util/lexgen/configfile.h b/util/lexgen/configfile.h
new file mode 100644
index 0000000..7bf3389
--- /dev/null
+++ b/util/lexgen/configfile.h
@@ -0,0 +1,81 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#ifndef CONFIGFILE_H
+#define CONFIGFILE_H
+
+#include <QStringList>
+#include <QMap>
+#include <QVector>
+
+struct ConfigFile
+{
+ struct Entry
+ {
+ inline Entry() : lineNumber(-1) {}
+ int lineNumber;
+ QString key;
+ QString value;
+ };
+ struct Section : public QVector<Entry>
+ {
+ inline bool contains(const QString &key) const
+ {
+ for (int i = 0; i < count(); ++i)
+ if (at(i).key == key)
+ return true;
+ return false;
+ }
+ inline QString value(const QString &key, const QString &defaultValue = QString()) const
+ {
+ for (int i = 0; i < count(); ++i)
+ if (at(i).key == key)
+ return at(i).value;
+ return defaultValue;
+ }
+ };
+ typedef QMap<QString, Section> SectionMap;
+
+ static SectionMap parse(const QString &fileName);
+ static SectionMap parse(QIODevice *dev);
+};
+
+#endif // CONFIGFILE_H
+
diff --git a/util/lexgen/css2-simplified.lexgen b/util/lexgen/css2-simplified.lexgen
new file mode 100644
index 0000000..3976632
--- /dev/null
+++ b/util/lexgen/css2-simplified.lexgen
@@ -0,0 +1,93 @@
+[Options]
+case-insensitive
+classname = QCssScanner_Generated
+
+[Code Generator Options]
+MapToCode[a-z] = (ch.unicode() >= 'a' && ch.unicode() <= 'z') || ch.unicode() >= 256
+TokenPrefix = QCss::
+FileHeader = ../moc/licenseheader.txt
+
+[Macros]
+escape = \\[^\r\n\f0-9a-f]
+nmstart = [_a-z]|{escape}
+nmchar = [_a-z0-9-]|{escape}
+nl = \n|\r\n|\r|\f
+string1 = \"([^\n\r\f\\"]|\\{nl}|{escape})*\"
+string2 = \'([^\n\r\f\\']|\\{nl}|{escape})*\'
+invalid1 = \"([^\n\r\f\\"]|\\{nl}|{escape})*
+invalid2 = \'([^\n\r\f\\']|\\{nl}|{escape})*
+
+ident = -?{nmstart}{nmchar}*
+name = {nmchar}+
+num = [0-9]+|[0-9]*"."[0-9]+
+string = {string1}|{string2}
+invalid = {invalid1}|{invalid2}
+url = ([!#$%&*-~]|{escape})*
+s = [ \t\r\n\f]
+w = {s}*
+
+[Tokens]
+
+S = {s}+
+
+handleCommentStart() = \/\*
+
+CDO = "<!--"
+CDC = "-->"
+INCLUDES = "~="
+DASHMATCH = "|="
+
+LBRACE = {w}"{"
+PLUS = {w}"+"
+GREATER = {w}">"
+COMMA = {w}","
+
+STRING = {string}
+INVALID = {invalid}
+
+IDENT = {ident}
+
+HASH = "#"{name}
+
+ATKEYWORD_SYM = "@"{ident}
+
+EXCLAMATION_SYM = "!"
+
+#EMS = {num}em
+#EXS = {num}ex
+#LENGTH = {num}px
+#LENGTH = {num}cm
+#LENGTH = {num}mm
+#LENGTH = {num}in
+#LENGTH = {num}pt
+#LENGTH = {num}pc
+#ANGLE = {num}deg
+#ANGLE = {num}rad
+#ANGLE = {num}grad
+#TIME = {num}ms
+#TIME = {num}s
+#FREQ = {num}hz
+#FREQ = {num}khz
+#DIMENSION = {num}{ident}
+LENGTH = {num}{ident}
+
+PERCENTAGE = {num}%
+NUMBER = {num}
+
+#URI = "url("{w}{string}{w}")"
+#URI = "url("{w}{url}{w}")"
+FUNCTION = {ident}"("
+
+COLON = :
+SEMICOLON = ;
+RBRACE = \}
+SLASH = /
+MINUS = -
+DOT = \.
+STAR = \*
+LBRACKET = \[
+RBRACKET = \]
+EQUAL = \=
+LPAREN = \(
+RPAREN = \)
+OR = \|
diff --git a/util/lexgen/generator.cpp b/util/lexgen/generator.cpp
new file mode 100644
index 0000000..a3c63f2
--- /dev/null
+++ b/util/lexgen/generator.cpp
@@ -0,0 +1,532 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "generator.h"
+
+#include <QFile>
+
+void Function::printDeclaration(CodeBlock &block, const QString &funcNamePrefix) const
+{
+ block << (iline ? "inline " : "") << signature(funcNamePrefix) << (iline ? QLatin1String(" {") : QLatin1String(";"));
+ if (!iline)
+ return;
+
+ block.indent();
+ QString tmp = body;
+ if (tmp.endsWith(QLatin1Char('\n')))
+ tmp.chop(1);
+ foreach (QString line, tmp.split(QLatin1Char('\n')))
+ block << line;
+ block.outdent();
+ block << "}";
+}
+
+QString Function::signature(const QString &funcNamePrefix) const
+{
+ QString sig;
+ if (!rtype.isEmpty()) {
+ sig += rtype;
+ sig += QLatin1Char(' ');
+ }
+ sig += funcNamePrefix;
+ sig += fname;
+ if (cnst)
+ sig += " const";
+ return sig;
+}
+
+QString Function::definition() const
+{
+ if (iline)
+ return QString();
+
+ QString result;
+ result += signature();
+ result += QLatin1String("\n{\n");
+
+ QString tmp = body;
+
+ if (tmp.endsWith(QLatin1Char('\n')))
+ tmp.chop(1);
+ if (!tmp.startsWith(QLatin1Char('\n')))
+ tmp.prepend(" ");
+
+ tmp.replace(QLatin1Char('\n'), QLatin1String("\n "));
+
+ result += tmp;
+
+ result += QLatin1String("\n}\n");
+
+ return result;
+}
+
+void Class::Section::printDeclaration(const Class *klass, CodeBlock &block) const
+{
+ foreach (Function ctor, constructors)
+ ctor.printDeclaration(block, klass->name());
+
+ if (!constructors.isEmpty())
+ block.addNewLine();
+
+ foreach (Function func, functions)
+ func.printDeclaration(block);
+
+ if (!functions.isEmpty())
+ block.addNewLine();
+
+ foreach (QString var, variables)
+ block << var << ';';
+}
+
+void Class::addConstructor(Access access, const QString &body, const QString &_args)
+{
+ Function ctor;
+ QString args = _args;
+ if (!args.startsWith(QLatin1Char('('))
+ && !args.endsWith(QLatin1Char(')'))) {
+ args.prepend('(');
+ args.append(')');
+ }
+ ctor.setName(args);
+ ctor.addBody(body);
+ sections[access].constructors.append(ctor);
+}
+
+QString Class::Section::definition(const Class *klass) const
+{
+ QString result;
+
+ foreach (Function ctor, constructors) {
+ ctor.setName(klass->name() + "::" + klass->name() + ctor.name());
+ result += ctor.definition();
+ result += QLatin1Char('\n');
+ }
+
+ foreach (Function func, functions) {
+ if (!func.hasBody()) continue;
+ func.setName(klass->name() + "::" + func.name());
+ result += func.definition();
+ result += QLatin1Char('\n');
+ }
+
+ return result;
+}
+
+QString Class::declaration() const
+{
+ CodeBlock block;
+
+ block << QLatin1String("class ") << cname;
+ block << "{";
+
+ if (!sections[PublicMember].isEmpty()) {
+ block << "public:";
+ block.indent();
+ sections[PublicMember].printDeclaration(this, block);
+ block.outdent();
+ }
+
+ if (!sections[ProtectedMember].isEmpty()) {
+ block << "protected:";
+ block.indent();
+ sections[ProtectedMember].printDeclaration(this, block);
+ block.outdent();
+ }
+
+ if (!sections[PrivateMember].isEmpty()) {
+ block << "private:";
+ block.indent();
+ sections[PrivateMember].printDeclaration(this, block);
+ block.outdent();
+ }
+
+ block << "};";
+ block.addNewLine();
+
+ return block.toString();
+}
+
+QString Class::definition() const
+{
+ return sections[PrivateMember].definition(this)
+ + sections[ProtectedMember].definition(this)
+ + sections[PublicMember].definition(this);
+}
+
+Generator::Generator(const DFA &_dfa, const Config &config)
+ : dfa(_dfa), cfg(config)
+{
+ QList<InputType> lst = cfg.maxInputSet.toList();
+ qSort(lst);
+ minInput = lst.first();
+ maxInput = lst.last();
+
+ ConfigFile::Section section = config.configSections.value("Code Generator Options");
+
+ foreach (ConfigFile::Entry entry, section) {
+ if (!entry.key.startsWith(QLatin1String("MapToCode["))
+ || !entry.key.endsWith(QLatin1Char(']')))
+ continue;
+ QString range = entry.key;
+ range.remove(0, qstrlen("MapToCode["));
+ range.chop(1);
+ if (range.length() != 3
+ || range.at(1) != QLatin1Char('-')) {
+ qWarning("Invalid range for char mapping function: %s", qPrintable(range));
+ continue;
+ }
+ TransitionSequence seq;
+ seq.first = range.at(0).unicode();
+ seq.last = range.at(2).unicode();
+ seq.testFunction = entry.value;
+ charFunctionRanges.append(seq);
+ }
+
+ QString tokenPrefix = section.value("TokenPrefix");
+ if (!tokenPrefix.isEmpty()) {
+ for (int i = 0; i < dfa.count(); ++i)
+ if (!dfa.at(i).symbol.isEmpty()
+ && !dfa.at(i).symbol.endsWith(QLatin1String("()")))
+ dfa[i].symbol.prepend(tokenPrefix);
+ }
+
+ headerFileName = section.value("FileHeader");
+}
+
+static inline bool adjacentKeys(int left, int right) { return left + 1 == right; }
+//static inline bool adjacentKeys(const InputType &left, const InputType &right)
+//{ return left.val + 1 == right.val; }
+
+static QVector<Generator::TransitionSequence> convertToSequences(const TransitionMap &transitions)
+{
+ QVector<Generator::TransitionSequence> sequences;
+ if (transitions.isEmpty())
+ return sequences;
+
+ QList<InputType> keys = transitions.keys();
+ qSort(keys);
+ int i = 0;
+ Generator::TransitionSequence sequence;
+ sequence.first = keys.at(0);
+ ++i;
+ for (; i < keys.count(); ++i) {
+ if (adjacentKeys(keys.at(i - 1), keys.at(i))
+ && transitions.value(keys.at(i)) == transitions.value(keys.at(i - 1))) {
+ continue;
+ }
+ sequence.last = keys.at(i - 1);
+ sequence.transition = transitions.value(sequence.last);
+ sequences.append(sequence);
+
+ sequence.first = keys.at(i);
+ }
+ sequence.last = keys.at(i - 1);
+ sequence.transition = transitions.value(sequence.last);
+ sequences.append(sequence);
+
+ return sequences;
+}
+
+QDebug &operator<<(QDebug &debug, const Generator::TransitionSequence &seq)
+{
+ return debug << "[first:" << seq.first << "; last:" << seq.last << "; transition:" << seq.transition
+ << (seq.testFunction.isEmpty() ? QString() : QString(QString("; testfunction:" + seq.testFunction)))
+ << "]";
+}
+
+bool Generator::isSingleReferencedFinalState(int i) const
+{
+ return backReferenceMap.value(i) == 1
+ && dfa.at(i).transitions.isEmpty()
+ && !dfa.at(i).symbol.isEmpty();
+}
+
+void Generator::generateTransitions(CodeBlock &body, const TransitionMap &transitions)
+{
+ if (transitions.isEmpty())
+ return;
+
+ QVector<TransitionSequence> sequences = convertToSequences(transitions);
+
+ bool needsCharFunction = false;
+ if (!charFunctionRanges.isEmpty()) {
+ int i = 0;
+ while (i < sequences.count()) {
+ const TransitionSequence &seq = sequences.at(i);
+ if (!seq.testFunction.isEmpty()) {
+ ++i;
+ continue;
+ }
+
+ foreach (TransitionSequence range, charFunctionRanges)
+ if (range.first >= seq.first && range.last <= seq.last) {
+ needsCharFunction = true;
+
+ TransitionSequence left, middle, right;
+
+ left.first = seq.first;
+ left.last = range.first - 1;
+ left.transition = seq.transition;
+
+ middle = range;
+ middle.transition = seq.transition;
+
+ right.first = range.last + 1;
+ right.last = seq.last;
+ right.transition = seq.transition;
+
+ sequences.remove(i);
+ if (left.last >= left.first) {
+ sequences.insert(i, left);
+ ++i;
+ }
+ sequences.insert(i, middle);
+ ++i;
+ if (right.last >= right.first) {
+ sequences.insert(i, right);
+ ++i;
+ }
+
+ i = -1;
+ break;
+ }
+
+ ++i;
+ }
+ }
+
+ //qDebug() << "sequence count" << sequences.count();
+ //qDebug() << sequences;
+
+ if (sequences.count() < 10
+ || sequences.last().last == maxInput
+ || needsCharFunction) {
+ foreach (TransitionSequence seq, sequences) {
+ const bool embedFinalState = isSingleReferencedFinalState(seq.transition);
+
+ QString brace;
+ if (embedFinalState)
+ brace = " {";
+
+ if (!seq.testFunction.isEmpty()) {
+ body << "if (" << seq.testFunction << ")" << brace;
+ } else if (seq.first == seq.last) {
+ body << "if (ch.unicode() == " << seq.first << ")" << brace;
+ } else {
+ if (seq.last < maxInput)
+ body << "if (ch.unicode() >= " << seq.first
+ << " && ch.unicode() <= " << seq.last << ")" << brace;
+ else
+ body << "if (ch.unicode() >= " << seq.first << ")" << brace;
+ }
+ body.indent();
+ if (embedFinalState) {
+ body << "token = " << dfa.at(seq.transition).symbol << ";";
+ body << "goto found;";
+
+ body.outdent();
+ body << "}";
+ } else {
+ body << "goto state_" << seq.transition << ";";
+ body.outdent();
+ }
+ }
+ } else {
+ QList<InputType> keys = transitions.keys();
+ qSort(keys);
+
+ body << "switch (ch.unicode()) {";
+ body.indent();
+ for (int k = 0; k < keys.count(); ++k) {
+ const InputType key = keys.at(k);
+ const int trans = transitions.value(key);
+
+ QString keyStr;
+ if (key == '\\')
+ keyStr = QString("\'\\\\\'");
+ else if (key >= 48 && key < 127)
+ keyStr = QString('\'') + QChar::fromLatin1(char(key)) + QChar('\'');
+ else
+ keyStr = QString::number(key);
+
+ if (k < keys.count() - 1
+ && transitions.value(keys.at(k + 1)) == trans) {
+ body << "case " << keyStr << ":";
+ } else {
+ if (isSingleReferencedFinalState(trans)) {
+ body << "case " << keyStr << ": token = " << dfa.at(trans).symbol << "; goto found;";
+ } else {
+ body << "case " << keyStr << ": goto state_" << trans << ";";
+ }
+ }
+ }
+ body.outdent();
+ body << "}";
+ }
+}
+
+QString Generator::generate()
+{
+ Class klass(cfg.className);
+
+ klass.addMember(Class::PublicMember, "QString input");
+ klass.addMember(Class::PublicMember, "int pos");
+ klass.addMember(Class::PublicMember, "int lexemStart");
+ klass.addMember(Class::PublicMember, "int lexemLength");
+
+ {
+ CodeBlock body;
+ body << "input = inp;";
+ body << "pos = 0;";
+ body << "lexemStart = 0;";
+ body << "lexemLength = 0;";
+ klass.addConstructor(Class::PublicMember, body, "const QString &inp");
+ }
+
+ {
+ Function next("QChar", "next()");
+ next.setInline(true);
+ if (cfg.caseSensitivity == Qt::CaseSensitive)
+ next.addBody("return (pos < input.length()) ? input.at(pos++) : QChar();");
+ else
+ next.addBody("return (pos < input.length()) ? input.at(pos++).toLower() : QChar();");
+ klass.addMember(Class::PublicMember, next);
+ }
+
+ /*
+ {
+ Function lexem("QString", "lexem()");
+ lexem.setConst(true);
+ lexem.setInline(true);
+ lexem.addBody("return input.mid(lexemStart, lexemLength);");
+ klass.addMember(Class::PublicMember, lexem);
+ }
+ */
+
+ for (int i = 0; i < dfa.count(); ++i)
+ if (dfa.at(i).symbol.endsWith(QLatin1String("()"))) {
+ Function handlerFunc("int", dfa.at(i).symbol);
+ klass.addMember(Class::PublicMember, handlerFunc);
+ }
+
+ Function lexFunc;
+ lexFunc.setReturnType("int");
+ lexFunc.setName("lex()");
+
+ CodeBlock body;
+ body << "lexemStart = pos;";
+ body << "lexemLength = 0;";
+ body << "int lastAcceptingPos = -1;";
+ body << "int token = -1;";
+ body << "QChar ch;";
+ body.addNewLine();
+
+ backReferenceMap.clear();
+ foreach (State s, dfa)
+ foreach (int state, s.transitions)
+ backReferenceMap[state]++;
+
+ bool haveSingleReferencedFinalState = false;
+
+ for (int i = 0; i < dfa.count(); ++i) {
+ if (isSingleReferencedFinalState(i)) {
+ haveSingleReferencedFinalState = true;
+ continue;
+ }
+
+ if (i > 0)
+ body << "state_" << i << ":";
+ else
+ body << "// initial state";
+
+ body.indent();
+
+ if (!dfa.at(i).symbol.isEmpty()) {
+ body << "lastAcceptingPos = pos;";
+ body << "token = " << dfa.at(i).symbol << ";";
+ }
+
+ body.outdent();
+
+ body.indent();
+
+ if (!dfa.at(i).transitions.isEmpty()) {
+ body << "ch = next();";
+ generateTransitions(body, dfa.at(i).transitions);
+ }
+
+ body << "goto out;";
+
+ body.outdent();
+ }
+
+ if (haveSingleReferencedFinalState) {
+ body << "found:";
+ body << "lastAcceptingPos = pos;";
+ body.addNewLine();
+ }
+
+ body << "out:";
+ body << "if (lastAcceptingPos != -1) {";
+ body.indent();
+ body << "lexemLength = lastAcceptingPos - lexemStart;";
+ body << "pos = lastAcceptingPos;";
+ body.outdent();
+ body << "}";
+ body << "return token;";
+
+ lexFunc.addBody(body);
+
+ klass.addMember(Class::PublicMember, lexFunc);
+
+ QString header;
+ QFile headerFile(headerFileName);
+ if (!headerFileName.isEmpty()
+ && headerFile.exists()
+ && headerFile.open(QIODevice::ReadOnly)) {
+ header = QString::fromUtf8(headerFile.readAll());
+ }
+
+ header += QLatin1String("// auto generated. DO NOT EDIT.\n");
+
+ return header + klass.declaration() + klass.definition();
+}
+
diff --git a/util/lexgen/generator.h b/util/lexgen/generator.h
new file mode 100644
index 0000000..047378b
--- /dev/null
+++ b/util/lexgen/generator.h
@@ -0,0 +1,221 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#ifndef GENERATOR_H
+#define GENERATOR_H
+
+#include <QTextStream>
+#include <QStringList>
+
+#include "nfa.h"
+
+class LineStream
+{
+private:
+ struct SharedStream
+ {
+ int ref;
+ QTextStream *stream;
+ };
+
+public:
+ LineStream(QTextStream *textStream)
+ {
+ shared = new SharedStream;
+ shared->ref = 1;
+ shared->stream = textStream;
+ }
+ LineStream(const LineStream &other)
+ {
+ shared = other.shared;
+ shared->ref++;
+ }
+ LineStream &operator=(const LineStream &other)
+ {
+ if (this == &other)
+ return *this;
+ LineStream copy(other); // keep refcount up
+ qSwap(*shared, *other.shared);
+ return *this;
+ }
+ ~LineStream()
+ {
+ if (!--shared->ref) {
+ (*shared->stream) << endl;
+ delete shared;
+ }
+ }
+
+ template <typename T>
+ LineStream &operator<<(const T &value)
+ { (*shared->stream) << value; return *this; }
+
+ SharedStream *shared;
+};
+
+class CodeBlock
+{
+public:
+ inline CodeBlock() { stream.setString(&output, QIODevice::WriteOnly); }
+
+ inline void indent() { indentStr += QLatin1String(" "); }
+ inline void outdent() { indentStr.remove(0, 4); }
+
+ template <typename T>
+ LineStream operator<<(const T &value)
+ { stream << indentStr; stream << value; return LineStream(&stream); }
+
+ inline void addNewLine() { stream << endl; }
+
+ inline QString toString() const { stream.flush(); return output; }
+
+private:
+ QString output;
+ mutable QTextStream stream;
+ QString indentStr;
+};
+
+class Function
+{
+public:
+ inline Function(const QString &returnType, const QString &name)
+ : rtype(returnType), fname(name), iline(false), cnst(false) {}
+ inline Function() : iline(false), cnst(false) {}
+
+ inline void setName(const QString &name) { fname = name; }
+ inline QString name() const { return fname; }
+
+ inline void setInline(bool i) { iline = i; }
+ inline bool isInline() const { return iline; }
+
+ inline void setReturnType(const QString &type) { rtype = type; }
+ inline QString returnType() const { return rtype; }
+
+ inline void addBody(const QString &_body) { body += _body; }
+ inline void addBody(const CodeBlock &block) { body += block.toString(); }
+ inline bool hasBody() const { return !body.isEmpty(); }
+
+ inline void setConst(bool konst) { cnst = konst; }
+ inline bool isConst() const { return cnst; }
+
+ void printDeclaration(CodeBlock &block, const QString &funcNamePrefix = QString()) const;
+ QString definition() const;
+
+private:
+ QString signature(const QString &funcNamePrefix = QString()) const;
+
+ QString rtype;
+ QString fname;
+ QString body;
+ bool iline;
+ bool cnst;
+};
+
+class Class
+{
+public:
+ enum Access { PublicMember, ProtectedMember, PrivateMember };
+
+ inline Class(const QString &name) : cname(name) {}
+
+ inline void setName(const QString &name) { cname = name; }
+ inline QString name() const { return cname; }
+
+ inline void addMember(Access access, const QString &name)
+ { sections[access].variables.append(name); }
+ inline void addMember(Access access, const Function &func)
+ { sections[access].functions.append(func); }
+
+ void addConstructor(Access access, const QString &body, const QString &args = QString());
+ inline void addConstructor(Access access, const CodeBlock &body, const QString &args = QString())
+ { addConstructor(access, body.toString(), args); }
+
+ QString declaration() const;
+ QString definition() const;
+
+private:
+ QString cname;
+ struct Section
+ {
+ QVector<Function> functions;
+ QStringList variables;
+ QVector<Function> constructors;
+
+ inline bool isEmpty() const
+ { return functions.isEmpty() && variables.isEmpty() && constructors.isEmpty(); }
+
+ void printDeclaration(const Class *klass, CodeBlock &block) const;
+ QString definition(const Class *klass) const;
+ };
+
+ Section sections[3];
+};
+
+class Generator
+{
+public:
+ Generator(const DFA &dfa, const Config &config);
+
+ QString generate();
+
+private:
+ void generateTransitions(CodeBlock &body, const TransitionMap &transitions);
+ bool isSingleReferencedFinalState(int i) const;
+
+ DFA dfa;
+ Config cfg;
+ InputType minInput;
+ InputType maxInput;
+ QHash<int, int> backReferenceMap;
+ QString headerFileName;
+public:
+ struct TransitionSequence
+ {
+ inline TransitionSequence() : first(-1), last(-1), transition(-1) {}
+ InputType first;
+ InputType last;
+ int transition;
+ QString testFunction;
+ };
+private:
+ QVector<TransitionSequence> charFunctionRanges;
+};
+
+#endif
diff --git a/util/lexgen/global.h b/util/lexgen/global.h
new file mode 100644
index 0000000..01f915d
--- /dev/null
+++ b/util/lexgen/global.h
@@ -0,0 +1,113 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef GLOBAL_H
+#define GLOBAL_H
+
+#include <QHash>
+#include <QDataStream>
+#include <QSet>
+
+#include "configfile.h"
+
+#if 1
+typedef int InputType;
+
+enum SpecialInputType {
+ DigitInput,
+ SpaceInput,
+ Letter
+};
+
+#else
+
+enum SpecialInputType {
+ NoSpecialInput = 0,
+ DigitInput,
+ SpaceInput,
+ LetterOrNumberInput
+};
+
+struct InputType
+{
+ inline InputType() : val(0), specialInput(NoSpecialInput) {}
+ inline InputType(const int &val) : val(val), specialInput(NoSpecialInput) {}
+
+ inline operator int() const { return val; }
+
+ inline bool operator==(const InputType &other) const
+ { return val == other.val; }
+ inline bool operator!=(const InputType &other) const
+ { return val != other.val; }
+
+ int val;
+ SpecialInputType specialInput;
+};
+
+inline int qHash(const InputType &t) { return qHash(t.val); }
+
+inline QDataStream &operator<<(QDataStream &stream, const InputType &i)
+{
+ return stream << i;
+}
+
+inline QDataStream &operator>>(QDataStream &stream, InputType &i)
+{
+ return stream >> i;
+}
+
+#endif
+
+const InputType Epsilon = -1;
+
+struct Config
+{
+ inline Config() : caseSensitivity(Qt::CaseSensitive), debug(false), cache(false) {}
+ QSet<InputType> maxInputSet;
+ Qt::CaseSensitivity caseSensitivity;
+ QString className;
+ bool debug;
+ bool cache;
+ QString ruleFile;
+ ConfigFile::SectionMap configSections;
+};
+
+#endif // GLOBAL_H
diff --git a/util/lexgen/lexgen.lexgen b/util/lexgen/lexgen.lexgen
new file mode 100644
index 0000000..33efb8b
--- /dev/null
+++ b/util/lexgen/lexgen.lexgen
@@ -0,0 +1,24 @@
+[Options]
+case-sensitive
+classname = RegExpTokenizer
+
+[Code Generator Options]
+TokenPrefix = RE2NFA::
+
+[Macros]
+Escape = \\.{1}
+
+[Tokens]
+TOK_QUOTED_STRING = \"[^\"]*\"
+TOK_STRING = [^\{\}\(\)\,\*\|\?\.\+\[]|{Escape}
+TOK_SEQUENCE = \[([^\]]|(\\\]))*\]
+TOK_LBRACE = \{
+TOK_RBRACE = \}
+TOK_LPAREN = \(
+TOK_RPAREN = \)
+TOK_COMMA = \,
+TOK_STAR = \*
+TOK_OR = \|
+TOK_QUESTION = \?
+TOK_DOT = \.
+TOK_PLUS = \+
diff --git a/util/lexgen/lexgen.pri b/util/lexgen/lexgen.pri
new file mode 100644
index 0000000..b36e00e
--- /dev/null
+++ b/util/lexgen/lexgen.pri
@@ -0,0 +1,3 @@
+VPATH += $$PWD
+SOURCES += nfa.cpp configfile.cpp re2nfa.cpp
+INCLUDEPATH += $$PWD
diff --git a/util/lexgen/lexgen.pro b/util/lexgen/lexgen.pro
new file mode 100644
index 0000000..89ac84d
--- /dev/null
+++ b/util/lexgen/lexgen.pro
@@ -0,0 +1,6 @@
+TEMPLATE = app
+TARGET = lexgen
+include(lexgen.pri)
+SOURCES += main.cpp \
+ generator.cpp
+QT = core
diff --git a/util/lexgen/main.cpp b/util/lexgen/main.cpp
new file mode 100644
index 0000000..b7b529a
--- /dev/null
+++ b/util/lexgen/main.cpp
@@ -0,0 +1,323 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "nfa.h"
+#include "re2nfa.h"
+#include "configfile.h"
+#include "generator.h"
+
+#include <QFile>
+#include <QCoreApplication>
+#include <QFileInfo>
+#include <QDateTime>
+
+struct Symbol
+{
+ QString token;
+ QString lexem;
+};
+
+static QList<Symbol> tokenize(const DFA &dfa, const QString &input, Config *cfg, bool *ok = 0)
+{
+ QList<Symbol> symbols;
+ Symbol lastSymbol;
+ int state = 0;
+ int lastAcceptingState = -1;
+ QString lastAcceptingLexem;
+ int lastAcceptingPos = -1;
+ for (int i = 0; i < input.length(); ++i) {
+ QChar ch = input.at(i);
+ QChar chForInput = ch;
+ if (cfg->caseSensitivity == Qt::CaseInsensitive)
+ chForInput = chForInput.toLower();
+ int next = dfa.at(state).transitions.value(chForInput.unicode());
+ if (cfg->debug)
+ qDebug() << "input" << input.at(i) << "leads to state" << next;
+ if (next) {
+ lastSymbol.lexem.append(input.at(i));
+ lastSymbol.token = dfa.at(next).symbol;
+ if (!lastSymbol.token.isEmpty()) {
+ lastAcceptingState = next;
+ lastAcceptingLexem = lastSymbol.lexem;
+ lastAcceptingPos = i;
+ }
+ state = next;
+ } else {
+ if (lastAcceptingState != -1) {
+ if (cfg->debug)
+ qDebug() << "adding" << dfa.at(lastAcceptingState).symbol << "and backtracking to" << lastAcceptingPos;
+ Symbol s;
+ s.token = dfa.at(lastAcceptingState).symbol;
+ s.lexem = lastAcceptingLexem;
+ symbols << s;
+ lastSymbol = Symbol();
+ state = 0;
+ i = lastAcceptingPos;
+ lastAcceptingPos = -1;
+ lastAcceptingState = -1;
+ continue;
+ }
+ if (state == 0 || lastSymbol.token.isEmpty()) {
+ if (cfg->debug)
+ qDebug() << "invalid input";
+ if (ok)
+ *ok = false;
+ return symbols;
+ }
+ if (cfg->debug)
+ qDebug() << "appending symbol with token" << lastSymbol.token;
+ symbols << lastSymbol;
+ lastSymbol = Symbol();
+ state = 0;
+ lastAcceptingState = -1;
+ --i;
+ }
+ }
+ if (!lastSymbol.token.isEmpty()) {
+ if (cfg->debug)
+ qDebug() << "appending (last) symbol with token" << lastSymbol.token;
+ symbols << lastSymbol;
+ } else if (lastAcceptingState != -1) {
+ if (cfg->debug)
+ qDebug() << "appending last accepting state with token" << dfa.at(lastAcceptingState).symbol;
+ Symbol s;
+ s.lexem = lastAcceptingLexem;
+ s.token = dfa.at(lastAcceptingState).symbol;
+ symbols << s;
+ }
+ if (ok)
+ *ok = true;
+ return symbols;
+}
+
+static QSet<InputType> determineMaxInputSet(const ConfigFile::Section &section)
+{
+ QSet<InputType> set;
+
+ QString inputTypeName;
+
+ foreach (const ConfigFile::Entry &entry, section)
+ if (entry.key == QLatin1String("InputType")) {
+ if (!inputTypeName.isEmpty()) {
+ qWarning("Error: InputType field specified multiple times in config file");
+ return QSet<InputType>();
+ }
+ inputTypeName = entry.value;
+ }
+
+ if (inputTypeName.isEmpty())
+ inputTypeName = "quint8";
+
+ if (inputTypeName == "quint8") {
+ for (int i = 1; i < 256; ++i)
+ set.insert(i);
+ } /* else if ### */
+ else {
+ qWarning("Error: Unknown input type '%s'", qPrintable(inputTypeName));
+ return QSet<InputType>();
+ }
+
+ return set;
+}
+
+static bool loadConfig(const QString &ruleFile, Config *cfg)
+{
+ ConfigFile::SectionMap sections = ConfigFile::parse(ruleFile);
+ if (sections.isEmpty()) {
+ qWarning("Error parsing %s", qPrintable(ruleFile));
+ return false;
+ }
+
+ QSet<InputType> maxInputSet = determineMaxInputSet(sections.value("Options"));
+ if (maxInputSet.isEmpty())
+ return false;
+
+ Qt::CaseSensitivity cs = Qt::CaseInsensitive;
+ if (sections.value("Options").contains("case-sensitive"))
+ cs = Qt::CaseSensitive;
+
+ cfg->configSections = sections;
+ cfg->caseSensitivity = cs;
+ cfg->className = sections.value("Options").value("classname", "Scanner");
+ cfg->maxInputSet = maxInputSet;
+ cfg->ruleFile = ruleFile;
+ return true;
+}
+
+static DFA generateMachine(const Config &cfg)
+{
+ if (cfg.cache) {
+ QFileInfo ruleInfo(cfg.ruleFile);
+ QFileInfo cacheInfo(ruleInfo.baseName() + ".dfa");
+ if (cacheInfo.exists()
+ && cacheInfo.lastModified() > ruleInfo.lastModified()) {
+ QFile f(cacheInfo.absoluteFilePath());
+ f.open(QIODevice::ReadOnly);
+ QDataStream stream(&f);
+ DFA machine;
+ stream >> machine;
+ return machine;
+ }
+ }
+
+ QMap<QString, NFA> macros;
+ foreach (ConfigFile::Entry e, cfg.configSections.value("Macros")) {
+ int errCol = 0;
+ if (cfg.debug)
+ qDebug() << "parsing" << e.value;
+ NFA nfa = RE2NFA(macros, cfg.maxInputSet, cfg.caseSensitivity).parse(e.value, &errCol);
+ if (nfa.isEmpty()) {
+ qWarning("Parse error in line %d column %d", e.lineNumber, errCol);
+ return DFA();
+ }
+ macros.insert(e.key, nfa);
+ }
+
+ if (!cfg.configSections.contains("Tokens")) {
+ qWarning("Rule file does not contain a [Tokens] section!");
+ return DFA();
+ }
+
+ QVector<NFA> tokens;
+
+ foreach (ConfigFile::Entry e, cfg.configSections.value("Tokens")) {
+ int errCol = 0;
+ if (cfg.debug)
+ qDebug() << "parsing" << e.value;
+ NFA tok = RE2NFA(macros, cfg.maxInputSet, cfg.caseSensitivity).parse(e.value, &errCol);
+ if (tok.isEmpty()) {
+ qWarning("Parse error in line %d column %d while parsing token %s", e.lineNumber, errCol, e.key.toLocal8Bit().constData());
+ return DFA();
+ }
+ tok.setTerminationSymbol(e.key);
+ tokens.append(tok);
+ }
+
+ NFA giganticStateMachine;
+ foreach (NFA nfa, tokens)
+ if (giganticStateMachine.isEmpty())
+ giganticStateMachine = nfa;
+ else
+ giganticStateMachine = NFA::createAlternatingNFA(giganticStateMachine, nfa);
+
+ DFA result = giganticStateMachine.toDFA().minimize();
+ if (cfg.cache) {
+ QFileInfo ruleInfo(cfg.ruleFile);
+ QFileInfo cacheInfo(ruleInfo.baseName() + ".dfa");
+ QFile f(cacheInfo.absoluteFilePath());
+ f.open(QIODevice::WriteOnly | QIODevice::Truncate);
+ QDataStream stream(&f);
+ stream << result;
+ }
+ return result;
+}
+
+#if !defined(AUTOTEST)
+int main(int argc, char **argv)
+{
+ QCoreApplication app(argc, argv);
+ QString ruleFile;
+ Config cfg;
+
+ const QStringList arguments = app.arguments().mid(1);
+ cfg.debug = arguments.contains("-debug");
+ const bool testRules = arguments.contains("-test");
+ cfg.cache = arguments.contains("-cache");
+
+ foreach (const QString &arg, arguments)
+ if (!arg.startsWith(QLatin1Char('-'))) {
+ ruleFile = arg;
+ break;
+ }
+
+ if (ruleFile.isEmpty()) {
+ qWarning("usage: lexgen [-test rulefile");
+ qWarning(" ");
+ qWarning(" the -test option will cause lexgen to interpret standard input");
+ qWarning(" according to the specified rules and print out pairs of token and");
+ qWarning(" lexical element");
+ return 1;
+ }
+
+ if (!loadConfig(ruleFile, &cfg))
+ return 1;
+
+ DFA machine = generateMachine(cfg);
+ if (machine.isEmpty())
+ return 1;
+
+ if (testRules) {
+ qWarning("Testing:");
+ QString input = QTextStream(stdin).readAll();
+ /*
+ qDebug() << "NFA has" << machine.stateCount() << "states";
+ qDebug() << "Converting to DFA... (this may take a while)";
+ DFA dfa = machine.toDFA();
+ qDebug() << "DFA has" << dfa.count() << "states";
+ qDebug() << "Minimizing...";
+ dfa = dfa.minimize();
+ qDebug() << "Minimized DFA has" << dfa.count() << "states";
+ */
+ DFA dfa = machine;
+ if (cfg.debug)
+ qDebug() << "tokenizing" << input;
+ bool ok = false;
+ QList<Symbol> symbols = tokenize(dfa, input, &cfg, &ok);
+ if (symbols.isEmpty()) {
+ qWarning("No tokens produced!");
+ } else {
+ foreach (Symbol s, symbols)
+ qDebug() << s.token << ":" << s.lexem;
+ }
+ if (ok)
+ qDebug() << symbols.count() << "tokens produced.";
+ else
+ qDebug() << "Error while tokenizing!";
+ } else {
+ Generator gen(machine, cfg);
+ QTextStream(stdout)
+ << gen.generate();
+ }
+
+ return 0;
+}
+#endif
+
diff --git a/util/lexgen/nfa.cpp b/util/lexgen/nfa.cpp
new file mode 100644
index 0000000..701a5e0
--- /dev/null
+++ b/util/lexgen/nfa.cpp
@@ -0,0 +1,508 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+#include "nfa.h"
+#include <QSet>
+#include <limits.h>
+
+NFA NFA::createSingleInputNFA(InputType input)
+{
+ NFA result;
+ result.initialize(2);
+ result.addTransition(result.initialState, input, result.finalState);
+ return result;
+}
+
+NFA NFA::createSymbolNFA(const QString &symbol)
+{
+ NFA result = NFA::createSingleInputNFA(Epsilon);
+ result.states[result.finalState].symbol = symbol;
+ return result;
+}
+
+void NFA::initialize(int size)
+{
+ states.resize(size);
+ states.fill(State());
+ initialState = 0;
+ finalState = size - 1;
+}
+
+void NFA::addTransition(int from, InputType input, int to)
+{
+ assertValidState(from);
+ assertValidState(to);
+
+ states[from].transitions.insertMulti(input, to);
+}
+
+void NFA::copyFrom(const NFA &other, int baseState)
+{
+ assertValidState(baseState);
+ assertValidState(baseState + other.states.count() - 1);
+
+ for (int i = 0; i < other.states.count(); ++i) {
+ State s = other.states.at(i);
+
+ for (TransitionMap::Iterator it = s.transitions.begin(),
+ end = s.transitions.end(); it != end; ++it)
+ *it += baseState;
+
+ states[baseState + i] = s;
+ }
+}
+
+void NFA::initializeFromPair(const NFA &a, const NFA &b,
+ int *initialA, int *finalA,
+ int *initialB, int *finalB)
+{
+ initialize(a.states.count() + b.states.count() + 2);
+
+ int baseIdxA = 1;
+ int baseIdxB = 1 + a.states.count();
+
+ *initialA = a.initialState + baseIdxA;
+ *finalA = a.finalState + baseIdxA;
+
+ *initialB = b.initialState + baseIdxB;
+ *finalB = b.finalState + baseIdxB;
+
+ copyFrom(a, baseIdxA);
+ copyFrom(b, baseIdxB);
+}
+
+NFA NFA::createAlternatingNFA(const NFA &a, const NFA &b)
+{
+ NFA result;
+
+ int newInitialA, newFinalA,
+ newInitialB, newFinalB;
+
+ result.initializeFromPair(a, b, &newInitialA, &newFinalA,
+ &newInitialB, &newFinalB);
+
+ result.addTransition(result.initialState, Epsilon, newInitialA);
+ result.addTransition(result.initialState, Epsilon, newInitialB);
+
+ result.addTransition(newFinalA, Epsilon, result.finalState);
+ result.addTransition(newFinalB, Epsilon, result.finalState);
+
+ return result;
+}
+
+NFA NFA::createConcatenatingNFA(const NFA &a, const NFA &b)
+{
+ NFA result;
+
+ int initialA, finalA,
+ initialB, finalB;
+
+ result.initializeFromPair(a, b, &initialA, &finalA, &initialB, &finalB);
+
+ result.addTransition(result.initialState, Epsilon, initialA);
+ result.addTransition(finalA, Epsilon, initialB);
+ result.addTransition(finalB, Epsilon, result.finalState);
+ return result;
+}
+
+NFA NFA::createOptionalNFA(const NFA &a)
+{
+ NFA result;
+
+ result.initialize(a.states.count() + 2);
+
+ int baseIdxA = 1;
+ int initialA = a.initialState + baseIdxA;
+ int finalA = a.finalState + baseIdxA;
+
+ result.copyFrom(a, baseIdxA);
+
+ result.addTransition(result.initialState, Epsilon, initialA);
+ result.addTransition(result.initialState, Epsilon, result.finalState);
+
+ result.addTransition(finalA, Epsilon, initialA);
+ result.addTransition(finalA, Epsilon, result.finalState);
+
+ return result;
+}
+
+NFA NFA::createStringNFA(const QByteArray &str)
+{
+ NFA result;
+ foreach (char c, str) {
+ NFA ch = NFA::createSingleInputNFA(c);
+ if (result.isEmpty())
+ result = ch;
+ else
+ result = NFA::createConcatenatingNFA(result, ch);
+ }
+ return result;
+}
+
+NFA NFA::createSetNFA(const QSet<InputType> &set)
+{
+ NFA result;
+ result.initialize(set.count() + 2);
+
+ int state = 1;
+ for (QSet<InputType>::ConstIterator it = set.constBegin(), end = set.constEnd();
+ it != end; ++it, ++state) {
+ result.addTransition(result.initialState, Epsilon, state);
+ result.addTransition(state, *it, result.finalState);
+ }
+
+ /*
+ foreach (InputType input, set) {
+ NFA ch = NFA::createSingleInputNFA(input);
+ if (result.isEmpty())
+ result = ch;
+ else
+ result = NFA::createAlternatingNFA(result, ch);
+ }
+ */
+ return result;
+}
+
+NFA NFA::createZeroOrOneNFA(const NFA &a)
+{
+ NFA epsilonNFA = createSingleInputNFA(Epsilon);
+ return NFA::createAlternatingNFA(a, epsilonNFA);
+}
+
+NFA NFA::applyQuantity(const NFA &a, int minOccurrences, int maxOccurrences)
+{
+ NFA result = a;
+ NFA epsilonNFA = createSingleInputNFA(Epsilon);
+
+ if (minOccurrences == 0) {
+ result = NFA::createAlternatingNFA(result, epsilonNFA);
+ } else {
+ minOccurrences--;
+ }
+ maxOccurrences--;
+
+ for (int i = 0; i < minOccurrences; ++i)
+ result = NFA::createConcatenatingNFA(result, a);
+
+ for (int i = minOccurrences; i < maxOccurrences; ++i)
+ result = NFA::createConcatenatingNFA(result, NFA::createAlternatingNFA(a, epsilonNFA));
+
+ return result;
+}
+
+void NFA::debug()
+{
+ qDebug() << "NFA has" << states.count() << "states";
+ qDebug() << "initial state is" << initialState;
+ qDebug() << "final state is" << finalState;
+
+ for (int i = 0; i < states.count(); ++i) {
+ const State &s = states.at(i);
+ for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
+ end = s.transitions.constEnd(); it != end; ++it)
+ qDebug() << "transition from state" << i << "to" << it.value() << "through"
+ << (it.key() == Epsilon ? QString("Epsilon") : QString(char(it.key())));
+ if (!s.symbol.isEmpty())
+ qDebug() << "State" << i << "leads to symbol" << s.symbol;
+ }
+}
+
+// helper
+typedef QSet<int> DFAState;
+
+// that's a bad hash, but it's good enough for us
+// and it allows us to use the nice QHash API :)
+inline uint qHash(const DFAState &state)
+{
+ uint val = 0;
+ foreach (int s, state)
+ val |= qHash(s);
+ return val;
+}
+
+DFA NFA::toDFA() const
+{
+ DFA result;
+ result.reserve(states.count());
+
+ QHash<QString, int> symbolReferenceCounts;
+ {
+ QSet<int> symbolStates;
+ for (int i = 0; i < states.count(); ++i)
+ if (!states.at(i).symbol.isEmpty())
+ symbolStates.insert(i);
+
+ QHash<int, QString> epsilonStates;
+ for (int i = 0; i < states.count(); ++i) {
+ const State &s = states.at(i);
+ for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
+ transition != end; ++transition)
+ if (transition.key() == Epsilon && symbolStates.contains(transition.value()))
+ epsilonStates.insert(i, states.at(transition.value()).symbol);
+ }
+
+ int lastCount;
+ do {
+ lastCount = epsilonStates.count();
+ for (int i = 0; i < states.count(); ++i) {
+ const State &s = states.at(i);
+ for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
+ transition != end; ++transition)
+ if (transition.key() == Epsilon && epsilonStates.contains(transition.value()))
+ epsilonStates.insert(i, epsilonStates.value(transition.value()));
+ }
+
+ } while (lastCount != epsilonStates.count());
+
+ for (int i = 0; i < states.count(); ++i) {
+ const State &s = states.at(i);
+ for (TransitionMap::ConstIterator transition = s.transitions.constBegin(), end = s.transitions.constEnd();
+ transition != end; ++transition) {
+ if (transition.key() == Epsilon)
+ continue;
+ if (symbolStates.contains(transition.value())) {
+ const QString symbol = states.at(transition.value()).symbol;
+ symbolReferenceCounts[symbol]++;
+ } else if (epsilonStates.contains(transition.value())) {
+ const QString symbol = epsilonStates.value(transition.value());
+ symbolReferenceCounts[symbol]++;
+ }
+ }
+ }
+ /*
+ for (QHash<QString, int>::ConstIterator symIt = symbolReferenceCounts.constBegin(), symEnd = symbolReferenceCounts.constEnd();
+ symIt != symEnd; ++symIt)
+ qDebug() << "symbol" << symIt.key() << "is reached" << symIt.value() << "times";
+ */
+ }
+
+
+ QSet<InputType> validInput;
+ foreach (const State &s, states)
+ for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
+ end = s.transitions.constEnd(); it != end; ++it)
+ if (it.key() != Epsilon)
+ validInput.insert(it.key());
+
+ // A DFA state can consist of multiple NFA states.
+ // the dfaStateMap maps from these to the actual
+ // state index within the resulting DFA vector
+ QHash<DFAState, int> dfaStateMap;
+ QStack<DFAState> pendingDFAStates;
+
+ DFAState startState = epsilonClosure(QSet<int>() << initialState);
+
+ result.resize(1);
+ dfaStateMap.insert(startState, 0);
+
+ pendingDFAStates.push(startState);
+
+ while (!pendingDFAStates.isEmpty()) {
+ DFAState state = pendingDFAStates.pop();
+// qDebug() << "processing" << state << "from the stack of pending states";
+
+ foreach (InputType input, validInput) {
+
+ QSet<int> reachableStates;
+
+ foreach (int nfaState, state) {
+ const TransitionMap &transitions = states.at(nfaState).transitions;
+ TransitionMap::ConstIterator it = transitions.find(input);
+ while (it != transitions.constEnd() && it.key() == input) {
+ reachableStates.insert(it.value());
+ ++it;
+ }
+ }
+
+ if (reachableStates.isEmpty())
+ continue;
+
+// qDebug() << "can reach" << reachableStates << "from input" << char(input);
+
+ QSet<int> closure = epsilonClosure(reachableStates);
+
+// qDebug() << "closure is" << closure;
+
+ if (!dfaStateMap.contains(closure)) {
+ int dfaState = result.count();
+ result.append(State());
+
+ QString symbol;
+ int refCount = INT_MAX;
+ foreach (int nfaState, closure)
+ if (!states.at(nfaState).symbol.isEmpty()) {
+// qDebug() << "closure also contains symbol" << states.at(nfaState).symbol;
+ QString candidate = states.at(nfaState).symbol;
+ int candidateRefCount =symbolReferenceCounts.value(candidate, INT_MAX);
+ if (candidateRefCount < refCount) {
+ refCount = candidateRefCount;
+ symbol = candidate;
+ }
+ }
+ if (!symbol.isEmpty())
+ result.last().symbol = symbol;
+
+ dfaStateMap.insert(closure, dfaState);
+
+ Q_ASSERT(!pendingDFAStates.contains(closure));
+ pendingDFAStates.prepend(closure);
+ }
+
+ result[dfaStateMap.value(state)].transitions.insert(input, dfaStateMap.value(closure));
+ }
+ }
+
+ return result;
+}
+
+QSet<int> NFA::epsilonClosure(const QSet<int> &initialClosure) const
+{
+ QSet<int> closure = initialClosure;
+ closure.reserve(closure.count() * 4);
+
+ QStack<int> stateStack;
+ stateStack.resize(closure.count());
+ qCopy(closure.constBegin(), closure.constEnd(), stateStack.begin());
+
+ while (!stateStack.isEmpty()) {
+ int t = stateStack.pop();
+ const TransitionMap &transitions = states.at(t).transitions;
+ TransitionMap::ConstIterator it = transitions.find(Epsilon);
+ while (it != transitions.constEnd() && it.key() == Epsilon) {
+ const int u = it.value();
+ if (!closure.contains(u)) {
+ closure.insert(u);
+ stateStack.push(u);
+ }
+ ++it;
+ }
+ }
+
+ return closure;
+}
+
+void NFA::setTerminationSymbol(const QString &symbol)
+{
+ states[finalState].symbol = symbol;
+}
+
+void DFA::debug() const
+{
+ qDebug() << "DFA has" << count() << "states";
+
+ for (int i = 0; i < count(); ++i) {
+ const State &s = at(i);
+ if (s.transitions.isEmpty()) {
+ qDebug() << "State" << i << "has no transitions";
+ } else {
+ for (TransitionMap::ConstIterator it = s.transitions.constBegin(),
+ end = s.transitions.constEnd(); it != end; ++it)
+ qDebug() << "transition from state" << i << "to" << it.value() << "through"
+ << (it.key() == Epsilon ? QString("Epsilon") : QString(char(it.key())));
+ }
+ if (!s.symbol.isEmpty())
+ qDebug() << "State" << i << "leads to symbol" << s.symbol;
+ }
+
+}
+
+DFA DFA::minimize() const
+{
+ QVector<bool> inequivalentStates(count() * count());
+ inequivalentStates.fill(false);
+
+ for (int i = 0; i < count(); ++i)
+ for (int j = 0; j < i; ++j) {
+ if (i != j && at(i).symbol != at(j).symbol)
+ inequivalentStates[i * count() + j] = true;
+ }
+
+ bool done;
+ do {
+ done = true;
+ for (int i = 0; i < count(); ++i)
+ for (int j = 0; j < count(); ++j) {
+ if (i == j)
+ continue;
+
+ if (inequivalentStates[i * count() + j])
+ continue;
+
+ if (at(i).transitions.keys() != at(j).transitions.keys()) {
+ inequivalentStates[i * count() + j] = true;
+ done = false;
+ continue;
+ }
+
+ foreach (InputType a, at(i).transitions.keys()) {
+ int r = at(i).transitions.value(a, -1);
+ if (r == -1)
+ continue;
+ int s = at(j).transitions.value(a, -1);
+ if (s == -1)
+ continue;
+
+ if (inequivalentStates[r * count() + s]
+ || r == s) {
+ inequivalentStates[i * count() + j] = true;
+ done = false;
+ break;
+ }
+ }
+ }
+ } while (!done);
+
+ QHash<int, int> statesToEliminate;
+ for (int i = 0; i < count(); ++i)
+ for (int j = 0; j < i; ++j)
+ if (!inequivalentStates[i * count() + j]) {
+ statesToEliminate.insertMulti(i, j);
+ }
+
+ /*
+ qDebug() << "states to eliminiate:" << statesToEliminate.count();;
+ qDebug() << "merging" << statesToEliminate;
+ debug();
+ */
+
+ return *this;
+}
+
+
diff --git a/util/lexgen/nfa.h b/util/lexgen/nfa.h
new file mode 100644
index 0000000..1f25071
--- /dev/null
+++ b/util/lexgen/nfa.h
@@ -0,0 +1,127 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef NFA_H
+#define NFA_H
+
+#include <QMap>
+#include <QHash>
+#include <QString>
+#include <QVector>
+#include <QDebug>
+#include <QStack>
+#include <QByteArray>
+
+#include "global.h"
+
+typedef QHash<InputType, int> TransitionMap;
+
+struct State
+{
+ QString symbol;
+ TransitionMap transitions;
+};
+
+inline QDataStream &operator<<(QDataStream &stream, const State &state)
+{
+ return stream << state.symbol << state.transitions;
+}
+
+inline QDataStream &operator>>(QDataStream &stream, State &state)
+{
+ return stream >> state.symbol >> state.transitions;
+}
+
+struct DFA : public QVector<State>
+{
+ void debug() const;
+ DFA minimize() const;
+};
+
+class NFA
+{
+public:
+ static NFA createSingleInputNFA(InputType input);
+ static NFA createSymbolNFA(const QString &symbol);
+ static NFA createAlternatingNFA(const NFA &a, const NFA &b);
+ static NFA createConcatenatingNFA(const NFA &a, const NFA &b);
+ static NFA createOptionalNFA(const NFA &a);
+
+ // convenience
+ static NFA createStringNFA(const QByteArray &str);
+ static NFA createSetNFA(const QSet<InputType> &set);
+ static NFA createZeroOrOneNFA(const NFA &a);
+ static NFA applyQuantity(const NFA &a, int minOccurrences, int maxOccurrences);
+
+ void setTerminationSymbol(const QString &symbol);
+
+ DFA toDFA() const;
+
+ inline bool isEmpty() const { return states.isEmpty(); }
+ inline int stateCount() const { return states.count(); }
+
+ void debug();
+
+private:
+ void initialize(int size);
+ void addTransition(int from, InputType input, int to);
+ void copyFrom(const NFA &other, int baseState);
+
+ void initializeFromPair(const NFA &a, const NFA &b,
+ int *initialA, int *finalA,
+ int *initialB, int *finalB);
+
+ QSet<int> epsilonClosure(const QSet<int> &initialClosure) const;
+
+ inline void assertValidState(int state)
+ { Q_UNUSED(state); Q_ASSERT(state >= 0); Q_ASSERT(state < states.count()); }
+
+#if defined(AUTOTEST)
+public:
+#endif
+ int initialState;
+ int finalState;
+
+ QVector<State> states;
+};
+
+#endif // NFA_H
+
diff --git a/util/lexgen/re2nfa.cpp b/util/lexgen/re2nfa.cpp
new file mode 100644
index 0000000..ed58d50
--- /dev/null
+++ b/util/lexgen/re2nfa.cpp
@@ -0,0 +1,547 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include "re2nfa.h"
+#include "tokenizer.cpp"
+
+RE2NFA::RE2NFA(const QMap<QString, NFA> &macros, const QSet<InputType> &maxInputSet, Qt::CaseSensitivity cs)
+ : macros(macros), index(0), errorColumn(-1), maxInputSet(maxInputSet), caseSensitivity(cs)
+{
+}
+
+NFA RE2NFA::parse(const QString &expression, int *errCol)
+{
+ tokenize(expression);
+
+ if (symbols.isEmpty())
+ return NFA();
+
+ index = 0;
+
+ NFA result = parseExpr();
+ if (result.isEmpty()) {
+ if (errCol)
+ *errCol = errorColumn;
+ }
+ return result;
+}
+
+NFA RE2NFA::parseExpr()
+{
+ NFA value = parseBranch();
+ while (test(TOK_OR)) {
+ NFA rhs = parseBranch();
+ value = NFA::createAlternatingNFA(value, rhs);
+ }
+ return value;
+}
+
+NFA RE2NFA::parseBranch()
+{
+ NFA value = parsePiece();
+ if (!hasNext())
+ return value;
+ NFA next;
+ do {
+ next = parsePiece();
+ if (!next.isEmpty())
+ value = NFA::createConcatenatingNFA(value, next);
+ } while (!next.isEmpty() && hasNext());
+ return value;
+}
+
+NFA RE2NFA::parsePiece()
+{
+ NFA atom = parseAtom();
+ if (atom.isEmpty() || !hasNext())
+ return atom;
+ return parseMaybeQuantifier(atom);
+}
+
+NFA RE2NFA::parseAtom()
+{
+ // ####
+ switch (next()) {
+ case TOK_STRING:
+ return createCharNFA();
+ case TOK_LPAREN: {
+ NFA subExpr = parseExpr();
+ next(TOK_RPAREN);
+ return subExpr;
+ }
+ case TOK_LBRACE: {
+ QString macroName = lexemUntil(TOK_RBRACE);
+ QMap<QString, NFA>::ConstIterator macro = macros.find(macroName);
+ if (macro == macros.end()) {
+ qWarning("Unknown macro '%s' - probably used before defined", qPrintable(macroName));
+ return NFA();
+ }
+ return *macro;
+ }
+ case TOK_LBRACKET: {
+ NFA set = parseSet();
+ next(TOK_RBRACKET);
+ return set;
+ }
+ case TOK_SEQUENCE:
+ return parseSet2();
+ case TOK_DOT:
+ return NFA::createSetNFA(maxInputSet);
+ default:
+ prev();
+ return NFA();
+ }
+}
+
+NFA RE2NFA::parseMaybeQuantifier(const NFA &nfa)
+{
+ // ####
+ switch (next()) {
+ case TOK_STAR:
+ return NFA::createOptionalNFA(nfa);
+ case TOK_QUESTION:
+ return NFA::createZeroOrOneNFA(nfa);
+ case TOK_PLUS:
+ return NFA::createConcatenatingNFA(nfa, NFA::createOptionalNFA(nfa));
+ case TOK_LBRACE: {
+ const int rewind = index - 1;
+
+ QString lexemBeforeComma;
+ QString lexemAfterComma;
+ bool seenComma = false;
+ forever {
+ if (test(TOK_COMMA)) {
+ if (seenComma) {
+ errorColumn = symbol().column;
+ return NFA();
+ }
+ seenComma = true;
+ } else if (test(TOK_RBRACE)) {
+ break;
+ } else {
+ next(TOK_STRING);
+ if (seenComma)
+ lexemAfterComma += symbol().lexem;
+ else
+ lexemBeforeComma += symbol().lexem;
+ }
+ }
+ bool isNumber = false;
+ int min = lexemBeforeComma.toInt(&isNumber);
+ if (!isNumber) {
+ index = rewind;
+ return nfa;
+ }
+ int max = min;
+ if (seenComma) {
+ max = lexemAfterComma.toInt(&isNumber);
+ if (!isNumber) {
+ errorColumn = symbol().column;
+ return NFA();
+ }
+ }
+ return NFA::applyQuantity(nfa, min, max);
+ }
+ default:
+ prev();
+ return nfa;
+ }
+}
+
+NFA RE2NFA::parseSet()
+{
+ QSet<InputType> set;
+ bool negate = false;
+
+ next(TOK_STRING);
+
+ do {
+ Q_ASSERT(symbol().lexem.length() == 1);
+ // ###
+ QChar ch = symbol().lexem.at(0);
+ if (set.isEmpty() && ch == QLatin1Char('^')) {
+ negate = true;
+ continue;
+ }
+
+ // look ahead for ranges like a-z
+ bool rangeFound = false;
+ if (test(TOK_STRING)) {
+ if (symbol().lexem.length() == 1
+ && symbol().lexem.at(0) == QLatin1Char('-')) {
+ next(TOK_STRING);
+ Q_ASSERT(symbol().lexem.length() == 1);
+ QChar last = symbol().lexem.at(0);
+
+ if (ch.unicode() > last.unicode())
+ qSwap(ch, last);
+
+ for (ushort i = ch.unicode(); i <= last.unicode(); ++i) {
+ if (caseSensitivity == Qt::CaseInsensitive) {
+ set.insert(QChar(i).toLower().unicode());
+ } else {
+ set.insert(i);
+ }
+ }
+
+ rangeFound = true;
+ } else {
+ prev();
+ }
+ }
+
+ if (!rangeFound) {
+ if (caseSensitivity == Qt::CaseInsensitive) {
+ set.insert(ch.toLower().unicode());
+ } else {
+ set.insert(ch.unicode());
+ }
+ }
+ } while (test(TOK_STRING));
+
+ if (negate) {
+ QSet<InputType> negatedSet = maxInputSet;
+ negatedSet.subtract(set);
+ set = negatedSet;
+ }
+
+ return NFA::createSetNFA(set);
+}
+
+NFA RE2NFA::parseSet2()
+{
+ QSet<InputType> set;
+ bool negate = false;
+
+ QString str = symbol().lexem;
+ // strip off brackets
+ str.chop(1);
+ str.remove(0, 1);
+
+ int i = 0;
+ while (i < str.length()) {
+ // ###
+ QChar ch = str.at(i++);
+ if (set.isEmpty() && ch == QLatin1Char('^')) {
+ negate = true;
+ continue;
+ }
+
+ // look ahead for ranges like a-z
+ bool rangeFound = false;
+ if (i < str.length() - 1 && str.at(i) == QLatin1Char('-')) {
+ ++i;
+ QChar last = str.at(i++);
+
+ if (ch.unicode() > last.unicode())
+ qSwap(ch, last);
+
+ for (ushort i = ch.unicode(); i <= last.unicode(); ++i) {
+ if (caseSensitivity == Qt::CaseInsensitive) {
+ set.insert(QChar(i).toLower().unicode());
+ } else {
+ set.insert(i);
+ }
+ }
+
+ rangeFound = true;
+ }
+
+ if (!rangeFound) {
+ if (caseSensitivity == Qt::CaseInsensitive) {
+ set.insert(ch.toLower().unicode());
+ } else {
+ set.insert(ch.unicode());
+ }
+ }
+ }
+
+ if (negate) {
+ QSet<InputType> negatedSet = maxInputSet;
+ negatedSet.subtract(set);
+ set = negatedSet;
+ }
+
+ return NFA::createSetNFA(set);
+}
+NFA RE2NFA::createCharNFA()
+{
+ NFA nfa;
+ // ####
+ if (caseSensitivity == Qt::CaseInsensitive) {
+ nfa = NFA::createStringNFA(symbol().lexem.toLower().toLatin1());
+ } else {
+ nfa = NFA::createStringNFA(symbol().lexem.toLatin1());
+ }
+ return nfa;
+}
+
+static inline int skipQuote(const QString &str, int pos)
+{
+ while (pos < str.length()
+ && str.at(pos) != QLatin1Char('"')) {
+ if (str.at(pos) == QLatin1Char('\\')) {
+ ++pos;
+ if (pos >= str.length())
+ break;
+ }
+ ++pos;
+ }
+ if (pos < str.length())
+ ++pos;
+ return pos;
+}
+
+#if 0
+static const char*tokStr(Token t)
+{
+ switch (t) {
+ case TOK_INVALID: return "TOK_INVALID";
+ case TOK_STRING: return "TOK_STRING";
+ case TOK_LBRACE: return "TOK_LBRACE";
+ case TOK_RBRACE: return "TOK_RBRACE";
+ case TOK_LBRACKET: return "TOK_LBRACKET";
+ case TOK_RBRACKET: return "TOK_RBRACKET";
+ case TOK_LPAREN: return "TOK_LPAREN";
+ case TOK_RPAREN: return "TOK_RPAREN";
+ case TOK_COMMA: return "TOK_COMMA";
+ case TOK_STAR: return "TOK_STAR";
+ case TOK_OR: return "TOK_OR";
+ case TOK_QUESTION: return "TOK_QUESTION";
+ case TOK_DOT: return "TOK_DOT";
+ case TOK_PLUS: return "TOK_PLUS";
+ case TOK_SEQUENCE: return "TOK_SEQUENCE";
+ case TOK_QUOTED_STRING: return "TOK_QUOTED_STRING";
+ }
+ return "";
+}
+#endif
+
+void RE2NFA::tokenize(const QString &input)
+{
+ symbols.clear();
+#if 1
+ RegExpTokenizer tokenizer(input);
+ Symbol sym;
+ int tok = tokenizer.lex();
+ while (tok != -1) {
+ Symbol sym;
+ sym.token = static_cast<Token>(tok);
+ sym.lexem = input.mid(tokenizer.lexemStart, tokenizer.lexemLength);
+
+ if (sym.token == TOK_QUOTED_STRING) {
+ sym.lexem.chop(1);
+ sym.lexem.remove(0, 1);
+ sym.token = TOK_STRING;
+ }
+
+ if (sym.token == TOK_STRING || sym.token == TOK_SEQUENCE) {
+ for (int i = 0; i < sym.lexem.length(); ++i) {
+ if (sym.lexem.at(i) == '\\') {
+ if (i >= sym.lexem.length() - 1)
+ break;
+ QChar ch = sym.lexem.at(i + 1);
+ if (ch == QLatin1Char('n')) {
+ ch = '\n';
+ } else if (ch == QLatin1Char('r')) {
+ ch = '\r';
+ } else if (ch == QLatin1Char('t')) {
+ ch = '\t';
+ } else if (ch == QLatin1Char('f')) {
+ ch = '\f';
+ }
+ sym.lexem.replace(i, 2, ch);
+ }
+ }
+ }
+
+ /*
+ if (sym.token == TOK_SEQUENCE) {
+ Symbol s;
+ s.token = TOK_LBRACKET;
+ s.lexem = "[";
+ symbols.append(s);
+
+ for (int i = 1; i < sym.lexem.length() - 1; ++i) {
+ s.token = TOK_STRING;
+ s.lexem = sym.lexem.at(i);
+ symbols.append(s);
+ }
+
+ s.token = TOK_RBRACKET;
+ s.lexem = "]";
+ symbols.append(s);
+
+ tok = tokenizer.lex();
+ continue;
+ }
+ */
+
+ symbols.append(sym);
+ tok = tokenizer.lex();
+ }
+#else
+ int pos = 0;
+ bool insideSet = false;
+ while (pos < input.length()) {
+ QChar ch = input.at(pos);
+
+ Symbol sym;
+ sym.column = pos;
+ sym.token = TOK_INVALID;
+ sym.lexem = QString(ch);
+ switch (ch.toLatin1()) {
+ case '"': {
+ if (insideSet) {
+ sym.token = TOK_STRING;
+ sym.lexem = QString(ch);
+ symbols += sym;
+ ++pos;
+ continue;
+ }
+ if (pos + 1 >= input.length())
+ return;
+ int quoteEnd = skipQuote(input, pos + 1);
+ sym.token = TOK_STRING;
+ sym.lexem = input.mid(pos + 1, quoteEnd - pos - 2);
+ symbols += sym;
+ pos = quoteEnd;
+ continue;
+ }
+ case '{':
+ sym.token = (insideSet ? TOK_STRING : TOK_LBRACE);
+ break;
+ case '}':
+ sym.token = (insideSet ? TOK_STRING : TOK_RBRACE);
+ break;
+ case '[':
+ insideSet = true;
+ sym.token = TOK_LBRACKET;
+ break;
+ case ']':
+ insideSet = false;
+ sym.token = TOK_RBRACKET;
+ break;
+ case '(':
+ sym.token = (insideSet ? TOK_STRING : TOK_LPAREN);
+ break;
+ case ')':
+ sym.token = (insideSet ? TOK_STRING : TOK_RPAREN);
+ break;
+ case ',':
+ sym.token = (insideSet ? TOK_STRING : TOK_COMMA);
+ break;
+ case '*':
+ sym.token = (insideSet ? TOK_STRING : TOK_STAR);
+ break;
+ case '|':
+ sym.token = (insideSet ? TOK_STRING : TOK_OR);
+ break;
+ case '?':
+ sym.token = (insideSet ? TOK_STRING : TOK_QUESTION);
+ break;
+ case '.':
+ sym.token = (insideSet ? TOK_STRING : TOK_DOT);
+ break;
+ case '+':
+ sym.token = (insideSet ? TOK_STRING : TOK_PLUS);
+ break;
+ case '\\':
+ ++pos;
+ if (pos >= input.length())
+ return;
+ ch = input.at(pos);
+ if (ch == QLatin1Char('n')) {
+ ch = '\n';
+ } else if (ch == QLatin1Char('r')) {
+ ch = '\r';
+ } else if (ch == QLatin1Char('t')) {
+ ch = '\t';
+ } else if (ch == QLatin1Char('f')) {
+ ch = '\f';
+ }
+ // fall through
+ default:
+ sym.token = TOK_STRING;
+ sym.lexem = QString(ch);
+ symbols += sym;
+ ++pos;
+ continue;
+ }
+ symbols += sym;
+ ++pos;
+ }
+#endif
+#if 0
+ foreach (Symbol s, symbols) {
+ qDebug() << "Tok" << tokStr(s.token) << "lexem" << s.lexem;
+ }
+#endif
+}
+
+bool RE2NFA::next(Token t)
+{
+ if (hasNext() && next() == t)
+ return true;
+ errorColumn = symbol().column;
+ Q_ASSERT(false);
+ return false;
+}
+
+bool RE2NFA::test(Token t)
+{
+ if (index >= symbols.count())
+ return false;
+ if (symbols.at(index).token == t) {
+ ++index;
+ return true;
+ }
+ return false;
+}
+
+QString RE2NFA::lexemUntil(Token t)
+{
+ QString lexem;
+ while (hasNext() && next() != t)
+ lexem += symbol().lexem;
+ return lexem;
+}
+
diff --git a/util/lexgen/re2nfa.h b/util/lexgen/re2nfa.h
new file mode 100644
index 0000000..fc10bea
--- /dev/null
+++ b/util/lexgen/re2nfa.h
@@ -0,0 +1,116 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#ifndef RE2NFA_H
+#define RE2NFA_H
+
+#include "nfa.h"
+#include <QSet>
+
+class RE2NFA
+{
+public:
+ RE2NFA(const QMap<QString, NFA> &macros, const QSet<InputType> &maxInputSet, Qt::CaseSensitivity cs);
+
+ NFA parse(const QString &expression, int *errorColumn = 0);
+
+private:
+ NFA parseExpr();
+ NFA parseBranch();
+ NFA parsePiece();
+ NFA parseAtom();
+ NFA parseMaybeQuantifier(const NFA &nfa);
+ NFA parseSet();
+ NFA parseSet2();
+
+ NFA createCharNFA();
+
+private:
+ friend class RegExpTokenizer;
+
+ enum Token {
+ TOK_INVALID,
+ TOK_STRING,
+ TOK_LBRACE, // {
+ TOK_RBRACE, // }
+ TOK_LBRACKET, // [
+ TOK_RBRACKET, // ]
+ TOK_LPAREN, // (
+ TOK_RPAREN, // )
+ TOK_COMMA,
+ TOK_STAR,
+ TOK_OR,
+ TOK_QUESTION,
+ TOK_DOT,
+ TOK_PLUS,
+ TOK_SEQUENCE,
+ TOK_QUOTED_STRING
+ };
+
+ struct Symbol
+ {
+ inline Symbol() : token(TOK_INVALID), column(-1) {}
+ inline Symbol(Token t, const QString &l = QString()) : token(t), lexem(l), column(-1) {}
+ Token token;
+ QString lexem;
+ int column;
+ };
+
+ inline bool hasNext() const { return index < symbols.count(); }
+ inline Token next() { return symbols.at(index++).token; }
+ bool next(Token t);
+ bool test(Token t);
+ inline void prev() { index--; }
+ inline const Symbol &symbol() const { return symbols.at(index - 1); }
+ QString lexemUntil(Token t);
+
+ void tokenize(const QString &input);
+
+ QMap<QString, NFA> macros;
+ QVector<Symbol> symbols;
+ int index;
+ int errorColumn;
+ const QSet<InputType> maxInputSet;
+ Qt::CaseSensitivity caseSensitivity;
+};
+
+#endif // RE2NFA_H
+
diff --git a/util/lexgen/test.lexgen b/util/lexgen/test.lexgen
new file mode 100644
index 0000000..fd532fd
--- /dev/null
+++ b/util/lexgen/test.lexgen
@@ -0,0 +1,9 @@
+[Options]
+case-insensitive
+classname = TestScanner
+
+[Tokens]
+TOK_C = [abcd]
+TOK_B = [bc]
+TOK_A = a
+
diff --git a/util/lexgen/tests/testdata/backtrack1/input b/util/lexgen/tests/testdata/backtrack1/input
new file mode 100644
index 0000000..f5099b5
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack1/input
@@ -0,0 +1 @@
+LETX
diff --git a/util/lexgen/tests/testdata/backtrack1/output b/util/lexgen/tests/testdata/backtrack1/output
new file mode 100644
index 0000000..6893deb
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack1/output
@@ -0,0 +1 @@
+TOK_LET|LET
diff --git a/util/lexgen/tests/testdata/backtrack1/rules.lexgen b/util/lexgen/tests/testdata/backtrack1/rules.lexgen
new file mode 100644
index 0000000..ade8a15
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack1/rules.lexgen
@@ -0,0 +1,3 @@
+[Tokens]
+TOK_LET = LET
+TOK_LETXX = LETXX
diff --git a/util/lexgen/tests/testdata/backtrack2/input b/util/lexgen/tests/testdata/backtrack2/input
new file mode 100644
index 0000000..59ff5b7
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack2/input
@@ -0,0 +1 @@
+LETXTRA
diff --git a/util/lexgen/tests/testdata/backtrack2/output b/util/lexgen/tests/testdata/backtrack2/output
new file mode 100644
index 0000000..348b382
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack2/output
@@ -0,0 +1,2 @@
+TOK_LET|LET
+TOK_XTRA|XTRA
diff --git a/util/lexgen/tests/testdata/backtrack2/rules.lexgen b/util/lexgen/tests/testdata/backtrack2/rules.lexgen
new file mode 100644
index 0000000..6f16986
--- /dev/null
+++ b/util/lexgen/tests/testdata/backtrack2/rules.lexgen
@@ -0,0 +1,4 @@
+[Tokens]
+TOK_LET = LET
+TOK_LETXX = LETXX
+TOK_XTRA = XTRA
diff --git a/util/lexgen/tests/testdata/casesensitivity/input b/util/lexgen/tests/testdata/casesensitivity/input
new file mode 100644
index 0000000..72b7f48
--- /dev/null
+++ b/util/lexgen/tests/testdata/casesensitivity/input
@@ -0,0 +1 @@
+abcdAbcDABCDeFgEFGefgEfghiHIHihI
diff --git a/util/lexgen/tests/testdata/casesensitivity/output b/util/lexgen/tests/testdata/casesensitivity/output
new file mode 100644
index 0000000..3a4e819
--- /dev/null
+++ b/util/lexgen/tests/testdata/casesensitivity/output
@@ -0,0 +1,14 @@
+TOK_AB|ab
+TOK_CD|cd
+TOK_AB|Ab
+TOK_CD|cD
+TOK_AB|AB
+TOK_CD|CD
+TOK_EFG|eFg
+TOK_EFG|EFG
+TOK_EFG|efg
+TOK_EFG|Efg
+TOK_HI|hi
+TOK_HI|HI
+TOK_HI|Hi
+TOK_HI|hI
diff --git a/util/lexgen/tests/testdata/casesensitivity/rules.lexgen b/util/lexgen/tests/testdata/casesensitivity/rules.lexgen
new file mode 100644
index 0000000..3347587
--- /dev/null
+++ b/util/lexgen/tests/testdata/casesensitivity/rules.lexgen
@@ -0,0 +1,7 @@
+[Options]
+case-insensitive
+[Tokens]
+TOK_AB = ab
+TOK_CD = cd
+TOK_EFG = [e-g]{3}
+TOK_HI = [hi]{2}
diff --git a/util/lexgen/tests/testdata/comments/input b/util/lexgen/tests/testdata/comments/input
new file mode 100644
index 0000000..03873e0
--- /dev/null
+++ b/util/lexgen/tests/testdata/comments/input
@@ -0,0 +1 @@
+/* comment with stuff *//*another comment with * stars * inside*/
diff --git a/util/lexgen/tests/testdata/comments/output b/util/lexgen/tests/testdata/comments/output
new file mode 100644
index 0000000..2395ad1
--- /dev/null
+++ b/util/lexgen/tests/testdata/comments/output
@@ -0,0 +1,2 @@
+TOK_COMMENT|/* comment with stuff */
+TOK_COMMENT|/*another comment with * stars * inside*/
diff --git a/util/lexgen/tests/testdata/comments/rules.lexgen b/util/lexgen/tests/testdata/comments/rules.lexgen
new file mode 100644
index 0000000..490c759
--- /dev/null
+++ b/util/lexgen/tests/testdata/comments/rules.lexgen
@@ -0,0 +1,2 @@
+[Tokens]
+TOK_COMMENT = \/\*[^*]*\*+([^/*][^*]*\*+)*\/
diff --git a/util/lexgen/tests/testdata/dot/input b/util/lexgen/tests/testdata/dot/input
new file mode 100644
index 0000000..e5b0ad6
--- /dev/null
+++ b/util/lexgen/tests/testdata/dot/input
@@ -0,0 +1 @@
+afbcxd
diff --git a/util/lexgen/tests/testdata/dot/output b/util/lexgen/tests/testdata/dot/output
new file mode 100644
index 0000000..6a9afd4
--- /dev/null
+++ b/util/lexgen/tests/testdata/dot/output
@@ -0,0 +1,2 @@
+TOK_AB|afb
+TOK_CD|cxd
diff --git a/util/lexgen/tests/testdata/dot/rules.lexgen b/util/lexgen/tests/testdata/dot/rules.lexgen
new file mode 100644
index 0000000..03873a7
--- /dev/null
+++ b/util/lexgen/tests/testdata/dot/rules.lexgen
@@ -0,0 +1,3 @@
+[Tokens]
+TOK_AB = a.b
+TOK_CD = c.d
diff --git a/util/lexgen/tests/testdata/negation/input b/util/lexgen/tests/testdata/negation/input
new file mode 100644
index 0000000..9447b80
--- /dev/null
+++ b/util/lexgen/tests/testdata/negation/input
@@ -0,0 +1 @@
+aycabd
diff --git a/util/lexgen/tests/testdata/negation/output b/util/lexgen/tests/testdata/negation/output
new file mode 100644
index 0000000..0b73263
--- /dev/null
+++ b/util/lexgen/tests/testdata/negation/output
@@ -0,0 +1,2 @@
+TOK_A|ayc
+TOK_B|abd
diff --git a/util/lexgen/tests/testdata/negation/rules.lexgen b/util/lexgen/tests/testdata/negation/rules.lexgen
new file mode 100644
index 0000000..179810b
--- /dev/null
+++ b/util/lexgen/tests/testdata/negation/rules.lexgen
@@ -0,0 +1,3 @@
+[Tokens]
+TOK_A = a[^b]c
+TOK_B = abd
diff --git a/util/lexgen/tests/testdata/quoteinset/input b/util/lexgen/tests/testdata/quoteinset/input
new file mode 100644
index 0000000..5a9b680
--- /dev/null
+++ b/util/lexgen/tests/testdata/quoteinset/input
@@ -0,0 +1 @@
+"a
diff --git a/util/lexgen/tests/testdata/quoteinset/output b/util/lexgen/tests/testdata/quoteinset/output
new file mode 100644
index 0000000..7ba8890
--- /dev/null
+++ b/util/lexgen/tests/testdata/quoteinset/output
@@ -0,0 +1 @@
+TOK_QUOTEA|"a
diff --git a/util/lexgen/tests/testdata/quoteinset/rules.lexgen b/util/lexgen/tests/testdata/quoteinset/rules.lexgen
new file mode 100644
index 0000000..9838276
--- /dev/null
+++ b/util/lexgen/tests/testdata/quoteinset/rules.lexgen
@@ -0,0 +1,2 @@
+[Tokens]
+TOK_QUOTEA = ["]a
diff --git a/util/lexgen/tests/testdata/quotes/input b/util/lexgen/tests/testdata/quotes/input
new file mode 100644
index 0000000..ac54450
--- /dev/null
+++ b/util/lexgen/tests/testdata/quotes/input
@@ -0,0 +1 @@
+quotedstring
diff --git a/util/lexgen/tests/testdata/quotes/output b/util/lexgen/tests/testdata/quotes/output
new file mode 100644
index 0000000..c538e32
--- /dev/null
+++ b/util/lexgen/tests/testdata/quotes/output
@@ -0,0 +1 @@
+TOK_STR|quotedstring
diff --git a/util/lexgen/tests/testdata/quotes/rules.lexgen b/util/lexgen/tests/testdata/quotes/rules.lexgen
new file mode 100644
index 0000000..d528cdd
--- /dev/null
+++ b/util/lexgen/tests/testdata/quotes/rules.lexgen
@@ -0,0 +1,2 @@
+[Tokens]
+TOK_STR = "quotedstring"
diff --git a/util/lexgen/tests/testdata/simple/input b/util/lexgen/tests/testdata/simple/input
new file mode 100644
index 0000000..acbe86c
--- /dev/null
+++ b/util/lexgen/tests/testdata/simple/input
@@ -0,0 +1 @@
+abcd
diff --git a/util/lexgen/tests/testdata/simple/output b/util/lexgen/tests/testdata/simple/output
new file mode 100644
index 0000000..a37a58e
--- /dev/null
+++ b/util/lexgen/tests/testdata/simple/output
@@ -0,0 +1,2 @@
+TOK_AB|ab
+TOK_CD|cd
diff --git a/util/lexgen/tests/testdata/simple/rules.lexgen b/util/lexgen/tests/testdata/simple/rules.lexgen
new file mode 100644
index 0000000..5d958c4
--- /dev/null
+++ b/util/lexgen/tests/testdata/simple/rules.lexgen
@@ -0,0 +1,3 @@
+[Tokens]
+TOK_AB = ab
+TOK_CD = cd
diff --git a/util/lexgen/tests/testdata/subsets1/input b/util/lexgen/tests/testdata/subsets1/input
new file mode 100644
index 0000000..47f66d1
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets1/input
@@ -0,0 +1 @@
+abaf
diff --git a/util/lexgen/tests/testdata/subsets1/output b/util/lexgen/tests/testdata/subsets1/output
new file mode 100644
index 0000000..75dd936
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets1/output
@@ -0,0 +1,2 @@
+TOK_AB|ab
+TOK_AZ|af
diff --git a/util/lexgen/tests/testdata/subsets1/rules.lexgen b/util/lexgen/tests/testdata/subsets1/rules.lexgen
new file mode 100644
index 0000000..94f51a9
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets1/rules.lexgen
@@ -0,0 +1,3 @@
+[Tokens]
+TOK_AB = ab
+TOK_AZ = a[a-z]
diff --git a/util/lexgen/tests/testdata/subsets2/input b/util/lexgen/tests/testdata/subsets2/input
new file mode 100644
index 0000000..4d0dc3a
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets2/input
@@ -0,0 +1 @@
+abd
diff --git a/util/lexgen/tests/testdata/subsets2/output b/util/lexgen/tests/testdata/subsets2/output
new file mode 100644
index 0000000..d5a7bc5
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets2/output
@@ -0,0 +1,3 @@
+TOK_A|a
+TOK_B|b
+TOK_D|d
diff --git a/util/lexgen/tests/testdata/subsets2/rules.lexgen b/util/lexgen/tests/testdata/subsets2/rules.lexgen
new file mode 100644
index 0000000..e0a7629
--- /dev/null
+++ b/util/lexgen/tests/testdata/subsets2/rules.lexgen
@@ -0,0 +1,4 @@
+[Tokens]
+TOK_D = [abcd]
+TOK_B = [bc]
+TOK_A = a
diff --git a/util/lexgen/tests/tests.pro b/util/lexgen/tests/tests.pro
new file mode 100644
index 0000000..eb04439
--- /dev/null
+++ b/util/lexgen/tests/tests.pro
@@ -0,0 +1,6 @@
+CONFIG += qtestlib
+SOURCES += tst_lexgen.cpp
+TARGET = tst_lexgen
+include(../lexgen.pri)
+QT = core
+DEFINES += SRCDIR=\\\"$$PWD\\\"
diff --git a/util/lexgen/tests/tst_lexgen.cpp b/util/lexgen/tests/tst_lexgen.cpp
new file mode 100644
index 0000000..6e50b15
--- /dev/null
+++ b/util/lexgen/tests/tst_lexgen.cpp
@@ -0,0 +1,285 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+#include <QtTest/QtTest>
+#define AUTOTEST
+#include "../main.cpp"
+
+class tst_LexGen : public QObject
+{
+ Q_OBJECT
+private slots:
+ void nfa_singleInput();
+ void nfa_alternating();
+ void nfa_concatenating();
+ void nfa_optional();
+ void nfa_toDFA_data();
+ void nfa_toDFA();
+ void lexgen_data();
+ void lexgen();
+};
+
+void tst_LexGen::nfa_singleInput()
+{
+ NFA nfa = NFA::createSingleInputNFA('a');
+
+ QCOMPARE(nfa.initialState, 0);
+ QCOMPARE(nfa.finalState, 1);
+
+ QCOMPARE(nfa.states.count(), 2);
+
+ QCOMPARE(nfa.states.at(0).transitions.count(), 1);
+ QVERIFY(nfa.states.at(0).transitions.contains('a'));
+ QCOMPARE(nfa.states.at(0).transitions.values('a').count(), 1);
+ QCOMPARE(nfa.states.at(0).transitions.value('a'), nfa.finalState);
+
+ QVERIFY(nfa.states.at(1).transitions.isEmpty());
+}
+
+void tst_LexGen::nfa_alternating()
+{
+ NFA a = NFA::createSingleInputNFA('a');
+ NFA b = NFA::createSingleInputNFA('b');
+ NFA nfa = NFA::createAlternatingNFA(a, b);
+
+ const int initialA = 1;
+ const int finalA = 2;
+
+ const int initialB = 3;
+ const int finalB = 4;
+
+ QCOMPARE(nfa.states.count(), 6);
+
+ QCOMPARE(nfa.initialState, 0);
+ QCOMPARE(nfa.finalState, 5);
+
+ QList<int> initialTransitions = nfa.states.at(0).transitions.values(Epsilon);
+ QCOMPARE(initialTransitions.count(), 2);
+ QVERIFY(initialTransitions.contains(initialA));
+ QVERIFY(initialTransitions.contains(initialB));
+
+ // no need to test the individual a and b NFAs, the other
+ // autotest already takes care of that. Just check whether
+ // the epsilon transitions to the final state exist.
+
+ QCOMPARE(nfa.states.at(finalA).transitions.count(), 1);
+ QCOMPARE(nfa.states.at(finalA).transitions.values(Epsilon).count(), 1);
+ QCOMPARE(nfa.states.at(finalA).transitions.value(Epsilon), nfa.finalState);
+
+ QCOMPARE(nfa.states.at(finalB).transitions.count(), 1);
+ QCOMPARE(nfa.states.at(finalB).transitions.values(Epsilon).count(), 1);
+ QCOMPARE(nfa.states.at(finalB).transitions.value(Epsilon), nfa.finalState);
+}
+
+void tst_LexGen::nfa_concatenating()
+{
+ NFA a = NFA::createSingleInputNFA('a');
+ NFA b = NFA::createSingleInputNFA('b');
+ NFA nfa = NFA::createConcatenatingNFA(a, b);
+
+ const int initialA = 1;
+ const int finalA = 2;
+
+ const int initialB = 3;
+ const int finalB = 4;
+
+ QCOMPARE(nfa.states.count(), 6);
+
+ QCOMPARE(nfa.initialState, 0);
+ QCOMPARE(nfa.finalState, 5);
+
+ QCOMPARE(nfa.states.at(0).transitions.count(), 1);
+ QCOMPARE(nfa.states.at(0).transitions.values(Epsilon).count(), 1);
+ QCOMPARE(nfa.states.at(0).transitions.value(Epsilon), initialA);
+
+ QCOMPARE(nfa.states.at(finalA).transitions.values(Epsilon).count(), 1);
+ QCOMPARE(nfa.states.at(finalA).transitions.value(Epsilon), initialB);
+
+ QCOMPARE(nfa.states.at(finalB).transitions.values(Epsilon).count(), 1);
+ QCOMPARE(nfa.states.at(finalB).transitions.value(Epsilon), nfa.finalState);
+}
+
+void tst_LexGen::nfa_optional()
+{
+ NFA a = NFA::createSingleInputNFA('a');
+ NFA nfa = NFA::createOptionalNFA(a);
+
+ const int initialA = 1;
+ const int finalA = 2;
+
+ QCOMPARE(nfa.states.count(), 4);
+
+ QCOMPARE(nfa.initialState, 0);
+ QCOMPARE(nfa.finalState, 3);
+
+ QCOMPARE(nfa.states.at(0).transitions.count(), 2);
+ QList<int> initialTransitions = nfa.states.at(0).transitions.values(Epsilon);
+ QVERIFY(initialTransitions.contains(nfa.finalState));
+ QVERIFY(initialTransitions.contains(initialA));
+
+ QList<int> finalEpsilonATransitions = nfa.states.at(finalA).transitions.values(Epsilon);
+ QVERIFY(finalEpsilonATransitions.contains(initialA));
+ QVERIFY(finalEpsilonATransitions.contains(nfa.finalState));
+}
+
+Q_DECLARE_METATYPE(NFA);
+Q_DECLARE_METATYPE(DFA);
+
+void tst_LexGen::nfa_toDFA_data()
+{
+ QTest::addColumn<NFA>("nfa");
+ QTest::addColumn<DFA>("expectedDFA");
+
+ NFA a = NFA::createSingleInputNFA('a');
+ NFA b = NFA::createSingleInputNFA('b');
+ NFA c = NFA::createSingleInputNFA('c');
+
+ NFA nfa;
+ DFA dfa;
+
+ dfa.clear();
+ dfa.resize(3);
+ dfa[0].transitions.insert('a', 1);
+ dfa[1].transitions.insert('b', 2);
+
+ nfa = NFA::createConcatenatingNFA(a, b);
+
+ QTest::newRow("simple concat") << nfa << dfa;
+
+ dfa.clear();
+ dfa.resize(3);
+ dfa[0].transitions.insert('a', 1);
+ dfa[0].transitions.insert('b', 2);
+
+ nfa = NFA::createAlternatingNFA(a, b);
+
+ QTest::newRow("simple alternate") << nfa << dfa;
+
+}
+
+void tst_LexGen::nfa_toDFA()
+{
+ QFETCH(NFA, nfa);
+ QFETCH(DFA, expectedDFA);
+
+ DFA dfa = nfa.toDFA();
+
+ QCOMPARE(dfa.count(), expectedDFA.count());
+ for (int i = 0; i < dfa.count(); ++i) {
+ if (dfa.at(i).transitions != expectedDFA.at(i).transitions) {
+ qDebug() << "DFAs differ in state" << i;
+ qDebug() << "NFA:";
+ nfa.debug();
+ qDebug() << "Actual DFA:";
+ dfa.debug();
+ qDebug() << "Expected DFA:";
+ expectedDFA.debug();
+ QVERIFY(false);
+ }
+ }
+}
+
+void tst_LexGen::lexgen_data()
+{
+ QTest::addColumn<QString>("ruleFile");
+ QTest::addColumn<QString>("input");
+ QTest::addColumn<QString>("expectedOutput");
+
+ QDir d(QString(SRCDIR));
+ d.cd("testdata");
+ foreach (QString test, d.entryList(QDir::Dirs | QDir::NoDotAndDotDot)) {
+ QString dir = d.absoluteFilePath(test) + '/';
+ QTest::newRow(qPrintable(test))
+ << dir + "rules.lexgen"
+ << dir + "input"
+ << dir + "output"
+ ;
+ }
+}
+
+void tst_LexGen::lexgen()
+{
+ QFETCH(QString, ruleFile);
+ QFETCH(QString, input);
+ QFETCH(QString, expectedOutput);
+
+ Config conf;
+ QVERIFY(loadConfig(ruleFile, &conf));
+ DFA dfa = generateMachine(conf);
+ QVERIFY(!dfa.isEmpty());
+ conf.debug = true;
+
+ QFile f(input);
+ QVERIFY(f.open(QIODevice::ReadOnly));
+ input = QString::fromUtf8(f.readAll());
+ f.close();
+ if (input.endsWith(QLatin1Char('\n')))
+ input.chop(1);
+// machine.debug();
+ bool ok = false;
+ QList<Symbol> symbols = tokenize(dfa, input, &conf, &ok);
+ QVERIFY(ok);
+ f.setFileName(expectedOutput);
+ QVERIFY(f.open(QIODevice::ReadOnly));
+ QStringList lines;
+ while (!f.atEnd()) {
+ QString line = QString::fromUtf8(f.readLine());
+ if (line.endsWith(QLatin1Char('\n')))
+ line.chop(1);
+ lines << line;
+ }
+ f.close();
+
+// dfa.debug();
+ QCOMPARE(lines.count(), symbols.count());
+
+ for (int i = 0; i < lines.count(); ++i) {
+ QStringList l = lines.at(i).split(QChar::fromLatin1('|'));
+ QCOMPARE(l.count(), 2);
+ QString expectedToken = l.at(0);
+ QString expectedLexem = l.at(1);
+ QCOMPARE(symbols.at(i).token, expectedToken);
+ QCOMPARE(symbols.at(i).lexem, expectedLexem);
+ }
+}
+
+QTEST_MAIN(tst_LexGen)
+#include "tst_lexgen.moc"
diff --git a/util/lexgen/tokenizer.cpp b/util/lexgen/tokenizer.cpp
new file mode 100644
index 0000000..401fa92
--- /dev/null
+++ b/util/lexgen/tokenizer.cpp
@@ -0,0 +1,237 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the utils of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+// auto generated. DO NOT EDIT.
+class RegExpTokenizer
+{
+public:
+ RegExpTokenizer(const QString &inp);
+
+ inline QChar next() {
+ return (pos < input.length()) ? input.at(pos++) : QChar();
+ }
+ int lex();
+
+ QString input;
+ int pos;
+ int lexemStart;
+ int lexemLength;
+};
+
+RegExpTokenizer::RegExpTokenizer(const QString &inp)
+{
+ input = inp;
+ pos = 0;
+ lexemStart = 0;
+ lexemLength = 0;
+}
+
+
+int RegExpTokenizer::lex()
+{
+ lexemStart = pos;
+ lexemLength = 0;
+ int lastAcceptingPos = -1;
+ int token = -1;
+ QChar ch;
+
+ // initial state
+ ch = next();
+ if (ch.unicode() >= 1 && ch.unicode() <= 33)
+ goto state_1;
+ if (ch.unicode() == 34)
+ goto state_2;
+ if (ch.unicode() >= 35 && ch.unicode() <= 39)
+ goto state_1;
+ if (ch.unicode() == 40) {
+ token = RE2NFA::TOK_LPAREN;
+ goto found;
+ }
+ if (ch.unicode() == 41) {
+ token = RE2NFA::TOK_RPAREN;
+ goto found;
+ }
+ if (ch.unicode() == 42) {
+ token = RE2NFA::TOK_STAR;
+ goto found;
+ }
+ if (ch.unicode() == 43) {
+ token = RE2NFA::TOK_PLUS;
+ goto found;
+ }
+ if (ch.unicode() == 44) {
+ token = RE2NFA::TOK_COMMA;
+ goto found;
+ }
+ if (ch.unicode() == 45)
+ goto state_1;
+ if (ch.unicode() == 46) {
+ token = RE2NFA::TOK_DOT;
+ goto found;
+ }
+ if (ch.unicode() >= 47 && ch.unicode() <= 62)
+ goto state_1;
+ if (ch.unicode() == 63) {
+ token = RE2NFA::TOK_QUESTION;
+ goto found;
+ }
+ if (ch.unicode() >= 64 && ch.unicode() <= 90)
+ goto state_1;
+ if (ch.unicode() == 91)
+ goto state_10;
+ if (ch.unicode() == 92)
+ goto state_11;
+ if (ch.unicode() >= 93 && ch.unicode() <= 122)
+ goto state_1;
+ if (ch.unicode() == 123) {
+ token = RE2NFA::TOK_LBRACE;
+ goto found;
+ }
+ if (ch.unicode() == 124) {
+ token = RE2NFA::TOK_OR;
+ goto found;
+ }
+ if (ch.unicode() == 125) {
+ token = RE2NFA::TOK_RBRACE;
+ goto found;
+ }
+ if (ch.unicode() >= 126)
+ goto state_1;
+ goto out;
+ state_1:
+ lastAcceptingPos = pos;
+ token = RE2NFA::TOK_STRING;
+ goto out;
+ state_2:
+ lastAcceptingPos = pos;
+ token = RE2NFA::TOK_STRING;
+ ch = next();
+ if (ch.unicode() >= 1 && ch.unicode() <= 33)
+ goto state_15;
+ if (ch.unicode() == 34)
+ goto state_16;
+ if (ch.unicode() >= 35)
+ goto state_15;
+ goto out;
+ state_10:
+ ch = next();
+ if (ch.unicode() >= 1 && ch.unicode() <= 91)
+ goto state_17;
+ if (ch.unicode() == 92)
+ goto state_18;
+ if (ch.unicode() == 93)
+ goto state_19;
+ if (ch.unicode() >= 94)
+ goto state_17;
+ goto out;
+ state_11:
+ lastAcceptingPos = pos;
+ token = RE2NFA::TOK_STRING;
+ ch = next();
+ if (ch.unicode() >= 1)
+ goto state_20;
+ goto out;
+ state_15:
+ ch = next();
+ if (ch.unicode() >= 1 && ch.unicode() <= 33)
+ goto state_15;
+ if (ch.unicode() == 34)
+ goto state_16;
+ if (ch.unicode() >= 35)
+ goto state_15;
+ goto out;
+ state_16:
+ lastAcceptingPos = pos;
+ token = RE2NFA::TOK_QUOTED_STRING;
+ goto out;
+ state_17:
+ ch = next();
+ if (ch.unicode() >= 1 && ch.unicode() <= 91)
+ goto state_17;
+ if (ch.unicode() == 92)
+ goto state_18;
+ if (ch.unicode() == 93)
+ goto state_19;
+ if (ch.unicode() >= 94)
+ goto state_17;
+ goto out;
+ state_18:
+ ch = next();
+ if (ch.unicode() >= 1 && ch.unicode() <= 91)
+ goto state_17;
+ if (ch.unicode() == 92)
+ goto state_18;
+ if (ch.unicode() == 93)
+ goto state_21;
+ if (ch.unicode() >= 94)
+ goto state_17;
+ goto out;
+ state_19:
+ lastAcceptingPos = pos;
+ token = RE2NFA::TOK_SEQUENCE;
+ goto out;
+ state_20:
+ lastAcceptingPos = pos;
+ token = RE2NFA::TOK_STRING;
+ goto out;
+ state_21:
+ lastAcceptingPos = pos;
+ token = RE2NFA::TOK_SEQUENCE;
+ ch = next();
+ if (ch.unicode() >= 1 && ch.unicode() <= 91)
+ goto state_17;
+ if (ch.unicode() == 92)
+ goto state_18;
+ if (ch.unicode() == 93)
+ goto state_19;
+ if (ch.unicode() >= 94)
+ goto state_17;
+ goto out;
+ found:
+ lastAcceptingPos = pos;
+
+ out:
+ if (lastAcceptingPos != -1) {
+ lexemLength = lastAcceptingPos - lexemStart;
+ pos = lastAcceptingPos;
+ }
+ return token;
+}
+