summaryrefslogtreecommitdiffstats
path: root/src/xmlpatterns/parser/qxquerytokenizer_p.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/xmlpatterns/parser/qxquerytokenizer_p.h')
-rw-r--r--src/xmlpatterns/parser/qxquerytokenizer_p.h332
1 files changed, 332 insertions, 0 deletions
diff --git a/src/xmlpatterns/parser/qxquerytokenizer_p.h b/src/xmlpatterns/parser/qxquerytokenizer_p.h
new file mode 100644
index 0000000..4ef7a5f
--- /dev/null
+++ b/src/xmlpatterns/parser/qxquerytokenizer_p.h
@@ -0,0 +1,332 @@
+/****************************************************************************
+**
+** Copyright (C) 2009 Nokia Corporation and/or its subsidiary(-ies).
+** Contact: Qt Software Information (qt-info@nokia.com)
+**
+** This file is part of the QtXmlPatterns module of the Qt Toolkit.
+**
+** $QT_BEGIN_LICENSE:LGPL$
+** No Commercial Usage
+** This file contains pre-release code and may not be distributed.
+** You may use this file in accordance with the terms and conditions
+** contained in the either Technology Preview License Agreement or the
+** Beta Release License Agreement.
+**
+** GNU Lesser General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU Lesser
+** General Public License version 2.1 as published by the Free Software
+** Foundation and appearing in the file LICENSE.LGPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU Lesser General Public License version 2.1 requirements
+** will be met: http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
+**
+** In addition, as a special exception, Nokia gives you certain
+** additional rights. These rights are described in the Nokia Qt LGPL
+** Exception version 1.0, included in the file LGPL_EXCEPTION.txt in this
+** package.
+**
+** GNU General Public License Usage
+** Alternatively, this file may be used under the terms of the GNU
+** General Public License version 3.0 as published by the Free Software
+** Foundation and appearing in the file LICENSE.GPL included in the
+** packaging of this file. Please review the following information to
+** ensure the GNU General Public License version 3.0 requirements will be
+** met: http://www.gnu.org/copyleft/gpl.html.
+**
+** If you are unsure which license is appropriate for your use, please
+** contact the sales department at qt-sales@nokia.com.
+** $QT_END_LICENSE$
+**
+****************************************************************************/
+
+//
+// W A R N I N G
+// -------------
+//
+// This file is not part of the Qt API. It exists purely as an
+// implementation detail. This header file may change from version to
+// version without notice, or even be removed.
+//
+// We mean it.
+#ifndef Patternist_XQueryTokenizer_H
+#define Patternist_XQueryTokenizer_H
+
+#include <QHash>
+#include <QSet>
+#include <QStack>
+#include <QString>
+#include <QUrl>
+
+#include "qtokenizer_p.h"
+
+QT_BEGIN_HEADER
+
+QT_BEGIN_NAMESPACE
+
+namespace QPatternist
+{
+ struct TokenMap;
+
+ /**
+ * @short A hand-written tokenizer which tokenizes XQuery 1.0 & XPath 2.0,
+ * and delivers tokens to the Bison generated parser.
+ *
+ * @author Frans Englich <fenglich@trolltech.com>
+ */
+ class XQueryTokenizer : public Tokenizer
+ {
+ public:
+ /**
+ * Tokenizer states. Organized alphabetically.
+ */
+ enum State
+ {
+ AfterAxisSeparator,
+ AposAttributeContent,
+ Axis,
+ Default,
+ ElementContent,
+ EndTag,
+ ItemType,
+ KindTest,
+ KindTestForPI,
+ NamespaceDecl,
+ NamespaceKeyword,
+ OccurrenceIndicator,
+ Operator,
+ Pragma,
+ PragmaContent,
+ ProcessingInstructionContent,
+ ProcessingInstructionName,
+ QuotAttributeContent,
+ StartTag,
+ VarName,
+ XMLComment,
+ XMLSpaceDecl,
+ XQueryVersion
+ };
+
+ XQueryTokenizer(const QString &query,
+ const QUrl &location,
+ const State startingState = Default);
+
+ virtual Token nextToken(YYLTYPE *const sourceLocator);
+ virtual int commenceScanOnly();
+ virtual void resumeTokenizationFrom(const int position);
+
+ /**
+ * Does nothing.
+ */
+ virtual void setParserContext(const ParserContext::Ptr &parseInfo);
+
+ private:
+
+ /**
+ * Returns the character corresponding to the builtin reference @p
+ * reference. For instance, passing @c gt will give you '>' in return.
+ *
+ * If @p reference is an invalid character reference, a null QChar is
+ * returned.
+ *
+ * @see QChar::isNull()
+ */
+ QChar charForReference(const QString &reference);
+
+ inline Token tokenAndChangeState(const TokenType code,
+ const State state,
+ const int advance = 1);
+ inline Token tokenAndChangeState(const TokenType code,
+ const QString &value,
+ const State state);
+ inline Token tokenAndAdvance(const TokenType code,
+ const int advance = 1);
+ QString tokenizeCharacterReference();
+
+ inline Token tokenizeStringLiteral();
+ inline Token tokenizeNumberLiteral();
+
+ /**
+ * @returns the character @p length characters from the current
+ * position.
+ */
+ inline char peekAhead(const int length = 1) const;
+
+ /**
+ * @returns whether the stream, starting from @p offset from the
+ * current position, matches @p chs. The length of @p chs is @p len.
+ */
+ inline bool aheadEquals(const char *const chs,
+ const int len,
+ const int offset = 1) const;
+
+ inline Token tokenizeNCName();
+ static inline bool isOperatorKeyword(const TokenType);
+
+ static inline bool isDigit(const char ch);
+ static inline Token error();
+ inline TokenType consumeWhitespace();
+
+ /**
+ * @short Returns the character at the current position, converted to
+ * @c ASCII.
+ *
+ * Equivalent to calling:
+ *
+ * @code
+ * current().toAscii();
+ * @endcode
+ */
+ inline char peekCurrent() const;
+
+ /**
+ * Disregarding encoding conversion, equivalent to calling:
+ *
+ * @code
+ * peekAhead(0);
+ * @endcode
+ */
+ inline const QChar current() const;
+
+ /**
+ * @p hadWhitespace is always set to a proper value.
+ *
+ * @returns the length of whitespace scanned before reaching "::", or
+ * -1 if something else was found.
+ */
+ int peekForColonColon() const;
+
+ static inline bool isNCNameStart(const QChar ch);
+ static inline bool isNCNameBody(const QChar ch);
+ static inline const TokenMap *lookupKeyword(const QString &keyword);
+ inline void popState();
+ inline void pushState(const State state);
+ inline State state() const;
+ inline void setState(const State s);
+ static bool isTypeToken(const TokenType t);
+
+ inline Token tokenizeNCNameOrQName();
+ /**
+ * Advances m_pos until content is encountered.
+ *
+ * Returned is the length stretching from m_pos when starting, until
+ * @p content is encountered. @p content is not included in the length.
+ */
+ int scanUntil(const char *const content);
+
+ /**
+ * Same as calling:
+ * @code
+ * pushState(currentState());
+ * @endcode
+ */
+ inline void pushState();
+
+ /**
+ * Consumes only whitespace, in the traditional sense. The function exits
+ * if non-whitespace is encountered, such as the start of a comment.
+ *
+ * @returns @c true if the end was reached, otherwise @c false
+ */
+ inline bool consumeRawWhitespace();
+
+ /**
+ * @short Parses comments: <tt>(: comment content :)</tt>. It recurses for
+ * parsing nested comments.
+ *
+ * It is assumed that the start token for the comment, "(:", has
+ * already been parsed.
+ *
+ * Typically, don't call this function, but ignoreWhitespace().
+ *
+ * @see <a href="http://www.w3.org/TR/xpath20/#comments">XML Path Language (XPath)
+ * 2.0, 2.6 Comments</a>
+ * @returns
+ * - SUCCESS if everything went ok
+ * - ERROR if there was an error in parsing one or more comments
+ * - END_OF_FILE if the end was reached
+ */
+ Tokenizer::TokenType consumeComment();
+
+ /**
+ * Determines whether @p code is a keyword
+ * that is followed by a second keyword. For instance <tt>declare
+ * function</tt>.
+ */
+ static inline bool isPhraseKeyword(const TokenType code);
+
+ /**
+ * A set of indexes into a QString, the one being passed to
+ * normalizeEOL() whose characters shouldn't be normalized. */
+ typedef QSet<int> CharacterSkips;
+
+ /**
+ * Returns @p input, normalized according to
+ * <a href="http://www.w3.org/TR/xquery/#id-eol-handling">XQuery 1.0:
+ * An XML Query Language, A.2.3 End-of-Line Handling</a>
+ */
+ static QString normalizeEOL(const QString &input,
+ const CharacterSkips &characterSkips);
+
+ inline bool atEnd() const
+ {
+ return m_pos == m_length;
+ }
+
+ Token nextToken();
+ /**
+ * Instead of recognizing and tokenizing embedded expressions in
+ * direct attriute constructors, this function is essentially a mini
+ * recursive-descent parser that has the necessary logic to recognize
+ * embedded expressions and their potentially interfering string literals, in
+ * order to scan to the very end of the attribute value, and return the
+ * whole as a string.
+ *
+ * There is of course syntax errors this function will not detect, but
+ * that is ok since the attributes will be parsed once more.
+ *
+ * An inelegant solution, but which gets the job done.
+ *
+ * @see commenceScanOnly(), resumeTokenizationFrom()
+ */
+ Token attributeAsRaw(const QChar separator,
+ int &stack,
+ const int startPos,
+ const bool inLiteral,
+ QString &result);
+
+ const QString m_data;
+ const int m_length;
+ State m_state;
+ QStack<State> m_stateStack;
+ int m_pos;
+
+ /**
+ * The current line number.
+ *
+ * The line number and column number both starts at 1.
+ */
+ int m_line;
+
+ /**
+ * The offset into m_length for where
+ * the current column starts. So m_length - m_columnOffset
+ * is the current column.
+ *
+ * The line number and column number both starts at 1.
+ */
+ int m_columnOffset;
+
+ const NamePool::Ptr m_namePool;
+ QStack<Token> m_tokenStack;
+ QHash<QString, QChar> m_charRefs;
+ bool m_scanOnly;
+
+ Q_DISABLE_COPY(XQueryTokenizer)
+ };
+}
+
+QT_END_NAMESPACE
+
+QT_END_HEADER
+
+#endif