summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/webkit/JavaScriptCore/wrec/WRECParser.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/webkit/JavaScriptCore/wrec/WRECParser.cpp')
-rw-r--r--src/3rdparty/webkit/JavaScriptCore/wrec/WRECParser.cpp637
1 files changed, 637 insertions, 0 deletions
diff --git a/src/3rdparty/webkit/JavaScriptCore/wrec/WRECParser.cpp b/src/3rdparty/webkit/JavaScriptCore/wrec/WRECParser.cpp
new file mode 100644
index 0000000..be42a34
--- /dev/null
+++ b/src/3rdparty/webkit/JavaScriptCore/wrec/WRECParser.cpp
@@ -0,0 +1,637 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "WRECParser.h"
+
+#if ENABLE(WREC)
+
+#include "CharacterClassConstructor.h"
+#include "WRECFunctors.h"
+
+using namespace WTF;
+
+namespace JSC { namespace WREC {
+
+// These error messages match the error messages used by PCRE.
+const char* Parser::QuantifierOutOfOrder = "numbers out of order in {} quantifier";
+const char* Parser::QuantifierWithoutAtom = "nothing to repeat";
+const char* Parser::ParenthesesUnmatched = "unmatched parentheses";
+const char* Parser::ParenthesesTypeInvalid = "unrecognized character after (?";
+const char* Parser::ParenthesesNotSupported = ""; // Not a user-visible syntax error -- just signals a syntax that WREC doesn't support yet.
+const char* Parser::CharacterClassUnmatched = "missing terminating ] for character class";
+const char* Parser::CharacterClassOutOfOrder = "range out of order in character class";
+const char* Parser::EscapeUnterminated = "\\ at end of pattern";
+
+class PatternCharacterSequence {
+typedef Generator::JumpList JumpList;
+
+public:
+ PatternCharacterSequence(Generator& generator, JumpList& failures)
+ : m_generator(generator)
+ , m_failures(failures)
+ {
+ }
+
+ size_t size() { return m_sequence.size(); }
+
+ void append(int ch)
+ {
+ m_sequence.append(ch);
+ }
+
+ void flush()
+ {
+ if (!m_sequence.size())
+ return;
+
+ m_generator.generatePatternCharacterSequence(m_failures, m_sequence.begin(), m_sequence.size());
+ m_sequence.clear();
+ }
+
+ void flush(const Quantifier& quantifier)
+ {
+ if (!m_sequence.size())
+ return;
+
+ m_generator.generatePatternCharacterSequence(m_failures, m_sequence.begin(), m_sequence.size() - 1);
+
+ switch (quantifier.type) {
+ case Quantifier::None:
+ case Quantifier::Error:
+ ASSERT_NOT_REACHED();
+ break;
+
+ case Quantifier::Greedy: {
+ GeneratePatternCharacterFunctor functor(m_sequence.last());
+ m_generator.generateGreedyQuantifier(m_failures, functor, quantifier.min, quantifier.max);
+ break;
+ }
+
+ case Quantifier::NonGreedy: {
+ GeneratePatternCharacterFunctor functor(m_sequence.last());
+ m_generator.generateNonGreedyQuantifier(m_failures, functor, quantifier.min, quantifier.max);
+ break;
+ }
+ }
+
+ m_sequence.clear();
+ }
+
+private:
+ Generator& m_generator;
+ JumpList& m_failures;
+ Vector<int, 8> m_sequence;
+};
+
+ALWAYS_INLINE Quantifier Parser::consumeGreedyQuantifier()
+{
+ switch (peek()) {
+ case '?':
+ consume();
+ return Quantifier(Quantifier::Greedy, 0, 1);
+
+ case '*':
+ consume();
+ return Quantifier(Quantifier::Greedy, 0);
+
+ case '+':
+ consume();
+ return Quantifier(Quantifier::Greedy, 1);
+
+ case '{': {
+ SavedState state(*this);
+ consume();
+
+ // Accept: {n}, {n,}, {n,m}.
+ // Reject: {n,m} where n > m.
+ // Ignore: Anything else, such as {n, m}.
+
+ if (!peekIsDigit()) {
+ state.restore();
+ return Quantifier();
+ }
+
+ unsigned min = consumeNumber();
+ unsigned max = min;
+
+ if (peek() == ',') {
+ consume();
+ max = peekIsDigit() ? consumeNumber() : Quantifier::Infinity;
+ }
+
+ if (peek() != '}') {
+ state.restore();
+ return Quantifier();
+ }
+ consume();
+
+ if (min > max) {
+ setError(QuantifierOutOfOrder);
+ return Quantifier(Quantifier::Error);
+ }
+
+ return Quantifier(Quantifier::Greedy, min, max);
+ }
+
+ default:
+ return Quantifier(); // No quantifier.
+ }
+}
+
+Quantifier Parser::consumeQuantifier()
+{
+ Quantifier q = consumeGreedyQuantifier();
+
+ if ((q.type == Quantifier::Greedy) && (peek() == '?')) {
+ consume();
+ q.type = Quantifier::NonGreedy;
+ }
+
+ return q;
+}
+
+bool Parser::parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert)
+{
+ Quantifier q = consumeQuantifier();
+
+ switch (q.type) {
+ case Quantifier::None: {
+ m_generator.generateCharacterClass(failures, charClass, invert);
+ break;
+ }
+
+ case Quantifier::Greedy: {
+ GenerateCharacterClassFunctor functor(&charClass, invert);
+ m_generator.generateGreedyQuantifier(failures, functor, q.min, q.max);
+ break;
+ }
+
+ case Quantifier::NonGreedy: {
+ GenerateCharacterClassFunctor functor(&charClass, invert);
+ m_generator.generateNonGreedyQuantifier(failures, functor, q.min, q.max);
+ break;
+ }
+
+ case Quantifier::Error:
+ return false;
+ }
+
+ return true;
+}
+
+bool Parser::parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId)
+{
+ Quantifier q = consumeQuantifier();
+
+ switch (q.type) {
+ case Quantifier::None: {
+ m_generator.generateBackreference(failures, subpatternId);
+ break;
+ }
+
+ case Quantifier::Greedy:
+ case Quantifier::NonGreedy:
+ m_generator.generateBackreferenceQuantifier(failures, q.type, subpatternId, q.min, q.max);
+ return true;
+
+ case Quantifier::Error:
+ return false;
+ }
+
+ return true;
+}
+
+bool Parser::parseParentheses(JumpList& failures)
+{
+ ParenthesesType type = consumeParenthesesType();
+
+ // FIXME: WREC originally failed to backtrack correctly in cases such as
+ // "c".match(/(.*)c/). Now, most parentheses handling is disabled. For
+ // unsupported parentheses, we fall back on PCRE.
+
+ switch (type) {
+ case Generator::Assertion:
+ m_generator.generateParenthesesAssertion(failures);
+
+ if (consume() != ')') {
+ setError(ParenthesesUnmatched);
+ return false;
+ }
+
+ // A quantifier after an assertion is meaningless, since assertions
+ // don't move index forward. So, we discard it.
+ consumeQuantifier();
+ break;
+
+ case Generator::InvertedAssertion:
+ m_generator.generateParenthesesInvertedAssertion(failures);
+
+ if (consume() != ')') {
+ setError(ParenthesesUnmatched);
+ return false;
+ }
+
+ // A quantifier after an assertion is meaningless, since assertions
+ // don't move index forward. So, we discard it.
+ consumeQuantifier();
+ break;
+
+ default:
+ setError(ParenthesesNotSupported);
+ return false;
+ }
+
+ return true;
+}
+
+bool Parser::parseCharacterClass(JumpList& failures)
+{
+ bool invert = false;
+ if (peek() == '^') {
+ consume();
+ invert = true;
+ }
+
+ CharacterClassConstructor constructor(m_ignoreCase);
+
+ int ch;
+ while ((ch = peek()) != ']') {
+ switch (ch) {
+ case EndOfPattern:
+ setError(CharacterClassUnmatched);
+ return false;
+
+ case '\\': {
+ consume();
+ Escape escape = consumeEscape(true);
+
+ switch (escape.type()) {
+ case Escape::PatternCharacter: {
+ int character = PatternCharacterEscape::cast(escape).character();
+ if (character == '-')
+ constructor.flushBeforeEscapedHyphen();
+ constructor.put(character);
+ break;
+ }
+ case Escape::CharacterClass: {
+ const CharacterClassEscape& characterClassEscape = CharacterClassEscape::cast(escape);
+ ASSERT(!characterClassEscape.invert());
+ constructor.append(characterClassEscape.characterClass());
+ break;
+ }
+ case Escape::Error:
+ return false;
+ case Escape::Backreference:
+ case Escape::WordBoundaryAssertion: {
+ ASSERT_NOT_REACHED();
+ break;
+ }
+ }
+ break;
+ }
+
+ default:
+ consume();
+ constructor.put(ch);
+ }
+ }
+ consume();
+
+ // lazily catch reversed ranges ([z-a])in character classes
+ if (constructor.isUpsideDown()) {
+ setError(CharacterClassOutOfOrder);
+ return false;
+ }
+
+ constructor.flush();
+ CharacterClass charClass = constructor.charClass();
+ return parseCharacterClassQuantifier(failures, charClass, invert);
+}
+
+bool Parser::parseNonCharacterEscape(JumpList& failures, const Escape& escape)
+{
+ switch (escape.type()) {
+ case Escape::PatternCharacter:
+ ASSERT_NOT_REACHED();
+ return false;
+
+ case Escape::CharacterClass:
+ return parseCharacterClassQuantifier(failures, CharacterClassEscape::cast(escape).characterClass(), CharacterClassEscape::cast(escape).invert());
+
+ case Escape::Backreference:
+ return parseBackreferenceQuantifier(failures, BackreferenceEscape::cast(escape).subpatternId());
+
+ case Escape::WordBoundaryAssertion:
+ m_generator.generateAssertionWordBoundary(failures, WordBoundaryAssertionEscape::cast(escape).invert());
+ return true;
+
+ case Escape::Error:
+ return false;
+ }
+
+ ASSERT_NOT_REACHED();
+ return false;
+}
+
+Escape Parser::consumeEscape(bool inCharacterClass)
+{
+ switch (peek()) {
+ case EndOfPattern:
+ setError(EscapeUnterminated);
+ return Escape(Escape::Error);
+
+ // Assertions
+ case 'b':
+ consume();
+ if (inCharacterClass)
+ return PatternCharacterEscape('\b');
+ return WordBoundaryAssertionEscape(false); // do not invert
+ case 'B':
+ consume();
+ if (inCharacterClass)
+ return PatternCharacterEscape('B');
+ return WordBoundaryAssertionEscape(true); // invert
+
+ // CharacterClassEscape
+ case 'd':
+ consume();
+ return CharacterClassEscape(CharacterClass::digits(), false);
+ case 's':
+ consume();
+ return CharacterClassEscape(CharacterClass::spaces(), false);
+ case 'w':
+ consume();
+ return CharacterClassEscape(CharacterClass::wordchar(), false);
+ case 'D':
+ consume();
+ return inCharacterClass
+ ? CharacterClassEscape(CharacterClass::nondigits(), false)
+ : CharacterClassEscape(CharacterClass::digits(), true);
+ case 'S':
+ consume();
+ return inCharacterClass
+ ? CharacterClassEscape(CharacterClass::nonspaces(), false)
+ : CharacterClassEscape(CharacterClass::spaces(), true);
+ case 'W':
+ consume();
+ return inCharacterClass
+ ? CharacterClassEscape(CharacterClass::nonwordchar(), false)
+ : CharacterClassEscape(CharacterClass::wordchar(), true);
+
+ // DecimalEscape
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': {
+ if (peekDigit() > m_numSubpatterns || inCharacterClass) {
+ // To match Firefox, we parse an invalid backreference in the range [1-7]
+ // as an octal escape.
+ return peekDigit() > 7 ? PatternCharacterEscape('\\') : PatternCharacterEscape(consumeOctal());
+ }
+
+ int value = 0;
+ do {
+ unsigned newValue = value * 10 + peekDigit();
+ if (newValue > m_numSubpatterns)
+ break;
+ value = newValue;
+ consume();
+ } while (peekIsDigit());
+
+ return BackreferenceEscape(value);
+ }
+
+ // Octal escape
+ case '0':
+ consume();
+ return PatternCharacterEscape(consumeOctal());
+
+ // ControlEscape
+ case 'f':
+ consume();
+ return PatternCharacterEscape('\f');
+ case 'n':
+ consume();
+ return PatternCharacterEscape('\n');
+ case 'r':
+ consume();
+ return PatternCharacterEscape('\r');
+ case 't':
+ consume();
+ return PatternCharacterEscape('\t');
+ case 'v':
+ consume();
+ return PatternCharacterEscape('\v');
+
+ // ControlLetter
+ case 'c': {
+ SavedState state(*this);
+ consume();
+
+ int control = consume();
+ if (!isASCIIAlpha(control)) {
+ state.restore();
+ return PatternCharacterEscape('\\');
+ }
+ return PatternCharacterEscape(control & 31);
+ }
+
+ // HexEscape
+ case 'x': {
+ consume();
+
+ SavedState state(*this);
+ int x = consumeHex(2);
+ if (x == -1) {
+ state.restore();
+ return PatternCharacterEscape('x');
+ }
+ return PatternCharacterEscape(x);
+ }
+
+ // UnicodeEscape
+ case 'u': {
+ consume();
+
+ SavedState state(*this);
+ int x = consumeHex(4);
+ if (x == -1) {
+ state.restore();
+ return PatternCharacterEscape('u');
+ }
+ return PatternCharacterEscape(x);
+ }
+
+ // IdentityEscape
+ default:
+ return PatternCharacterEscape(consume());
+ }
+}
+
+void Parser::parseAlternative(JumpList& failures)
+{
+ PatternCharacterSequence sequence(m_generator, failures);
+
+ while (1) {
+ switch (peek()) {
+ case EndOfPattern:
+ case '|':
+ case ')':
+ sequence.flush();
+ return;
+
+ case '*':
+ case '+':
+ case '?':
+ case '{': {
+ Quantifier q = consumeQuantifier();
+
+ if (q.type == Quantifier::None) {
+ sequence.append(consume());
+ continue;
+ }
+
+ if (q.type == Quantifier::Error)
+ return;
+
+ if (!sequence.size()) {
+ setError(QuantifierWithoutAtom);
+ return;
+ }
+
+ sequence.flush(q);
+ continue;
+ }
+
+ case '^':
+ consume();
+
+ sequence.flush();
+ m_generator.generateAssertionBOL(failures);
+ continue;
+
+ case '$':
+ consume();
+
+ sequence.flush();
+ m_generator.generateAssertionEOL(failures);
+ continue;
+
+ case '.':
+ consume();
+
+ sequence.flush();
+ if (!parseCharacterClassQuantifier(failures, CharacterClass::newline(), true))
+ return;
+ continue;
+
+ case '[':
+ consume();
+
+ sequence.flush();
+ if (!parseCharacterClass(failures))
+ return;
+ continue;
+
+ case '(':
+ consume();
+
+ sequence.flush();
+ if (!parseParentheses(failures))
+ return;
+ continue;
+
+ case '\\': {
+ consume();
+
+ Escape escape = consumeEscape(false);
+ if (escape.type() == Escape::PatternCharacter) {
+ sequence.append(PatternCharacterEscape::cast(escape).character());
+ continue;
+ }
+
+ sequence.flush();
+ if (!parseNonCharacterEscape(failures, escape))
+ return;
+ continue;
+ }
+
+ default:
+ sequence.append(consume());
+ continue;
+ }
+ }
+}
+
+/*
+ TOS holds index.
+*/
+void Parser::parseDisjunction(JumpList& failures)
+{
+ parseAlternative(failures);
+ if (peek() != '|')
+ return;
+
+ JumpList successes;
+ do {
+ consume();
+ m_generator.terminateAlternative(successes, failures);
+ parseAlternative(failures);
+ } while (peek() == '|');
+
+ m_generator.terminateDisjunction(successes);
+}
+
+Generator::ParenthesesType Parser::consumeParenthesesType()
+{
+ if (peek() != '?')
+ return Generator::Capturing;
+ consume();
+
+ switch (consume()) {
+ case ':':
+ return Generator::NonCapturing;
+
+ case '=':
+ return Generator::Assertion;
+
+ case '!':
+ return Generator::InvertedAssertion;
+
+ default:
+ setError(ParenthesesTypeInvalid);
+ return Generator::Error;
+ }
+}
+
+} } // namespace JSC::WREC
+
+#endif // ENABLE(WREC)