diff options
Diffstat (limited to 'src/3rdparty/webkit/JavaScriptCore/wrec/WRECParser.cpp')
-rw-r--r-- | src/3rdparty/webkit/JavaScriptCore/wrec/WRECParser.cpp | 637 |
1 files changed, 637 insertions, 0 deletions
diff --git a/src/3rdparty/webkit/JavaScriptCore/wrec/WRECParser.cpp b/src/3rdparty/webkit/JavaScriptCore/wrec/WRECParser.cpp new file mode 100644 index 0000000..be42a34 --- /dev/null +++ b/src/3rdparty/webkit/JavaScriptCore/wrec/WRECParser.cpp @@ -0,0 +1,637 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "WRECParser.h" + +#if ENABLE(WREC) + +#include "CharacterClassConstructor.h" +#include "WRECFunctors.h" + +using namespace WTF; + +namespace JSC { namespace WREC { + +// These error messages match the error messages used by PCRE. +const char* Parser::QuantifierOutOfOrder = "numbers out of order in {} quantifier"; +const char* Parser::QuantifierWithoutAtom = "nothing to repeat"; +const char* Parser::ParenthesesUnmatched = "unmatched parentheses"; +const char* Parser::ParenthesesTypeInvalid = "unrecognized character after (?"; +const char* Parser::ParenthesesNotSupported = ""; // Not a user-visible syntax error -- just signals a syntax that WREC doesn't support yet. +const char* Parser::CharacterClassUnmatched = "missing terminating ] for character class"; +const char* Parser::CharacterClassOutOfOrder = "range out of order in character class"; +const char* Parser::EscapeUnterminated = "\\ at end of pattern"; + +class PatternCharacterSequence { +typedef Generator::JumpList JumpList; + +public: + PatternCharacterSequence(Generator& generator, JumpList& failures) + : m_generator(generator) + , m_failures(failures) + { + } + + size_t size() { return m_sequence.size(); } + + void append(int ch) + { + m_sequence.append(ch); + } + + void flush() + { + if (!m_sequence.size()) + return; + + m_generator.generatePatternCharacterSequence(m_failures, m_sequence.begin(), m_sequence.size()); + m_sequence.clear(); + } + + void flush(const Quantifier& quantifier) + { + if (!m_sequence.size()) + return; + + m_generator.generatePatternCharacterSequence(m_failures, m_sequence.begin(), m_sequence.size() - 1); + + switch (quantifier.type) { + case Quantifier::None: + case Quantifier::Error: + ASSERT_NOT_REACHED(); + break; + + case Quantifier::Greedy: { + GeneratePatternCharacterFunctor functor(m_sequence.last()); + m_generator.generateGreedyQuantifier(m_failures, functor, quantifier.min, quantifier.max); + break; + } + + case Quantifier::NonGreedy: { + GeneratePatternCharacterFunctor functor(m_sequence.last()); + m_generator.generateNonGreedyQuantifier(m_failures, functor, quantifier.min, quantifier.max); + break; + } + } + + m_sequence.clear(); + } + +private: + Generator& m_generator; + JumpList& m_failures; + Vector<int, 8> m_sequence; +}; + +ALWAYS_INLINE Quantifier Parser::consumeGreedyQuantifier() +{ + switch (peek()) { + case '?': + consume(); + return Quantifier(Quantifier::Greedy, 0, 1); + + case '*': + consume(); + return Quantifier(Quantifier::Greedy, 0); + + case '+': + consume(); + return Quantifier(Quantifier::Greedy, 1); + + case '{': { + SavedState state(*this); + consume(); + + // Accept: {n}, {n,}, {n,m}. + // Reject: {n,m} where n > m. + // Ignore: Anything else, such as {n, m}. + + if (!peekIsDigit()) { + state.restore(); + return Quantifier(); + } + + unsigned min = consumeNumber(); + unsigned max = min; + + if (peek() == ',') { + consume(); + max = peekIsDigit() ? consumeNumber() : Quantifier::Infinity; + } + + if (peek() != '}') { + state.restore(); + return Quantifier(); + } + consume(); + + if (min > max) { + setError(QuantifierOutOfOrder); + return Quantifier(Quantifier::Error); + } + + return Quantifier(Quantifier::Greedy, min, max); + } + + default: + return Quantifier(); // No quantifier. + } +} + +Quantifier Parser::consumeQuantifier() +{ + Quantifier q = consumeGreedyQuantifier(); + + if ((q.type == Quantifier::Greedy) && (peek() == '?')) { + consume(); + q.type = Quantifier::NonGreedy; + } + + return q; +} + +bool Parser::parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert) +{ + Quantifier q = consumeQuantifier(); + + switch (q.type) { + case Quantifier::None: { + m_generator.generateCharacterClass(failures, charClass, invert); + break; + } + + case Quantifier::Greedy: { + GenerateCharacterClassFunctor functor(&charClass, invert); + m_generator.generateGreedyQuantifier(failures, functor, q.min, q.max); + break; + } + + case Quantifier::NonGreedy: { + GenerateCharacterClassFunctor functor(&charClass, invert); + m_generator.generateNonGreedyQuantifier(failures, functor, q.min, q.max); + break; + } + + case Quantifier::Error: + return false; + } + + return true; +} + +bool Parser::parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId) +{ + Quantifier q = consumeQuantifier(); + + switch (q.type) { + case Quantifier::None: { + m_generator.generateBackreference(failures, subpatternId); + break; + } + + case Quantifier::Greedy: + case Quantifier::NonGreedy: + m_generator.generateBackreferenceQuantifier(failures, q.type, subpatternId, q.min, q.max); + return true; + + case Quantifier::Error: + return false; + } + + return true; +} + +bool Parser::parseParentheses(JumpList& failures) +{ + ParenthesesType type = consumeParenthesesType(); + + // FIXME: WREC originally failed to backtrack correctly in cases such as + // "c".match(/(.*)c/). Now, most parentheses handling is disabled. For + // unsupported parentheses, we fall back on PCRE. + + switch (type) { + case Generator::Assertion: + m_generator.generateParenthesesAssertion(failures); + + if (consume() != ')') { + setError(ParenthesesUnmatched); + return false; + } + + // A quantifier after an assertion is meaningless, since assertions + // don't move index forward. So, we discard it. + consumeQuantifier(); + break; + + case Generator::InvertedAssertion: + m_generator.generateParenthesesInvertedAssertion(failures); + + if (consume() != ')') { + setError(ParenthesesUnmatched); + return false; + } + + // A quantifier after an assertion is meaningless, since assertions + // don't move index forward. So, we discard it. + consumeQuantifier(); + break; + + default: + setError(ParenthesesNotSupported); + return false; + } + + return true; +} + +bool Parser::parseCharacterClass(JumpList& failures) +{ + bool invert = false; + if (peek() == '^') { + consume(); + invert = true; + } + + CharacterClassConstructor constructor(m_ignoreCase); + + int ch; + while ((ch = peek()) != ']') { + switch (ch) { + case EndOfPattern: + setError(CharacterClassUnmatched); + return false; + + case '\\': { + consume(); + Escape escape = consumeEscape(true); + + switch (escape.type()) { + case Escape::PatternCharacter: { + int character = PatternCharacterEscape::cast(escape).character(); + if (character == '-') + constructor.flushBeforeEscapedHyphen(); + constructor.put(character); + break; + } + case Escape::CharacterClass: { + const CharacterClassEscape& characterClassEscape = CharacterClassEscape::cast(escape); + ASSERT(!characterClassEscape.invert()); + constructor.append(characterClassEscape.characterClass()); + break; + } + case Escape::Error: + return false; + case Escape::Backreference: + case Escape::WordBoundaryAssertion: { + ASSERT_NOT_REACHED(); + break; + } + } + break; + } + + default: + consume(); + constructor.put(ch); + } + } + consume(); + + // lazily catch reversed ranges ([z-a])in character classes + if (constructor.isUpsideDown()) { + setError(CharacterClassOutOfOrder); + return false; + } + + constructor.flush(); + CharacterClass charClass = constructor.charClass(); + return parseCharacterClassQuantifier(failures, charClass, invert); +} + +bool Parser::parseNonCharacterEscape(JumpList& failures, const Escape& escape) +{ + switch (escape.type()) { + case Escape::PatternCharacter: + ASSERT_NOT_REACHED(); + return false; + + case Escape::CharacterClass: + return parseCharacterClassQuantifier(failures, CharacterClassEscape::cast(escape).characterClass(), CharacterClassEscape::cast(escape).invert()); + + case Escape::Backreference: + return parseBackreferenceQuantifier(failures, BackreferenceEscape::cast(escape).subpatternId()); + + case Escape::WordBoundaryAssertion: + m_generator.generateAssertionWordBoundary(failures, WordBoundaryAssertionEscape::cast(escape).invert()); + return true; + + case Escape::Error: + return false; + } + + ASSERT_NOT_REACHED(); + return false; +} + +Escape Parser::consumeEscape(bool inCharacterClass) +{ + switch (peek()) { + case EndOfPattern: + setError(EscapeUnterminated); + return Escape(Escape::Error); + + // Assertions + case 'b': + consume(); + if (inCharacterClass) + return PatternCharacterEscape('\b'); + return WordBoundaryAssertionEscape(false); // do not invert + case 'B': + consume(); + if (inCharacterClass) + return PatternCharacterEscape('B'); + return WordBoundaryAssertionEscape(true); // invert + + // CharacterClassEscape + case 'd': + consume(); + return CharacterClassEscape(CharacterClass::digits(), false); + case 's': + consume(); + return CharacterClassEscape(CharacterClass::spaces(), false); + case 'w': + consume(); + return CharacterClassEscape(CharacterClass::wordchar(), false); + case 'D': + consume(); + return inCharacterClass + ? CharacterClassEscape(CharacterClass::nondigits(), false) + : CharacterClassEscape(CharacterClass::digits(), true); + case 'S': + consume(); + return inCharacterClass + ? CharacterClassEscape(CharacterClass::nonspaces(), false) + : CharacterClassEscape(CharacterClass::spaces(), true); + case 'W': + consume(); + return inCharacterClass + ? CharacterClassEscape(CharacterClass::nonwordchar(), false) + : CharacterClassEscape(CharacterClass::wordchar(), true); + + // DecimalEscape + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + if (peekDigit() > m_numSubpatterns || inCharacterClass) { + // To match Firefox, we parse an invalid backreference in the range [1-7] + // as an octal escape. + return peekDigit() > 7 ? PatternCharacterEscape('\\') : PatternCharacterEscape(consumeOctal()); + } + + int value = 0; + do { + unsigned newValue = value * 10 + peekDigit(); + if (newValue > m_numSubpatterns) + break; + value = newValue; + consume(); + } while (peekIsDigit()); + + return BackreferenceEscape(value); + } + + // Octal escape + case '0': + consume(); + return PatternCharacterEscape(consumeOctal()); + + // ControlEscape + case 'f': + consume(); + return PatternCharacterEscape('\f'); + case 'n': + consume(); + return PatternCharacterEscape('\n'); + case 'r': + consume(); + return PatternCharacterEscape('\r'); + case 't': + consume(); + return PatternCharacterEscape('\t'); + case 'v': + consume(); + return PatternCharacterEscape('\v'); + + // ControlLetter + case 'c': { + SavedState state(*this); + consume(); + + int control = consume(); + if (!isASCIIAlpha(control)) { + state.restore(); + return PatternCharacterEscape('\\'); + } + return PatternCharacterEscape(control & 31); + } + + // HexEscape + case 'x': { + consume(); + + SavedState state(*this); + int x = consumeHex(2); + if (x == -1) { + state.restore(); + return PatternCharacterEscape('x'); + } + return PatternCharacterEscape(x); + } + + // UnicodeEscape + case 'u': { + consume(); + + SavedState state(*this); + int x = consumeHex(4); + if (x == -1) { + state.restore(); + return PatternCharacterEscape('u'); + } + return PatternCharacterEscape(x); + } + + // IdentityEscape + default: + return PatternCharacterEscape(consume()); + } +} + +void Parser::parseAlternative(JumpList& failures) +{ + PatternCharacterSequence sequence(m_generator, failures); + + while (1) { + switch (peek()) { + case EndOfPattern: + case '|': + case ')': + sequence.flush(); + return; + + case '*': + case '+': + case '?': + case '{': { + Quantifier q = consumeQuantifier(); + + if (q.type == Quantifier::None) { + sequence.append(consume()); + continue; + } + + if (q.type == Quantifier::Error) + return; + + if (!sequence.size()) { + setError(QuantifierWithoutAtom); + return; + } + + sequence.flush(q); + continue; + } + + case '^': + consume(); + + sequence.flush(); + m_generator.generateAssertionBOL(failures); + continue; + + case '$': + consume(); + + sequence.flush(); + m_generator.generateAssertionEOL(failures); + continue; + + case '.': + consume(); + + sequence.flush(); + if (!parseCharacterClassQuantifier(failures, CharacterClass::newline(), true)) + return; + continue; + + case '[': + consume(); + + sequence.flush(); + if (!parseCharacterClass(failures)) + return; + continue; + + case '(': + consume(); + + sequence.flush(); + if (!parseParentheses(failures)) + return; + continue; + + case '\\': { + consume(); + + Escape escape = consumeEscape(false); + if (escape.type() == Escape::PatternCharacter) { + sequence.append(PatternCharacterEscape::cast(escape).character()); + continue; + } + + sequence.flush(); + if (!parseNonCharacterEscape(failures, escape)) + return; + continue; + } + + default: + sequence.append(consume()); + continue; + } + } +} + +/* + TOS holds index. +*/ +void Parser::parseDisjunction(JumpList& failures) +{ + parseAlternative(failures); + if (peek() != '|') + return; + + JumpList successes; + do { + consume(); + m_generator.terminateAlternative(successes, failures); + parseAlternative(failures); + } while (peek() == '|'); + + m_generator.terminateDisjunction(successes); +} + +Generator::ParenthesesType Parser::consumeParenthesesType() +{ + if (peek() != '?') + return Generator::Capturing; + consume(); + + switch (consume()) { + case ':': + return Generator::NonCapturing; + + case '=': + return Generator::Assertion; + + case '!': + return Generator::InvertedAssertion; + + default: + setError(ParenthesesTypeInvalid); + return Generator::Error; + } +} + +} } // namespace JSC::WREC + +#endif // ENABLE(WREC) |