summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/javascriptcore/JavaScriptCore/yarr
diff options
context:
space:
mode:
authorSimon Hausmann <simon.hausmann@nokia.com>2009-09-16 07:51:00 (GMT)
committerSimon Hausmann <simon.hausmann@nokia.com>2009-09-16 07:58:16 (GMT)
commit120329adb47dba60f532c1c2fd2ad0f37b812437 (patch)
treefc2311cef4b69fe7294e8f5aab27fe6af2546123 /src/3rdparty/javascriptcore/JavaScriptCore/yarr
parentce17ae5a6159d8ce3a5d2cc98f804a2debb860e5 (diff)
downloadQt-120329adb47dba60f532c1c2fd2ad0f37b812437.zip
Qt-120329adb47dba60f532c1c2fd2ad0f37b812437.tar.gz
Qt-120329adb47dba60f532c1c2fd2ad0f37b812437.tar.bz2
Separate the copy of JavaScriptCore that QtScript uses from the copy that
QtWebKit uses. This is needed to decouple QtScript from QtWebKit, as discussed in the WebKit team. Reviewed-by: Kent Hansen
Diffstat (limited to 'src/3rdparty/javascriptcore/JavaScriptCore/yarr')
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.cpp728
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.h45
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.cpp1638
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.h337
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.cpp1418
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.h91
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexParser.h854
-rw-r--r--src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexPattern.h356
8 files changed, 5467 insertions, 0 deletions
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.cpp b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.cpp
new file mode 100644
index 0000000..c7b3c81
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.cpp
@@ -0,0 +1,728 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "RegexCompiler.h"
+
+#include "RegexInterpreter.h"
+#include "RegexPattern.h"
+#include <wtf/Vector.h>
+
+#if ENABLE(YARR)
+
+using namespace WTF;
+
+namespace JSC { namespace Yarr {
+
+class CharacterClassConstructor {
+public:
+ CharacterClassConstructor(bool isCaseInsensitive = false)
+ : m_isCaseInsensitive(isCaseInsensitive)
+ {
+ }
+
+ void reset()
+ {
+ m_matches.clear();
+ m_ranges.clear();
+ m_matchesUnicode.clear();
+ m_rangesUnicode.clear();
+ }
+
+ void append(const CharacterClass* other)
+ {
+ for (size_t i = 0; i < other->m_matches.size(); ++i)
+ addSorted(m_matches, other->m_matches[i]);
+ for (size_t i = 0; i < other->m_ranges.size(); ++i)
+ addSortedRange(m_ranges, other->m_ranges[i].begin, other->m_ranges[i].end);
+ for (size_t i = 0; i < other->m_matchesUnicode.size(); ++i)
+ addSorted(m_matchesUnicode, other->m_matchesUnicode[i]);
+ for (size_t i = 0; i < other->m_rangesUnicode.size(); ++i)
+ addSortedRange(m_rangesUnicode, other->m_rangesUnicode[i].begin, other->m_rangesUnicode[i].end);
+ }
+
+ void putChar(UChar ch)
+ {
+ if (ch <= 0x7f) {
+ if (m_isCaseInsensitive && isASCIIAlpha(ch)) {
+ addSorted(m_matches, toASCIIUpper(ch));
+ addSorted(m_matches, toASCIILower(ch));
+ } else
+ addSorted(m_matches, ch);
+ } else {
+ UChar upper, lower;
+ if (m_isCaseInsensitive && ((upper = Unicode::toUpper(ch)) != (lower = Unicode::toLower(ch)))) {
+ addSorted(m_matchesUnicode, upper);
+ addSorted(m_matchesUnicode, lower);
+ } else
+ addSorted(m_matchesUnicode, ch);
+ }
+ }
+
+ // returns true if this character has another case, and 'ch' is the upper case form.
+ static inline bool isUnicodeUpper(UChar ch)
+ {
+ return ch != Unicode::toLower(ch);
+ }
+
+ // returns true if this character has another case, and 'ch' is the lower case form.
+ static inline bool isUnicodeLower(UChar ch)
+ {
+ return ch != Unicode::toUpper(ch);
+ }
+
+ void putRange(UChar lo, UChar hi)
+ {
+ if (lo <= 0x7f) {
+ char asciiLo = lo;
+ char asciiHi = std::min(hi, (UChar)0x7f);
+ addSortedRange(m_ranges, lo, asciiHi);
+
+ if (m_isCaseInsensitive) {
+ if ((asciiLo <= 'Z') && (asciiHi >= 'A'))
+ addSortedRange(m_ranges, std::max(asciiLo, 'A')+('a'-'A'), std::min(asciiHi, 'Z')+('a'-'A'));
+ if ((asciiLo <= 'z') && (asciiHi >= 'a'))
+ addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a'));
+ }
+ }
+ if (hi >= 0x80) {
+ uint32_t unicodeCurr = std::max(lo, (UChar)0x80);
+ addSortedRange(m_rangesUnicode, unicodeCurr, hi);
+
+ if (m_isCaseInsensitive) {
+ while (unicodeCurr <= hi) {
+ // If the upper bound of the range (hi) is 0xffff, the increments to
+ // unicodeCurr in this loop may take it to 0x10000. This is fine
+ // (if so we won't re-enter the loop, since the loop condition above
+ // will definitely fail) - but this does mean we cannot use a UChar
+ // to represent unicodeCurr, we must use a 32-bit value instead.
+ ASSERT(unicodeCurr <= 0xffff);
+
+ if (isUnicodeUpper(unicodeCurr)) {
+ UChar lowerCaseRangeBegin = Unicode::toLower(unicodeCurr);
+ UChar lowerCaseRangeEnd = lowerCaseRangeBegin;
+ while ((++unicodeCurr <= hi) && isUnicodeUpper(unicodeCurr) && (Unicode::toLower(unicodeCurr) == (lowerCaseRangeEnd + 1)))
+ lowerCaseRangeEnd++;
+ addSortedRange(m_rangesUnicode, lowerCaseRangeBegin, lowerCaseRangeEnd);
+ } else if (isUnicodeLower(unicodeCurr)) {
+ UChar upperCaseRangeBegin = Unicode::toUpper(unicodeCurr);
+ UChar upperCaseRangeEnd = upperCaseRangeBegin;
+ while ((++unicodeCurr <= hi) && isUnicodeLower(unicodeCurr) && (Unicode::toUpper(unicodeCurr) == (upperCaseRangeEnd + 1)))
+ upperCaseRangeEnd++;
+ addSortedRange(m_rangesUnicode, upperCaseRangeBegin, upperCaseRangeEnd);
+ } else
+ ++unicodeCurr;
+ }
+ }
+ }
+ }
+
+ CharacterClass* charClass()
+ {
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_matches.append(m_matches);
+ characterClass->m_ranges.append(m_ranges);
+ characterClass->m_matchesUnicode.append(m_matchesUnicode);
+ characterClass->m_rangesUnicode.append(m_rangesUnicode);
+
+ reset();
+
+ return characterClass;
+ }
+
+private:
+ void addSorted(Vector<UChar>& matches, UChar ch)
+ {
+ unsigned pos = 0;
+ unsigned range = matches.size();
+
+ // binary chop, find position to insert char.
+ while (range) {
+ unsigned index = range >> 1;
+
+ int val = matches[pos+index] - ch;
+ if (!val)
+ return;
+ else if (val > 0)
+ range = index;
+ else {
+ pos += (index+1);
+ range -= (index+1);
+ }
+ }
+
+ if (pos == matches.size())
+ matches.append(ch);
+ else
+ matches.insert(pos, ch);
+ }
+
+ void addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UChar hi)
+ {
+ unsigned end = ranges.size();
+
+ // Simple linear scan - I doubt there are that many ranges anyway...
+ // feel free to fix this with something faster (eg binary chop).
+ for (unsigned i = 0; i < end; ++i) {
+ // does the new range fall before the current position in the array
+ if (hi < ranges[i].begin) {
+ // optional optimization: concatenate appending ranges? - may not be worthwhile.
+ if (hi == (ranges[i].begin - 1)) {
+ ranges[i].begin = lo;
+ return;
+ }
+ ranges.insert(i, CharacterRange(lo, hi));
+ return;
+ }
+ // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining
+ // If the new range start at or before the end of the last range, then the overlap (if it starts one after the
+ // end of the last range they concatenate, which is just as good.
+ if (lo <= (ranges[i].end + 1)) {
+ // found an intersect! we'll replace this entry in the array.
+ ranges[i].begin = std::min(ranges[i].begin, lo);
+ ranges[i].end = std::max(ranges[i].end, hi);
+
+ // now check if the new range can subsume any subsequent ranges.
+ unsigned next = i+1;
+ // each iteration of the loop we will either remove something from the list, or break the loop.
+ while (next < ranges.size()) {
+ if (ranges[next].begin <= (ranges[i].end + 1)) {
+ // the next entry now overlaps / concatenates this one.
+ ranges[i].end = std::max(ranges[i].end, ranges[next].end);
+ ranges.remove(next);
+ } else
+ break;
+ }
+
+ return;
+ }
+ }
+
+ // CharacterRange comes after all existing ranges.
+ ranges.append(CharacterRange(lo, hi));
+ }
+
+ bool m_isCaseInsensitive;
+
+ Vector<UChar> m_matches;
+ Vector<CharacterRange> m_ranges;
+ Vector<UChar> m_matchesUnicode;
+ Vector<CharacterRange> m_rangesUnicode;
+};
+
+
+CharacterClass* newlineCreate()
+{
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_matches.append('\n');
+ characterClass->m_matches.append('\r');
+ characterClass->m_matchesUnicode.append(0x2028);
+ characterClass->m_matchesUnicode.append(0x2029);
+
+ return characterClass;
+}
+
+CharacterClass* digitsCreate()
+{
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_ranges.append(CharacterRange('0', '9'));
+
+ return characterClass;
+}
+
+CharacterClass* spacesCreate()
+{
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_matches.append(' ');
+ characterClass->m_ranges.append(CharacterRange('\t', '\r'));
+ characterClass->m_matchesUnicode.append(0x00a0);
+ characterClass->m_matchesUnicode.append(0x1680);
+ characterClass->m_matchesUnicode.append(0x180e);
+ characterClass->m_matchesUnicode.append(0x2028);
+ characterClass->m_matchesUnicode.append(0x2029);
+ characterClass->m_matchesUnicode.append(0x202f);
+ characterClass->m_matchesUnicode.append(0x205f);
+ characterClass->m_matchesUnicode.append(0x3000);
+ characterClass->m_rangesUnicode.append(CharacterRange(0x2000, 0x200a));
+
+ return characterClass;
+}
+
+CharacterClass* wordcharCreate()
+{
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_matches.append('_');
+ characterClass->m_ranges.append(CharacterRange('0', '9'));
+ characterClass->m_ranges.append(CharacterRange('A', 'Z'));
+ characterClass->m_ranges.append(CharacterRange('a', 'z'));
+
+ return characterClass;
+}
+
+CharacterClass* nondigitsCreate()
+{
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
+ characterClass->m_ranges.append(CharacterRange('9' + 1, 0x7f));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
+
+ return characterClass;
+}
+
+CharacterClass* nonspacesCreate()
+{
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_ranges.append(CharacterRange(0, '\t' - 1));
+ characterClass->m_ranges.append(CharacterRange('\r' + 1, ' ' - 1));
+ characterClass->m_ranges.append(CharacterRange(' ' + 1, 0x7f));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x0080, 0x009f));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x00a1, 0x167f));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x1681, 0x180d));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x180f, 0x1fff));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x200b, 0x2027));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x202a, 0x202e));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x2030, 0x205e));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x2060, 0x2fff));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x3001, 0xffff));
+
+ return characterClass;
+}
+
+CharacterClass* nonwordcharCreate()
+{
+ CharacterClass* characterClass = new CharacterClass();
+
+ characterClass->m_matches.append('`');
+ characterClass->m_ranges.append(CharacterRange(0, '0' - 1));
+ characterClass->m_ranges.append(CharacterRange('9' + 1, 'A' - 1));
+ characterClass->m_ranges.append(CharacterRange('Z' + 1, '_' - 1));
+ characterClass->m_ranges.append(CharacterRange('z' + 1, 0x7f));
+ characterClass->m_rangesUnicode.append(CharacterRange(0x80, 0xffff));
+
+ return characterClass;
+}
+
+
+class RegexPatternConstructor {
+public:
+ RegexPatternConstructor(RegexPattern& pattern)
+ : m_pattern(pattern)
+ , m_characterClassConstructor(pattern.m_ignoreCase)
+ {
+ }
+
+ ~RegexPatternConstructor()
+ {
+ }
+
+ void reset()
+ {
+ m_pattern.reset();
+ m_characterClassConstructor.reset();
+ }
+
+ void assertionBOL()
+ {
+ m_alternative->m_terms.append(PatternTerm::BOL());
+ }
+ void assertionEOL()
+ {
+ m_alternative->m_terms.append(PatternTerm::EOL());
+ }
+ void assertionWordBoundary(bool invert)
+ {
+ m_alternative->m_terms.append(PatternTerm::WordBoundary(invert));
+ }
+
+ void atomPatternCharacter(UChar ch)
+ {
+ // We handle case-insensitive checking of unicode characters which do have both
+ // cases by handling them as if they were defined using a CharacterClass.
+ if (m_pattern.m_ignoreCase && !isASCII(ch) && (Unicode::toUpper(ch) != Unicode::toLower(ch))) {
+ atomCharacterClassBegin();
+ atomCharacterClassAtom(ch);
+ atomCharacterClassEnd();
+ } else
+ m_alternative->m_terms.append(PatternTerm(ch));
+ }
+
+ void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
+ {
+ switch (classID) {
+ case DigitClassID:
+ m_alternative->m_terms.append(PatternTerm(m_pattern.digitsCharacterClass(), invert));
+ break;
+ case SpaceClassID:
+ m_alternative->m_terms.append(PatternTerm(m_pattern.spacesCharacterClass(), invert));
+ break;
+ case WordClassID:
+ m_alternative->m_terms.append(PatternTerm(m_pattern.wordcharCharacterClass(), invert));
+ break;
+ case NewlineClassID:
+ m_alternative->m_terms.append(PatternTerm(m_pattern.newlineCharacterClass(), invert));
+ break;
+ }
+ }
+
+ void atomCharacterClassBegin(bool invert = false)
+ {
+ m_invertCharacterClass = invert;
+ }
+
+ void atomCharacterClassAtom(UChar ch)
+ {
+ m_characterClassConstructor.putChar(ch);
+ }
+
+ void atomCharacterClassRange(UChar begin, UChar end)
+ {
+ m_characterClassConstructor.putRange(begin, end);
+ }
+
+ void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
+ {
+ ASSERT(classID != NewlineClassID);
+
+ switch (classID) {
+ case DigitClassID:
+ m_characterClassConstructor.append(invert ? m_pattern.nondigitsCharacterClass() : m_pattern.digitsCharacterClass());
+ break;
+
+ case SpaceClassID:
+ m_characterClassConstructor.append(invert ? m_pattern.nonspacesCharacterClass() : m_pattern.spacesCharacterClass());
+ break;
+
+ case WordClassID:
+ m_characterClassConstructor.append(invert ? m_pattern.nonwordcharCharacterClass() : m_pattern.wordcharCharacterClass());
+ break;
+
+ default:
+ ASSERT_NOT_REACHED();
+ }
+ }
+
+ void atomCharacterClassEnd()
+ {
+ CharacterClass* newCharacterClass = m_characterClassConstructor.charClass();
+ m_pattern.m_userCharacterClasses.append(newCharacterClass);
+ m_alternative->m_terms.append(PatternTerm(newCharacterClass, m_invertCharacterClass));
+ }
+
+ void atomParenthesesSubpatternBegin(bool capture = true)
+ {
+ unsigned subpatternId = m_pattern.m_numSubpatterns + 1;
+ if (capture)
+ m_pattern.m_numSubpatterns++;
+
+ PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
+ m_pattern.m_disjunctions.append(parenthesesDisjunction);
+ m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, capture));
+ m_alternative = parenthesesDisjunction->addNewAlternative();
+ }
+
+ void atomParentheticalAssertionBegin(bool invert = false)
+ {
+ PatternDisjunction* parenthesesDisjunction = new PatternDisjunction(m_alternative);
+ m_pattern.m_disjunctions.append(parenthesesDisjunction);
+ m_alternative->m_terms.append(PatternTerm(PatternTerm::TypeParentheticalAssertion, m_pattern.m_numSubpatterns + 1, parenthesesDisjunction, invert));
+ m_alternative = parenthesesDisjunction->addNewAlternative();
+ }
+
+ void atomParenthesesEnd()
+ {
+ ASSERT(m_alternative->m_parent);
+ ASSERT(m_alternative->m_parent->m_parent);
+ m_alternative = m_alternative->m_parent->m_parent;
+
+ m_alternative->lastTerm().parentheses.lastSubpatternId = m_pattern.m_numSubpatterns;
+ }
+
+ void atomBackReference(unsigned subpatternId)
+ {
+ ASSERT(subpatternId);
+ m_pattern.m_maxBackReference = std::max(m_pattern.m_maxBackReference, subpatternId);
+
+ if (subpatternId > m_pattern.m_numSubpatterns) {
+ m_alternative->m_terms.append(PatternTerm::ForwardReference());
+ return;
+ }
+
+ PatternAlternative* currentAlternative = m_alternative;
+ ASSERT(currentAlternative);
+
+ // Note to self: if we waited until the AST was baked, we could also remove forwards refs
+ while ((currentAlternative = currentAlternative->m_parent->m_parent)) {
+ PatternTerm& term = currentAlternative->lastTerm();
+ ASSERT((term.type == PatternTerm::TypeParenthesesSubpattern) || (term.type == PatternTerm::TypeParentheticalAssertion));
+
+ if ((term.type == PatternTerm::TypeParenthesesSubpattern) && term.invertOrCapture && (subpatternId == term.subpatternId)) {
+ m_alternative->m_terms.append(PatternTerm::ForwardReference());
+ return;
+ }
+ }
+
+ m_alternative->m_terms.append(PatternTerm(subpatternId));
+ }
+
+ PatternDisjunction* copyDisjunction(PatternDisjunction* disjunction)
+ {
+ PatternDisjunction* newDisjunction = new PatternDisjunction();
+
+ newDisjunction->m_parent = disjunction->m_parent;
+ for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
+ PatternAlternative* alternative = disjunction->m_alternatives[alt];
+ PatternAlternative* newAlternative = newDisjunction->addNewAlternative();
+ for (unsigned i = 0; i < alternative->m_terms.size(); ++i)
+ newAlternative->m_terms.append(copyTerm(alternative->m_terms[i]));
+ }
+
+ m_pattern.m_disjunctions.append(newDisjunction);
+ return newDisjunction;
+ }
+
+ PatternTerm copyTerm(PatternTerm& term)
+ {
+ if ((term.type != PatternTerm::TypeParenthesesSubpattern) && (term.type != PatternTerm::TypeParentheticalAssertion))
+ return PatternTerm(term);
+
+ PatternTerm termCopy = term;
+ termCopy.parentheses.disjunction = copyDisjunction(termCopy.parentheses.disjunction);
+ return termCopy;
+ }
+
+ void quantifyAtom(unsigned min, unsigned max, bool greedy)
+ {
+ ASSERT(min <= max);
+ ASSERT(m_alternative->m_terms.size());
+
+ if (!max) {
+ m_alternative->removeLastTerm();
+ return;
+ }
+
+ PatternTerm& term = m_alternative->lastTerm();
+ ASSERT(term.type > PatternTerm::TypeAssertionWordBoundary);
+ ASSERT((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount));
+
+ // For any assertion with a zero minimum, not matching is valid and has no effect,
+ // remove it. Otherwise, we need to match as least once, but there is no point
+ // matching more than once, so remove the quantifier. It is not entirely clear
+ // from the spec whether or not this behavior is correct, but I believe this
+ // matches Firefox. :-/
+ if (term.type == PatternTerm::TypeParentheticalAssertion) {
+ if (!min)
+ m_alternative->removeLastTerm();
+ return;
+ }
+
+ if (min == 0)
+ term.quantify(max, greedy ? QuantifierGreedy : QuantifierNonGreedy);
+ else if (min == max)
+ term.quantify(min, QuantifierFixedCount);
+ else {
+ term.quantify(min, QuantifierFixedCount);
+ m_alternative->m_terms.append(copyTerm(term));
+ // NOTE: this term is interesting from an analysis perspective, in that it can be ignored.....
+ m_alternative->lastTerm().quantify((max == UINT_MAX) ? max : max - min, greedy ? QuantifierGreedy : QuantifierNonGreedy);
+ if (m_alternative->lastTerm().type == PatternTerm::TypeParenthesesSubpattern)
+ m_alternative->lastTerm().parentheses.isCopy = true;
+ }
+ }
+
+ void disjunction()
+ {
+ m_alternative = m_alternative->m_parent->addNewAlternative();
+ }
+
+ void regexBegin()
+ {
+ m_pattern.m_body = new PatternDisjunction();
+ m_alternative = m_pattern.m_body->addNewAlternative();
+ m_pattern.m_disjunctions.append(m_pattern.m_body);
+ }
+ void regexEnd()
+ {
+ }
+ void regexError()
+ {
+ }
+
+ unsigned setupAlternativeOffsets(PatternAlternative* alternative, unsigned currentCallFrameSize, unsigned initialInputPosition)
+ {
+ alternative->m_hasFixedSize = true;
+ unsigned currentInputPosition = initialInputPosition;
+
+ for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
+ PatternTerm& term = alternative->m_terms[i];
+
+ switch (term.type) {
+ case PatternTerm::TypeAssertionBOL:
+ case PatternTerm::TypeAssertionEOL:
+ case PatternTerm::TypeAssertionWordBoundary:
+ term.inputPosition = currentInputPosition;
+ break;
+
+ case PatternTerm::TypeBackReference:
+ term.inputPosition = currentInputPosition;
+ term.frameLocation = currentCallFrameSize;
+ currentCallFrameSize += RegexStackSpaceForBackTrackInfoBackReference;
+ alternative->m_hasFixedSize = false;
+ break;
+
+ case PatternTerm::TypeForwardReference:
+ break;
+
+ case PatternTerm::TypePatternCharacter:
+ term.inputPosition = currentInputPosition;
+ if (term.quantityType != QuantifierFixedCount) {
+ term.frameLocation = currentCallFrameSize;
+ currentCallFrameSize += RegexStackSpaceForBackTrackInfoPatternCharacter;
+ alternative->m_hasFixedSize = false;
+ } else
+ currentInputPosition += term.quantityCount;
+ break;
+
+ case PatternTerm::TypeCharacterClass:
+ term.inputPosition = currentInputPosition;
+ if (term.quantityType != QuantifierFixedCount) {
+ term.frameLocation = currentCallFrameSize;
+ currentCallFrameSize += RegexStackSpaceForBackTrackInfoCharacterClass;
+ alternative->m_hasFixedSize = false;
+ } else
+ currentInputPosition += term.quantityCount;
+ break;
+
+ case PatternTerm::TypeParenthesesSubpattern:
+ // Note: for fixed once parentheses we will ensure at least the minimum is available; others are on their own.
+ term.frameLocation = currentCallFrameSize;
+ if ((term.quantityCount == 1) && !term.parentheses.isCopy) {
+ if (term.quantityType == QuantifierFixedCount) {
+ currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
+ currentInputPosition += term.parentheses.disjunction->m_minimumSize;
+ } else {
+ currentCallFrameSize += RegexStackSpaceForBackTrackInfoParenthesesOnce;
+ currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize, currentInputPosition);
+ }
+ term.inputPosition = currentInputPosition;
+ } else {
+ term.inputPosition = currentInputPosition;
+ setupDisjunctionOffsets(term.parentheses.disjunction, 0, currentInputPosition);
+ currentCallFrameSize += RegexStackSpaceForBackTrackInfoParentheses;
+ }
+ // Fixed count of 1 could be accepted, if they have a fixed size *AND* if all alternatives are of the same length.
+ alternative->m_hasFixedSize = false;
+ break;
+
+ case PatternTerm::TypeParentheticalAssertion:
+ term.inputPosition = currentInputPosition;
+ term.frameLocation = currentCallFrameSize;
+ currentCallFrameSize = setupDisjunctionOffsets(term.parentheses.disjunction, currentCallFrameSize + RegexStackSpaceForBackTrackInfoParentheticalAssertion, currentInputPosition);
+ break;
+ }
+ }
+
+ alternative->m_minimumSize = currentInputPosition - initialInputPosition;
+ return currentCallFrameSize;
+ }
+
+ unsigned setupDisjunctionOffsets(PatternDisjunction* disjunction, unsigned initialCallFrameSize, unsigned initialInputPosition)
+ {
+ if ((disjunction != m_pattern.m_body) && (disjunction->m_alternatives.size() > 1))
+ initialCallFrameSize += RegexStackSpaceForBackTrackInfoAlternative;
+
+ unsigned minimumInputSize = UINT_MAX;
+ unsigned maximumCallFrameSize = 0;
+ bool hasFixedSize = true;
+
+ for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
+ PatternAlternative* alternative = disjunction->m_alternatives[alt];
+ unsigned currentAlternativeCallFrameSize = setupAlternativeOffsets(alternative, initialCallFrameSize, initialInputPosition);
+ minimumInputSize = min(minimumInputSize, alternative->m_minimumSize);
+ maximumCallFrameSize = max(maximumCallFrameSize, currentAlternativeCallFrameSize);
+ hasFixedSize &= alternative->m_hasFixedSize;
+ }
+
+ ASSERT(minimumInputSize != UINT_MAX);
+ ASSERT(maximumCallFrameSize >= initialCallFrameSize);
+
+ disjunction->m_hasFixedSize = hasFixedSize;
+ disjunction->m_minimumSize = minimumInputSize;
+ disjunction->m_callFrameSize = maximumCallFrameSize;
+ return maximumCallFrameSize;
+ }
+
+ void setupOffsets()
+ {
+ setupDisjunctionOffsets(m_pattern.m_body, 0, 0);
+ }
+
+private:
+ RegexPattern& m_pattern;
+ PatternAlternative* m_alternative;
+ CharacterClassConstructor m_characterClassConstructor;
+ bool m_invertCharacterClass;
+};
+
+
+const char* compileRegex(const UString& patternString, RegexPattern& pattern)
+{
+ RegexPatternConstructor constructor(pattern);
+
+ if (const char* error = parse(constructor, patternString))
+ return error;
+
+ // If the pattern contains illegal backreferences reset & reparse.
+ // Quoting Netscape's "What's new in JavaScript 1.2",
+ // "Note: if the number of left parentheses is less than the number specified
+ // in \#, the \# is taken as an octal escape as described in the next row."
+ if (pattern.containsIllegalBackReference()) {
+ unsigned numSubpatterns = pattern.m_numSubpatterns;
+
+ constructor.reset();
+#ifndef NDEBUG
+ const char* error =
+#endif
+ parse(constructor, patternString, numSubpatterns);
+
+ ASSERT(!error);
+ ASSERT(numSubpatterns == pattern.m_numSubpatterns);
+ }
+
+ constructor.setupOffsets();
+
+ return false;
+};
+
+
+} }
+
+#endif
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.h b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.h
new file mode 100644
index 0000000..3ed2be9
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexCompiler.h
@@ -0,0 +1,45 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RegexCompiler_h
+#define RegexCompiler_h
+
+#include <wtf/Platform.h>
+
+#if ENABLE(YARR)
+
+#include <wtf/unicode/Unicode.h>
+#include "RegexParser.h"
+#include "RegexPattern.h"
+
+namespace JSC { namespace Yarr {
+
+const char* compileRegex(const UString& patternString, RegexPattern& pattern);
+
+} } // namespace JSC::Yarr
+
+#endif
+
+#endif // RegexCompiler_h
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.cpp b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.cpp
new file mode 100644
index 0000000..b0aae65
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.cpp
@@ -0,0 +1,1638 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "RegexInterpreter.h"
+
+#include "RegexCompiler.h"
+#include "RegexPattern.h"
+
+#ifndef NDEBUG
+#include <stdio.h>
+#endif
+
+#if ENABLE(YARR)
+
+using namespace WTF;
+
+namespace JSC { namespace Yarr {
+
+class Interpreter {
+public:
+ struct ParenthesesDisjunctionContext;
+
+ struct BackTrackInfoPatternCharacter {
+ uintptr_t matchAmount;
+ };
+ struct BackTrackInfoCharacterClass {
+ uintptr_t matchAmount;
+ };
+ struct BackTrackInfoBackReference {
+ uintptr_t begin; // Not really needed for greedy quantifiers.
+ uintptr_t matchAmount; // Not really needed for fixed quantifiers.
+ };
+ struct BackTrackInfoAlternative {
+ uintptr_t offset;
+ };
+ struct BackTrackInfoParentheticalAssertion {
+ uintptr_t begin;
+ };
+ struct BackTrackInfoParenthesesOnce {
+ uintptr_t inParentheses;
+ };
+ struct BackTrackInfoParentheses {
+ uintptr_t matchAmount;
+ ParenthesesDisjunctionContext* lastContext;
+ uintptr_t prevBegin;
+ uintptr_t prevEnd;
+ };
+
+ static inline void appendParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack, ParenthesesDisjunctionContext* context)
+ {
+ context->next = backTrack->lastContext;
+ backTrack->lastContext = context;
+ ++backTrack->matchAmount;
+ }
+
+ static inline void popParenthesesDisjunctionContext(BackTrackInfoParentheses* backTrack)
+ {
+ ASSERT(backTrack->matchAmount);
+ ASSERT(backTrack->lastContext);
+ backTrack->lastContext = backTrack->lastContext->next;
+ --backTrack->matchAmount;
+ }
+
+ struct DisjunctionContext
+ {
+ DisjunctionContext()
+ : term(0)
+ {
+ }
+
+ void* operator new(size_t, void* where)
+ {
+ return where;
+ }
+
+ int term;
+ unsigned matchBegin;
+ unsigned matchEnd;
+ uintptr_t frame[1];
+ };
+
+ DisjunctionContext* allocDisjunctionContext(ByteDisjunction* disjunction)
+ {
+ return new(malloc(sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) DisjunctionContext();
+ }
+
+ void freeDisjunctionContext(DisjunctionContext* context)
+ {
+ free(context);
+ }
+
+ struct ParenthesesDisjunctionContext
+ {
+ ParenthesesDisjunctionContext(int* output, ByteTerm& term)
+ : next(0)
+ {
+ unsigned firstSubpatternId = term.atom.subpatternId;
+ unsigned numNestedSubpatterns = term.atom.parenthesesDisjunction->m_numSubpatterns;
+
+ for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i) {
+ subpatternBackup[i] = output[(firstSubpatternId << 1) + i];
+ output[(firstSubpatternId << 1) + i] = -1;
+ }
+
+ new(getDisjunctionContext(term)) DisjunctionContext();
+ }
+
+ void* operator new(size_t, void* where)
+ {
+ return where;
+ }
+
+ void restoreOutput(int* output, unsigned firstSubpatternId, unsigned numNestedSubpatterns)
+ {
+ for (unsigned i = 0; i < (numNestedSubpatterns << 1); ++i)
+ output[(firstSubpatternId << 1) + i] = subpatternBackup[i];
+ }
+
+ DisjunctionContext* getDisjunctionContext(ByteTerm& term)
+ {
+ return reinterpret_cast<DisjunctionContext*>(&(subpatternBackup[term.atom.parenthesesDisjunction->m_numSubpatterns << 1]));
+ }
+
+ ParenthesesDisjunctionContext* next;
+ int subpatternBackup[1];
+ };
+
+ ParenthesesDisjunctionContext* allocParenthesesDisjunctionContext(ByteDisjunction* disjunction, int* output, ByteTerm& term)
+ {
+ return new(malloc(sizeof(ParenthesesDisjunctionContext) + (((term.atom.parenthesesDisjunction->m_numSubpatterns << 1) - 1) * sizeof(int)) + sizeof(DisjunctionContext) + (disjunction->m_frameSize - 1) * sizeof(uintptr_t))) ParenthesesDisjunctionContext(output, term);
+ }
+
+ void freeParenthesesDisjunctionContext(ParenthesesDisjunctionContext* context)
+ {
+ free(context);
+ }
+
+ class InputStream {
+ public:
+ InputStream(const UChar* input, unsigned start, unsigned length)
+ : input(input)
+ , pos(start)
+ , length(length)
+ {
+ }
+
+ void next()
+ {
+ ++pos;
+ }
+
+ void rewind(unsigned amount)
+ {
+ ASSERT(pos >= amount);
+ pos -= amount;
+ }
+
+ int read()
+ {
+ ASSERT(pos < length);
+ if (pos < length)
+ return input[pos];
+ return -1;
+ }
+
+ int readChecked(int position)
+ {
+ ASSERT(position < 0);
+ ASSERT((unsigned)-position <= pos);
+ unsigned p = pos + position;
+ ASSERT(p < length);
+ return input[p];
+ }
+
+ int reread(unsigned from)
+ {
+ ASSERT(from < length);
+ return input[from];
+ }
+
+ int prev()
+ {
+ ASSERT(!(pos > length));
+ if (pos && length)
+ return input[pos - 1];
+ return -1;
+ }
+
+ unsigned getPos()
+ {
+ return pos;
+ }
+
+ void setPos(unsigned p)
+ {
+ pos = p;
+ }
+
+ bool atStart()
+ {
+ return pos == 0;
+ }
+
+ bool atEnd()
+ {
+ return pos == length;
+ }
+
+ bool checkInput(int count)
+ {
+ if ((pos + count) <= length) {
+ pos += count;
+ return true;
+ } else
+ return false;
+ }
+
+ void uncheckInput(int count)
+ {
+ pos -= count;
+ }
+
+ bool atStart(int position)
+ {
+ return (pos + position) == 0;
+ }
+
+ bool atEnd(int position)
+ {
+ return (pos + position) == length;
+ }
+
+ private:
+ const UChar* input;
+ unsigned pos;
+ unsigned length;
+ };
+
+ bool testCharacterClass(CharacterClass* characterClass, int ch)
+ {
+ if (ch & 0xFF80) {
+ for (unsigned i = 0; i < characterClass->m_matchesUnicode.size(); ++i)
+ if (ch == characterClass->m_matchesUnicode[i])
+ return true;
+ for (unsigned i = 0; i < characterClass->m_rangesUnicode.size(); ++i)
+ if ((ch >= characterClass->m_rangesUnicode[i].begin) && (ch <= characterClass->m_rangesUnicode[i].end))
+ return true;
+ } else {
+ for (unsigned i = 0; i < characterClass->m_matches.size(); ++i)
+ if (ch == characterClass->m_matches[i])
+ return true;
+ for (unsigned i = 0; i < characterClass->m_ranges.size(); ++i)
+ if ((ch >= characterClass->m_ranges[i].begin) && (ch <= characterClass->m_ranges[i].end))
+ return true;
+ }
+
+ return false;
+ }
+
+ bool tryConsumeCharacter(int testChar)
+ {
+ if (input.atEnd())
+ return false;
+
+ int ch = input.read();
+
+ if (pattern->m_ignoreCase ? ((Unicode::toLower(testChar) == ch) || (Unicode::toUpper(testChar) == ch)) : (testChar == ch)) {
+ input.next();
+ return true;
+ }
+ return false;
+ }
+
+ bool checkCharacter(int testChar, int inputPosition)
+ {
+ return testChar == input.readChecked(inputPosition);
+ }
+
+ bool checkCasedCharacter(int loChar, int hiChar, int inputPosition)
+ {
+ int ch = input.readChecked(inputPosition);
+ return (loChar == ch) || (hiChar == ch);
+ }
+
+ bool tryConsumeCharacterClass(CharacterClass* characterClass, bool invert)
+ {
+ if (input.atEnd())
+ return false;
+
+ bool match = testCharacterClass(characterClass, input.read());
+
+ if (invert)
+ match = !match;
+
+ if (match) {
+ input.next();
+ return true;
+ }
+ return false;
+ }
+
+ bool checkCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition)
+ {
+ bool match = testCharacterClass(characterClass, input.readChecked(inputPosition));
+ return invert ? !match : match;
+ }
+
+ bool tryConsumeBackReference(int matchBegin, int matchEnd, int inputOffset)
+ {
+ int matchSize = matchEnd - matchBegin;
+
+ if (!input.checkInput(matchSize))
+ return false;
+
+ for (int i = 0; i < matchSize; ++i) {
+ if (!checkCharacter(input.reread(matchBegin + i), inputOffset - matchSize + i)) {
+ input.uncheckInput(matchSize);
+ return false;
+ }
+ }
+
+ return true;
+ }
+
+ bool matchAssertionBOL(ByteTerm& term)
+ {
+ return (input.atStart(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition - 1)));
+ }
+
+ bool matchAssertionEOL(ByteTerm& term)
+ {
+ if (term.inputPosition)
+ return (input.atEnd(term.inputPosition)) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.readChecked(term.inputPosition)));
+ else
+ return (input.atEnd()) || (pattern->m_multiline && testCharacterClass(pattern->newlineCharacterClass, input.read()));
+ }
+
+ bool matchAssertionWordBoundary(ByteTerm& term)
+ {
+ bool prevIsWordchar = !input.atStart(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition - 1));
+ bool readIsWordchar;
+ if (term.inputPosition)
+ readIsWordchar = !input.atEnd(term.inputPosition) && testCharacterClass(pattern->wordcharCharacterClass, input.readChecked(term.inputPosition));
+ else
+ readIsWordchar = !input.atEnd() && testCharacterClass(pattern->wordcharCharacterClass, input.read());
+
+ bool wordBoundary = prevIsWordchar != readIsWordchar;
+ return term.invert() ? !wordBoundary : wordBoundary;
+ }
+
+ bool backtrackPatternCharacter(ByteTerm& term, DisjunctionContext* context)
+ {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount:
+ break;
+
+ case QuantifierGreedy:
+ if (backTrack->matchAmount) {
+ --backTrack->matchAmount;
+ input.uncheckInput(1);
+ return true;
+ }
+ break;
+
+ case QuantifierNonGreedy:
+ if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ ++backTrack->matchAmount;
+ if (checkCharacter(term.atom.patternCharacter, term.inputPosition - 1))
+ return true;
+ }
+ input.uncheckInput(backTrack->matchAmount);
+ break;
+ }
+
+ return false;
+ }
+
+ bool backtrackPatternCasedCharacter(ByteTerm& term, DisjunctionContext* context)
+ {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount:
+ break;
+
+ case QuantifierGreedy:
+ if (backTrack->matchAmount) {
+ --backTrack->matchAmount;
+ input.uncheckInput(1);
+ return true;
+ }
+ break;
+
+ case QuantifierNonGreedy:
+ if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ ++backTrack->matchAmount;
+ if (checkCasedCharacter(term.atom.casedCharacter.lo, term.atom.casedCharacter.hi, term.inputPosition - 1))
+ return true;
+ }
+ input.uncheckInput(backTrack->matchAmount);
+ break;
+ }
+
+ return false;
+ }
+
+ bool matchCharacterClass(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeCharacterClass);
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount: {
+ for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
+ if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition + matchAmount))
+ return false;
+ }
+ return true;
+ }
+
+ case QuantifierGreedy: {
+ unsigned matchAmount = 0;
+ while ((matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ if (!checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1)) {
+ input.uncheckInput(1);
+ break;
+ }
+ ++matchAmount;
+ }
+ backTrack->matchAmount = matchAmount;
+
+ return true;
+ }
+
+ case QuantifierNonGreedy:
+ backTrack->matchAmount = 0;
+ return true;
+ }
+
+ ASSERT_NOT_REACHED();
+ return false;
+ }
+
+ bool backtrackCharacterClass(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeCharacterClass);
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount:
+ break;
+
+ case QuantifierGreedy:
+ if (backTrack->matchAmount) {
+ --backTrack->matchAmount;
+ input.uncheckInput(1);
+ return true;
+ }
+ break;
+
+ case QuantifierNonGreedy:
+ if ((backTrack->matchAmount < term.atom.quantityCount) && input.checkInput(1)) {
+ ++backTrack->matchAmount;
+ if (checkCharacterClass(term.atom.characterClass, term.invert(), term.inputPosition - 1))
+ return true;
+ }
+ input.uncheckInput(backTrack->matchAmount);
+ break;
+ }
+
+ return false;
+ }
+
+ bool matchBackReference(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeBackReference);
+ BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
+
+ int matchBegin = output[(term.atom.subpatternId << 1)];
+ int matchEnd = output[(term.atom.subpatternId << 1) + 1];
+ ASSERT((matchBegin == -1) == (matchEnd == -1));
+ ASSERT(matchBegin <= matchEnd);
+
+ if (matchBegin == matchEnd)
+ return true;
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount: {
+ backTrack->begin = input.getPos();
+ for (unsigned matchAmount = 0; matchAmount < term.atom.quantityCount; ++matchAmount) {
+ if (!tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
+ input.setPos(backTrack->begin);
+ return false;
+ }
+ }
+ return true;
+ }
+
+ case QuantifierGreedy: {
+ unsigned matchAmount = 0;
+ while ((matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition))
+ ++matchAmount;
+ backTrack->matchAmount = matchAmount;
+ return true;
+ }
+
+ case QuantifierNonGreedy:
+ backTrack->begin = input.getPos();
+ backTrack->matchAmount = 0;
+ return true;
+ }
+
+ ASSERT_NOT_REACHED();
+ return false;
+ }
+
+ bool backtrackBackReference(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeBackReference);
+ BackTrackInfoBackReference* backTrack = reinterpret_cast<BackTrackInfoBackReference*>(context->frame + term.frameLocation);
+
+ int matchBegin = output[(term.atom.subpatternId << 1)];
+ int matchEnd = output[(term.atom.subpatternId << 1) + 1];
+ ASSERT((matchBegin == -1) == (matchEnd == -1));
+ ASSERT(matchBegin <= matchEnd);
+
+ if (matchBegin == matchEnd)
+ return false;
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount:
+ // for quantityCount == 1, could rewind.
+ input.setPos(backTrack->begin);
+ break;
+
+ case QuantifierGreedy:
+ if (backTrack->matchAmount) {
+ --backTrack->matchAmount;
+ input.rewind(matchEnd - matchBegin);
+ return true;
+ }
+ break;
+
+ case QuantifierNonGreedy:
+ if ((backTrack->matchAmount < term.atom.quantityCount) && tryConsumeBackReference(matchBegin, matchEnd, term.inputPosition)) {
+ ++backTrack->matchAmount;
+ return true;
+ } else
+ input.setPos(backTrack->begin);
+ break;
+ }
+
+ return false;
+ }
+
+ void recordParenthesesMatch(ByteTerm& term, ParenthesesDisjunctionContext* context)
+ {
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1)] = context->getDisjunctionContext(term)->matchBegin + term.inputPosition;
+ output[(subpatternId << 1) + 1] = context->getDisjunctionContext(term)->matchEnd + term.inputPosition;
+ }
+ }
+ void resetMatches(ByteTerm& term, ParenthesesDisjunctionContext* context)
+ {
+ unsigned firstSubpatternId = term.atom.subpatternId;
+ unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
+ context->restoreOutput(output, firstSubpatternId, count);
+ }
+ void resetAssertionMatches(ByteTerm& term)
+ {
+ unsigned firstSubpatternId = term.atom.subpatternId;
+ unsigned count = term.atom.parenthesesDisjunction->m_numSubpatterns;
+ for (unsigned i = 0; i < (count << 1); ++i)
+ output[(firstSubpatternId << 1) + i] = -1;
+ }
+ bool parenthesesDoBacktrack(ByteTerm& term, BackTrackInfoParentheses* backTrack)
+ {
+ while (backTrack->matchAmount) {
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+
+ if (matchDisjunction(term.atom.parenthesesDisjunction, context->getDisjunctionContext(term), true))
+ return true;
+
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+ popParenthesesDisjunctionContext(backTrack);
+ }
+
+ return false;
+ }
+
+ bool matchParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierGreedy: {
+ // set this speculatively; if we get to the parens end this will be true.
+ backTrack->inParentheses = 1;
+ break;
+ }
+ case QuantifierNonGreedy: {
+ backTrack->inParentheses = 0;
+ context->term += term.atom.parenthesesWidth;
+ return true;
+ }
+ case QuantifierFixedCount:
+ break;
+ }
+
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1)] = input.getPos() + term.inputPosition;
+ }
+
+ return true;
+ }
+
+ bool matchParenthesesOnceEnd(ByteTerm& term, DisjunctionContext*)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
+ ASSERT(term.atom.quantityCount == 1);
+
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
+ }
+ return true;
+ }
+
+ bool backtrackParenthesesOnceBegin(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
+
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1)] = -1;
+ output[(subpatternId << 1) + 1] = -1;
+ }
+
+ switch (term.atom.quantityType) {
+ case QuantifierGreedy:
+ // if we backtrack to this point, there is another chance - try matching nothing.
+ ASSERT(backTrack->inParentheses);
+ backTrack->inParentheses = 0;
+ context->term += term.atom.parenthesesWidth;
+ return true;
+ case QuantifierNonGreedy:
+ ASSERT(backTrack->inParentheses);
+ case QuantifierFixedCount:
+ break;
+ }
+
+ return false;
+ }
+
+ bool backtrackParenthesesOnceEnd(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpatternOnceEnd);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParenthesesOnce* backTrack = reinterpret_cast<BackTrackInfoParenthesesOnce*>(context->frame + term.frameLocation);
+
+ switch (term.atom.quantityType) {
+ case QuantifierGreedy:
+ if (!backTrack->inParentheses) {
+ context->term -= term.atom.parenthesesWidth;
+ return false;
+ }
+ case QuantifierNonGreedy:
+ if (!backTrack->inParentheses) {
+ // now try to match the parens; set this speculatively.
+ backTrack->inParentheses = 1;
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1) + 1] = input.getPos() + term.inputPosition;
+ }
+ context->term -= term.atom.parenthesesWidth;
+ return true;
+ }
+ case QuantifierFixedCount:
+ break;
+ }
+
+ return false;
+ }
+
+ bool matchParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
+
+ backTrack->begin = input.getPos();
+ return true;
+ }
+
+ bool matchParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
+
+ input.setPos(backTrack->begin);
+
+ // We've reached the end of the parens; if they are inverted, this is failure.
+ if (term.invert()) {
+ context->term -= term.atom.parenthesesWidth;
+ return false;
+ }
+
+ return true;
+ }
+
+ bool backtrackParentheticalAssertionBegin(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParentheticalAssertionBegin);
+ ASSERT(term.atom.quantityCount == 1);
+
+ // We've failed to match parens; if they are inverted, this is win!
+ if (term.invert()) {
+ context->term += term.atom.parenthesesWidth;
+ return true;
+ }
+
+ return false;
+ }
+
+ bool backtrackParentheticalAssertionEnd(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParentheticalAssertionEnd);
+ ASSERT(term.atom.quantityCount == 1);
+
+ BackTrackInfoParentheticalAssertion* backTrack = reinterpret_cast<BackTrackInfoParentheticalAssertion*>(context->frame + term.frameLocation);
+
+ input.setPos(backTrack->begin);
+
+ context->term -= term.atom.parenthesesWidth;
+ return false;
+ }
+
+ bool matchParentheses(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
+
+ BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
+
+ unsigned subpatternId = term.atom.subpatternId;
+ ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
+
+ backTrack->prevBegin = output[(subpatternId << 1)];
+ backTrack->prevEnd = output[(subpatternId << 1) + 1];
+
+ backTrack->matchAmount = 0;
+ backTrack->lastContext = 0;
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount: {
+ // While we haven't yet reached our fixed limit,
+ while (backTrack->matchAmount < term.atom.quantityCount) {
+ // Try to do a match, and it it succeeds, add it to the list.
+ ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
+ if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+ appendParenthesesDisjunctionContext(backTrack, context);
+ else {
+ // The match failed; try to find an alternate point to carry on from.
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+ if (!parenthesesDoBacktrack(term, backTrack))
+ return false;
+ }
+ }
+
+ ASSERT(backTrack->matchAmount == term.atom.quantityCount);
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+ recordParenthesesMatch(term, context);
+ return true;
+ }
+
+ case QuantifierGreedy: {
+ while (backTrack->matchAmount < term.atom.quantityCount) {
+ ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
+ if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+ appendParenthesesDisjunctionContext(backTrack, context);
+ else {
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+ break;
+ }
+ }
+
+ if (backTrack->matchAmount) {
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+ recordParenthesesMatch(term, context);
+ }
+ return true;
+ }
+
+ case QuantifierNonGreedy:
+ return true;
+ }
+
+ ASSERT_NOT_REACHED();
+ return false;
+ }
+
+ // Rules for backtracking differ depending on whether this is greedy or non-greedy.
+ //
+ // Greedy matches never should try just adding more - you should already have done
+ // the 'more' cases. Always backtrack, at least a leetle bit. However cases where
+ // you backtrack an item off the list needs checking, since we'll never have matched
+ // the one less case. Tracking forwards, still add as much as possible.
+ //
+ // Non-greedy, we've already done the one less case, so don't match on popping.
+ // We haven't done the one more case, so always try to add that.
+ //
+ bool backtrackParentheses(ByteTerm& term, DisjunctionContext* context)
+ {
+ ASSERT(term.type == ByteTerm::TypeParenthesesSubpattern);
+
+ BackTrackInfoParentheses* backTrack = reinterpret_cast<BackTrackInfoParentheses*>(context->frame + term.frameLocation);
+
+ if (term.capture()) {
+ unsigned subpatternId = term.atom.subpatternId;
+ output[(subpatternId << 1)] = backTrack->prevBegin;
+ output[(subpatternId << 1) + 1] = backTrack->prevEnd;
+ }
+
+ ByteDisjunction* disjunctionBody = term.atom.parenthesesDisjunction;
+
+ switch (term.atom.quantityType) {
+ case QuantifierFixedCount: {
+ ASSERT(backTrack->matchAmount == term.atom.quantityCount);
+
+ ParenthesesDisjunctionContext* context = 0;
+
+ if (!parenthesesDoBacktrack(term, backTrack))
+ return false;
+
+ // While we haven't yet reached our fixed limit,
+ while (backTrack->matchAmount < term.atom.quantityCount) {
+ // Try to do a match, and it it succeeds, add it to the list.
+ context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
+ if (matchDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+ appendParenthesesDisjunctionContext(backTrack, context);
+ else {
+ // The match failed; try to find an alternate point to carry on from.
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+ if (!parenthesesDoBacktrack(term, backTrack))
+ return false;
+ }
+ }
+
+ ASSERT(backTrack->matchAmount == term.atom.quantityCount);
+ context = backTrack->lastContext;
+ recordParenthesesMatch(term, context);
+ return true;
+ }
+
+ case QuantifierGreedy: {
+ if (!backTrack->matchAmount)
+ return false;
+
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+ if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
+ while (backTrack->matchAmount < term.atom.quantityCount) {
+ ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
+ if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term)))
+ appendParenthesesDisjunctionContext(backTrack, context);
+ else {
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+ break;
+ }
+ }
+ } else {
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+ popParenthesesDisjunctionContext(backTrack);
+ }
+
+ if (backTrack->matchAmount) {
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+ recordParenthesesMatch(term, context);
+ }
+ return true;
+ }
+
+ case QuantifierNonGreedy: {
+ // If we've not reached the limit, try to add one more match.
+ if (backTrack->matchAmount < term.atom.quantityCount) {
+ ParenthesesDisjunctionContext* context = allocParenthesesDisjunctionContext(disjunctionBody, output, term);
+ if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term))) {
+ appendParenthesesDisjunctionContext(backTrack, context);
+ recordParenthesesMatch(term, context);
+ return true;
+ } else {
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+ }
+ }
+
+ // Nope - okay backtrack looking for an alternative.
+ while (backTrack->matchAmount) {
+ ParenthesesDisjunctionContext* context = backTrack->lastContext;
+ if (matchNonZeroDisjunction(disjunctionBody, context->getDisjunctionContext(term), true)) {
+ // successful backtrack! we're back in the game!
+ if (backTrack->matchAmount) {
+ context = backTrack->lastContext;
+ recordParenthesesMatch(term, context);
+ }
+ return true;
+ }
+
+ // pop a match off the stack
+ resetMatches(term, context);
+ freeParenthesesDisjunctionContext(context);
+ popParenthesesDisjunctionContext(backTrack);
+ }
+
+ return false;
+ }
+ }
+
+ ASSERT_NOT_REACHED();
+ return false;
+ }
+
+#define MATCH_NEXT() { ++context->term; goto matchAgain; }
+#define BACKTRACK() { --context->term; goto backtrack; }
+#define currentTerm() (disjunction->terms[context->term])
+ bool matchDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
+ {
+ if (btrack)
+ BACKTRACK();
+
+ context->matchBegin = input.getPos();
+ context->term = 0;
+
+ matchAgain:
+ ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
+
+ switch (currentTerm().type) {
+ case ByteTerm::TypeSubpatternBegin:
+ MATCH_NEXT();
+ case ByteTerm::TypeSubpatternEnd:
+ context->matchEnd = input.getPos();
+ return true;
+
+ case ByteTerm::TypeBodyAlternativeBegin:
+ MATCH_NEXT();
+ case ByteTerm::TypeBodyAlternativeDisjunction:
+ case ByteTerm::TypeBodyAlternativeEnd:
+ context->matchEnd = input.getPos();
+ return true;
+
+ case ByteTerm::TypeAlternativeBegin:
+ MATCH_NEXT();
+ case ByteTerm::TypeAlternativeDisjunction:
+ case ByteTerm::TypeAlternativeEnd: {
+ int offset = currentTerm().alternative.end;
+ BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
+ backTrack->offset = offset;
+ context->term += offset;
+ MATCH_NEXT();
+ }
+
+ case ByteTerm::TypeAssertionBOL:
+ if (matchAssertionBOL(currentTerm()))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeAssertionEOL:
+ if (matchAssertionEOL(currentTerm()))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeAssertionWordBoundary:
+ if (matchAssertionWordBoundary(currentTerm()))
+ MATCH_NEXT();
+ BACKTRACK();
+
+ case ByteTerm::TypePatternCharacterOnce:
+ case ByteTerm::TypePatternCharacterFixed: {
+ for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
+ if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition + matchAmount))
+ BACKTRACK();
+ }
+ MATCH_NEXT();
+ }
+ case ByteTerm::TypePatternCharacterGreedy: {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
+ unsigned matchAmount = 0;
+ while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
+ if (!checkCharacter(currentTerm().atom.patternCharacter, currentTerm().inputPosition - 1)) {
+ input.uncheckInput(1);
+ break;
+ }
+ ++matchAmount;
+ }
+ backTrack->matchAmount = matchAmount;
+
+ MATCH_NEXT();
+ }
+ case ByteTerm::TypePatternCharacterNonGreedy: {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
+ backTrack->matchAmount = 0;
+ MATCH_NEXT();
+ }
+
+ case ByteTerm::TypePatternCasedCharacterOnce:
+ case ByteTerm::TypePatternCasedCharacterFixed: {
+ for (unsigned matchAmount = 0; matchAmount < currentTerm().atom.quantityCount; ++matchAmount) {
+ if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition + matchAmount))
+ BACKTRACK();
+ }
+ MATCH_NEXT();
+ }
+ case ByteTerm::TypePatternCasedCharacterGreedy: {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
+ unsigned matchAmount = 0;
+ while ((matchAmount < currentTerm().atom.quantityCount) && input.checkInput(1)) {
+ if (!checkCasedCharacter(currentTerm().atom.casedCharacter.lo, currentTerm().atom.casedCharacter.hi, currentTerm().inputPosition - 1)) {
+ input.uncheckInput(1);
+ break;
+ }
+ ++matchAmount;
+ }
+ backTrack->matchAmount = matchAmount;
+
+ MATCH_NEXT();
+ }
+ case ByteTerm::TypePatternCasedCharacterNonGreedy: {
+ BackTrackInfoPatternCharacter* backTrack = reinterpret_cast<BackTrackInfoPatternCharacter*>(context->frame + currentTerm().frameLocation);
+ backTrack->matchAmount = 0;
+ MATCH_NEXT();
+ }
+
+ case ByteTerm::TypeCharacterClass:
+ if (matchCharacterClass(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeBackReference:
+ if (matchBackReference(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpattern:
+ if (matchParentheses(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternOnceBegin:
+ if (matchParenthesesOnceBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternOnceEnd:
+ if (matchParenthesesOnceEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParentheticalAssertionBegin:
+ if (matchParentheticalAssertionBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParentheticalAssertionEnd:
+ if (matchParentheticalAssertionEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+
+ case ByteTerm::TypeCheckInput:
+ if (input.checkInput(currentTerm().checkInputCount))
+ MATCH_NEXT();
+ BACKTRACK();
+ }
+
+ // We should never fall-through to here.
+ ASSERT_NOT_REACHED();
+
+ backtrack:
+ ASSERT(context->term < static_cast<int>(disjunction->terms.size()));
+
+ switch (currentTerm().type) {
+ case ByteTerm::TypeSubpatternBegin:
+ return false;
+ case ByteTerm::TypeSubpatternEnd:
+ ASSERT_NOT_REACHED();
+
+ case ByteTerm::TypeBodyAlternativeBegin:
+ case ByteTerm::TypeBodyAlternativeDisjunction: {
+ int offset = currentTerm().alternative.next;
+ context->term += offset;
+ if (offset > 0)
+ MATCH_NEXT();
+
+ if (input.atEnd())
+ return false;
+
+ input.next();
+ context->matchBegin = input.getPos();
+ MATCH_NEXT();
+ }
+ case ByteTerm::TypeBodyAlternativeEnd:
+ ASSERT_NOT_REACHED();
+
+ case ByteTerm::TypeAlternativeBegin:
+ case ByteTerm::TypeAlternativeDisjunction: {
+ int offset = currentTerm().alternative.next;
+ context->term += offset;
+ if (offset > 0)
+ MATCH_NEXT();
+ BACKTRACK();
+ }
+ case ByteTerm::TypeAlternativeEnd: {
+ // We should never backtrack back into an alternative of the main body of the regex.
+ BackTrackInfoAlternative* backTrack = reinterpret_cast<BackTrackInfoAlternative*>(context->frame + currentTerm().frameLocation);
+ unsigned offset = backTrack->offset;
+ context->term -= offset;
+ BACKTRACK();
+ }
+
+ case ByteTerm::TypeAssertionBOL:
+ case ByteTerm::TypeAssertionEOL:
+ case ByteTerm::TypeAssertionWordBoundary:
+ BACKTRACK();
+
+ case ByteTerm::TypePatternCharacterOnce:
+ case ByteTerm::TypePatternCharacterFixed:
+ case ByteTerm::TypePatternCharacterGreedy:
+ case ByteTerm::TypePatternCharacterNonGreedy:
+ if (backtrackPatternCharacter(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypePatternCasedCharacterOnce:
+ case ByteTerm::TypePatternCasedCharacterFixed:
+ case ByteTerm::TypePatternCasedCharacterGreedy:
+ case ByteTerm::TypePatternCasedCharacterNonGreedy:
+ if (backtrackPatternCasedCharacter(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeCharacterClass:
+ if (backtrackCharacterClass(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeBackReference:
+ if (backtrackBackReference(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpattern:
+ if (backtrackParentheses(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternOnceBegin:
+ if (backtrackParenthesesOnceBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParenthesesSubpatternOnceEnd:
+ if (backtrackParenthesesOnceEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParentheticalAssertionBegin:
+ if (backtrackParentheticalAssertionBegin(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+ case ByteTerm::TypeParentheticalAssertionEnd:
+ if (backtrackParentheticalAssertionEnd(currentTerm(), context))
+ MATCH_NEXT();
+ BACKTRACK();
+
+ case ByteTerm::TypeCheckInput:
+ input.uncheckInput(currentTerm().checkInputCount);
+ BACKTRACK();
+ }
+
+ ASSERT_NOT_REACHED();
+ return false;
+ }
+
+ bool matchNonZeroDisjunction(ByteDisjunction* disjunction, DisjunctionContext* context, bool btrack = false)
+ {
+ if (matchDisjunction(disjunction, context, btrack)) {
+ while (context->matchBegin == context->matchEnd) {
+ if (!matchDisjunction(disjunction, context, true))
+ return false;
+ }
+ return true;
+ }
+
+ return false;
+ }
+
+ int interpret()
+ {
+ for (unsigned i = 0; i < ((pattern->m_body->m_numSubpatterns + 1) << 1); ++i)
+ output[i] = -1;
+
+ DisjunctionContext* context = allocDisjunctionContext(pattern->m_body.get());
+
+ if (matchDisjunction(pattern->m_body.get(), context)) {
+ output[0] = context->matchBegin;
+ output[1] = context->matchEnd;
+ }
+
+ freeDisjunctionContext(context);
+
+ return output[0];
+ }
+
+ Interpreter(BytecodePattern* pattern, int* output, const UChar* inputChar, unsigned start, unsigned length)
+ : pattern(pattern)
+ , output(output)
+ , input(inputChar, start, length)
+ {
+ }
+
+private:
+ BytecodePattern *pattern;
+ int* output;
+ InputStream input;
+};
+
+
+
+class ByteCompiler {
+ struct ParenthesesStackEntry {
+ unsigned beginTerm;
+ unsigned savedAlternativeIndex;
+ ParenthesesStackEntry(unsigned beginTerm, unsigned savedAlternativeIndex/*, unsigned subpatternId, bool capture = false*/)
+ : beginTerm(beginTerm)
+ , savedAlternativeIndex(savedAlternativeIndex)
+ {
+ }
+ };
+
+public:
+ ByteCompiler(RegexPattern& pattern)
+ : m_pattern(pattern)
+ {
+ bodyDisjunction = 0;
+ currentAlternativeIndex = 0;
+ }
+
+ BytecodePattern* compile()
+ {
+ regexBegin(m_pattern.m_numSubpatterns, m_pattern.m_body->m_callFrameSize);
+ emitDisjunction(m_pattern.m_body);
+ regexEnd();
+
+ return new BytecodePattern(bodyDisjunction, m_allParenthesesInfo, m_pattern);
+ }
+
+ void checkInput(unsigned count)
+ {
+ bodyDisjunction->terms.append(ByteTerm::CheckInput(count));
+ }
+
+ void assertionBOL(int inputPosition)
+ {
+ bodyDisjunction->terms.append(ByteTerm::BOL(inputPosition));
+ }
+
+ void assertionEOL(int inputPosition)
+ {
+ bodyDisjunction->terms.append(ByteTerm::EOL(inputPosition));
+ }
+
+ void assertionWordBoundary(bool invert, int inputPosition)
+ {
+ bodyDisjunction->terms.append(ByteTerm::WordBoundary(invert, inputPosition));
+ }
+
+ void atomPatternCharacter(UChar ch, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ {
+ if (m_pattern.m_ignoreCase) {
+ UChar lo = Unicode::toLower(ch);
+ UChar hi = Unicode::toUpper(ch);
+
+ if (lo != hi) {
+ bodyDisjunction->terms.append(ByteTerm(lo, hi, inputPosition, frameLocation, quantityCount, quantityType));
+ return;
+ }
+ }
+
+ bodyDisjunction->terms.append(ByteTerm(ch, inputPosition, frameLocation, quantityCount, quantityType));
+ }
+
+ void atomCharacterClass(CharacterClass* characterClass, bool invert, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ {
+ bodyDisjunction->terms.append(ByteTerm(characterClass, invert, inputPosition));
+
+ bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
+ bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
+ bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+ }
+
+ void atomBackReference(unsigned subpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ {
+ ASSERT(subpatternId);
+
+ bodyDisjunction->terms.append(ByteTerm::BackReference(subpatternId, inputPosition));
+
+ bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].atom.quantityCount = quantityCount;
+ bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].atom.quantityType = quantityType;
+ bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+ }
+
+ void atomParenthesesSubpatternBegin(unsigned subpatternId, bool capture, int inputPosition, unsigned frameLocation, unsigned alternativeFrameLocation)
+ {
+ int beginTerm = bodyDisjunction->terms.size();
+
+ bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpatternOnceBegin, subpatternId, capture, inputPosition));
+ bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+ bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
+ bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
+
+ m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, currentAlternativeIndex));
+ currentAlternativeIndex = beginTerm + 1;
+ }
+
+ void atomParentheticalAssertionBegin(unsigned subpatternId, bool invert, unsigned frameLocation, unsigned alternativeFrameLocation)
+ {
+ int beginTerm = bodyDisjunction->terms.size();
+
+ bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParentheticalAssertionBegin, subpatternId, invert, 0));
+ bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = frameLocation;
+ bodyDisjunction->terms.append(ByteTerm::AlternativeBegin());
+ bodyDisjunction->terms[bodyDisjunction->terms.size() - 1].frameLocation = alternativeFrameLocation;
+
+ m_parenthesesStack.append(ParenthesesStackEntry(beginTerm, currentAlternativeIndex));
+ currentAlternativeIndex = beginTerm + 1;
+ }
+
+ unsigned popParenthesesStack()
+ {
+ ASSERT(m_parenthesesStack.size());
+ int stackEnd = m_parenthesesStack.size() - 1;
+ unsigned beginTerm = m_parenthesesStack[stackEnd].beginTerm;
+ currentAlternativeIndex = m_parenthesesStack[stackEnd].savedAlternativeIndex;
+ m_parenthesesStack.shrink(stackEnd);
+
+ ASSERT(beginTerm < bodyDisjunction->terms.size());
+ ASSERT(currentAlternativeIndex < bodyDisjunction->terms.size());
+
+ return beginTerm;
+ }
+
+#ifndef NDEBUG
+ void dumpDisjunction(ByteDisjunction* disjunction)
+ {
+ printf("ByteDisjunction(%p):\n\t", disjunction);
+ for (unsigned i = 0; i < disjunction->terms.size(); ++i)
+ printf("{ %d } ", disjunction->terms[i].type);
+ printf("\n");
+ }
+#endif
+
+ void closeAlternative(int beginTerm)
+ {
+ int origBeginTerm = beginTerm;
+ ASSERT(bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeBegin);
+ int endIndex = bodyDisjunction->terms.size();
+
+ unsigned frameLocation = bodyDisjunction->terms[beginTerm].frameLocation;
+
+ if (!bodyDisjunction->terms[beginTerm].alternative.next)
+ bodyDisjunction->terms.remove(beginTerm);
+ else {
+ while (bodyDisjunction->terms[beginTerm].alternative.next) {
+ beginTerm += bodyDisjunction->terms[beginTerm].alternative.next;
+ ASSERT(bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeAlternativeDisjunction);
+ bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
+ bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
+ }
+
+ bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
+
+ bodyDisjunction->terms.append(ByteTerm::AlternativeEnd());
+ bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
+ }
+ }
+
+ void closeBodyAlternative()
+ {
+ int beginTerm = 0;
+ int origBeginTerm = 0;
+ ASSERT(bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeBegin);
+ int endIndex = bodyDisjunction->terms.size();
+
+ unsigned frameLocation = bodyDisjunction->terms[beginTerm].frameLocation;
+
+ while (bodyDisjunction->terms[beginTerm].alternative.next) {
+ beginTerm += bodyDisjunction->terms[beginTerm].alternative.next;
+ ASSERT(bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeBodyAlternativeDisjunction);
+ bodyDisjunction->terms[beginTerm].alternative.end = endIndex - beginTerm;
+ bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
+ }
+
+ bodyDisjunction->terms[beginTerm].alternative.next = origBeginTerm - beginTerm;
+
+ bodyDisjunction->terms.append(ByteTerm::BodyAlternativeEnd());
+ bodyDisjunction->terms[endIndex].frameLocation = frameLocation;
+ }
+
+ void atomParenthesesEnd(bool doInline, unsigned lastSubpatternId, int inputPosition, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType, unsigned callFrameSize = 0)
+ {
+ unsigned beginTerm = popParenthesesStack();
+ closeAlternative(beginTerm + 1);
+ unsigned endTerm = bodyDisjunction->terms.size();
+
+ bool isAssertion = bodyDisjunction->terms[beginTerm].type == ByteTerm::TypeParentheticalAssertionBegin;
+ bool invertOrCapture = bodyDisjunction->terms[beginTerm].invertOrCapture;
+ unsigned subpatternId = bodyDisjunction->terms[beginTerm].atom.subpatternId;
+
+ bodyDisjunction->terms.append(ByteTerm(isAssertion ? ByteTerm::TypeParentheticalAssertionEnd : ByteTerm::TypeParenthesesSubpatternOnceEnd, subpatternId, invertOrCapture, inputPosition));
+ bodyDisjunction->terms[beginTerm].atom.parenthesesWidth = endTerm - beginTerm;
+ bodyDisjunction->terms[endTerm].atom.parenthesesWidth = endTerm - beginTerm;
+ bodyDisjunction->terms[endTerm].frameLocation = frameLocation;
+
+ if (doInline) {
+ bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+ bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
+ bodyDisjunction->terms[endTerm].atom.quantityCount = quantityCount;
+ bodyDisjunction->terms[endTerm].atom.quantityType = quantityType;
+ } else {
+ ByteTerm& parenthesesBegin = bodyDisjunction->terms[beginTerm];
+ ASSERT(parenthesesBegin.type == ByteTerm::TypeParenthesesSubpatternOnceBegin);
+
+ bool invertOrCapture = parenthesesBegin.invertOrCapture;
+ unsigned subpatternId = parenthesesBegin.atom.subpatternId;
+
+ unsigned numSubpatterns = lastSubpatternId - subpatternId + 1;
+ ByteDisjunction* parenthesesDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
+
+ parenthesesDisjunction->terms.append(ByteTerm::SubpatternBegin());
+ for (unsigned termInParentheses = beginTerm + 1; termInParentheses < endTerm; ++termInParentheses)
+ parenthesesDisjunction->terms.append(bodyDisjunction->terms[termInParentheses]);
+ parenthesesDisjunction->terms.append(ByteTerm::SubpatternEnd());
+
+ bodyDisjunction->terms.shrink(beginTerm);
+
+ m_allParenthesesInfo.append(parenthesesDisjunction);
+ bodyDisjunction->terms.append(ByteTerm(ByteTerm::TypeParenthesesSubpattern, subpatternId, parenthesesDisjunction, invertOrCapture, inputPosition));
+
+ bodyDisjunction->terms[beginTerm].atom.quantityCount = quantityCount;
+ bodyDisjunction->terms[beginTerm].atom.quantityType = quantityType;
+ bodyDisjunction->terms[beginTerm].frameLocation = frameLocation;
+ }
+ }
+
+ void regexBegin(unsigned numSubpatterns, unsigned callFrameSize)
+ {
+ bodyDisjunction = new ByteDisjunction(numSubpatterns, callFrameSize);
+ bodyDisjunction->terms.append(ByteTerm::BodyAlternativeBegin());
+ bodyDisjunction->terms[0].frameLocation = 0;
+ currentAlternativeIndex = 0;
+ }
+
+ void regexEnd()
+ {
+ closeBodyAlternative();
+ }
+
+ void alterantiveBodyDisjunction()
+ {
+ int newAlternativeIndex = bodyDisjunction->terms.size();
+ bodyDisjunction->terms[currentAlternativeIndex].alternative.next = newAlternativeIndex - currentAlternativeIndex;
+ bodyDisjunction->terms.append(ByteTerm::BodyAlternativeDisjunction());
+
+ currentAlternativeIndex = newAlternativeIndex;
+ }
+
+ void alterantiveDisjunction()
+ {
+ int newAlternativeIndex = bodyDisjunction->terms.size();
+ bodyDisjunction->terms[currentAlternativeIndex].alternative.next = newAlternativeIndex - currentAlternativeIndex;
+ bodyDisjunction->terms.append(ByteTerm::AlternativeDisjunction());
+
+ currentAlternativeIndex = newAlternativeIndex;
+ }
+
+ void emitDisjunction(PatternDisjunction* disjunction, unsigned inputCountAlreadyChecked = 0, unsigned parenthesesInputCountAlreadyChecked = 0)
+ {
+ for (unsigned alt = 0; alt < disjunction->m_alternatives.size(); ++alt) {
+ unsigned currentCountAlreadyChecked = inputCountAlreadyChecked;
+
+ if (alt) {
+ if (disjunction == m_pattern.m_body)
+ alterantiveBodyDisjunction();
+ else
+ alterantiveDisjunction();
+ }
+
+ PatternAlternative* alternative = disjunction->m_alternatives[alt];
+ unsigned minimumSize = alternative->m_minimumSize;
+
+ ASSERT(minimumSize >= parenthesesInputCountAlreadyChecked);
+ unsigned countToCheck = minimumSize - parenthesesInputCountAlreadyChecked;
+ if (countToCheck)
+ checkInput(countToCheck);
+ currentCountAlreadyChecked += countToCheck;
+
+ for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
+ PatternTerm& term = alternative->m_terms[i];
+
+ switch (term.type) {
+ case PatternTerm::TypeAssertionBOL:
+ assertionBOL(term.inputPosition - currentCountAlreadyChecked);
+ break;
+
+ case PatternTerm::TypeAssertionEOL:
+ assertionEOL(term.inputPosition - currentCountAlreadyChecked);
+ break;
+
+ case PatternTerm::TypeAssertionWordBoundary:
+ assertionWordBoundary(term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked);
+ break;
+
+ case PatternTerm::TypePatternCharacter:
+ atomPatternCharacter(term.patternCharacter, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
+ break;
+
+ case PatternTerm::TypeCharacterClass:
+ atomCharacterClass(term.characterClass, term.invertOrCapture, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
+ break;
+
+ case PatternTerm::TypeBackReference:
+ atomBackReference(term.subpatternId, term.inputPosition - currentCountAlreadyChecked, term.frameLocation, term.quantityCount, term.quantityType);
+ break;
+
+ case PatternTerm::TypeForwardReference:
+ break;
+
+ case PatternTerm::TypeParenthesesSubpattern: {
+ unsigned disjunctionAlreadyCheckedCount = 0;
+ if ((term.quantityCount == 1) && !term.parentheses.isCopy) {
+ if (term.quantityType == QuantifierFixedCount) {
+ disjunctionAlreadyCheckedCount = term.parentheses.disjunction->m_minimumSize;
+ unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
+ atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation);
+ emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, term.parentheses.disjunction->m_minimumSize);
+ atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
+ } else {
+ unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
+ atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, term.frameLocation + RegexStackSpaceForBackTrackInfoParenthesesOnce);
+ emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
+ atomParenthesesEnd(true, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
+ }
+ } else {
+ unsigned delegateEndInputOffset = term.inputPosition - currentCountAlreadyChecked;
+ atomParenthesesSubpatternBegin(term.parentheses.subpatternId, term.invertOrCapture, delegateEndInputOffset - disjunctionAlreadyCheckedCount, term.frameLocation, 0);
+ emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
+ atomParenthesesEnd(false, term.parentheses.lastSubpatternId, delegateEndInputOffset, term.frameLocation, term.quantityCount, term.quantityType, term.parentheses.disjunction->m_callFrameSize);
+ }
+ break;
+ }
+
+ case PatternTerm::TypeParentheticalAssertion: {
+ unsigned alternativeFrameLocation = term.inputPosition + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
+
+ atomParentheticalAssertionBegin(term.parentheses.subpatternId, term.invertOrCapture, term.frameLocation, alternativeFrameLocation);
+ emitDisjunction(term.parentheses.disjunction, currentCountAlreadyChecked, 0);
+ atomParenthesesEnd(true, term.parentheses.lastSubpatternId, 0, term.frameLocation, term.quantityCount, term.quantityType);
+ break;
+ }
+ }
+ }
+ }
+ }
+
+private:
+ RegexPattern& m_pattern;
+ ByteDisjunction* bodyDisjunction;
+ unsigned currentAlternativeIndex;
+ Vector<ParenthesesStackEntry> m_parenthesesStack;
+ Vector<ByteDisjunction*> m_allParenthesesInfo;
+};
+
+
+BytecodePattern* byteCompileRegex(const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
+{
+ RegexPattern pattern(ignoreCase, multiline);
+
+ if ((error = compileRegex(patternString, pattern)))
+ return 0;
+
+ numSubpatterns = pattern.m_numSubpatterns;
+
+ return ByteCompiler(pattern).compile();
+}
+
+int interpretRegex(BytecodePattern* regex, const UChar* input, unsigned start, unsigned length, int* output)
+{
+ return Interpreter(regex, output, input, start, length).interpret();
+}
+
+
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoPatternCharacter) == (RegexStackSpaceForBackTrackInfoPatternCharacter * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoPatternCharacter);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoCharacterClass) == (RegexStackSpaceForBackTrackInfoCharacterClass * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoCharacterClass);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoBackReference) == (RegexStackSpaceForBackTrackInfoBackReference * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoBackReference);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoAlternative) == (RegexStackSpaceForBackTrackInfoAlternative * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoAlternative);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheticalAssertion) == (RegexStackSpaceForBackTrackInfoParentheticalAssertion * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheticalAssertion);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParenthesesOnce) == (RegexStackSpaceForBackTrackInfoParenthesesOnce * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParenthesesOnce);
+COMPILE_ASSERT(sizeof(Interpreter::BackTrackInfoParentheses) == (RegexStackSpaceForBackTrackInfoParentheses * sizeof(uintptr_t)), CheckRegexStackSpaceForBackTrackInfoParentheses);
+
+
+} }
+
+#endif
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.h b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.h
new file mode 100644
index 0000000..a8c122a
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexInterpreter.h
@@ -0,0 +1,337 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RegexInterpreter_h
+#define RegexInterpreter_h
+
+#include <wtf/Platform.h>
+
+#if ENABLE(YARR)
+
+#include <wtf/unicode/Unicode.h>
+#include "RegexParser.h"
+#include "RegexPattern.h"
+
+namespace JSC { namespace Yarr {
+
+class ByteDisjunction;
+
+struct ByteTerm {
+ enum Type {
+ TypeBodyAlternativeBegin,
+ TypeBodyAlternativeDisjunction,
+ TypeBodyAlternativeEnd,
+ TypeAlternativeBegin,
+ TypeAlternativeDisjunction,
+ TypeAlternativeEnd,
+ TypeSubpatternBegin,
+ TypeSubpatternEnd,
+ TypeAssertionBOL,
+ TypeAssertionEOL,
+ TypeAssertionWordBoundary,
+ TypePatternCharacterOnce,
+ TypePatternCharacterFixed,
+ TypePatternCharacterGreedy,
+ TypePatternCharacterNonGreedy,
+ TypePatternCasedCharacterOnce,
+ TypePatternCasedCharacterFixed,
+ TypePatternCasedCharacterGreedy,
+ TypePatternCasedCharacterNonGreedy,
+ TypeCharacterClass,
+ TypeBackReference,
+ TypeParenthesesSubpattern,
+ TypeParenthesesSubpatternOnceBegin,
+ TypeParenthesesSubpatternOnceEnd,
+ TypeParentheticalAssertionBegin,
+ TypeParentheticalAssertionEnd,
+ TypeCheckInput,
+ } type;
+ bool invertOrCapture;
+ union {
+ struct {
+ union {
+ UChar patternCharacter;
+ struct {
+ UChar lo;
+ UChar hi;
+ } casedCharacter;
+ CharacterClass* characterClass;
+ unsigned subpatternId;
+ };
+ union {
+ ByteDisjunction* parenthesesDisjunction;
+ unsigned parenthesesWidth;
+ };
+ QuantifierType quantityType;
+ unsigned quantityCount;
+ } atom;
+ struct {
+ int next;
+ int end;
+ } alternative;
+ unsigned checkInputCount;
+ };
+ unsigned frameLocation;
+ int inputPosition;
+
+ ByteTerm(UChar ch, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ : frameLocation(frameLocation)
+ {
+ switch (quantityType) {
+ case QuantifierFixedCount:
+ type = (quantityCount == 1) ? ByteTerm::TypePatternCharacterOnce : ByteTerm::TypePatternCharacterFixed;
+ break;
+ case QuantifierGreedy:
+ type = ByteTerm::TypePatternCharacterGreedy;
+ break;
+ case QuantifierNonGreedy:
+ type = ByteTerm::TypePatternCharacterNonGreedy;
+ break;
+ }
+
+ atom.patternCharacter = ch;
+ atom.quantityType = quantityType;
+ atom.quantityCount = quantityCount;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(UChar lo, UChar hi, int inputPos, unsigned frameLocation, unsigned quantityCount, QuantifierType quantityType)
+ : frameLocation(frameLocation)
+ {
+ switch (quantityType) {
+ case QuantifierFixedCount:
+ type = (quantityCount == 1) ? ByteTerm::TypePatternCasedCharacterOnce : ByteTerm::TypePatternCasedCharacterFixed;
+ break;
+ case QuantifierGreedy:
+ type = ByteTerm::TypePatternCasedCharacterGreedy;
+ break;
+ case QuantifierNonGreedy:
+ type = ByteTerm::TypePatternCasedCharacterNonGreedy;
+ break;
+ }
+
+ atom.casedCharacter.lo = lo;
+ atom.casedCharacter.hi = hi;
+ atom.quantityType = quantityType;
+ atom.quantityCount = quantityCount;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(CharacterClass* characterClass, bool invert, int inputPos)
+ : type(ByteTerm::TypeCharacterClass)
+ , invertOrCapture(invert)
+ {
+ atom.characterClass = characterClass;
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(Type type, unsigned subpatternId, ByteDisjunction* parenthesesInfo, bool invertOrCapture, int inputPos)
+ : type(type)
+ , invertOrCapture(invertOrCapture)
+ {
+ atom.subpatternId = subpatternId;
+ atom.parenthesesDisjunction = parenthesesInfo;
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ inputPosition = inputPos;
+ }
+
+ ByteTerm(Type type, bool invert = false)
+ : type(type)
+ , invertOrCapture(invert)
+ {
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ }
+
+ ByteTerm(Type type, unsigned subpatternId, bool invertOrCapture, int inputPos)
+ : type(type)
+ , invertOrCapture(invertOrCapture)
+ {
+ atom.subpatternId = subpatternId;
+ atom.quantityType = QuantifierFixedCount;
+ atom.quantityCount = 1;
+ inputPosition = inputPos;
+ }
+
+ static ByteTerm BOL(int inputPos)
+ {
+ ByteTerm term(TypeAssertionBOL);
+ term.inputPosition = inputPos;
+ return term;
+ }
+
+ static ByteTerm CheckInput(unsigned count)
+ {
+ ByteTerm term(TypeCheckInput);
+ term.checkInputCount = count;
+ return term;
+ }
+
+ static ByteTerm EOL(int inputPos)
+ {
+ ByteTerm term(TypeAssertionEOL);
+ term.inputPosition = inputPos;
+ return term;
+ }
+
+ static ByteTerm WordBoundary(bool invert, int inputPos)
+ {
+ ByteTerm term(TypeAssertionWordBoundary, invert);
+ term.inputPosition = inputPos;
+ return term;
+ }
+
+ static ByteTerm BackReference(unsigned subpatternId, int inputPos)
+ {
+ return ByteTerm(TypeBackReference, subpatternId, false, inputPos);
+ }
+
+ static ByteTerm BodyAlternativeBegin()
+ {
+ ByteTerm term(TypeBodyAlternativeBegin);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm BodyAlternativeDisjunction()
+ {
+ ByteTerm term(TypeBodyAlternativeDisjunction);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm BodyAlternativeEnd()
+ {
+ ByteTerm term(TypeBodyAlternativeEnd);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm AlternativeBegin()
+ {
+ ByteTerm term(TypeAlternativeBegin);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm AlternativeDisjunction()
+ {
+ ByteTerm term(TypeAlternativeDisjunction);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm AlternativeEnd()
+ {
+ ByteTerm term(TypeAlternativeEnd);
+ term.alternative.next = 0;
+ term.alternative.end = 0;
+ return term;
+ }
+
+ static ByteTerm SubpatternBegin()
+ {
+ return ByteTerm(TypeSubpatternBegin);
+ }
+
+ static ByteTerm SubpatternEnd()
+ {
+ return ByteTerm(TypeSubpatternEnd);
+ }
+
+ bool invert()
+ {
+ return invertOrCapture;
+ }
+
+ bool capture()
+ {
+ return invertOrCapture;
+ }
+};
+
+class ByteDisjunction {
+public:
+ ByteDisjunction(unsigned numSubpatterns, unsigned frameSize)
+ : m_numSubpatterns(numSubpatterns)
+ , m_frameSize(frameSize)
+ {
+ }
+
+ Vector<ByteTerm> terms;
+ unsigned m_numSubpatterns;
+ unsigned m_frameSize;
+};
+
+struct BytecodePattern {
+ BytecodePattern(ByteDisjunction* body, Vector<ByteDisjunction*> allParenthesesInfo, RegexPattern& pattern)
+ : m_body(body)
+ , m_ignoreCase(pattern.m_ignoreCase)
+ , m_multiline(pattern.m_multiline)
+ {
+ newlineCharacterClass = pattern.newlineCharacterClass();
+ wordcharCharacterClass = pattern.wordcharCharacterClass();
+
+ m_allParenthesesInfo.append(allParenthesesInfo);
+ m_userCharacterClasses.append(pattern.m_userCharacterClasses);
+ // 'Steal' the RegexPattern's CharacterClasses! We clear its
+ // array, so that it won't delete them on destruction. We'll
+ // take responsibility for that.
+ pattern.m_userCharacterClasses.clear();
+ }
+
+ ~BytecodePattern()
+ {
+ deleteAllValues(m_allParenthesesInfo);
+ deleteAllValues(m_userCharacterClasses);
+ }
+
+ OwnPtr<ByteDisjunction> m_body;
+ bool m_ignoreCase;
+ bool m_multiline;
+
+ CharacterClass* newlineCharacterClass;
+ CharacterClass* wordcharCharacterClass;
+private:
+ Vector<ByteDisjunction*> m_allParenthesesInfo;
+ Vector<CharacterClass*> m_userCharacterClasses;
+};
+
+BytecodePattern* byteCompileRegex(const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase = false, bool multiline = false);
+int interpretRegex(BytecodePattern* v_regex, const UChar* input, unsigned start, unsigned length, int* output);
+
+} } // namespace JSC::Yarr
+
+#endif
+
+#endif // RegexInterpreter_h
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.cpp b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.cpp
new file mode 100644
index 0000000..663a524
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.cpp
@@ -0,0 +1,1418 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "RegexJIT.h"
+
+#include "ASCIICType.h"
+#include "JSGlobalData.h"
+#include "LinkBuffer.h"
+#include "MacroAssembler.h"
+#include "RegexCompiler.h"
+
+#include "pcre.h" // temporary, remove when fallback is removed.
+
+#if ENABLE(YARR_JIT)
+
+using namespace WTF;
+
+namespace JSC { namespace Yarr {
+
+
+class RegexGenerator : private MacroAssembler {
+ friend void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline);
+
+#if PLATFORM(ARM)
+ static const RegisterID input = ARM::r0;
+ static const RegisterID index = ARM::r1;
+ static const RegisterID length = ARM::r2;
+ static const RegisterID output = ARM::r4;
+
+ static const RegisterID regT0 = ARM::r5;
+ static const RegisterID regT1 = ARM::r6;
+
+ static const RegisterID returnRegister = ARM::r0;
+#elif PLATFORM(X86)
+ static const RegisterID input = X86::eax;
+ static const RegisterID index = X86::edx;
+ static const RegisterID length = X86::ecx;
+ static const RegisterID output = X86::edi;
+
+ static const RegisterID regT0 = X86::ebx;
+ static const RegisterID regT1 = X86::esi;
+
+ static const RegisterID returnRegister = X86::eax;
+#elif PLATFORM(X86_64)
+ static const RegisterID input = X86::edi;
+ static const RegisterID index = X86::esi;
+ static const RegisterID length = X86::edx;
+ static const RegisterID output = X86::ecx;
+
+ static const RegisterID regT0 = X86::eax;
+ static const RegisterID regT1 = X86::ebx;
+
+ static const RegisterID returnRegister = X86::eax;
+#endif
+
+ void optimizeAlternative(PatternAlternative* alternative)
+ {
+ if (!alternative->m_terms.size())
+ return;
+
+ for (unsigned i = 0; i < alternative->m_terms.size() - 1; ++i) {
+ PatternTerm& term = alternative->m_terms[i];
+ PatternTerm& nextTerm = alternative->m_terms[i + 1];
+
+ if ((term.type == PatternTerm::TypeCharacterClass)
+ && (term.quantityType == QuantifierFixedCount)
+ && (nextTerm.type == PatternTerm::TypePatternCharacter)
+ && (nextTerm.quantityType == QuantifierFixedCount)) {
+ PatternTerm termCopy = term;
+ alternative->m_terms[i] = nextTerm;
+ alternative->m_terms[i + 1] = termCopy;
+ }
+ }
+ }
+
+ void matchCharacterClassRange(RegisterID character, JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount)
+ {
+ do {
+ // pick which range we're going to generate
+ int which = count >> 1;
+ char lo = ranges[which].begin;
+ char hi = ranges[which].end;
+
+ // check if there are any ranges or matches below lo. If not, just jl to failure -
+ // if there is anything else to check, check that first, if it falls through jmp to failure.
+ if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
+ Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
+
+ // generate code for all ranges before this one
+ if (which)
+ matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
+
+ while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) {
+ matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex])));
+ ++*matchIndex;
+ }
+ failures.append(jump());
+
+ loOrAbove.link(this);
+ } else if (which) {
+ Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo));
+
+ matchCharacterClassRange(character, failures, matchDest, ranges, which, matchIndex, matches, matchCount);
+ failures.append(jump());
+
+ loOrAbove.link(this);
+ } else
+ failures.append(branch32(LessThan, character, Imm32((unsigned short)lo)));
+
+ while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi))
+ ++*matchIndex;
+
+ matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi)));
+ // fall through to here, the value is above hi.
+
+ // shuffle along & loop around if there are any more matches to handle.
+ unsigned next = which + 1;
+ ranges += next;
+ count -= next;
+ } while (count);
+ }
+
+ void matchCharacterClass(RegisterID character, JumpList& matchDest, const CharacterClass* charClass)
+ {
+ Jump unicodeFail;
+ if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size()) {
+ Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f));
+
+ if (charClass->m_matchesUnicode.size()) {
+ for (unsigned i = 0; i < charClass->m_matchesUnicode.size(); ++i) {
+ UChar ch = charClass->m_matchesUnicode[i];
+ matchDest.append(branch32(Equal, character, Imm32(ch)));
+ }
+ }
+
+ if (charClass->m_rangesUnicode.size()) {
+ for (unsigned i = 0; i < charClass->m_rangesUnicode.size(); ++i) {
+ UChar lo = charClass->m_rangesUnicode[i].begin;
+ UChar hi = charClass->m_rangesUnicode[i].end;
+
+ Jump below = branch32(LessThan, character, Imm32(lo));
+ matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi)));
+ below.link(this);
+ }
+ }
+
+ unicodeFail = jump();
+ isAscii.link(this);
+ }
+
+ if (charClass->m_ranges.size()) {
+ unsigned matchIndex = 0;
+ JumpList failures;
+ matchCharacterClassRange(character, failures, matchDest, charClass->m_ranges.begin(), charClass->m_ranges.size(), &matchIndex, charClass->m_matches.begin(), charClass->m_matches.size());
+ while (matchIndex < charClass->m_matches.size())
+ matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass->m_matches[matchIndex++])));
+
+ failures.link(this);
+ } else if (charClass->m_matches.size()) {
+ // optimization: gather 'a','A' etc back together, can mask & test once.
+ Vector<char> matchesAZaz;
+
+ for (unsigned i = 0; i < charClass->m_matches.size(); ++i) {
+ char ch = charClass->m_matches[i];
+ if (m_pattern.m_ignoreCase) {
+ if (isASCIILower(ch)) {
+ matchesAZaz.append(ch);
+ continue;
+ }
+ if (isASCIIUpper(ch))
+ continue;
+ }
+ matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch)));
+ }
+
+ if (unsigned countAZaz = matchesAZaz.size()) {
+ or32(Imm32(32), character);
+ for (unsigned i = 0; i < countAZaz; ++i)
+ matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i])));
+ }
+ }
+
+ if (charClass->m_matchesUnicode.size() || charClass->m_rangesUnicode.size())
+ unicodeFail.link(this);
+ }
+
+ // Jumps if input not available; will have (incorrectly) incremented already!
+ Jump jumpIfNoAvailableInput(unsigned countToCheck)
+ {
+ add32(Imm32(countToCheck), index);
+ return branch32(Above, index, length);
+ }
+
+ Jump jumpIfAvailableInput(unsigned countToCheck)
+ {
+ add32(Imm32(countToCheck), index);
+ return branch32(BelowOrEqual, index, length);
+ }
+
+ Jump checkInput()
+ {
+ return branch32(BelowOrEqual, index, length);
+ }
+
+ Jump atEndOfInput()
+ {
+ return branch32(Equal, index, length);
+ }
+
+ Jump notAtEndOfInput()
+ {
+ return branch32(NotEqual, index, length);
+ }
+
+ Jump jumpIfCharEquals(UChar ch, int inputPosition)
+ {
+ return branch16(Equal, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
+ }
+
+ Jump jumpIfCharNotEquals(UChar ch, int inputPosition)
+ {
+ return branch16(NotEqual, BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), Imm32(ch));
+ }
+
+ void readCharacter(int inputPosition, RegisterID reg)
+ {
+ load16(BaseIndex(input, index, TimesTwo, inputPosition * sizeof(UChar)), reg);
+ }
+
+ void storeToFrame(RegisterID reg, unsigned frameLocation)
+ {
+ poke(reg, frameLocation);
+ }
+
+ void storeToFrame(Imm32 imm, unsigned frameLocation)
+ {
+ poke(imm, frameLocation);
+ }
+
+ DataLabelPtr storeToFrameWithPatch(unsigned frameLocation)
+ {
+ return storePtrWithPatch(ImmPtr(0), Address(stackPointerRegister, frameLocation * sizeof(void*)));
+ }
+
+ void loadFromFrame(unsigned frameLocation, RegisterID reg)
+ {
+ peek(reg, frameLocation);
+ }
+
+ void loadFromFrameAndJump(unsigned frameLocation)
+ {
+ jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
+ }
+
+ struct AlternativeBacktrackRecord {
+ DataLabelPtr dataLabel;
+ Label backtrackLocation;
+
+ AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation)
+ : dataLabel(dataLabel)
+ , backtrackLocation(backtrackLocation)
+ {
+ }
+ };
+
+ struct TermGenerationState {
+ TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
+ : disjunction(disjunction)
+ , checkedTotal(checkedTotal)
+ {
+ }
+
+ void resetAlternative()
+ {
+ isBackTrackGenerated = false;
+ alt = 0;
+ }
+ bool alternativeValid()
+ {
+ return alt < disjunction->m_alternatives.size();
+ }
+ void nextAlternative()
+ {
+ ++alt;
+ }
+ PatternAlternative* alternative()
+ {
+ return disjunction->m_alternatives[alt];
+ }
+
+ void resetTerm()
+ {
+ ASSERT(alternativeValid());
+ t = 0;
+ }
+ bool termValid()
+ {
+ ASSERT(alternativeValid());
+ return t < alternative()->m_terms.size();
+ }
+ void nextTerm()
+ {
+ ASSERT(alternativeValid());
+ ++t;
+ }
+ PatternTerm& term()
+ {
+ ASSERT(alternativeValid());
+ return alternative()->m_terms[t];
+ }
+
+ PatternTerm& lookaheadTerm()
+ {
+ ASSERT(alternativeValid());
+ ASSERT((t + 1) < alternative()->m_terms.size());
+ return alternative()->m_terms[t + 1];
+ }
+ bool isSinglePatternCharacterLookaheadTerm()
+ {
+ ASSERT(alternativeValid());
+ return ((t + 1) < alternative()->m_terms.size())
+ && (lookaheadTerm().type == PatternTerm::TypePatternCharacter)
+ && (lookaheadTerm().quantityType == QuantifierFixedCount)
+ && (lookaheadTerm().quantityCount == 1);
+ }
+
+ int inputOffset()
+ {
+ return term().inputPosition - checkedTotal;
+ }
+
+ void jumpToBacktrack(Jump jump, MacroAssembler* masm)
+ {
+ if (isBackTrackGenerated)
+ jump.linkTo(backtrackLabel, masm);
+ else
+ backTrackJumps.append(jump);
+ }
+ void jumpToBacktrack(JumpList& jumps, MacroAssembler* masm)
+ {
+ if (isBackTrackGenerated)
+ jumps.linkTo(backtrackLabel, masm);
+ else
+ backTrackJumps.append(jumps);
+ }
+ bool plantJumpToBacktrackIfExists(MacroAssembler* masm)
+ {
+ if (isBackTrackGenerated) {
+ masm->jump(backtrackLabel);
+ return true;
+ }
+ return false;
+ }
+ void addBacktrackJump(Jump jump)
+ {
+ backTrackJumps.append(jump);
+ }
+ void setBacktrackGenerated(Label label)
+ {
+ isBackTrackGenerated = true;
+ backtrackLabel = label;
+ }
+ void linkAlternativeBacktracks(MacroAssembler* masm)
+ {
+ isBackTrackGenerated = false;
+ backTrackJumps.link(masm);
+ }
+ void linkAlternativeBacktracksTo(Label label, MacroAssembler* masm)
+ {
+ isBackTrackGenerated = false;
+ backTrackJumps.linkTo(label, masm);
+ }
+ void propagateBacktrackingFrom(TermGenerationState& nestedParenthesesState, MacroAssembler* masm)
+ {
+ jumpToBacktrack(nestedParenthesesState.backTrackJumps, masm);
+ if (nestedParenthesesState.isBackTrackGenerated)
+ setBacktrackGenerated(nestedParenthesesState.backtrackLabel);
+ }
+
+ PatternDisjunction* disjunction;
+ int checkedTotal;
+ private:
+ unsigned alt;
+ unsigned t;
+ JumpList backTrackJumps;
+ Label backtrackLabel;
+ bool isBackTrackGenerated;
+ };
+
+ void generateAssertionBOL(TermGenerationState& state)
+ {
+ PatternTerm& term = state.term();
+
+ if (m_pattern.m_multiline) {
+ const RegisterID character = regT0;
+
+ JumpList matchDest;
+ if (!term.inputPosition)
+ matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal)));
+
+ readCharacter(state.inputOffset() - 1, character);
+ matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
+ state.jumpToBacktrack(jump(), this);
+
+ matchDest.link(this);
+ } else {
+ // Erk, really should poison out these alternatives early. :-/
+ if (term.inputPosition)
+ state.jumpToBacktrack(jump(), this);
+ else
+ state.jumpToBacktrack(branch32(NotEqual, index, Imm32(state.checkedTotal)), this);
+ }
+ }
+
+ void generateAssertionEOL(TermGenerationState& state)
+ {
+ PatternTerm& term = state.term();
+
+ if (m_pattern.m_multiline) {
+ const RegisterID character = regT0;
+
+ JumpList matchDest;
+ if (term.inputPosition == state.checkedTotal)
+ matchDest.append(atEndOfInput());
+
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
+ state.jumpToBacktrack(jump(), this);
+
+ matchDest.link(this);
+ } else {
+ if (term.inputPosition == state.checkedTotal)
+ state.jumpToBacktrack(notAtEndOfInput(), this);
+ // Erk, really should poison out these alternatives early. :-/
+ else
+ state.jumpToBacktrack(jump(), this);
+ }
+ }
+
+ // Also falls though on nextIsNotWordChar.
+ void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
+ {
+ const RegisterID character = regT0;
+ PatternTerm& term = state.term();
+
+ if (term.inputPosition == state.checkedTotal)
+ nextIsNotWordChar.append(atEndOfInput());
+
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
+ }
+
+ void generateAssertionWordBoundary(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ PatternTerm& term = state.term();
+
+ Jump atBegin;
+ JumpList matchDest;
+ if (!term.inputPosition)
+ atBegin = branch32(Equal, index, Imm32(state.checkedTotal));
+ readCharacter(state.inputOffset() - 1, character);
+ matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
+ if (!term.inputPosition)
+ atBegin.link(this);
+
+ // We fall through to here if the last character was not a wordchar.
+ JumpList nonWordCharThenWordChar;
+ JumpList nonWordCharThenNonWordChar;
+ if (term.invertOrCapture) {
+ matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
+ nonWordCharThenWordChar.append(jump());
+ } else {
+ matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
+ nonWordCharThenNonWordChar.append(jump());
+ }
+ state.jumpToBacktrack(nonWordCharThenNonWordChar, this);
+
+ // We jump here if the last character was a wordchar.
+ matchDest.link(this);
+ JumpList wordCharThenWordChar;
+ JumpList wordCharThenNonWordChar;
+ if (term.invertOrCapture) {
+ matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar);
+ wordCharThenWordChar.append(jump());
+ } else {
+ matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar);
+ // This can fall-though!
+ }
+
+ state.jumpToBacktrack(wordCharThenWordChar, this);
+
+ nonWordCharThenWordChar.link(this);
+ wordCharThenNonWordChar.link(this);
+ }
+
+ void generatePatternCharacterSingle(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ UChar ch = state.term().patternCharacter;
+
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ readCharacter(state.inputOffset(), character);
+ or32(Imm32(32), character);
+ state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
+ } else {
+ ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
+ state.jumpToBacktrack(jumpIfCharNotEquals(ch, state.inputOffset()), this);
+ }
+ }
+
+ void generatePatternCharacterPair(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ UChar ch1 = state.term().patternCharacter;
+ UChar ch2 = state.lookaheadTerm().patternCharacter;
+
+ int mask = 0;
+ int chPair = ch1 | (ch2 << 16);
+
+ if (m_pattern.m_ignoreCase) {
+ if (isASCIIAlpha(ch1))
+ mask |= 32;
+ if (isASCIIAlpha(ch2))
+ mask |= 32 << 16;
+ }
+
+ if (mask) {
+ load32(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character);
+ or32(Imm32(mask), character);
+ state.jumpToBacktrack(branch32(NotEqual, character, Imm32(chPair | mask)), this);
+ } else
+ state.jumpToBacktrack(branch32(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)), this);
+ }
+
+ void generatePatternCharacterFixed(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+ UChar ch = term.patternCharacter;
+
+ move(index, countRegister);
+ sub32(Imm32(term.quantityCount), countRegister);
+
+ Label loop(this);
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
+ or32(Imm32(32), character);
+ state.jumpToBacktrack(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))), this);
+ } else {
+ ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
+ state.jumpToBacktrack(branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)), this);
+ }
+ add32(Imm32(1), countRegister);
+ branch32(NotEqual, countRegister, index).linkTo(loop, this);
+ }
+
+ void generatePatternCharacterGreedy(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+ UChar ch = term.patternCharacter;
+
+ move(Imm32(0), countRegister);
+
+ JumpList failures;
+ Label loop(this);
+ failures.append(atEndOfInput());
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ readCharacter(state.inputOffset(), character);
+ or32(Imm32(32), character);
+ failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
+ } else {
+ ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
+ failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
+ }
+ add32(Imm32(1), countRegister);
+ add32(Imm32(1), index);
+ branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+ failures.append(jump());
+
+ Label backtrackBegin(this);
+ loadFromFrame(term.frameLocation, countRegister);
+ state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
+ sub32(Imm32(1), countRegister);
+ sub32(Imm32(1), index);
+
+ failures.link(this);
+
+ storeToFrame(countRegister, term.frameLocation);
+
+ state.setBacktrackGenerated(backtrackBegin);
+ }
+
+ void generatePatternCharacterNonGreedy(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+ UChar ch = term.patternCharacter;
+
+ move(Imm32(0), countRegister);
+
+ Jump firstTimeDoNothing = jump();
+
+ Label hardFail(this);
+ sub32(countRegister, index);
+ state.jumpToBacktrack(jump(), this);
+
+ Label backtrackBegin(this);
+ loadFromFrame(term.frameLocation, countRegister);
+
+ atEndOfInput().linkTo(hardFail, this);
+ branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
+ if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
+ readCharacter(state.inputOffset(), character);
+ or32(Imm32(32), character);
+ branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this);
+ } else {
+ ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
+ jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this);
+ }
+
+ add32(Imm32(1), countRegister);
+ add32(Imm32(1), index);
+
+ firstTimeDoNothing.link(this);
+ storeToFrame(countRegister, term.frameLocation);
+
+ state.setBacktrackGenerated(backtrackBegin);
+ }
+
+ void generateCharacterClassSingle(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ PatternTerm& term = state.term();
+
+ JumpList matchDest;
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, matchDest, term.characterClass);
+
+ if (term.invertOrCapture)
+ state.jumpToBacktrack(matchDest, this);
+ else {
+ state.jumpToBacktrack(jump(), this);
+ matchDest.link(this);
+ }
+ }
+
+ void generateCharacterClassFixed(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+
+ move(index, countRegister);
+ sub32(Imm32(term.quantityCount), countRegister);
+
+ Label loop(this);
+ JumpList matchDest;
+ load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
+ matchCharacterClass(character, matchDest, term.characterClass);
+
+ if (term.invertOrCapture)
+ state.jumpToBacktrack(matchDest, this);
+ else {
+ state.jumpToBacktrack(jump(), this);
+ matchDest.link(this);
+ }
+
+ add32(Imm32(1), countRegister);
+ branch32(NotEqual, countRegister, index).linkTo(loop, this);
+ }
+
+ void generateCharacterClassGreedy(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+
+ move(Imm32(0), countRegister);
+
+ JumpList failures;
+ Label loop(this);
+ failures.append(atEndOfInput());
+
+ if (term.invertOrCapture) {
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, failures, term.characterClass);
+ } else {
+ JumpList matchDest;
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, matchDest, term.characterClass);
+ failures.append(jump());
+ matchDest.link(this);
+ }
+
+ add32(Imm32(1), countRegister);
+ add32(Imm32(1), index);
+ branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+ failures.append(jump());
+
+ Label backtrackBegin(this);
+ loadFromFrame(term.frameLocation, countRegister);
+ state.jumpToBacktrack(branchTest32(Zero, countRegister), this);
+ sub32(Imm32(1), countRegister);
+ sub32(Imm32(1), index);
+
+ failures.link(this);
+
+ storeToFrame(countRegister, term.frameLocation);
+
+ state.setBacktrackGenerated(backtrackBegin);
+ }
+
+ void generateCharacterClassNonGreedy(TermGenerationState& state)
+ {
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
+ PatternTerm& term = state.term();
+
+ move(Imm32(0), countRegister);
+
+ Jump firstTimeDoNothing = jump();
+
+ Label hardFail(this);
+ sub32(countRegister, index);
+ state.jumpToBacktrack(jump(), this);
+
+ Label backtrackBegin(this);
+ loadFromFrame(term.frameLocation, countRegister);
+
+ atEndOfInput().linkTo(hardFail, this);
+ branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
+
+ JumpList matchDest;
+ readCharacter(state.inputOffset(), character);
+ matchCharacterClass(character, matchDest, term.characterClass);
+
+ if (term.invertOrCapture)
+ matchDest.linkTo(hardFail, this);
+ else {
+ jump(hardFail);
+ matchDest.link(this);
+ }
+
+ add32(Imm32(1), countRegister);
+ add32(Imm32(1), index);
+
+ firstTimeDoNothing.link(this);
+ storeToFrame(countRegister, term.frameLocation);
+
+ state.setBacktrackGenerated(backtrackBegin);
+ }
+
+ void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
+ {
+ ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
+ ASSERT(parenthesesTerm.quantityCount == 1);
+
+ PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
+ unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
+
+ if (disjunction->m_alternatives.size() == 1) {
+ state.resetAlternative();
+ ASSERT(state.alternativeValid());
+ PatternAlternative* alternative = state.alternative();
+ optimizeAlternative(alternative);
+
+ int countToCheck = alternative->m_minimumSize - preCheckedCount;
+ if (countToCheck) {
+ ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount));
+
+ // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists'
+ // will be forced to always trampoline into here, just to decrement the index.
+ // Ick.
+ Jump skip = jump();
+
+ Label backtrackBegin(this);
+ sub32(Imm32(countToCheck), index);
+ state.addBacktrackJump(jump());
+
+ skip.link(this);
+
+ state.setBacktrackGenerated(backtrackBegin);
+
+ state.jumpToBacktrack(jumpIfNoAvailableInput(countToCheck), this);
+ state.checkedTotal += countToCheck;
+ }
+
+ for (state.resetTerm(); state.termValid(); state.nextTerm())
+ generateTerm(state);
+
+ state.checkedTotal -= countToCheck;
+ } else {
+ JumpList successes;
+
+ for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative()) {
+
+ PatternAlternative* alternative = state.alternative();
+ optimizeAlternative(alternative);
+
+ ASSERT(alternative->m_minimumSize >= preCheckedCount);
+ int countToCheck = alternative->m_minimumSize - preCheckedCount;
+ if (countToCheck) {
+ state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
+ state.checkedTotal += countToCheck;
+ }
+
+ for (state.resetTerm(); state.termValid(); state.nextTerm())
+ generateTerm(state);
+
+ // Matched an alternative.
+ DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation);
+ successes.append(jump());
+
+ // Alternative did not match.
+ Label backtrackLocation(this);
+
+ // Can we backtrack the alternative? - if so, do so. If not, just fall through to the next one.
+ state.plantJumpToBacktrackIfExists(this);
+
+ state.linkAlternativeBacktracks(this);
+
+ if (countToCheck) {
+ sub32(Imm32(countToCheck), index);
+ state.checkedTotal -= countToCheck;
+ }
+
+ m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, backtrackLocation));
+ }
+ // We fall through to here when the last alternative fails.
+ // Add a backtrack out of here for the parenthese handling code to link up.
+ state.addBacktrackJump(jump());
+
+ // Generate a trampoline for the parens code to backtrack to, to retry the
+ // next alternative.
+ state.setBacktrackGenerated(label());
+ loadFromFrameAndJump(alternativeFrameLocation);
+
+ // FIXME: both of the above hooks are a little inefficient, in that you
+ // may end up trampolining here, just to trampoline back out to the
+ // parentheses code, or vice versa. We can probably eliminate a jump
+ // by restructuring, but coding this way for now for simplicity during
+ // development.
+
+ successes.link(this);
+ }
+ }
+
+ void generateParenthesesSingle(TermGenerationState& state)
+ {
+ const RegisterID indexTemporary = regT0;
+ PatternTerm& term = state.term();
+ PatternDisjunction* disjunction = term.parentheses.disjunction;
+ ASSERT(term.quantityCount == 1);
+
+ unsigned preCheckedCount = ((term.quantityCount == 1) && (term.quantityType == QuantifierFixedCount)) ? disjunction->m_minimumSize : 0;
+
+ unsigned parenthesesFrameLocation = term.frameLocation;
+ unsigned alternativeFrameLocation = parenthesesFrameLocation;
+ if (term.quantityType != QuantifierFixedCount)
+ alternativeFrameLocation += RegexStackSpaceForBackTrackInfoParenthesesOnce;
+
+ // optimized case - no capture & no quantifier can be handled in a light-weight manner.
+ if (!term.invertOrCapture && (term.quantityType == QuantifierFixedCount)) {
+ TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+ generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
+ // this expects that any backtracks back out of the parentheses will be in the
+ // parenthesesState's backTrackJumps vector, and that if they need backtracking
+ // they will have set an entry point on the parenthesesState's backtrackLabel.
+ state.propagateBacktrackingFrom(parenthesesState, this);
+ } else {
+ Jump nonGreedySkipParentheses;
+ Label nonGreedyTryParentheses;
+ if (term.quantityType == QuantifierGreedy)
+ storeToFrame(Imm32(1), parenthesesFrameLocation);
+ else if (term.quantityType == QuantifierNonGreedy) {
+ storeToFrame(Imm32(0), parenthesesFrameLocation);
+ nonGreedySkipParentheses = jump();
+ nonGreedyTryParentheses = label();
+ storeToFrame(Imm32(1), parenthesesFrameLocation);
+ }
+
+ // store the match start index
+ if (term.invertOrCapture) {
+ int inputOffset = state.inputOffset() - preCheckedCount;
+ if (inputOffset) {
+ move(index, indexTemporary);
+ add32(Imm32(inputOffset), indexTemporary);
+ store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
+ } else
+ store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
+ }
+
+ // generate the body of the parentheses
+ TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+ generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
+
+ // store the match end index
+ if (term.invertOrCapture) {
+ int inputOffset = state.inputOffset();
+ if (inputOffset) {
+ move(index, indexTemporary);
+ add32(Imm32(state.inputOffset()), indexTemporary);
+ store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
+ } else
+ store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
+ }
+ Jump success = jump();
+
+ // A failure AFTER the parens jumps here
+ Label backtrackFromAfterParens(this);
+
+ if (term.quantityType == QuantifierGreedy) {
+ // If this is zero we have now tested with both with and without the parens.
+ loadFromFrame(parenthesesFrameLocation, indexTemporary);
+ state.jumpToBacktrack(branchTest32(Zero, indexTemporary), this);
+ } else if (term.quantityType == QuantifierNonGreedy) {
+ // If this is zero we have now tested with both with and without the parens.
+ loadFromFrame(parenthesesFrameLocation, indexTemporary);
+ branchTest32(Zero, indexTemporary).linkTo(nonGreedyTryParentheses, this);
+ }
+
+ parenthesesState.plantJumpToBacktrackIfExists(this);
+ // A failure WITHIN the parens jumps here
+ parenthesesState.linkAlternativeBacktracks(this);
+ if (term.invertOrCapture) {
+ store32(Imm32(-1), Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
+ store32(Imm32(-1), Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
+ }
+
+ if (term.quantityType == QuantifierGreedy)
+ storeToFrame(Imm32(0), parenthesesFrameLocation);
+ else
+ state.jumpToBacktrack(jump(), this);
+
+ state.setBacktrackGenerated(backtrackFromAfterParens);
+ if (term.quantityType == QuantifierNonGreedy)
+ nonGreedySkipParentheses.link(this);
+ success.link(this);
+ }
+ }
+
+ void generateParentheticalAssertion(TermGenerationState& state)
+ {
+ PatternTerm& term = state.term();
+ PatternDisjunction* disjunction = term.parentheses.disjunction;
+ ASSERT(term.quantityCount == 1);
+ ASSERT(term.quantityType == QuantifierFixedCount);
+
+ unsigned parenthesesFrameLocation = term.frameLocation;
+ unsigned alternativeFrameLocation = parenthesesFrameLocation + RegexStackSpaceForBackTrackInfoParentheticalAssertion;
+
+ int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
+
+ if (term.invertOrCapture) {
+ // Inverted case
+ storeToFrame(index, parenthesesFrameLocation);
+
+ state.checkedTotal -= countCheckedAfterAssertion;
+ if (countCheckedAfterAssertion)
+ sub32(Imm32(countCheckedAfterAssertion), index);
+
+ TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+ generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
+ // Success! - which means - Fail!
+ loadFromFrame(parenthesesFrameLocation, index);
+ state.jumpToBacktrack(jump(), this);
+
+ // And fail means success.
+ parenthesesState.linkAlternativeBacktracks(this);
+ loadFromFrame(parenthesesFrameLocation, index);
+
+ state.checkedTotal += countCheckedAfterAssertion;
+ } else {
+ // Normal case
+ storeToFrame(index, parenthesesFrameLocation);
+
+ state.checkedTotal -= countCheckedAfterAssertion;
+ if (countCheckedAfterAssertion)
+ sub32(Imm32(countCheckedAfterAssertion), index);
+
+ TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+ generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
+ // Success! - which means - Success!
+ loadFromFrame(parenthesesFrameLocation, index);
+ Jump success = jump();
+
+ parenthesesState.linkAlternativeBacktracks(this);
+ loadFromFrame(parenthesesFrameLocation, index);
+ state.jumpToBacktrack(jump(), this);
+
+ success.link(this);
+
+ state.checkedTotal += countCheckedAfterAssertion;
+ }
+ }
+
+ void generateTerm(TermGenerationState& state)
+ {
+ PatternTerm& term = state.term();
+
+ switch (term.type) {
+ case PatternTerm::TypeAssertionBOL:
+ generateAssertionBOL(state);
+ break;
+
+ case PatternTerm::TypeAssertionEOL:
+ generateAssertionEOL(state);
+ break;
+
+ case PatternTerm::TypeAssertionWordBoundary:
+ generateAssertionWordBoundary(state);
+ break;
+
+ case PatternTerm::TypePatternCharacter:
+ switch (term.quantityType) {
+ case QuantifierFixedCount:
+ if (term.quantityCount == 1) {
+ if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) {
+ generatePatternCharacterPair(state);
+ state.nextTerm();
+ } else
+ generatePatternCharacterSingle(state);
+ } else
+ generatePatternCharacterFixed(state);
+ break;
+ case QuantifierGreedy:
+ generatePatternCharacterGreedy(state);
+ break;
+ case QuantifierNonGreedy:
+ generatePatternCharacterNonGreedy(state);
+ break;
+ }
+ break;
+
+ case PatternTerm::TypeCharacterClass:
+ switch (term.quantityType) {
+ case QuantifierFixedCount:
+ if (term.quantityCount == 1)
+ generateCharacterClassSingle(state);
+ else
+ generateCharacterClassFixed(state);
+ break;
+ case QuantifierGreedy:
+ generateCharacterClassGreedy(state);
+ break;
+ case QuantifierNonGreedy:
+ generateCharacterClassNonGreedy(state);
+ break;
+ }
+ break;
+
+ case PatternTerm::TypeBackReference:
+ m_generationFailed = true;
+ break;
+
+ case PatternTerm::TypeForwardReference:
+ break;
+
+ case PatternTerm::TypeParenthesesSubpattern:
+ if ((term.quantityCount == 1) && !term.parentheses.isCopy)
+ generateParenthesesSingle(state);
+ else
+ m_generationFailed = true;
+ break;
+
+ case PatternTerm::TypeParentheticalAssertion:
+ generateParentheticalAssertion(state);
+ break;
+ }
+ }
+
+ void generateDisjunction(PatternDisjunction* disjunction)
+ {
+ TermGenerationState state(disjunction, 0);
+ state.resetAlternative();
+
+ // Plant a check to see if there is sufficient input available to run the first alternative.
+ // Jumping back to the label 'firstAlternative' will get to this check, jumping to
+ // 'firstAlternativeInputChecked' will jump directly to matching the alternative having
+ // skipped this check.
+
+ Label firstAlternative(this);
+
+ // check availability for the next alternative
+ int countCheckedForCurrentAlternative = 0;
+ int countToCheckForFirstAlternative = 0;
+ bool hasShorterAlternatives = false;
+ JumpList notEnoughInputForPreviousAlternative;
+
+ if (state.alternativeValid()) {
+ PatternAlternative* alternative = state.alternative();
+ countToCheckForFirstAlternative = alternative->m_minimumSize;
+ state.checkedTotal += countToCheckForFirstAlternative;
+ if (countToCheckForFirstAlternative)
+ notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
+ countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
+ }
+
+ Label firstAlternativeInputChecked(this);
+
+ while (state.alternativeValid()) {
+ // Track whether any alternatives are shorter than the first one.
+ hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
+
+ PatternAlternative* alternative = state.alternative();
+ optimizeAlternative(alternative);
+
+ for (state.resetTerm(); state.termValid(); state.nextTerm())
+ generateTerm(state);
+
+ // If we get here, the alternative matched.
+ if (m_pattern.m_body->m_callFrameSize)
+ addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
+
+ ASSERT(index != returnRegister);
+ if (m_pattern.m_body->m_hasFixedSize) {
+ move(index, returnRegister);
+ if (alternative->m_minimumSize)
+ sub32(Imm32(alternative->m_minimumSize), returnRegister);
+ } else
+ pop(returnRegister);
+ store32(index, Address(output, 4));
+ store32(returnRegister, output);
+
+ generateReturn();
+
+ state.nextAlternative();
+
+ // if there are any more alternatives, plant the check for input before looping.
+ if (state.alternativeValid()) {
+ PatternAlternative* nextAlternative = state.alternative();
+ int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
+
+ if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
+ // If we get here, there the last input checked failed.
+ notEnoughInputForPreviousAlternative.link(this);
+
+ // Check if sufficent input available to run the next alternative
+ notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
+ // We are now in the correct state to enter the next alternative; this add is only required
+ // to mirror and revert operation of the sub32, just below.
+ add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
+
+ // If we get here, there the last input checked passed.
+ state.linkAlternativeBacktracks(this);
+ // No need to check if we can run the next alternative, since it is shorter -
+ // just update index.
+ sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
+ } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
+ // If we get here, there the last input checked failed.
+ // If there is insufficient input to run the current alternative, and the next alternative is longer,
+ // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
+ // we had checked.
+ notEnoughInputForPreviousAlternative.link(this);
+ add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
+ notEnoughInputForPreviousAlternative.append(jump());
+
+ // The next alternative is longer than the current one; check the difference.
+ state.linkAlternativeBacktracks(this);
+ notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
+ } else { // CASE 3: Both alternatives are the same length.
+ ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
+
+ // If the next alterative is the same length as this one, then no need to check the input -
+ // if there was sufficent input to run the current alternative then there is sufficient
+ // input to run the next one; if not, there isn't.
+ state.linkAlternativeBacktracks(this);
+ }
+
+ state.checkedTotal -= countCheckedForCurrentAlternative;
+ countCheckedForCurrentAlternative = countToCheckForNextAlternative;
+ state.checkedTotal += countCheckedForCurrentAlternative;
+ }
+ }
+
+ // If we get here, all Alternatives failed...
+
+ state.checkedTotal -= countCheckedForCurrentAlternative;
+
+ // How much more input need there be to be able to retry from the first alternative?
+ // examples:
+ // /yarr_jit/ or /wrec|pcre/
+ // In these examples we need check for one more input before looping.
+ // /yarr_jit|pcre/
+ // In this case we need check for 5 more input to loop (+4 to allow for the first alterative
+ // being four longer than the last alternative checked, and another +1 to effectively move
+ // the start position along by one).
+ // /yarr|rules/ or /wrec|notsomuch/
+ // In these examples, provided that there was sufficient input to have just been matching for
+ // the second alternative we can loop without checking for available input (since the second
+ // alternative is longer than the first). In the latter example we need to decrement index
+ // (by 4) so the start position is only progressed by 1 from the last iteration.
+ int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
+
+ // First, deal with the cases where there was sufficient input to try the last alternative.
+ if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
+ state.linkAlternativeBacktracks(this);
+ else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
+ state.linkAlternativeBacktracksTo(firstAlternativeInputChecked, this);
+ else { // no need to check the input, but we do have some bookkeeping to do first.
+ state.linkAlternativeBacktracks(this);
+
+ // Where necessary update our preserved start position.
+ if (!m_pattern.m_body->m_hasFixedSize) {
+ move(index, regT0);
+ sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
+ poke(regT0, m_pattern.m_body->m_callFrameSize);
+ }
+
+ // Update index if necessary, and loop (without checking).
+ if (incrementForNextIter)
+ add32(Imm32(incrementForNextIter), index);
+ jump().linkTo(firstAlternativeInputChecked, this);
+ }
+
+ notEnoughInputForPreviousAlternative.link(this);
+ // Update our idea of the start position, if we're tracking this.
+ if (!m_pattern.m_body->m_hasFixedSize) {
+ if (countCheckedForCurrentAlternative - 1) {
+ move(index, regT0);
+ sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
+ poke(regT0, m_pattern.m_body->m_callFrameSize);
+ } else
+ poke(index, m_pattern.m_body->m_callFrameSize);
+ }
+ // Check if there is sufficent input to run the first alternative again.
+ jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
+ // No - insufficent input to run the first alteranative, are there any other alternatives we
+ // might need to check? If so, the last check will have left the index incremented by
+ // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
+ // LESS input is available, to have the effect of just progressing the start position by 1
+ // from the last iteration. If this check passes we can just jump up to the check associated
+ // with the first alternative in the loop. This is a bit sad, since we'll end up trying the
+ // first alternative again, and this check will fail (otherwise the check planted just above
+ // here would have passed). This is a bit sad, however it saves trying to do something more
+ // complex here in compilation, and in the common case we should end up coallescing the checks.
+ //
+ // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
+ // of the minimum-alterantive-lengths. E.g. if I have two alternatives of length 200 and 150,
+ // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
+ // is sufficient input to run either alternative (constantly failing). If there had been only
+ // one alternative, or if the shorter alternative had come first, we would have terminated
+ // immediately. :-/
+ if (hasShorterAlternatives)
+ jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
+ // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
+ // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ...
+ // but since we're about to return a failure this doesn't really matter!)
+
+ unsigned frameSize = m_pattern.m_body->m_callFrameSize;
+ if (!m_pattern.m_body->m_hasFixedSize)
+ ++frameSize;
+ if (frameSize)
+ addPtr(Imm32(frameSize * sizeof(void*)), stackPointerRegister);
+
+ move(Imm32(-1), returnRegister);
+
+ generateReturn();
+ }
+
+ void generateEnter()
+ {
+#if PLATFORM(X86_64)
+ push(X86::ebp);
+ move(stackPointerRegister, X86::ebp);
+ push(X86::ebx);
+#elif PLATFORM(X86)
+ push(X86::ebp);
+ move(stackPointerRegister, X86::ebp);
+ // TODO: do we need spill registers to fill the output pointer if there are no sub captures?
+ push(X86::ebx);
+ push(X86::edi);
+ push(X86::esi);
+ // load output into edi (2 = saved ebp + return address).
+ #if COMPILER(MSVC)
+ loadPtr(Address(X86::ebp, 2 * sizeof(void*)), input);
+ loadPtr(Address(X86::ebp, 3 * sizeof(void*)), index);
+ loadPtr(Address(X86::ebp, 4 * sizeof(void*)), length);
+ loadPtr(Address(X86::ebp, 5 * sizeof(void*)), output);
+ #else
+ loadPtr(Address(X86::ebp, 2 * sizeof(void*)), output);
+ #endif
+#elif PLATFORM(ARM)
+#if !PLATFORM_ARM_ARCH(7)
+ push(ARM::lr);
+#endif
+ push(ARM::r4);
+ push(ARM::r5);
+ push(ARM::r6);
+ move(ARM::r3, output);
+#endif
+ }
+
+ void generateReturn()
+ {
+#if PLATFORM(X86_64)
+ pop(X86::ebx);
+ pop(X86::ebp);
+#elif PLATFORM(X86)
+ pop(X86::esi);
+ pop(X86::edi);
+ pop(X86::ebx);
+ pop(X86::ebp);
+#elif PLATFORM(ARM)
+ pop(ARM::r6);
+ pop(ARM::r5);
+ pop(ARM::r4);
+#endif
+ ret();
+ }
+
+public:
+ RegexGenerator(RegexPattern& pattern)
+ : m_pattern(pattern)
+ , m_generationFailed(false)
+ {
+ }
+
+ void generate()
+ {
+ generateEnter();
+
+ // TODO: do I really want this on the stack?
+ if (!m_pattern.m_body->m_hasFixedSize)
+ push(index);
+
+ if (m_pattern.m_body->m_callFrameSize)
+ subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
+
+ generateDisjunction(m_pattern.m_body);
+ }
+
+ void compile(JSGlobalData* globalData, RegexCodeBlock& jitObject)
+ {
+ generate();
+
+ LinkBuffer patchBuffer(this, globalData->executableAllocator.poolForSize(size()));
+
+ for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
+ patchBuffer.patch(m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_backtrackRecords[i].backtrackLocation));
+
+ jitObject.set(patchBuffer.finalizeCode());
+ }
+
+ bool generationFailed()
+ {
+ return m_generationFailed;
+ }
+
+private:
+ RegexPattern& m_pattern;
+ Vector<AlternativeBacktrackRecord> m_backtrackRecords;
+ bool m_generationFailed;
+};
+
+void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& patternString, unsigned& numSubpatterns, const char*& error, bool ignoreCase, bool multiline)
+{
+ RegexPattern pattern(ignoreCase, multiline);
+
+ if ((error = compileRegex(patternString, pattern)))
+ return;
+
+ numSubpatterns = pattern.m_numSubpatterns;
+
+ RegexGenerator generator(pattern);
+ generator.compile(globalData, jitObject);
+
+ if (generator.generationFailed()) {
+ JSRegExpIgnoreCaseOption ignoreCaseOption = ignoreCase ? JSRegExpIgnoreCase : JSRegExpDoNotIgnoreCase;
+ JSRegExpMultilineOption multilineOption = multiline ? JSRegExpMultiline : JSRegExpSingleLine;
+ jitObject.setFallback(jsRegExpCompile(reinterpret_cast<const UChar*>(patternString.data()), patternString.size(), ignoreCaseOption, multilineOption, &numSubpatterns, &error));
+ }
+}
+
+int executeRegex(RegexCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output, int outputArraySize)
+{
+ if (JSRegExp* fallback = jitObject.getFallback())
+ return (jsRegExpExecute(fallback, input, length, start, output, outputArraySize) < 0) ? -1 : output[0];
+
+ return jitObject.execute(input, start, length, output);
+}
+
+}}
+
+#endif
+
+
+
+
+
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.h b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.h
new file mode 100644
index 0000000..5b0df9d
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexJIT.h
@@ -0,0 +1,91 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RegexJIT_h
+#define RegexJIT_h
+
+#include <wtf/Platform.h>
+
+#if ENABLE(YARR_JIT)
+
+#include "MacroAssembler.h"
+#include "RegexPattern.h"
+#include <UString.h>
+
+#include <pcre.h>
+struct JSRegExp; // temporary, remove when fallback is removed.
+
+#if PLATFORM(X86) && !COMPILER(MSVC)
+#define YARR_CALL __attribute__ ((regparm (3)))
+#else
+#define YARR_CALL
+#endif
+
+namespace JSC {
+
+class JSGlobalData;
+class ExecutablePool;
+
+namespace Yarr {
+
+class RegexCodeBlock {
+ typedef int (*RegexJITCode)(const UChar* input, unsigned start, unsigned length, int* output) YARR_CALL;
+
+public:
+ RegexCodeBlock()
+ : m_fallback(0)
+ {
+ }
+
+ ~RegexCodeBlock()
+ {
+ if (m_fallback)
+ jsRegExpFree(m_fallback);
+ }
+
+ JSRegExp* getFallback() { return m_fallback; }
+ void setFallback(JSRegExp* fallback) { m_fallback = fallback; }
+
+ bool operator!() { return !m_ref.m_code.executableAddress(); }
+ void set(MacroAssembler::CodeRef ref) { m_ref = ref; }
+
+ int execute(const UChar* input, unsigned start, unsigned length, int* output)
+ {
+ return reinterpret_cast<RegexJITCode>(m_ref.m_code.executableAddress())(input, start, length, output);
+ }
+
+private:
+ MacroAssembler::CodeRef m_ref;
+ JSRegExp* m_fallback;
+};
+
+void jitCompileRegex(JSGlobalData* globalData, RegexCodeBlock& jitObject, const UString& pattern, unsigned& numSubpatterns, const char*& error, bool ignoreCase = false, bool multiline = false);
+int executeRegex(RegexCodeBlock& jitObject, const UChar* input, unsigned start, unsigned length, int* output, int outputArraySize);
+
+} } // namespace JSC::Yarr
+
+#endif
+
+#endif // RegexJIT_h
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexParser.h b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexParser.h
new file mode 100644
index 0000000..64e8463
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexParser.h
@@ -0,0 +1,854 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RegexParser_h
+#define RegexParser_h
+
+#include <wtf/Platform.h>
+
+#if ENABLE(YARR)
+
+#include <UString.h>
+#include <wtf/ASCIICType.h>
+#include <wtf/unicode/Unicode.h>
+#include <limits.h>
+
+namespace JSC { namespace Yarr {
+
+enum BuiltInCharacterClassID {
+ DigitClassID,
+ SpaceClassID,
+ WordClassID,
+ NewlineClassID,
+};
+
+// The Parser class should not be used directly - only via the Yarr::parse() method.
+template<class Delegate>
+class Parser {
+private:
+ template<class FriendDelegate>
+ friend const char* parse(FriendDelegate& delegate, const UString& pattern, unsigned backReferenceLimit);
+
+ enum ErrorCode {
+ NoError,
+ PatternTooLarge,
+ QuantifierOutOfOrder,
+ QuantifierWithoutAtom,
+ MissingParentheses,
+ ParenthesesUnmatched,
+ ParenthesesTypeInvalid,
+ CharacterClassUnmatched,
+ CharacterClassOutOfOrder,
+ EscapeUnterminated,
+ NumberOfErrorCodes
+ };
+
+ /*
+ * CharacterClassParserDelegate:
+ *
+ * The class CharacterClassParserDelegate is used in the parsing of character
+ * classes. This class handles detection of character ranges. This class
+ * implements enough of the delegate interface such that it can be passed to
+ * parseEscape() as an EscapeDelegate. This allows parseEscape() to be reused
+ * to perform the parsing of escape characters in character sets.
+ */
+ class CharacterClassParserDelegate {
+ public:
+ CharacterClassParserDelegate(Delegate& delegate, ErrorCode& err)
+ : m_delegate(delegate)
+ , m_err(err)
+ , m_state(empty)
+ {
+ }
+
+ /*
+ * begin():
+ *
+ * Called at beginning of construction.
+ */
+ void begin(bool invert)
+ {
+ m_delegate.atomCharacterClassBegin(invert);
+ }
+
+ /*
+ * atomPatternCharacterUnescaped():
+ *
+ * This method is called directly from parseCharacterClass(), to report a new
+ * pattern character token. This method differs from atomPatternCharacter(),
+ * which will be called from parseEscape(), since a hypen provided via this
+ * method may be indicating a character range, but a hyphen parsed by
+ * parseEscape() cannot be interpreted as doing so.
+ */
+ void atomPatternCharacterUnescaped(UChar ch)
+ {
+ switch (m_state) {
+ case empty:
+ m_character = ch;
+ m_state = cachedCharacter;
+ break;
+
+ case cachedCharacter:
+ if (ch == '-')
+ m_state = cachedCharacterHyphen;
+ else {
+ m_delegate.atomCharacterClassAtom(m_character);
+ m_character = ch;
+ }
+ break;
+
+ case cachedCharacterHyphen:
+ if (ch >= m_character)
+ m_delegate.atomCharacterClassRange(m_character, ch);
+ else
+ m_err = CharacterClassOutOfOrder;
+ m_state = empty;
+ }
+ }
+
+ /*
+ * atomPatternCharacter():
+ *
+ * Adds a pattern character, called by parseEscape(), as such will not
+ * interpret a hyphen as indicating a character range.
+ */
+ void atomPatternCharacter(UChar ch)
+ {
+ // Flush if a character is already pending to prevent the
+ // hyphen from begin interpreted as indicating a range.
+ if((ch == '-') && (m_state == cachedCharacter))
+ flush();
+
+ atomPatternCharacterUnescaped(ch);
+ }
+
+ /*
+ * atomBuiltInCharacterClass():
+ *
+ * Adds a built-in character class, called by parseEscape().
+ */
+ void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert)
+ {
+ flush();
+ m_delegate.atomCharacterClassBuiltIn(classID, invert);
+ }
+
+ /*
+ * end():
+ *
+ * Called at end of construction.
+ */
+ void end()
+ {
+ flush();
+ m_delegate.atomCharacterClassEnd();
+ }
+
+ // parseEscape() should never call these delegate methods when
+ // invoked with inCharacterClass set.
+ void assertionWordBoundary(bool) { ASSERT_NOT_REACHED(); }
+ void atomBackReference(unsigned) { ASSERT_NOT_REACHED(); }
+
+ private:
+ void flush()
+ {
+ if (m_state != empty) // either cachedCharacter or cachedCharacterHyphen
+ m_delegate.atomCharacterClassAtom(m_character);
+ if (m_state == cachedCharacterHyphen)
+ m_delegate.atomCharacterClassAtom('-');
+ m_state = empty;
+ }
+
+ Delegate& m_delegate;
+ ErrorCode& m_err;
+ enum CharacterClassConstructionState {
+ empty,
+ cachedCharacter,
+ cachedCharacterHyphen,
+ } m_state;
+ UChar m_character;
+ };
+
+ Parser(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit)
+ : m_delegate(delegate)
+ , m_backReferenceLimit(backReferenceLimit)
+ , m_err(NoError)
+ , m_data(pattern.data())
+ , m_size(pattern.size())
+ , m_index(0)
+ , m_parenthesesNestingDepth(0)
+ {
+ }
+
+ /*
+ * parseEscape():
+ *
+ * Helper for parseTokens() AND parseCharacterClass().
+ * Unlike the other parser methods, this function does not report tokens
+ * directly to the member delegate (m_delegate), instead tokens are
+ * emitted to the delegate provided as an argument. In the case of atom
+ * escapes, parseTokens() will call parseEscape() passing m_delegate as
+ * an argument, and as such the escape will be reported to the delegate.
+ *
+ * However this method may also be used by parseCharacterClass(), in which
+ * case a CharacterClassParserDelegate will be passed as the delegate that
+ * tokens should be added to. A boolean flag is also provided to indicate
+ * whether that an escape in a CharacterClass is being parsed (some parsing
+ * rules change in this context).
+ *
+ * The boolean value returned by this method indicates whether the token
+ * parsed was an atom (outside of a characted class \b and \B will be
+ * interpreted as assertions).
+ */
+ template<bool inCharacterClass, class EscapeDelegate>
+ bool parseEscape(EscapeDelegate& delegate)
+ {
+ ASSERT(!m_err);
+ ASSERT(peek() == '\\');
+ consume();
+
+ if (atEndOfPattern()) {
+ m_err = EscapeUnterminated;
+ return false;
+ }
+
+ switch (peek()) {
+ // Assertions
+ case 'b':
+ consume();
+ if (inCharacterClass)
+ delegate.atomPatternCharacter('\b');
+ else {
+ delegate.assertionWordBoundary(false);
+ return false;
+ }
+ break;
+ case 'B':
+ consume();
+ if (inCharacterClass)
+ delegate.atomPatternCharacter('B');
+ else {
+ delegate.assertionWordBoundary(true);
+ return false;
+ }
+ break;
+
+ // CharacterClassEscape
+ case 'd':
+ consume();
+ delegate.atomBuiltInCharacterClass(DigitClassID, false);
+ break;
+ case 's':
+ consume();
+ delegate.atomBuiltInCharacterClass(SpaceClassID, false);
+ break;
+ case 'w':
+ consume();
+ delegate.atomBuiltInCharacterClass(WordClassID, false);
+ break;
+ case 'D':
+ consume();
+ delegate.atomBuiltInCharacterClass(DigitClassID, true);
+ break;
+ case 'S':
+ consume();
+ delegate.atomBuiltInCharacterClass(SpaceClassID, true);
+ break;
+ case 'W':
+ consume();
+ delegate.atomBuiltInCharacterClass(WordClassID, true);
+ break;
+
+ // DecimalEscape
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9': {
+ // To match Firefox, we parse an invalid backreference in the range [1-7] as an octal escape.
+ // First, try to parse this as backreference.
+ if (!inCharacterClass) {
+ ParseState state = saveState();
+
+ unsigned backReference = consumeNumber();
+ if (backReference <= m_backReferenceLimit) {
+ delegate.atomBackReference(backReference);
+ break;
+ }
+
+ restoreState(state);
+ }
+
+ // Not a backreference, and not octal.
+ if (peek() >= '8') {
+ delegate.atomPatternCharacter('\\');
+ break;
+ }
+
+ // Fall-through to handle this as an octal escape.
+ }
+
+ // Octal escape
+ case '0':
+ delegate.atomPatternCharacter(consumeOctal());
+ break;
+
+ // ControlEscape
+ case 'f':
+ consume();
+ delegate.atomPatternCharacter('\f');
+ break;
+ case 'n':
+ consume();
+ delegate.atomPatternCharacter('\n');
+ break;
+ case 'r':
+ consume();
+ delegate.atomPatternCharacter('\r');
+ break;
+ case 't':
+ consume();
+ delegate.atomPatternCharacter('\t');
+ break;
+ case 'v':
+ consume();
+ delegate.atomPatternCharacter('\v');
+ break;
+
+ // ControlLetter
+ case 'c': {
+ ParseState state = saveState();
+ consume();
+ if (!atEndOfPattern()) {
+ int control = consume();
+
+ // To match Firefox, inside a character class, we also accept numbers and '_' as control characters.
+ if (inCharacterClass ? WTF::isASCIIAlphanumeric(control) || (control == '_') : WTF::isASCIIAlpha(control)) {
+ delegate.atomPatternCharacter(control & 0x1f);
+ break;
+ }
+ }
+ restoreState(state);
+ delegate.atomPatternCharacter('\\');
+ break;
+ }
+
+ // HexEscape
+ case 'x': {
+ consume();
+ int x = tryConsumeHex(2);
+ if (x == -1)
+ delegate.atomPatternCharacter('x');
+ else
+ delegate.atomPatternCharacter(x);
+ break;
+ }
+
+ // UnicodeEscape
+ case 'u': {
+ consume();
+ int u = tryConsumeHex(4);
+ if (u == -1)
+ delegate.atomPatternCharacter('u');
+ else
+ delegate.atomPatternCharacter(u);
+ break;
+ }
+
+ // IdentityEscape
+ default:
+ delegate.atomPatternCharacter(consume());
+ }
+
+ return true;
+ }
+
+ /*
+ * parseAtomEscape(), parseCharacterClassEscape():
+ *
+ * These methods alias to parseEscape().
+ */
+ bool parseAtomEscape()
+ {
+ return parseEscape<false>(m_delegate);
+ }
+ void parseCharacterClassEscape(CharacterClassParserDelegate& delegate)
+ {
+ parseEscape<true>(delegate);
+ }
+
+ /*
+ * parseCharacterClass():
+ *
+ * Helper for parseTokens(); calls dirctly and indirectly (via parseCharacterClassEscape)
+ * to an instance of CharacterClassParserDelegate, to describe the character class to the
+ * delegate.
+ */
+ void parseCharacterClass()
+ {
+ ASSERT(!m_err);
+ ASSERT(peek() == '[');
+ consume();
+
+ CharacterClassParserDelegate characterClassConstructor(m_delegate, m_err);
+
+ characterClassConstructor.begin(tryConsume('^'));
+
+ while (!atEndOfPattern()) {
+ switch (peek()) {
+ case ']':
+ consume();
+ characterClassConstructor.end();
+ return;
+
+ case '\\':
+ parseCharacterClassEscape(characterClassConstructor);
+ break;
+
+ default:
+ characterClassConstructor.atomPatternCharacterUnescaped(consume());
+ }
+
+ if (m_err)
+ return;
+ }
+
+ m_err = CharacterClassUnmatched;
+ }
+
+ /*
+ * parseParenthesesBegin():
+ *
+ * Helper for parseTokens(); checks for parentheses types other than regular capturing subpatterns.
+ */
+ void parseParenthesesBegin()
+ {
+ ASSERT(!m_err);
+ ASSERT(peek() == '(');
+ consume();
+
+ if (tryConsume('?')) {
+ if (atEndOfPattern()) {
+ m_err = ParenthesesTypeInvalid;
+ return;
+ }
+
+ switch (consume()) {
+ case ':':
+ m_delegate.atomParenthesesSubpatternBegin(false);
+ break;
+
+ case '=':
+ m_delegate.atomParentheticalAssertionBegin();
+ break;
+
+ case '!':
+ m_delegate.atomParentheticalAssertionBegin(true);
+ break;
+
+ default:
+ m_err = ParenthesesTypeInvalid;
+ }
+ } else
+ m_delegate.atomParenthesesSubpatternBegin();
+
+ ++m_parenthesesNestingDepth;
+ }
+
+ /*
+ * parseParenthesesEnd():
+ *
+ * Helper for parseTokens(); checks for parse errors (due to unmatched parentheses).
+ */
+ void parseParenthesesEnd()
+ {
+ ASSERT(!m_err);
+ ASSERT(peek() == ')');
+ consume();
+
+ if (m_parenthesesNestingDepth > 0)
+ m_delegate.atomParenthesesEnd();
+ else
+ m_err = ParenthesesUnmatched;
+
+ --m_parenthesesNestingDepth;
+ }
+
+ /*
+ * parseQuantifier():
+ *
+ * Helper for parseTokens(); checks for parse errors and non-greedy quantifiers.
+ */
+ void parseQuantifier(bool lastTokenWasAnAtom, unsigned min, unsigned max)
+ {
+ ASSERT(!m_err);
+ ASSERT(min <= max);
+
+ if (lastTokenWasAnAtom)
+ m_delegate.quantifyAtom(min, max, !tryConsume('?'));
+ else
+ m_err = QuantifierWithoutAtom;
+ }
+
+ /*
+ * parseTokens():
+ *
+ * This method loops over the input pattern reporting tokens to the delegate.
+ * The method returns when a parse error is detected, or the end of the pattern
+ * is reached. One piece of state is tracked around the loop, which is whether
+ * the last token passed to the delegate was an atom (this is necessary to detect
+ * a parse error when a quantifier provided without an atom to quantify).
+ */
+ void parseTokens()
+ {
+ bool lastTokenWasAnAtom = false;
+
+ while (!atEndOfPattern()) {
+ switch (peek()) {
+ case '|':
+ consume();
+ m_delegate.disjunction();
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '(':
+ parseParenthesesBegin();
+ lastTokenWasAnAtom = false;
+ break;
+
+ case ')':
+ parseParenthesesEnd();
+ lastTokenWasAnAtom = true;
+ break;
+
+ case '^':
+ consume();
+ m_delegate.assertionBOL();
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '$':
+ consume();
+ m_delegate.assertionEOL();
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '.':
+ consume();
+ m_delegate.atomBuiltInCharacterClass(NewlineClassID, true);
+ lastTokenWasAnAtom = true;
+ break;
+
+ case '[':
+ parseCharacterClass();
+ lastTokenWasAnAtom = true;
+ break;
+
+ case '\\':
+ lastTokenWasAnAtom = parseAtomEscape();
+ break;
+
+ case '*':
+ consume();
+ parseQuantifier(lastTokenWasAnAtom, 0, UINT_MAX);
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '+':
+ consume();
+ parseQuantifier(lastTokenWasAnAtom, 1, UINT_MAX);
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '?':
+ consume();
+ parseQuantifier(lastTokenWasAnAtom, 0, 1);
+ lastTokenWasAnAtom = false;
+ break;
+
+ case '{': {
+ ParseState state = saveState();
+
+ consume();
+ if (peekIsDigit()) {
+ unsigned min = consumeNumber();
+ unsigned max = min;
+
+ if (tryConsume(','))
+ max = peekIsDigit() ? consumeNumber() : UINT_MAX;
+
+ if (tryConsume('}')) {
+ if (min <= max)
+ parseQuantifier(lastTokenWasAnAtom, min, max);
+ else
+ m_err = QuantifierOutOfOrder;
+ lastTokenWasAnAtom = false;
+ break;
+ }
+ }
+
+ restoreState(state);
+ } // if we did not find a complete quantifer, fall through to the default case.
+
+ default:
+ m_delegate.atomPatternCharacter(consume());
+ lastTokenWasAnAtom = true;
+ }
+
+ if (m_err)
+ return;
+ }
+
+ if (m_parenthesesNestingDepth > 0)
+ m_err = MissingParentheses;
+ }
+
+ /*
+ * parse():
+ *
+ * This method calls regexBegin(), calls parseTokens() to parse over the input
+ * patterns, calls regexEnd() or regexError() as appropriate, and converts any
+ * error code to a const char* for a result.
+ */
+ const char* parse()
+ {
+ m_delegate.regexBegin();
+
+ if (m_size > MAX_PATTERN_SIZE)
+ m_err = PatternTooLarge;
+ else
+ parseTokens();
+ ASSERT(atEndOfPattern() || m_err);
+
+ if (m_err)
+ m_delegate.regexError();
+ else
+ m_delegate.regexEnd();
+
+ // The order of this array must match the ErrorCode enum.
+ static const char* errorMessages[NumberOfErrorCodes] = {
+ 0, // NoError
+ "regular expression too large",
+ "numbers out of order in {} quantifier",
+ "nothing to repeat",
+ "missing )",
+ "unmatched parentheses",
+ "unrecognized character after (?",
+ "missing terminating ] for character class",
+ "range out of order in character class",
+ "\\ at end of pattern"
+ };
+
+ return errorMessages[m_err];
+ }
+
+
+ // Misc helper functions:
+
+ typedef unsigned ParseState;
+
+ ParseState saveState()
+ {
+ return m_index;
+ }
+
+ void restoreState(ParseState state)
+ {
+ m_index = state;
+ }
+
+ bool atEndOfPattern()
+ {
+ ASSERT(m_index <= m_size);
+ return m_index == m_size;
+ }
+
+ int peek()
+ {
+ ASSERT(m_index < m_size);
+ return m_data[m_index];
+ }
+
+ bool peekIsDigit()
+ {
+ return !atEndOfPattern() && WTF::isASCIIDigit(peek());
+ }
+
+ unsigned peekDigit()
+ {
+ ASSERT(peekIsDigit());
+ return peek() - '0';
+ }
+
+ int consume()
+ {
+ ASSERT(m_index < m_size);
+ return m_data[m_index++];
+ }
+
+ unsigned consumeDigit()
+ {
+ ASSERT(peekIsDigit());
+ return consume() - '0';
+ }
+
+ unsigned consumeNumber()
+ {
+ unsigned n = consumeDigit();
+ // check for overflow.
+ for (unsigned newValue; peekIsDigit() && ((newValue = n * 10 + peekDigit()) >= n); ) {
+ n = newValue;
+ consume();
+ }
+ return n;
+ }
+
+ unsigned consumeOctal()
+ {
+ ASSERT(WTF::isASCIIOctalDigit(peek()));
+
+ unsigned n = consumeDigit();
+ while (n < 32 && !atEndOfPattern() && WTF::isASCIIOctalDigit(peek()))
+ n = n * 8 + consumeDigit();
+ return n;
+ }
+
+ bool tryConsume(UChar ch)
+ {
+ if (atEndOfPattern() || (m_data[m_index] != ch))
+ return false;
+ ++m_index;
+ return true;
+ }
+
+ int tryConsumeHex(int count)
+ {
+ ParseState state = saveState();
+
+ int n = 0;
+ while (count--) {
+ if (atEndOfPattern() || !WTF::isASCIIHexDigit(peek())) {
+ restoreState(state);
+ return -1;
+ }
+ n = (n << 4) | WTF::toASCIIHexValue(consume());
+ }
+ return n;
+ }
+
+ Delegate& m_delegate;
+ unsigned m_backReferenceLimit;
+ ErrorCode m_err;
+ const UChar* m_data;
+ unsigned m_size;
+ unsigned m_index;
+ unsigned m_parenthesesNestingDepth;
+
+ // Derived by empirical testing of compile time in PCRE and WREC.
+ static const unsigned MAX_PATTERN_SIZE = 1024 * 1024;
+};
+
+/*
+ * Yarr::parse():
+ *
+ * The parse method is passed a pattern to be parsed and a delegate upon which
+ * callbacks will be made to record the parsed tokens forming the regex.
+ * Yarr::parse() returns null on success, or a const C string providing an error
+ * message where a parse error occurs.
+ *
+ * The Delegate must implement the following interface:
+ *
+ * void assertionBOL();
+ * void assertionEOL();
+ * void assertionWordBoundary(bool invert);
+ *
+ * void atomPatternCharacter(UChar ch);
+ * void atomBuiltInCharacterClass(BuiltInCharacterClassID classID, bool invert);
+ * void atomCharacterClassBegin(bool invert)
+ * void atomCharacterClassAtom(UChar ch)
+ * void atomCharacterClassRange(UChar begin, UChar end)
+ * void atomCharacterClassBuiltIn(BuiltInCharacterClassID classID, bool invert)
+ * void atomCharacterClassEnd()
+ * void atomParenthesesSubpatternBegin(bool capture = true);
+ * void atomParentheticalAssertionBegin(bool invert = false);
+ * void atomParenthesesEnd();
+ * void atomBackReference(unsigned subpatternId);
+ *
+ * void quantifyAtom(unsigned min, unsigned max, bool greedy);
+ *
+ * void disjunction();
+ *
+ * void regexBegin();
+ * void regexEnd();
+ * void regexError();
+ *
+ * Before any call recording tokens are made, regexBegin() will be called on the
+ * delegate once. Once parsing is complete either regexEnd() or regexError() will
+ * be called, as appropriate.
+ *
+ * The regular expression is described by a sequence of assertion*() and atom*()
+ * callbacks to the delegate, describing the terms in the regular expression.
+ * Following an atom a quantifyAtom() call may occur to indicate that the previous
+ * atom should be quantified. In the case of atoms described across multiple
+ * calls (parentheses and character classes) the call to quantifyAtom() will come
+ * after the call to the atom*End() method, never after atom*Begin().
+ *
+ * Character classes may either be described by a single call to
+ * atomBuiltInCharacterClass(), or by a sequence of atomCharacterClass*() calls.
+ * In the latter case, ...Begin() will be called, followed by a sequence of
+ * calls to ...Atom(), ...Range(), and ...BuiltIn(), followed by a call to ...End().
+ *
+ * Sequences of atoms and assertions are broken into alternatives via calls to
+ * disjunction(). Assertions, atoms, and disjunctions emitted between calls to
+ * atomParenthesesBegin() and atomParenthesesEnd() form the body of a subpattern.
+ * atomParenthesesBegin() is passed a subpatternId. In the case of a regular
+ * capturing subpattern, this will be the subpatternId associated with these
+ * parentheses, and will also by definition be the lowest subpatternId of these
+ * parentheses and of any nested paretheses. The atomParenthesesEnd() method
+ * is passed the subpatternId of the last capturing subexpression nested within
+ * these paretheses. In the case of a capturing subpattern with no nested
+ * capturing subpatterns, the same subpatternId will be passed to the begin and
+ * end functions. In the case of non-capturing subpatterns the subpatternId
+ * passed to the begin method is also the first possible subpatternId that might
+ * be nested within these paretheses. If a set of non-capturing parentheses does
+ * not contain any capturing subpatterns, then the subpatternId passed to begin
+ * will be greater than the subpatternId passed to end.
+ */
+
+template<class Delegate>
+const char* parse(Delegate& delegate, const UString& pattern, unsigned backReferenceLimit = UINT_MAX)
+{
+ return Parser<Delegate>(delegate, pattern, backReferenceLimit).parse();
+}
+
+} } // namespace JSC::Yarr
+
+#endif
+
+#endif // RegexParser_h
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexPattern.h b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexPattern.h
new file mode 100644
index 0000000..a451131
--- /dev/null
+++ b/src/3rdparty/javascriptcore/JavaScriptCore/yarr/RegexPattern.h
@@ -0,0 +1,356 @@
+/*
+ * Copyright (C) 2009 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef RegexPattern_h
+#define RegexPattern_h
+
+#include <wtf/Platform.h>
+
+#if ENABLE(YARR)
+
+#include <wtf/Vector.h>
+#include <wtf/unicode/Unicode.h>
+
+
+namespace JSC { namespace Yarr {
+
+#define RegexStackSpaceForBackTrackInfoPatternCharacter 1 // Only for !fixed quantifiers.
+#define RegexStackSpaceForBackTrackInfoCharacterClass 1 // Only for !fixed quantifiers.
+#define RegexStackSpaceForBackTrackInfoBackReference 2
+#define RegexStackSpaceForBackTrackInfoAlternative 1 // One per alternative.
+#define RegexStackSpaceForBackTrackInfoParentheticalAssertion 1
+#define RegexStackSpaceForBackTrackInfoParenthesesOnce 1 // Only for !fixed quantifiers.
+#define RegexStackSpaceForBackTrackInfoParentheses 4
+
+struct PatternDisjunction;
+
+struct CharacterRange {
+ UChar begin;
+ UChar end;
+
+ CharacterRange(UChar begin, UChar end)
+ : begin(begin)
+ , end(end)
+ {
+ }
+};
+
+struct CharacterClass : FastAllocBase {
+ Vector<UChar> m_matches;
+ Vector<CharacterRange> m_ranges;
+ Vector<UChar> m_matchesUnicode;
+ Vector<CharacterRange> m_rangesUnicode;
+};
+
+enum QuantifierType {
+ QuantifierFixedCount,
+ QuantifierGreedy,
+ QuantifierNonGreedy,
+};
+
+struct PatternTerm {
+ enum Type {
+ TypeAssertionBOL,
+ TypeAssertionEOL,
+ TypeAssertionWordBoundary,
+ TypePatternCharacter,
+ TypeCharacterClass,
+ TypeBackReference,
+ TypeForwardReference,
+ TypeParenthesesSubpattern,
+ TypeParentheticalAssertion,
+ } type;
+ bool invertOrCapture;
+ union {
+ UChar patternCharacter;
+ CharacterClass* characterClass;
+ unsigned subpatternId;
+ struct {
+ PatternDisjunction* disjunction;
+ unsigned subpatternId;
+ unsigned lastSubpatternId;
+ bool isCopy;
+ } parentheses;
+ };
+ QuantifierType quantityType;
+ unsigned quantityCount;
+ int inputPosition;
+ unsigned frameLocation;
+
+ PatternTerm(UChar ch)
+ : type(PatternTerm::TypePatternCharacter)
+ {
+ patternCharacter = ch;
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
+ PatternTerm(CharacterClass* charClass, bool invert)
+ : type(PatternTerm::TypeCharacterClass)
+ , invertOrCapture(invert)
+ {
+ characterClass = charClass;
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
+ PatternTerm(Type type, unsigned subpatternId, PatternDisjunction* disjunction, bool invertOrCapture)
+ : type(type)
+ , invertOrCapture(invertOrCapture)
+ {
+ parentheses.disjunction = disjunction;
+ parentheses.subpatternId = subpatternId;
+ parentheses.isCopy = false;
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
+ PatternTerm(Type type, bool invert = false)
+ : type(type)
+ , invertOrCapture(invert)
+ {
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
+ PatternTerm(unsigned spatternId)
+ : type(TypeBackReference)
+ , invertOrCapture(invertOrCapture)
+ {
+ subpatternId = spatternId;
+ quantityType = QuantifierFixedCount;
+ quantityCount = 1;
+ }
+
+ static PatternTerm ForwardReference()
+ {
+ return PatternTerm(TypeForwardReference);
+ }
+
+ static PatternTerm BOL()
+ {
+ return PatternTerm(TypeAssertionBOL);
+ }
+
+ static PatternTerm EOL()
+ {
+ return PatternTerm(TypeAssertionEOL);
+ }
+
+ static PatternTerm WordBoundary(bool invert)
+ {
+ return PatternTerm(TypeAssertionWordBoundary, invert);
+ }
+
+ bool invert()
+ {
+ return invertOrCapture;
+ }
+
+ bool capture()
+ {
+ return invertOrCapture;
+ }
+
+ void quantify(unsigned count, QuantifierType type)
+ {
+ quantityCount = count;
+ quantityType = type;
+ }
+};
+
+struct PatternAlternative : FastAllocBase {
+ PatternAlternative(PatternDisjunction* disjunction)
+ : m_parent(disjunction)
+ {
+ }
+
+ PatternTerm& lastTerm()
+ {
+ ASSERT(m_terms.size());
+ return m_terms[m_terms.size() - 1];
+ }
+
+ void removeLastTerm()
+ {
+ ASSERT(m_terms.size());
+ m_terms.shrink(m_terms.size() - 1);
+ }
+
+ Vector<PatternTerm> m_terms;
+ PatternDisjunction* m_parent;
+ unsigned m_minimumSize;
+ bool m_hasFixedSize;
+};
+
+struct PatternDisjunction : FastAllocBase {
+ PatternDisjunction(PatternAlternative* parent = 0)
+ : m_parent(parent)
+ {
+ }
+
+ ~PatternDisjunction()
+ {
+ deleteAllValues(m_alternatives);
+ }
+
+ PatternAlternative* addNewAlternative()
+ {
+ PatternAlternative* alternative = new PatternAlternative(this);
+ m_alternatives.append(alternative);
+ return alternative;
+ }
+
+ Vector<PatternAlternative*> m_alternatives;
+ PatternAlternative* m_parent;
+ unsigned m_minimumSize;
+ unsigned m_callFrameSize;
+ bool m_hasFixedSize;
+};
+
+// You probably don't want to be calling these functions directly
+// (please to be calling newlineCharacterClass() et al on your
+// friendly neighborhood RegexPattern instance to get nicely
+// cached copies).
+CharacterClass* newlineCreate();
+CharacterClass* digitsCreate();
+CharacterClass* spacesCreate();
+CharacterClass* wordcharCreate();
+CharacterClass* nondigitsCreate();
+CharacterClass* nonspacesCreate();
+CharacterClass* nonwordcharCreate();
+
+struct RegexPattern {
+ RegexPattern(bool ignoreCase, bool multiline)
+ : m_ignoreCase(ignoreCase)
+ , m_multiline(multiline)
+ , m_numSubpatterns(0)
+ , m_maxBackReference(0)
+ , newlineCached(0)
+ , digitsCached(0)
+ , spacesCached(0)
+ , wordcharCached(0)
+ , nondigitsCached(0)
+ , nonspacesCached(0)
+ , nonwordcharCached(0)
+ {
+ }
+
+ ~RegexPattern()
+ {
+ deleteAllValues(m_disjunctions);
+ deleteAllValues(m_userCharacterClasses);
+ }
+
+ void reset()
+ {
+ m_numSubpatterns = 0;
+ m_maxBackReference = 0;
+
+ newlineCached = 0;
+ digitsCached = 0;
+ spacesCached = 0;
+ wordcharCached = 0;
+ nondigitsCached = 0;
+ nonspacesCached = 0;
+ nonwordcharCached = 0;
+
+ deleteAllValues(m_disjunctions);
+ m_disjunctions.clear();
+ deleteAllValues(m_userCharacterClasses);
+ m_userCharacterClasses.clear();
+ }
+
+ bool containsIllegalBackReference()
+ {
+ return m_maxBackReference > m_numSubpatterns;
+ }
+
+ CharacterClass* newlineCharacterClass()
+ {
+ if (!newlineCached)
+ m_userCharacterClasses.append(newlineCached = newlineCreate());
+ return newlineCached;
+ }
+ CharacterClass* digitsCharacterClass()
+ {
+ if (!digitsCached)
+ m_userCharacterClasses.append(digitsCached = digitsCreate());
+ return digitsCached;
+ }
+ CharacterClass* spacesCharacterClass()
+ {
+ if (!spacesCached)
+ m_userCharacterClasses.append(spacesCached = spacesCreate());
+ return spacesCached;
+ }
+ CharacterClass* wordcharCharacterClass()
+ {
+ if (!wordcharCached)
+ m_userCharacterClasses.append(wordcharCached = wordcharCreate());
+ return wordcharCached;
+ }
+ CharacterClass* nondigitsCharacterClass()
+ {
+ if (!nondigitsCached)
+ m_userCharacterClasses.append(nondigitsCached = nondigitsCreate());
+ return nondigitsCached;
+ }
+ CharacterClass* nonspacesCharacterClass()
+ {
+ if (!nonspacesCached)
+ m_userCharacterClasses.append(nonspacesCached = nonspacesCreate());
+ return nonspacesCached;
+ }
+ CharacterClass* nonwordcharCharacterClass()
+ {
+ if (!nonwordcharCached)
+ m_userCharacterClasses.append(nonwordcharCached = nonwordcharCreate());
+ return nonwordcharCached;
+ }
+
+ bool m_ignoreCase;
+ bool m_multiline;
+ unsigned m_numSubpatterns;
+ unsigned m_maxBackReference;
+ PatternDisjunction* m_body;
+ Vector<PatternDisjunction*, 4> m_disjunctions;
+ Vector<CharacterClass*> m_userCharacterClasses;
+
+private:
+ CharacterClass* newlineCached;
+ CharacterClass* digitsCached;
+ CharacterClass* spacesCached;
+ CharacterClass* wordcharCached;
+ CharacterClass* nondigitsCached;
+ CharacterClass* nonspacesCached;
+ CharacterClass* nonwordcharCached;
+};
+
+} } // namespace JSC::Yarr
+
+#endif
+
+#endif // RegexPattern_h