diff options
author | Simon Hausmann <simon.hausmann@nokia.com> | 2009-09-16 07:51:00 (GMT) |
---|---|---|
committer | Simon Hausmann <simon.hausmann@nokia.com> | 2009-09-16 07:58:16 (GMT) |
commit | 120329adb47dba60f532c1c2fd2ad0f37b812437 (patch) | |
tree | fc2311cef4b69fe7294e8f5aab27fe6af2546123 /src/3rdparty/javascriptcore/JavaScriptCore/wrec | |
parent | ce17ae5a6159d8ce3a5d2cc98f804a2debb860e5 (diff) | |
download | Qt-120329adb47dba60f532c1c2fd2ad0f37b812437.zip Qt-120329adb47dba60f532c1c2fd2ad0f37b812437.tar.gz Qt-120329adb47dba60f532c1c2fd2ad0f37b812437.tar.bz2 |
Separate the copy of JavaScriptCore that QtScript uses from the copy that
QtWebKit uses.
This is needed to decouple QtScript from QtWebKit, as discussed in the
WebKit team.
Reviewed-by: Kent Hansen
Diffstat (limited to 'src/3rdparty/javascriptcore/JavaScriptCore/wrec')
14 files changed, 2747 insertions, 0 deletions
diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/CharacterClass.cpp b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/CharacterClass.cpp new file mode 100644 index 0000000..e3f12f2 --- /dev/null +++ b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/CharacterClass.cpp @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "CharacterClass.h" + +#if ENABLE(WREC) + +using namespace WTF; + +namespace JSC { namespace WREC { + +const CharacterClass& CharacterClass::newline() { + static const UChar asciiNewlines[2] = { '\n', '\r' }; + static const UChar unicodeNewlines[2] = { 0x2028, 0x2029 }; + static const CharacterClass charClass = { + asciiNewlines, 2, + 0, 0, + unicodeNewlines, 2, + 0, 0, + }; + + return charClass; +} + +const CharacterClass& CharacterClass::digits() { + static const CharacterRange asciiDigitsRange[1] = { { '0', '9' } }; + static const CharacterClass charClass = { + 0, 0, + asciiDigitsRange, 1, + 0, 0, + 0, 0, + }; + + return charClass; +} + +const CharacterClass& CharacterClass::spaces() { + static const UChar asciiSpaces[1] = { ' ' }; + static const CharacterRange asciiSpacesRange[1] = { { '\t', '\r' } }; + static const UChar unicodeSpaces[8] = { 0x00a0, 0x1680, 0x180e, 0x2028, 0x2029, 0x202f, 0x205f, 0x3000 }; + static const CharacterRange unicodeSpacesRange[1] = { { 0x2000, 0x200a } }; + static const CharacterClass charClass = { + asciiSpaces, 1, + asciiSpacesRange, 1, + unicodeSpaces, 8, + unicodeSpacesRange, 1, + }; + + return charClass; +} + +const CharacterClass& CharacterClass::wordchar() { + static const UChar asciiWordchar[1] = { '_' }; + static const CharacterRange asciiWordcharRange[3] = { { '0', '9' }, { 'A', 'Z' }, { 'a', 'z' } }; + static const CharacterClass charClass = { + asciiWordchar, 1, + asciiWordcharRange, 3, + 0, 0, + 0, 0, + }; + + return charClass; +} + +const CharacterClass& CharacterClass::nondigits() { + static const CharacterRange asciiNondigitsRange[2] = { { 0, '0' - 1 }, { '9' + 1, 0x7f } }; + static const CharacterRange unicodeNondigitsRange[1] = { { 0x0080, 0xffff } }; + static const CharacterClass charClass = { + 0, 0, + asciiNondigitsRange, 2, + 0, 0, + unicodeNondigitsRange, 1, + }; + + return charClass; +} + +const CharacterClass& CharacterClass::nonspaces() { + static const CharacterRange asciiNonspacesRange[3] = { { 0, '\t' - 1 }, { '\r' + 1, ' ' - 1 }, { ' ' + 1, 0x7f } }; + static const CharacterRange unicodeNonspacesRange[9] = { + { 0x0080, 0x009f }, + { 0x00a1, 0x167f }, + { 0x1681, 0x180d }, + { 0x180f, 0x1fff }, + { 0x200b, 0x2027 }, + { 0x202a, 0x202e }, + { 0x2030, 0x205e }, + { 0x2060, 0x2fff }, + { 0x3001, 0xffff } + }; + static const CharacterClass charClass = { + 0, 0, + asciiNonspacesRange, 3, + 0, 0, + unicodeNonspacesRange, 9, + }; + + return charClass; +} + +const CharacterClass& CharacterClass::nonwordchar() { + static const UChar asciiNonwordchar[1] = { '`' }; + static const CharacterRange asciiNonwordcharRange[4] = { { 0, '0' - 1 }, { '9' + 1, 'A' - 1 }, { 'Z' + 1, '_' - 1 }, { 'z' + 1, 0x7f } }; + static const CharacterRange unicodeNonwordcharRange[1] = { { 0x0080, 0xffff } }; + static const CharacterClass charClass = { + asciiNonwordchar, 1, + asciiNonwordcharRange, 4, + 0, 0, + unicodeNonwordcharRange, 1, + }; + + return charClass; +} + +} } // namespace JSC::WREC + +#endif // ENABLE(WREC) diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/CharacterClass.h b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/CharacterClass.h new file mode 100644 index 0000000..8a9d2fc --- /dev/null +++ b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/CharacterClass.h @@ -0,0 +1,68 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CharacterClass_h +#define CharacterClass_h + +#include <wtf/Platform.h> + +#if ENABLE(WREC) + +#include <wtf/unicode/Unicode.h> + +namespace JSC { namespace WREC { + + struct CharacterRange { + UChar begin; + UChar end; + }; + + struct CharacterClass { + static const CharacterClass& newline(); + static const CharacterClass& digits(); + static const CharacterClass& spaces(); + static const CharacterClass& wordchar(); + static const CharacterClass& nondigits(); + static const CharacterClass& nonspaces(); + static const CharacterClass& nonwordchar(); + + const UChar* matches; + unsigned numMatches; + + const CharacterRange* ranges; + unsigned numRanges; + + const UChar* matchesUnicode; + unsigned numMatchesUnicode; + + const CharacterRange* rangesUnicode; + unsigned numRangesUnicode; + }; + +} } // namespace JSC::WREC + +#endif // ENABLE(WREC) + +#endif // CharacterClass_h diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/CharacterClassConstructor.cpp b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/CharacterClassConstructor.cpp new file mode 100644 index 0000000..06f4262 --- /dev/null +++ b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/CharacterClassConstructor.cpp @@ -0,0 +1,257 @@ +/* + * Copyright (C) 2008, 2009 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "CharacterClassConstructor.h" + +#if ENABLE(WREC) + +#include "pcre_internal.h" +#include <wtf/ASCIICType.h> + +using namespace WTF; + +namespace JSC { namespace WREC { + +void CharacterClassConstructor::addSorted(Vector<UChar>& matches, UChar ch) +{ + unsigned pos = 0; + unsigned range = matches.size(); + + // binary chop, find position to insert char. + while (range) { + unsigned index = range >> 1; + + int val = matches[pos+index] - ch; + if (!val) + return; + else if (val > 0) + range = index; + else { + pos += (index+1); + range -= (index+1); + } + } + + if (pos == matches.size()) + matches.append(ch); + else + matches.insert(pos, ch); +} + +void CharacterClassConstructor::addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UChar hi) +{ + unsigned end = ranges.size(); + + // Simple linear scan - I doubt there are that many ranges anyway... + // feel free to fix this with something faster (eg binary chop). + for (unsigned i = 0; i < end; ++i) { + // does the new range fall before the current position in the array + if (hi < ranges[i].begin) { + // optional optimization: concatenate appending ranges? - may not be worthwhile. + if (hi == (ranges[i].begin - 1)) { + ranges[i].begin = lo; + return; + } + CharacterRange r = {lo, hi}; + ranges.insert(i, r); + return; + } + // Okay, since we didn't hit the last case, the end of the new range is definitely at or after the begining + // If the new range start at or before the end of the last range, then the overlap (if it starts one after the + // end of the last range they concatenate, which is just as good. + if (lo <= (ranges[i].end + 1)) { + // found an intersect! we'll replace this entry in the array. + ranges[i].begin = std::min(ranges[i].begin, lo); + ranges[i].end = std::max(ranges[i].end, hi); + + // now check if the new range can subsume any subsequent ranges. + unsigned next = i+1; + // each iteration of the loop we will either remove something from the list, or break the loop. + while (next < ranges.size()) { + if (ranges[next].begin <= (ranges[i].end + 1)) { + // the next entry now overlaps / concatenates this one. + ranges[i].end = std::max(ranges[i].end, ranges[next].end); + ranges.remove(next); + } else + break; + } + + return; + } + } + + // CharacterRange comes after all existing ranges. + CharacterRange r = {lo, hi}; + ranges.append(r); +} + +void CharacterClassConstructor::put(UChar ch) +{ + // Parsing a regular expression like [a-z], we start in an initial empty state: + // ((m_charBuffer == -1) && !m_isPendingDash) + // When buffer the 'a' sice it may be (and is in this case) part of a range: + // ((m_charBuffer != -1) && !m_isPendingDash) + // Having parsed the hyphen we then record that the dash is also pending: + // ((m_charBuffer != -1) && m_isPendingDash) + // The next change will always take us back to the initial state - either because + // a complete range has been parsed (such as [a-z]), or because a flush is forced, + // due to an early end in the regexp ([a-]), or a character class escape being added + // ([a-\s]). The fourth permutation of m_charBuffer and m_isPendingDash is not permitted. + ASSERT(!((m_charBuffer == -1) && m_isPendingDash)); + + if (m_charBuffer != -1) { + if (m_isPendingDash) { + // EXAMPLE: parsing [-a-c], the 'c' reaches this case - we have buffered a previous character and seen a hyphen, so this is a range. + UChar lo = m_charBuffer; + UChar hi = ch; + // Reset back to the inital state. + m_charBuffer = -1; + m_isPendingDash = false; + + // This is an error, detected lazily. Do not proceed. + if (lo > hi) { + m_isUpsideDown = true; + return; + } + + if (lo <= 0x7f) { + char asciiLo = lo; + char asciiHi = std::min(hi, (UChar)0x7f); + addSortedRange(m_ranges, lo, asciiHi); + + if (m_isCaseInsensitive) { + if ((asciiLo <= 'Z') && (asciiHi >= 'A')) + addSortedRange(m_ranges, std::max(asciiLo, 'A')+('a'-'A'), std::min(asciiHi, 'Z')+('a'-'A')); + if ((asciiLo <= 'z') && (asciiHi >= 'a')) + addSortedRange(m_ranges, std::max(asciiLo, 'a')+('A'-'a'), std::min(asciiHi, 'z')+('A'-'a')); + } + } + if (hi >= 0x80) { + UChar unicodeCurr = std::max(lo, (UChar)0x80); + addSortedRange(m_rangesUnicode, unicodeCurr, hi); + + if (m_isCaseInsensitive) { + // we're going to scan along, updating the start of the range + while (unicodeCurr <= hi) { + // Spin forwards over any characters that don't have two cases. + for (; jsc_pcre_ucp_othercase(unicodeCurr) == -1; ++unicodeCurr) { + // if this was the last character in the range, we're done. + if (unicodeCurr == hi) + return; + } + // if we fall through to here, unicodeCurr <= hi & has another case. Get the other case. + UChar rangeStart = unicodeCurr; + UChar otherCurr = jsc_pcre_ucp_othercase(unicodeCurr); + + // If unicodeCurr is not yet hi, check the next char in the range. If it also has another case, + // and if it's other case value is one greater then the othercase value for the current last + // character included in the range, we can include next into the range. + while ((unicodeCurr < hi) && (jsc_pcre_ucp_othercase(unicodeCurr + 1) == (otherCurr + 1))) { + // increment unicodeCurr; it points to the end of the range. + // increment otherCurr, due to the check above other for next must be 1 greater than the currrent other value. + ++unicodeCurr; + ++otherCurr; + } + + // otherChar is the last in the range of other case chars, calculate offset to get back to the start. + addSortedRange(m_rangesUnicode, otherCurr-(unicodeCurr-rangeStart), otherCurr); + + // unicodeCurr has been added, move on to the next char. + ++unicodeCurr; + } + } + } + } else if (ch == '-') + // EXAMPLE: parsing [-a-c], the second '-' reaches this case - the hyphen is treated as potentially indicating a range. + m_isPendingDash = true; + else { + // EXAMPLE: Parsing [-a-c], the 'a' reaches this case - we repace the previously buffered char with the 'a'. + flush(); + m_charBuffer = ch; + } + } else + // EXAMPLE: Parsing [-a-c], the first hyphen reaches this case - there is no buffered character + // (the hyphen not treated as a special character in this case, same handling for any char). + m_charBuffer = ch; +} + +// When a character is added to the set we do not immediately add it to the arrays, in case it is actually defining a range. +// When we have determined the character is not used in specifing a range it is added, in a sorted fashion, to the appropriate +// array (either ascii or unicode). +// If the pattern is case insensitive we add entries for both cases. +void CharacterClassConstructor::flush() +{ + if (m_charBuffer != -1) { + if (m_charBuffer <= 0x7f) { + if (m_isCaseInsensitive && isASCIILower(m_charBuffer)) + addSorted(m_matches, toASCIIUpper(m_charBuffer)); + addSorted(m_matches, m_charBuffer); + if (m_isCaseInsensitive && isASCIIUpper(m_charBuffer)) + addSorted(m_matches, toASCIILower(m_charBuffer)); + } else { + addSorted(m_matchesUnicode, m_charBuffer); + if (m_isCaseInsensitive) { + int other = jsc_pcre_ucp_othercase(m_charBuffer); + if (other != -1) + addSorted(m_matchesUnicode, other); + } + } + m_charBuffer = -1; + } + + if (m_isPendingDash) { + addSorted(m_matches, '-'); + m_isPendingDash = false; + } +} + +void CharacterClassConstructor::append(const CharacterClass& other) +{ + // [x-\s] will add, 'x', '-', and all unicode spaces to new class (same as [x\s-]). + // Need to check the spec, really, but think this matches PCRE behaviour. + flush(); + + if (other.numMatches) { + for (size_t i = 0; i < other.numMatches; ++i) + addSorted(m_matches, other.matches[i]); + } + if (other.numRanges) { + for (size_t i = 0; i < other.numRanges; ++i) + addSortedRange(m_ranges, other.ranges[i].begin, other.ranges[i].end); + } + if (other.numMatchesUnicode) { + for (size_t i = 0; i < other.numMatchesUnicode; ++i) + addSorted(m_matchesUnicode, other.matchesUnicode[i]); + } + if (other.numRangesUnicode) { + for (size_t i = 0; i < other.numRangesUnicode; ++i) + addSortedRange(m_rangesUnicode, other.rangesUnicode[i].begin, other.rangesUnicode[i].end); + } +} + +} } // namespace JSC::WREC + +#endif // ENABLE(WREC) diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/CharacterClassConstructor.h b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/CharacterClassConstructor.h new file mode 100644 index 0000000..581733d --- /dev/null +++ b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/CharacterClassConstructor.h @@ -0,0 +1,99 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CharacterClassConstructor_h +#define CharacterClassConstructor_h + +#include <wtf/Platform.h> + +#if ENABLE(WREC) + +#include "CharacterClass.h" +#include <wtf/AlwaysInline.h> +#include <wtf/Vector.h> +#include <wtf/unicode/Unicode.h> + +namespace JSC { namespace WREC { + + class CharacterClassConstructor { + public: + CharacterClassConstructor(bool isCaseInsensitive) + : m_charBuffer(-1) + , m_isPendingDash(false) + , m_isCaseInsensitive(isCaseInsensitive) + , m_isUpsideDown(false) + { + } + + void flush(); + + // We need to flush prior to an escaped hyphen to prevent it as being treated as indicating + // a range, e.g. [a\-c] we flush prior to adding the hyphen so that this is not treated as + // [a-c]. However, we do not want to flush if we have already seen a non escaped hyphen - + // e.g. [+-\-] should be treated the same as [+--], producing a range that will also match + // a comma. + void flushBeforeEscapedHyphen() + { + if (!m_isPendingDash) + flush(); + } + + void put(UChar ch); + void append(const CharacterClass& other); + + bool isUpsideDown() { return m_isUpsideDown; } + + ALWAYS_INLINE CharacterClass charClass() + { + CharacterClass newCharClass = { + m_matches.begin(), m_matches.size(), + m_ranges.begin(), m_ranges.size(), + m_matchesUnicode.begin(), m_matchesUnicode.size(), + m_rangesUnicode.begin(), m_rangesUnicode.size(), + }; + + return newCharClass; + } + + private: + void addSorted(Vector<UChar>& matches, UChar ch); + void addSortedRange(Vector<CharacterRange>& ranges, UChar lo, UChar hi); + + int m_charBuffer; + bool m_isPendingDash; + bool m_isCaseInsensitive; + bool m_isUpsideDown; + + Vector<UChar> m_matches; + Vector<CharacterRange> m_ranges; + Vector<UChar> m_matchesUnicode; + Vector<CharacterRange> m_rangesUnicode; + }; + +} } // namespace JSC::WREC + +#endif // ENABLE(WREC) + +#endif // CharacterClassConstructor_h diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/Escapes.h b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/Escapes.h new file mode 100644 index 0000000..16c1d6f --- /dev/null +++ b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/Escapes.h @@ -0,0 +1,150 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef Escapes_h +#define Escapes_h + +#include <wtf/Platform.h> + +#if ENABLE(WREC) + +#include <wtf/Assertions.h> + +namespace JSC { namespace WREC { + + class CharacterClass; + + class Escape { + public: + enum Type { + PatternCharacter, + CharacterClass, + Backreference, + WordBoundaryAssertion, + Error, + }; + + Escape(Type type) + : m_type(type) + { + } + + Type type() const { return m_type; } + + private: + Type m_type; + + protected: + // Used by subclasses to store data. + union { + int i; + const WREC::CharacterClass* c; + } m_u; + bool m_invert; + }; + + class PatternCharacterEscape : public Escape { + public: + static const PatternCharacterEscape& cast(const Escape& escape) + { + ASSERT(escape.type() == PatternCharacter); + return static_cast<const PatternCharacterEscape&>(escape); + } + + PatternCharacterEscape(int character) + : Escape(PatternCharacter) + { + m_u.i = character; + } + + operator Escape() const { return *this; } + + int character() const { return m_u.i; } + }; + + class CharacterClassEscape : public Escape { + public: + static const CharacterClassEscape& cast(const Escape& escape) + { + ASSERT(escape.type() == CharacterClass); + return static_cast<const CharacterClassEscape&>(escape); + } + + CharacterClassEscape(const WREC::CharacterClass& characterClass, bool invert) + : Escape(CharacterClass) + { + m_u.c = &characterClass; + m_invert = invert; + } + + operator Escape() { return *this; } + + const WREC::CharacterClass& characterClass() const { return *m_u.c; } + bool invert() const { return m_invert; } + }; + + class BackreferenceEscape : public Escape { + public: + static const BackreferenceEscape& cast(const Escape& escape) + { + ASSERT(escape.type() == Backreference); + return static_cast<const BackreferenceEscape&>(escape); + } + + BackreferenceEscape(int subpatternId) + : Escape(Backreference) + { + m_u.i = subpatternId; + } + + operator Escape() const { return *this; } + + int subpatternId() const { return m_u.i; } + }; + + class WordBoundaryAssertionEscape : public Escape { + public: + static const WordBoundaryAssertionEscape& cast(const Escape& escape) + { + ASSERT(escape.type() == WordBoundaryAssertion); + return static_cast<const WordBoundaryAssertionEscape&>(escape); + } + + WordBoundaryAssertionEscape(bool invert) + : Escape(WordBoundaryAssertion) + { + m_invert = invert; + } + + operator Escape() const { return *this; } + + bool invert() const { return m_invert; } + }; + +} } // namespace JSC::WREC + +#endif // ENABLE(WREC) + +#endif // Escapes_h diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/Quantifier.h b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/Quantifier.h new file mode 100644 index 0000000..3da74cd --- /dev/null +++ b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/Quantifier.h @@ -0,0 +1,66 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef Quantifier_h +#define Quantifier_h + +#include <wtf/Platform.h> + +#if ENABLE(WREC) + +#include <wtf/Assertions.h> +#include <limits.h> + +namespace JSC { namespace WREC { + + struct Quantifier { + enum Type { + None, + Greedy, + NonGreedy, + Error, + }; + + Quantifier(Type type = None, unsigned min = 0, unsigned max = Infinity) + : type(type) + , min(min) + , max(max) + { + ASSERT(min <= max); + } + + Type type; + + unsigned min; + unsigned max; + + static const unsigned Infinity = UINT_MAX; + }; + +} } // namespace JSC::WREC + +#endif // ENABLE(WREC) + +#endif // Quantifier_h diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WREC.cpp b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WREC.cpp new file mode 100644 index 0000000..145a1ce --- /dev/null +++ b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WREC.cpp @@ -0,0 +1,86 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "WREC.h" + +#if ENABLE(WREC) + +#include "CharacterClassConstructor.h" +#include "Interpreter.h" +#include "JSGlobalObject.h" +#include "RegisterFile.h" +#include "WRECFunctors.h" +#include "WRECParser.h" +#include "pcre_internal.h" + +using namespace WTF; + +namespace JSC { namespace WREC { + +CompiledRegExp Generator::compileRegExp(JSGlobalData* globalData, const UString& pattern, unsigned* numSubpatterns_ptr, const char** error_ptr, RefPtr<ExecutablePool>& pool, bool ignoreCase, bool multiline) +{ + if (pattern.size() > MAX_PATTERN_SIZE) { + *error_ptr = "regular expression too large"; + return 0; + } + + Parser parser(pattern, ignoreCase, multiline); + Generator& generator = parser.generator(); + MacroAssembler::JumpList failures; + MacroAssembler::Jump endOfInput; + + generator.generateEnter(); + generator.generateSaveIndex(); + + Label beginPattern(&generator); + parser.parsePattern(failures); + generator.generateReturnSuccess(); + + failures.link(&generator); + generator.generateIncrementIndex(&endOfInput); + parser.parsePattern(failures); + generator.generateReturnSuccess(); + + failures.link(&generator); + generator.generateIncrementIndex(); + generator.generateJumpIfNotEndOfInput(beginPattern); + + endOfInput.link(&generator); + generator.generateReturnFailure(); + + if (parser.error()) { + *error_ptr = parser.syntaxError(); // NULL in the case of patterns that WREC doesn't support yet. + return 0; + } + + *numSubpatterns_ptr = parser.numSubpatterns(); + pool = globalData->executableAllocator.poolForSize(generator.size()); + return reinterpret_cast<CompiledRegExp>(generator.copyCode(pool.get())); +} + +} } // namespace JSC::WREC + +#endif // ENABLE(WREC) diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WREC.h b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WREC.h new file mode 100644 index 0000000..483dce0 --- /dev/null +++ b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WREC.h @@ -0,0 +1,54 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef WREC_h +#define WREC_h + +#include <wtf/Platform.h> + +#if ENABLE(WREC) + +#include <wtf/unicode/Unicode.h> + +#if COMPILER(GCC) && PLATFORM(X86) +#define WREC_CALL __attribute__ ((regparm (3))) +#else +#define WREC_CALL +#endif + +namespace JSC { + class Interpreter; + class UString; +} + +namespace JSC { namespace WREC { + + typedef int (*CompiledRegExp)(const UChar* input, unsigned start, unsigned length, int* output) WREC_CALL; + +} } // namespace JSC::WREC + +#endif // ENABLE(WREC) + +#endif // WREC_h diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECFunctors.cpp b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECFunctors.cpp new file mode 100644 index 0000000..5f1674e --- /dev/null +++ b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECFunctors.cpp @@ -0,0 +1,80 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "WRECFunctors.h" + +#if ENABLE(WREC) + +#include "WRECGenerator.h" + +using namespace WTF; + +namespace JSC { namespace WREC { + +void GeneratePatternCharacterFunctor::generateAtom(Generator* generator, Generator::JumpList& failures) +{ + generator->generatePatternCharacter(failures, m_ch); +} + +void GeneratePatternCharacterFunctor::backtrack(Generator* generator) +{ + generator->generateBacktrack1(); +} + +void GenerateCharacterClassFunctor::generateAtom(Generator* generator, Generator::JumpList& failures) +{ + generator->generateCharacterClass(failures, *m_charClass, m_invert); +} + +void GenerateCharacterClassFunctor::backtrack(Generator* generator) +{ + generator->generateBacktrack1(); +} + +void GenerateBackreferenceFunctor::generateAtom(Generator* generator, Generator::JumpList& failures) +{ + generator->generateBackreference(failures, m_subpatternId); +} + +void GenerateBackreferenceFunctor::backtrack(Generator* generator) +{ + generator->generateBacktrackBackreference(m_subpatternId); +} + +void GenerateParenthesesNonGreedyFunctor::generateAtom(Generator* generator, Generator::JumpList& failures) +{ + generator->generateParenthesesNonGreedy(failures, m_start, m_success, m_fail); +} + +void GenerateParenthesesNonGreedyFunctor::backtrack(Generator*) +{ + // FIXME: do something about this. + CRASH(); +} + +} } // namespace JSC::WREC + +#endif // ENABLE(WREC) diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECFunctors.h b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECFunctors.h new file mode 100644 index 0000000..610ce55 --- /dev/null +++ b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECFunctors.h @@ -0,0 +1,109 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <wtf/Platform.h> + +#if ENABLE(WREC) + +#include "WRECGenerator.h" +#include <wtf/unicode/Unicode.h> + +namespace JSC { namespace WREC { + + struct CharacterClass; + + class GenerateAtomFunctor { + public: + virtual ~GenerateAtomFunctor() {} + + virtual void generateAtom(Generator*, Generator::JumpList&) = 0; + virtual void backtrack(Generator*) = 0; + }; + + class GeneratePatternCharacterFunctor : public GenerateAtomFunctor { + public: + GeneratePatternCharacterFunctor(const UChar ch) + : m_ch(ch) + { + } + + virtual void generateAtom(Generator*, Generator::JumpList&); + virtual void backtrack(Generator*); + + private: + const UChar m_ch; + }; + + class GenerateCharacterClassFunctor : public GenerateAtomFunctor { + public: + GenerateCharacterClassFunctor(const CharacterClass* charClass, bool invert) + : m_charClass(charClass) + , m_invert(invert) + { + } + + virtual void generateAtom(Generator*, Generator::JumpList&); + virtual void backtrack(Generator*); + + private: + const CharacterClass* m_charClass; + bool m_invert; + }; + + class GenerateBackreferenceFunctor : public GenerateAtomFunctor { + public: + GenerateBackreferenceFunctor(unsigned subpatternId) + : m_subpatternId(subpatternId) + { + } + + virtual void generateAtom(Generator*, Generator::JumpList&); + virtual void backtrack(Generator*); + + private: + unsigned m_subpatternId; + }; + + class GenerateParenthesesNonGreedyFunctor : public GenerateAtomFunctor { + public: + GenerateParenthesesNonGreedyFunctor(Generator::Label start, Generator::Jump success, Generator::Jump fail) + : m_start(start) + , m_success(success) + , m_fail(fail) + { + } + + virtual void generateAtom(Generator*, Generator::JumpList&); + virtual void backtrack(Generator*); + + private: + Generator::Label m_start; + Generator::Jump m_success; + Generator::Jump m_fail; + }; + +} } // namespace JSC::WREC + +#endif // ENABLE(WREC) diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECGenerator.cpp b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECGenerator.cpp new file mode 100644 index 0000000..e2e8aba --- /dev/null +++ b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECGenerator.cpp @@ -0,0 +1,653 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "WREC.h" + +#if ENABLE(WREC) + +#include "CharacterClassConstructor.h" +#include "Interpreter.h" +#include "WRECFunctors.h" +#include "WRECParser.h" +#include "pcre_internal.h" + +using namespace WTF; + +namespace JSC { namespace WREC { + +void Generator::generateEnter() +{ +#if PLATFORM(X86) + // On x86 edi & esi are callee preserved registers. + push(X86::edi); + push(X86::esi); + +#if COMPILER(MSVC) + // Move the arguments into registers. + peek(input, 3); + peek(index, 4); + peek(length, 5); + peek(output, 6); +#else + // On gcc the function is regparm(3), so the input, index, and length registers + // (eax, edx, and ecx respectively) already contain the appropriate values. + // Just load the fourth argument (output) into edi + peek(output, 3); +#endif +#endif +} + +void Generator::generateReturnSuccess() +{ + ASSERT(returnRegister != index); + ASSERT(returnRegister != output); + + // Set return value. + pop(returnRegister); // match begin + store32(returnRegister, output); + store32(index, Address(output, 4)); // match end + + // Restore callee save registers. +#if PLATFORM(X86) + pop(X86::esi); + pop(X86::edi); +#endif + ret(); +} + +void Generator::generateSaveIndex() +{ + push(index); +} + +void Generator::generateIncrementIndex(Jump* failure) +{ + peek(index); + if (failure) + *failure = branch32(Equal, length, index); + add32(Imm32(1), index); + poke(index); +} + +void Generator::generateLoadCharacter(JumpList& failures) +{ + failures.append(branch32(Equal, length, index)); + load16(BaseIndex(input, index, TimesTwo), character); +} + +// For the sake of end-of-line assertions, we treat one-past-the-end as if it +// were part of the input string. +void Generator::generateJumpIfNotEndOfInput(Label target) +{ + branch32(LessThanOrEqual, index, length, target); +} + +void Generator::generateReturnFailure() +{ + pop(); + move(Imm32(-1), returnRegister); + +#if PLATFORM(X86) + pop(X86::esi); + pop(X86::edi); +#endif + ret(); +} + +void Generator::generateBacktrack1() +{ + sub32(Imm32(1), index); +} + +void Generator::generateBacktrackBackreference(unsigned subpatternId) +{ + sub32(Address(output, (2 * subpatternId + 1) * sizeof(int)), index); + add32(Address(output, (2 * subpatternId) * sizeof(int)), index); +} + +void Generator::generateBackreferenceQuantifier(JumpList& failures, Quantifier::Type quantifierType, unsigned subpatternId, unsigned min, unsigned max) +{ + GenerateBackreferenceFunctor functor(subpatternId); + + load32(Address(output, (2 * subpatternId) * sizeof(int)), character); + Jump skipIfEmpty = branch32(Equal, Address(output, ((2 * subpatternId) + 1) * sizeof(int)), character); + + ASSERT(quantifierType == Quantifier::Greedy || quantifierType == Quantifier::NonGreedy); + if (quantifierType == Quantifier::Greedy) + generateGreedyQuantifier(failures, functor, min, max); + else + generateNonGreedyQuantifier(failures, functor, min, max); + + skipIfEmpty.link(this); +} + +void Generator::generateNonGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max) +{ + JumpList atomFailedList; + JumpList alternativeFailedList; + + // (0) Setup: Save, then init repeatCount. + push(repeatCount); + move(Imm32(0), repeatCount); + Jump start = jump(); + + // (4) Quantifier failed: No more atom reading possible. + Label quantifierFailed(this); + pop(repeatCount); + failures.append(jump()); + + // (3) Alternative failed: If we can, read another atom, then fall through to (2) to try again. + Label alternativeFailed(this); + pop(index); + if (max != Quantifier::Infinity) + branch32(Equal, repeatCount, Imm32(max), quantifierFailed); + + // (1) Read an atom. + if (min) + start.link(this); + Label readAtom(this); + functor.generateAtom(this, atomFailedList); + atomFailedList.linkTo(quantifierFailed, this); + add32(Imm32(1), repeatCount); + + // (2) Keep reading if we're under the minimum. + if (min > 1) + branch32(LessThan, repeatCount, Imm32(min), readAtom); + + // (3) Test the rest of the alternative. + if (!min) + start.link(this); + push(index); + m_parser.parseAlternative(alternativeFailedList); + alternativeFailedList.linkTo(alternativeFailed, this); + + pop(); + pop(repeatCount); +} + +void Generator::generateGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max) +{ + if (!max) + return; + + JumpList doneReadingAtomsList; + JumpList alternativeFailedList; + + // (0) Setup: Save, then init repeatCount. + push(repeatCount); + move(Imm32(0), repeatCount); + + // (1) Greedily read as many copies of the atom as possible, then jump to (2). + Label readAtom(this); + functor.generateAtom(this, doneReadingAtomsList); + add32(Imm32(1), repeatCount); + if (max == Quantifier::Infinity) + jump(readAtom); + else if (max == 1) + doneReadingAtomsList.append(jump()); + else { + branch32(NotEqual, repeatCount, Imm32(max), readAtom); + doneReadingAtomsList.append(jump()); + } + + // (5) Quantifier failed: No more backtracking possible. + Label quantifierFailed(this); + pop(repeatCount); + failures.append(jump()); + + // (4) Alternative failed: Backtrack, then fall through to (2) to try again. + Label alternativeFailed(this); + pop(index); + functor.backtrack(this); + sub32(Imm32(1), repeatCount); + + // (2) Verify that we have enough atoms. + doneReadingAtomsList.link(this); + branch32(LessThan, repeatCount, Imm32(min), quantifierFailed); + + // (3) Test the rest of the alternative. + push(index); + m_parser.parseAlternative(alternativeFailedList); + alternativeFailedList.linkTo(alternativeFailed, this); + + pop(); + pop(repeatCount); +} + +void Generator::generatePatternCharacterSequence(JumpList& failures, int* sequence, size_t count) +{ + for (size_t i = 0; i < count;) { + if (i < count - 1) { + if (generatePatternCharacterPair(failures, sequence[i], sequence[i + 1])) { + i += 2; + continue; + } + } + + generatePatternCharacter(failures, sequence[i]); + ++i; + } +} + +bool Generator::generatePatternCharacterPair(JumpList& failures, int ch1, int ch2) +{ + if (m_parser.ignoreCase()) { + // Non-trivial case folding requires more than one test, so we can't + // test as a pair with an adjacent character. + if (!isASCII(ch1) && Unicode::toLower(ch1) != Unicode::toUpper(ch1)) + return false; + if (!isASCII(ch2) && Unicode::toLower(ch2) != Unicode::toUpper(ch2)) + return false; + } + + // Optimistically consume 2 characters. + add32(Imm32(2), index); + failures.append(branch32(GreaterThan, index, length)); + + // Load the characters we just consumed, offset -2 characters from index. + load32(BaseIndex(input, index, TimesTwo, -2 * 2), character); + + if (m_parser.ignoreCase()) { + // Convert ASCII alphabet characters to upper case before testing for + // equality. (ASCII non-alphabet characters don't require upper-casing + // because they have no uppercase equivalents. Unicode characters don't + // require upper-casing because we only handle Unicode characters whose + // upper and lower cases are equal.) + int ch1Mask = 0; + if (isASCIIAlpha(ch1)) { + ch1 |= 32; + ch1Mask = 32; + } + + int ch2Mask = 0; + if (isASCIIAlpha(ch2)) { + ch2 |= 32; + ch2Mask = 32; + } + + int mask = ch1Mask | (ch2Mask << 16); + if (mask) + or32(Imm32(mask), character); + } + int pair = ch1 | (ch2 << 16); + + failures.append(branch32(NotEqual, character, Imm32(pair))); + return true; +} + +void Generator::generatePatternCharacter(JumpList& failures, int ch) +{ + generateLoadCharacter(failures); + + // used for unicode case insensitive + bool hasUpper = false; + Jump isUpper; + + // if case insensitive match + if (m_parser.ignoreCase()) { + UChar lower, upper; + + // check for ascii case sensitive characters + if (isASCIIAlpha(ch)) { + or32(Imm32(32), character); + ch |= 32; + } else if (!isASCII(ch) && ((lower = Unicode::toLower(ch)) != (upper = Unicode::toUpper(ch)))) { + // handle unicode case sentitive characters - branch to success on upper + isUpper = branch32(Equal, character, Imm32(upper)); + hasUpper = true; + ch = lower; + } + } + + // checks for ch, or lower case version of ch, if insensitive + failures.append(branch32(NotEqual, character, Imm32((unsigned short)ch))); + + if (m_parser.ignoreCase() && hasUpper) { + // for unicode case insensitive matches, branch here if upper matches. + isUpper.link(this); + } + + // on success consume the char + add32(Imm32(1), index); +} + +void Generator::generateCharacterClassInvertedRange(JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount) +{ + do { + // pick which range we're going to generate + int which = count >> 1; + char lo = ranges[which].begin; + char hi = ranges[which].end; + + // check if there are any ranges or matches below lo. If not, just jl to failure - + // if there is anything else to check, check that first, if it falls through jmp to failure. + if ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { + Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); + + // generate code for all ranges before this one + if (which) + generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount); + + while ((*matchIndex < matchCount) && (matches[*matchIndex] < lo)) { + matchDest.append(branch32(Equal, character, Imm32((unsigned short)matches[*matchIndex]))); + ++*matchIndex; + } + failures.append(jump()); + + loOrAbove.link(this); + } else if (which) { + Jump loOrAbove = branch32(GreaterThanOrEqual, character, Imm32((unsigned short)lo)); + + generateCharacterClassInvertedRange(failures, matchDest, ranges, which, matchIndex, matches, matchCount); + failures.append(jump()); + + loOrAbove.link(this); + } else + failures.append(branch32(LessThan, character, Imm32((unsigned short)lo))); + + while ((*matchIndex < matchCount) && (matches[*matchIndex] <= hi)) + ++*matchIndex; + + matchDest.append(branch32(LessThanOrEqual, character, Imm32((unsigned short)hi))); + // fall through to here, the value is above hi. + + // shuffle along & loop around if there are any more matches to handle. + unsigned next = which + 1; + ranges += next; + count -= next; + } while (count); +} + +void Generator::generateCharacterClassInverted(JumpList& matchDest, const CharacterClass& charClass) +{ + Jump unicodeFail; + if (charClass.numMatchesUnicode || charClass.numRangesUnicode) { + Jump isAscii = branch32(LessThanOrEqual, character, Imm32(0x7f)); + + if (charClass.numMatchesUnicode) { + for (unsigned i = 0; i < charClass.numMatchesUnicode; ++i) { + UChar ch = charClass.matchesUnicode[i]; + matchDest.append(branch32(Equal, character, Imm32(ch))); + } + } + + if (charClass.numRangesUnicode) { + for (unsigned i = 0; i < charClass.numRangesUnicode; ++i) { + UChar lo = charClass.rangesUnicode[i].begin; + UChar hi = charClass.rangesUnicode[i].end; + + Jump below = branch32(LessThan, character, Imm32(lo)); + matchDest.append(branch32(LessThanOrEqual, character, Imm32(hi))); + below.link(this); + } + } + + unicodeFail = jump(); + isAscii.link(this); + } + + if (charClass.numRanges) { + unsigned matchIndex = 0; + JumpList failures; + generateCharacterClassInvertedRange(failures, matchDest, charClass.ranges, charClass.numRanges, &matchIndex, charClass.matches, charClass.numMatches); + while (matchIndex < charClass.numMatches) + matchDest.append(branch32(Equal, character, Imm32((unsigned short)charClass.matches[matchIndex++]))); + + failures.link(this); + } else if (charClass.numMatches) { + // optimization: gather 'a','A' etc back together, can mask & test once. + Vector<char> matchesAZaz; + + for (unsigned i = 0; i < charClass.numMatches; ++i) { + char ch = charClass.matches[i]; + if (m_parser.ignoreCase()) { + if (isASCIILower(ch)) { + matchesAZaz.append(ch); + continue; + } + if (isASCIIUpper(ch)) + continue; + } + matchDest.append(branch32(Equal, character, Imm32((unsigned short)ch))); + } + + if (unsigned countAZaz = matchesAZaz.size()) { + or32(Imm32(32), character); + for (unsigned i = 0; i < countAZaz; ++i) + matchDest.append(branch32(Equal, character, Imm32(matchesAZaz[i]))); + } + } + + if (charClass.numMatchesUnicode || charClass.numRangesUnicode) + unicodeFail.link(this); +} + +void Generator::generateCharacterClass(JumpList& failures, const CharacterClass& charClass, bool invert) +{ + generateLoadCharacter(failures); + + if (invert) + generateCharacterClassInverted(failures, charClass); + else { + JumpList successes; + generateCharacterClassInverted(successes, charClass); + failures.append(jump()); + successes.link(this); + } + + add32(Imm32(1), index); +} + +void Generator::generateParenthesesAssertion(JumpList& failures) +{ + JumpList disjunctionFailed; + + push(index); + m_parser.parseDisjunction(disjunctionFailed); + Jump success = jump(); + + disjunctionFailed.link(this); + pop(index); + failures.append(jump()); + + success.link(this); + pop(index); +} + +void Generator::generateParenthesesInvertedAssertion(JumpList& failures) +{ + JumpList disjunctionFailed; + + push(index); + m_parser.parseDisjunction(disjunctionFailed); + + // If the disjunction succeeded, the inverted assertion failed. + pop(index); + failures.append(jump()); + + // If the disjunction failed, the inverted assertion succeeded. + disjunctionFailed.link(this); + pop(index); +} + +void Generator::generateParenthesesNonGreedy(JumpList& failures, Label start, Jump success, Jump fail) +{ + jump(start); + success.link(this); + failures.append(fail); +} + +Generator::Jump Generator::generateParenthesesResetTrampoline(JumpList& newFailures, unsigned subpatternIdBefore, unsigned subpatternIdAfter) +{ + Jump skip = jump(); + newFailures.link(this); + for (unsigned i = subpatternIdBefore + 1; i <= subpatternIdAfter; ++i) { + store32(Imm32(-1), Address(output, (2 * i) * sizeof(int))); + store32(Imm32(-1), Address(output, (2 * i + 1) * sizeof(int))); + } + + Jump newFailJump = jump(); + skip.link(this); + + return newFailJump; +} + +void Generator::generateAssertionBOL(JumpList& failures) +{ + if (m_parser.multiline()) { + JumpList previousIsNewline; + + // begin of input == success + previousIsNewline.append(branch32(Equal, index, Imm32(0))); + + // now check prev char against newline characters. + load16(BaseIndex(input, index, TimesTwo, -2), character); + generateCharacterClassInverted(previousIsNewline, CharacterClass::newline()); + + failures.append(jump()); + + previousIsNewline.link(this); + } else + failures.append(branch32(NotEqual, index, Imm32(0))); +} + +void Generator::generateAssertionEOL(JumpList& failures) +{ + if (m_parser.multiline()) { + JumpList nextIsNewline; + + generateLoadCharacter(nextIsNewline); // end of input == success + generateCharacterClassInverted(nextIsNewline, CharacterClass::newline()); + failures.append(jump()); + nextIsNewline.link(this); + } else { + failures.append(branch32(NotEqual, length, index)); + } +} + +void Generator::generateAssertionWordBoundary(JumpList& failures, bool invert) +{ + JumpList wordBoundary; + JumpList notWordBoundary; + + // (1) Check if the previous value was a word char + + // (1.1) check for begin of input + Jump atBegin = branch32(Equal, index, Imm32(0)); + // (1.2) load the last char, and chck if is word character + load16(BaseIndex(input, index, TimesTwo, -2), character); + JumpList previousIsWord; + generateCharacterClassInverted(previousIsWord, CharacterClass::wordchar()); + // (1.3) if we get here, previous is not a word char + atBegin.link(this); + + // (2) Handle situation where previous was NOT a \w + + generateLoadCharacter(notWordBoundary); + generateCharacterClassInverted(wordBoundary, CharacterClass::wordchar()); + // (2.1) If we get here, neither chars are word chars + notWordBoundary.append(jump()); + + // (3) Handle situation where previous was a \w + + // (3.0) link success in first match to here + previousIsWord.link(this); + generateLoadCharacter(wordBoundary); + generateCharacterClassInverted(notWordBoundary, CharacterClass::wordchar()); + // (3.1) If we get here, this is an end of a word, within the input. + + // (4) Link everything up + + if (invert) { + // handle the fall through case + wordBoundary.append(jump()); + + // looking for non word boundaries, so link boundary fails to here. + notWordBoundary.link(this); + + failures.append(wordBoundary); + } else { + // looking for word boundaries, so link successes here. + wordBoundary.link(this); + + failures.append(notWordBoundary); + } +} + +void Generator::generateBackreference(JumpList& failures, unsigned subpatternId) +{ + push(index); + push(repeatCount); + + // get the start pos of the backref into repeatCount (multipurpose!) + load32(Address(output, (2 * subpatternId) * sizeof(int)), repeatCount); + + Jump skipIncrement = jump(); + Label topOfLoop(this); + + add32(Imm32(1), index); + add32(Imm32(1), repeatCount); + skipIncrement.link(this); + + // check if we're at the end of backref (if we are, success!) + Jump endOfBackRef = branch32(Equal, Address(output, ((2 * subpatternId) + 1) * sizeof(int)), repeatCount); + + load16(BaseIndex(input, repeatCount, MacroAssembler::TimesTwo), character); + + // check if we've run out of input (this would be a can o'fail) + Jump endOfInput = branch32(Equal, length, index); + + branch16(Equal, BaseIndex(input, index, TimesTwo), character, topOfLoop); + + endOfInput.link(this); + + // Failure + pop(repeatCount); + pop(index); + failures.append(jump()); + + // Success + endOfBackRef.link(this); + pop(repeatCount); + pop(); +} + +void Generator::terminateAlternative(JumpList& successes, JumpList& failures) +{ + successes.append(jump()); + + failures.link(this); + peek(index); +} + +void Generator::terminateDisjunction(JumpList& successes) +{ + successes.link(this); +} + +} } // namespace JSC::WREC + +#endif // ENABLE(WREC) diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECGenerator.h b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECGenerator.h new file mode 100644 index 0000000..8562cac --- /dev/null +++ b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECGenerator.h @@ -0,0 +1,128 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef WRECGenerator_h +#define WRECGenerator_h + +#include <wtf/Platform.h> + +#if ENABLE(WREC) + +#include "Quantifier.h" +#include "MacroAssembler.h" +#include <wtf/ASCIICType.h> +#include <wtf/unicode/Unicode.h> +#include "WREC.h" + +namespace JSC { + + class JSGlobalData; + + namespace WREC { + + class CharacterRange; + class GenerateAtomFunctor; + class Parser; + struct CharacterClass; + + class Generator : private MacroAssembler { + public: + using MacroAssembler::Jump; + using MacroAssembler::JumpList; + using MacroAssembler::Label; + + enum ParenthesesType { Capturing, NonCapturing, Assertion, InvertedAssertion, Error }; + + static CompiledRegExp compileRegExp(JSGlobalData*, const UString& pattern, unsigned* numSubpatterns_ptr, const char** error_ptr, RefPtr<ExecutablePool>& pool, bool ignoreCase = false, bool multiline = false); + + Generator(Parser& parser) + : m_parser(parser) + { + } + +#if PLATFORM(X86) + static const RegisterID input = X86::eax; + static const RegisterID index = X86::edx; + static const RegisterID length = X86::ecx; + static const RegisterID output = X86::edi; + + static const RegisterID character = X86::esi; + static const RegisterID repeatCount = X86::ebx; // How many times the current atom repeats in the current match. + + static const RegisterID returnRegister = X86::eax; +#endif +#if PLATFORM(X86_64) + static const RegisterID input = X86::edi; + static const RegisterID index = X86::esi; + static const RegisterID length = X86::edx; + static const RegisterID output = X86::ecx; + + static const RegisterID character = X86::eax; + static const RegisterID repeatCount = X86::ebx; // How many times the current atom repeats in the current match. + + static const RegisterID returnRegister = X86::eax; +#endif + + void generateEnter(); + void generateSaveIndex(); + void generateIncrementIndex(Jump* failure = 0); + void generateLoadCharacter(JumpList& failures); + void generateJumpIfNotEndOfInput(Label); + void generateReturnSuccess(); + void generateReturnFailure(); + + void generateGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max); + void generateNonGreedyQuantifier(JumpList& failures, GenerateAtomFunctor& functor, unsigned min, unsigned max); + void generateBacktrack1(); + void generateBacktrackBackreference(unsigned subpatternId); + void generateCharacterClass(JumpList& failures, const CharacterClass& charClass, bool invert); + void generateCharacterClassInverted(JumpList& failures, const CharacterClass& charClass); + void generateCharacterClassInvertedRange(JumpList& failures, JumpList& matchDest, const CharacterRange* ranges, unsigned count, unsigned* matchIndex, const UChar* matches, unsigned matchCount); + void generatePatternCharacter(JumpList& failures, int ch); + void generatePatternCharacterSequence(JumpList& failures, int* sequence, size_t count); + void generateAssertionWordBoundary(JumpList& failures, bool invert); + void generateAssertionBOL(JumpList& failures); + void generateAssertionEOL(JumpList& failures); + void generateBackreference(JumpList& failures, unsigned subpatternID); + void generateBackreferenceQuantifier(JumpList& failures, Quantifier::Type quantifierType, unsigned subpatternId, unsigned min, unsigned max); + void generateParenthesesAssertion(JumpList& failures); + void generateParenthesesInvertedAssertion(JumpList& failures); + Jump generateParenthesesResetTrampoline(JumpList& newFailures, unsigned subpatternIdBefore, unsigned subpatternIdAfter); + void generateParenthesesNonGreedy(JumpList& failures, Label start, Jump success, Jump fail); + + void terminateAlternative(JumpList& successes, JumpList& failures); + void terminateDisjunction(JumpList& successes); + + private: + bool generatePatternCharacterPair(JumpList& failures, int ch1, int ch2); + + Parser& m_parser; + }; + +} } // namespace JSC::WREC + +#endif // ENABLE(WREC) + +#endif // WRECGenerator_h diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.cpp b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.cpp new file mode 100644 index 0000000..1709bf9 --- /dev/null +++ b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.cpp @@ -0,0 +1,643 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "WRECParser.h" + +#if ENABLE(WREC) + +#include "CharacterClassConstructor.h" +#include "WRECFunctors.h" + +using namespace WTF; + +namespace JSC { namespace WREC { + +// These error messages match the error messages used by PCRE. +const char* Parser::QuantifierOutOfOrder = "numbers out of order in {} quantifier"; +const char* Parser::QuantifierWithoutAtom = "nothing to repeat"; +const char* Parser::ParenthesesUnmatched = "unmatched parentheses"; +const char* Parser::ParenthesesTypeInvalid = "unrecognized character after (?"; +const char* Parser::ParenthesesNotSupported = ""; // Not a user-visible syntax error -- just signals a syntax that WREC doesn't support yet. +const char* Parser::CharacterClassUnmatched = "missing terminating ] for character class"; +const char* Parser::CharacterClassOutOfOrder = "range out of order in character class"; +const char* Parser::EscapeUnterminated = "\\ at end of pattern"; + +class PatternCharacterSequence { +typedef Generator::JumpList JumpList; + +public: + PatternCharacterSequence(Generator& generator, JumpList& failures) + : m_generator(generator) + , m_failures(failures) + { + } + + size_t size() { return m_sequence.size(); } + + void append(int ch) + { + m_sequence.append(ch); + } + + void flush() + { + if (!m_sequence.size()) + return; + + m_generator.generatePatternCharacterSequence(m_failures, m_sequence.begin(), m_sequence.size()); + m_sequence.clear(); + } + + void flush(const Quantifier& quantifier) + { + if (!m_sequence.size()) + return; + + m_generator.generatePatternCharacterSequence(m_failures, m_sequence.begin(), m_sequence.size() - 1); + + switch (quantifier.type) { + case Quantifier::None: + case Quantifier::Error: + ASSERT_NOT_REACHED(); + break; + + case Quantifier::Greedy: { + GeneratePatternCharacterFunctor functor(m_sequence.last()); + m_generator.generateGreedyQuantifier(m_failures, functor, quantifier.min, quantifier.max); + break; + } + + case Quantifier::NonGreedy: { + GeneratePatternCharacterFunctor functor(m_sequence.last()); + m_generator.generateNonGreedyQuantifier(m_failures, functor, quantifier.min, quantifier.max); + break; + } + } + + m_sequence.clear(); + } + +private: + Generator& m_generator; + JumpList& m_failures; + Vector<int, 8> m_sequence; +}; + +ALWAYS_INLINE Quantifier Parser::consumeGreedyQuantifier() +{ + switch (peek()) { + case '?': + consume(); + return Quantifier(Quantifier::Greedy, 0, 1); + + case '*': + consume(); + return Quantifier(Quantifier::Greedy, 0); + + case '+': + consume(); + return Quantifier(Quantifier::Greedy, 1); + + case '{': { + SavedState state(*this); + consume(); + + // Accept: {n}, {n,}, {n,m}. + // Reject: {n,m} where n > m. + // Ignore: Anything else, such as {n, m}. + + if (!peekIsDigit()) { + state.restore(); + return Quantifier(); + } + + unsigned min = consumeNumber(); + unsigned max = min; + + if (peek() == ',') { + consume(); + max = peekIsDigit() ? consumeNumber() : Quantifier::Infinity; + } + + if (peek() != '}') { + state.restore(); + return Quantifier(); + } + consume(); + + if (min > max) { + setError(QuantifierOutOfOrder); + return Quantifier(Quantifier::Error); + } + + return Quantifier(Quantifier::Greedy, min, max); + } + + default: + return Quantifier(); // No quantifier. + } +} + +Quantifier Parser::consumeQuantifier() +{ + Quantifier q = consumeGreedyQuantifier(); + + if ((q.type == Quantifier::Greedy) && (peek() == '?')) { + consume(); + q.type = Quantifier::NonGreedy; + } + + return q; +} + +bool Parser::parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert) +{ + Quantifier q = consumeQuantifier(); + + switch (q.type) { + case Quantifier::None: { + m_generator.generateCharacterClass(failures, charClass, invert); + break; + } + + case Quantifier::Greedy: { + GenerateCharacterClassFunctor functor(&charClass, invert); + m_generator.generateGreedyQuantifier(failures, functor, q.min, q.max); + break; + } + + case Quantifier::NonGreedy: { + GenerateCharacterClassFunctor functor(&charClass, invert); + m_generator.generateNonGreedyQuantifier(failures, functor, q.min, q.max); + break; + } + + case Quantifier::Error: + return false; + } + + return true; +} + +bool Parser::parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId) +{ + Quantifier q = consumeQuantifier(); + + switch (q.type) { + case Quantifier::None: { + m_generator.generateBackreference(failures, subpatternId); + break; + } + + case Quantifier::Greedy: + case Quantifier::NonGreedy: + m_generator.generateBackreferenceQuantifier(failures, q.type, subpatternId, q.min, q.max); + return true; + + case Quantifier::Error: + return false; + } + + return true; +} + +bool Parser::parseParentheses(JumpList& failures) +{ + ParenthesesType type = consumeParenthesesType(); + + // FIXME: WREC originally failed to backtrack correctly in cases such as + // "c".match(/(.*)c/). Now, most parentheses handling is disabled. For + // unsupported parentheses, we fall back on PCRE. + + switch (type) { + case Generator::Assertion: { + m_generator.generateParenthesesAssertion(failures); + + if (consume() != ')') { + setError(ParenthesesUnmatched); + return false; + } + + Quantifier quantifier = consumeQuantifier(); + if (quantifier.type != Quantifier::None && quantifier.min == 0) { + setError(ParenthesesNotSupported); + return false; + } + + return true; + } + case Generator::InvertedAssertion: { + m_generator.generateParenthesesInvertedAssertion(failures); + + if (consume() != ')') { + setError(ParenthesesUnmatched); + return false; + } + + Quantifier quantifier = consumeQuantifier(); + if (quantifier.type != Quantifier::None && quantifier.min == 0) { + setError(ParenthesesNotSupported); + return false; + } + + return true; + } + default: + setError(ParenthesesNotSupported); + return false; + } +} + +bool Parser::parseCharacterClass(JumpList& failures) +{ + bool invert = false; + if (peek() == '^') { + consume(); + invert = true; + } + + CharacterClassConstructor constructor(m_ignoreCase); + + int ch; + while ((ch = peek()) != ']') { + switch (ch) { + case EndOfPattern: + setError(CharacterClassUnmatched); + return false; + + case '\\': { + consume(); + Escape escape = consumeEscape(true); + + switch (escape.type()) { + case Escape::PatternCharacter: { + int character = PatternCharacterEscape::cast(escape).character(); + if (character == '-') + constructor.flushBeforeEscapedHyphen(); + constructor.put(character); + break; + } + case Escape::CharacterClass: { + const CharacterClassEscape& characterClassEscape = CharacterClassEscape::cast(escape); + ASSERT(!characterClassEscape.invert()); + constructor.append(characterClassEscape.characterClass()); + break; + } + case Escape::Error: + return false; + case Escape::Backreference: + case Escape::WordBoundaryAssertion: { + ASSERT_NOT_REACHED(); + break; + } + } + break; + } + + default: + consume(); + constructor.put(ch); + } + } + consume(); + + // lazily catch reversed ranges ([z-a])in character classes + if (constructor.isUpsideDown()) { + setError(CharacterClassOutOfOrder); + return false; + } + + constructor.flush(); + CharacterClass charClass = constructor.charClass(); + return parseCharacterClassQuantifier(failures, charClass, invert); +} + +bool Parser::parseNonCharacterEscape(JumpList& failures, const Escape& escape) +{ + switch (escape.type()) { + case Escape::PatternCharacter: + ASSERT_NOT_REACHED(); + return false; + + case Escape::CharacterClass: + return parseCharacterClassQuantifier(failures, CharacterClassEscape::cast(escape).characterClass(), CharacterClassEscape::cast(escape).invert()); + + case Escape::Backreference: + return parseBackreferenceQuantifier(failures, BackreferenceEscape::cast(escape).subpatternId()); + + case Escape::WordBoundaryAssertion: + m_generator.generateAssertionWordBoundary(failures, WordBoundaryAssertionEscape::cast(escape).invert()); + return true; + + case Escape::Error: + return false; + } + + ASSERT_NOT_REACHED(); + return false; +} + +Escape Parser::consumeEscape(bool inCharacterClass) +{ + switch (peek()) { + case EndOfPattern: + setError(EscapeUnterminated); + return Escape(Escape::Error); + + // Assertions + case 'b': + consume(); + if (inCharacterClass) + return PatternCharacterEscape('\b'); + return WordBoundaryAssertionEscape(false); // do not invert + case 'B': + consume(); + if (inCharacterClass) + return PatternCharacterEscape('B'); + return WordBoundaryAssertionEscape(true); // invert + + // CharacterClassEscape + case 'd': + consume(); + return CharacterClassEscape(CharacterClass::digits(), false); + case 's': + consume(); + return CharacterClassEscape(CharacterClass::spaces(), false); + case 'w': + consume(); + return CharacterClassEscape(CharacterClass::wordchar(), false); + case 'D': + consume(); + return inCharacterClass + ? CharacterClassEscape(CharacterClass::nondigits(), false) + : CharacterClassEscape(CharacterClass::digits(), true); + case 'S': + consume(); + return inCharacterClass + ? CharacterClassEscape(CharacterClass::nonspaces(), false) + : CharacterClassEscape(CharacterClass::spaces(), true); + case 'W': + consume(); + return inCharacterClass + ? CharacterClassEscape(CharacterClass::nonwordchar(), false) + : CharacterClassEscape(CharacterClass::wordchar(), true); + + // DecimalEscape + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + if (peekDigit() > m_numSubpatterns || inCharacterClass) { + // To match Firefox, we parse an invalid backreference in the range [1-7] + // as an octal escape. + return peekDigit() > 7 ? PatternCharacterEscape('\\') : PatternCharacterEscape(consumeOctal()); + } + + int value = 0; + do { + unsigned newValue = value * 10 + peekDigit(); + if (newValue > m_numSubpatterns) + break; + value = newValue; + consume(); + } while (peekIsDigit()); + + return BackreferenceEscape(value); + } + + // Octal escape + case '0': + consume(); + return PatternCharacterEscape(consumeOctal()); + + // ControlEscape + case 'f': + consume(); + return PatternCharacterEscape('\f'); + case 'n': + consume(); + return PatternCharacterEscape('\n'); + case 'r': + consume(); + return PatternCharacterEscape('\r'); + case 't': + consume(); + return PatternCharacterEscape('\t'); + case 'v': + consume(); + return PatternCharacterEscape('\v'); + + // ControlLetter + case 'c': { + SavedState state(*this); + consume(); + + int control = consume(); + // To match Firefox, inside a character class, we also accept numbers + // and '_' as control characters. + if ((!inCharacterClass && !isASCIIAlpha(control)) || (!isASCIIAlphanumeric(control) && control != '_')) { + state.restore(); + return PatternCharacterEscape('\\'); + } + return PatternCharacterEscape(control & 31); + } + + // HexEscape + case 'x': { + consume(); + + SavedState state(*this); + int x = consumeHex(2); + if (x == -1) { + state.restore(); + return PatternCharacterEscape('x'); + } + return PatternCharacterEscape(x); + } + + // UnicodeEscape + case 'u': { + consume(); + + SavedState state(*this); + int x = consumeHex(4); + if (x == -1) { + state.restore(); + return PatternCharacterEscape('u'); + } + return PatternCharacterEscape(x); + } + + // IdentityEscape + default: + return PatternCharacterEscape(consume()); + } +} + +void Parser::parseAlternative(JumpList& failures) +{ + PatternCharacterSequence sequence(m_generator, failures); + + while (1) { + switch (peek()) { + case EndOfPattern: + case '|': + case ')': + sequence.flush(); + return; + + case '*': + case '+': + case '?': + case '{': { + Quantifier q = consumeQuantifier(); + + if (q.type == Quantifier::None) { + sequence.append(consume()); + continue; + } + + if (q.type == Quantifier::Error) + return; + + if (!sequence.size()) { + setError(QuantifierWithoutAtom); + return; + } + + sequence.flush(q); + continue; + } + + case '^': + consume(); + + sequence.flush(); + m_generator.generateAssertionBOL(failures); + continue; + + case '$': + consume(); + + sequence.flush(); + m_generator.generateAssertionEOL(failures); + continue; + + case '.': + consume(); + + sequence.flush(); + if (!parseCharacterClassQuantifier(failures, CharacterClass::newline(), true)) + return; + continue; + + case '[': + consume(); + + sequence.flush(); + if (!parseCharacterClass(failures)) + return; + continue; + + case '(': + consume(); + + sequence.flush(); + if (!parseParentheses(failures)) + return; + continue; + + case '\\': { + consume(); + + Escape escape = consumeEscape(false); + if (escape.type() == Escape::PatternCharacter) { + sequence.append(PatternCharacterEscape::cast(escape).character()); + continue; + } + + sequence.flush(); + if (!parseNonCharacterEscape(failures, escape)) + return; + continue; + } + + default: + sequence.append(consume()); + continue; + } + } +} + +/* + TOS holds index. +*/ +void Parser::parseDisjunction(JumpList& failures) +{ + parseAlternative(failures); + if (peek() != '|') + return; + + JumpList successes; + do { + consume(); + m_generator.terminateAlternative(successes, failures); + parseAlternative(failures); + } while (peek() == '|'); + + m_generator.terminateDisjunction(successes); +} + +Generator::ParenthesesType Parser::consumeParenthesesType() +{ + if (peek() != '?') + return Generator::Capturing; + consume(); + + switch (consume()) { + case ':': + return Generator::NonCapturing; + + case '=': + return Generator::Assertion; + + case '!': + return Generator::InvertedAssertion; + + default: + setError(ParenthesesTypeInvalid); + return Generator::Error; + } +} + +} } // namespace JSC::WREC + +#endif // ENABLE(WREC) diff --git a/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.h b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.h new file mode 100644 index 0000000..a3e151b --- /dev/null +++ b/src/3rdparty/javascriptcore/JavaScriptCore/wrec/WRECParser.h @@ -0,0 +1,214 @@ +/* + * Copyright (C) 2008 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef Parser_h +#define Parser_h + +#include <wtf/Platform.h> + +#if ENABLE(WREC) + +#include "Escapes.h" +#include "Quantifier.h" +#include "UString.h" +#include "WRECGenerator.h" +#include <wtf/ASCIICType.h> + +namespace JSC { namespace WREC { + + struct CharacterClass; + + class Parser { + typedef Generator::JumpList JumpList; + typedef Generator::ParenthesesType ParenthesesType; + + friend class SavedState; + + public: + Parser(const UString& pattern, bool ignoreCase, bool multiline) + : m_generator(*this) + , m_data(pattern.data()) + , m_size(pattern.size()) + , m_ignoreCase(ignoreCase) + , m_multiline(multiline) + { + reset(); + } + + Generator& generator() { return m_generator; } + + bool ignoreCase() const { return m_ignoreCase; } + bool multiline() const { return m_multiline; } + + void recordSubpattern() { ++m_numSubpatterns; } + unsigned numSubpatterns() const { return m_numSubpatterns; } + + const char* error() const { return m_error; } + const char* syntaxError() const { return m_error == ParenthesesNotSupported ? 0 : m_error; } + + void parsePattern(JumpList& failures) + { + reset(); + + parseDisjunction(failures); + + if (peek() != EndOfPattern) + setError(ParenthesesUnmatched); // Parsing the pattern should fully consume it. + } + + void parseDisjunction(JumpList& failures); + void parseAlternative(JumpList& failures); + bool parseTerm(JumpList& failures); + bool parseNonCharacterEscape(JumpList& failures, const Escape&); + bool parseParentheses(JumpList& failures); + bool parseCharacterClass(JumpList& failures); + bool parseCharacterClassQuantifier(JumpList& failures, const CharacterClass& charClass, bool invert); + bool parseBackreferenceQuantifier(JumpList& failures, unsigned subpatternId); + + private: + class SavedState { + public: + SavedState(Parser& parser) + : m_parser(parser) + , m_index(parser.m_index) + { + } + + void restore() + { + m_parser.m_index = m_index; + } + + private: + Parser& m_parser; + unsigned m_index; + }; + + void reset() + { + m_index = 0; + m_numSubpatterns = 0; + m_error = 0; + } + + void setError(const char* error) + { + if (m_error) + return; + m_error = error; + } + + int peek() + { + if (m_index >= m_size) + return EndOfPattern; + return m_data[m_index]; + } + + int consume() + { + if (m_index >= m_size) + return EndOfPattern; + return m_data[m_index++]; + } + + bool peekIsDigit() + { + return WTF::isASCIIDigit(peek()); + } + + unsigned peekDigit() + { + ASSERT(peekIsDigit()); + return peek() - '0'; + } + + unsigned consumeDigit() + { + ASSERT(peekIsDigit()); + return consume() - '0'; + } + + unsigned consumeNumber() + { + int n = consumeDigit(); + while (peekIsDigit()) { + n *= 10; + n += consumeDigit(); + } + return n; + } + + int consumeHex(int count) + { + int n = 0; + while (count--) { + if (!WTF::isASCIIHexDigit(peek())) + return -1; + n = (n << 4) | WTF::toASCIIHexValue(consume()); + } + return n; + } + + unsigned consumeOctal() + { + unsigned n = 0; + while (n < 32 && WTF::isASCIIOctalDigit(peek())) + n = n * 8 + consumeDigit(); + return n; + } + + ALWAYS_INLINE Quantifier consumeGreedyQuantifier(); + Quantifier consumeQuantifier(); + Escape consumeEscape(bool inCharacterClass); + ParenthesesType consumeParenthesesType(); + + static const int EndOfPattern = -1; + + // Error messages. + static const char* QuantifierOutOfOrder; + static const char* QuantifierWithoutAtom; + static const char* ParenthesesUnmatched; + static const char* ParenthesesTypeInvalid; + static const char* ParenthesesNotSupported; + static const char* CharacterClassUnmatched; + static const char* CharacterClassOutOfOrder; + static const char* EscapeUnterminated; + + Generator m_generator; + const UChar* m_data; + unsigned m_size; + unsigned m_index; + bool m_ignoreCase; + bool m_multiline; + unsigned m_numSubpatterns; + const char* m_error; + }; + +} } // namespace JSC::WREC + +#endif // ENABLE(WREC) + +#endif // Parser_h |