summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/webkit/Source/JavaScriptCore/yarr/YarrJIT.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/webkit/Source/JavaScriptCore/yarr/YarrJIT.cpp')
-rw-r--r--src/3rdparty/webkit/Source/JavaScriptCore/yarr/YarrJIT.cpp3098
1 files changed, 1609 insertions, 1489 deletions
diff --git a/src/3rdparty/webkit/Source/JavaScriptCore/yarr/YarrJIT.cpp b/src/3rdparty/webkit/Source/JavaScriptCore/yarr/YarrJIT.cpp
index ece2379..f3356b1 100644
--- a/src/3rdparty/webkit/Source/JavaScriptCore/yarr/YarrJIT.cpp
+++ b/src/3rdparty/webkit/Source/JavaScriptCore/yarr/YarrJIT.cpp
@@ -228,9 +228,10 @@ class YarrGenerator : private MacroAssembler {
}
// Jumps if input not available; will have (incorrectly) incremented already!
- Jump jumpIfNoAvailableInput(unsigned countToCheck)
+ Jump jumpIfNoAvailableInput(unsigned countToCheck = 0)
{
- add32(Imm32(countToCheck), index);
+ if (countToCheck)
+ add32(Imm32(countToCheck), index);
return branch32(Above, index, length);
}
@@ -295,1056 +296,455 @@ class YarrGenerator : private MacroAssembler {
jump(Address(stackPointerRegister, frameLocation * sizeof(void*)));
}
- struct IndirectJumpEntry {
- IndirectJumpEntry(int32_t stackOffset)
- : m_stackOffset(stackOffset)
- {
- }
-
- IndirectJumpEntry(int32_t stackOffset, Jump jump)
- : m_stackOffset(stackOffset)
- {
- addJump(jump);
- }
-
- IndirectJumpEntry(int32_t stackOffset, DataLabelPtr dataLabel)
- : m_stackOffset(stackOffset)
- {
- addDataLabel(dataLabel);
- }
-
- void addJump(Jump jump)
- {
- m_relJumps.append(jump);
- }
-
- void addDataLabel(DataLabelPtr dataLabel)
- {
- m_dataLabelPtrVector.append(dataLabel);
- }
-
- int32_t m_stackOffset;
- JumpList m_relJumps;
- Vector<DataLabelPtr, 16> m_dataLabelPtrVector;
+ enum YarrOpCode {
+ // These nodes wrap body alternatives - those in the main disjunction,
+ // rather than subpatterns or assertions. These are chained together in
+ // a doubly linked list, with a 'begin' node for the first alternative,
+ // a 'next' node for each subsequent alternative, and an 'end' node at
+ // the end. In the case of repeating alternatives, the 'end' node also
+ // has a reference back to 'begin'.
+ OpBodyAlternativeBegin,
+ OpBodyAlternativeNext,
+ OpBodyAlternativeEnd,
+ // Similar to the body alternatives, but used for subpatterns with two
+ // or more alternatives.
+ OpNestedAlternativeBegin,
+ OpNestedAlternativeNext,
+ OpNestedAlternativeEnd,
+ // Used for alternatives in subpatterns where there is only a single
+ // alternative (backtrackingis easier in these cases), or for alternatives
+ // which never need to be backtracked (those in parenthetical assertions,
+ // terminal subpatterns).
+ OpSimpleNestedAlternativeBegin,
+ OpSimpleNestedAlternativeNext,
+ OpSimpleNestedAlternativeEnd,
+ // Used to wrap 'Once' subpattern matches (quantityCount == 1).
+ OpParenthesesSubpatternOnceBegin,
+ OpParenthesesSubpatternOnceEnd,
+ // Used to wrap 'Terminal' subpattern matches (at the end of the regexp).
+ OpParenthesesSubpatternTerminalBegin,
+ OpParenthesesSubpatternTerminalEnd,
+ // Used to wrap parenthetical assertions.
+ OpParentheticalAssertionBegin,
+ OpParentheticalAssertionEnd,
+ // Wraps all simple terms (pattern characters, character classes).
+ OpTerm,
+ // Where an expression contains only 'once through' body alternatives
+ // and no repeating ones, this op is used to return match failure.
+ OpMatchFailed
};
- struct AlternativeBacktrackRecord {
- DataLabelPtr dataLabel;
- Label backtrackLocation;
-
- AlternativeBacktrackRecord(DataLabelPtr dataLabel, Label backtrackLocation)
- : dataLabel(dataLabel)
- , backtrackLocation(backtrackLocation)
- {
- }
- };
-
- struct ParenthesesTail;
- struct TermGenerationState;
-
- struct GenerationState {
- typedef HashMap<int, IndirectJumpEntry*, WTF::IntHash<uint32_t>, UnsignedWithZeroKeyHashTraits<uint32_t> > IndirectJumpHashMap;
-
- GenerationState()
- : m_parenNestingLevel(0)
- {
- }
-
- void addIndirectJumpEntry(int32_t stackOffset, Jump jump)
- {
- IndirectJumpHashMap::iterator result = m_indirectJumpMap.find(stackOffset);
-
- ASSERT(stackOffset >= 0);
-
- uint32_t offset = static_cast<uint32_t>(stackOffset);
-
- if (result == m_indirectJumpMap.end())
- m_indirectJumpMap.add(offset, new IndirectJumpEntry(stackOffset, jump));
- else
- result->second->addJump(jump);
- }
-
- void addIndirectJumpEntry(int32_t stackOffset, JumpList jumps)
- {
- JumpList::JumpVector jumpVector = jumps.jumps();
- size_t size = jumpVector.size();
- for (size_t i = 0; i < size; ++i)
- addIndirectJumpEntry(stackOffset, jumpVector[i]);
-
- jumps.empty();
- }
-
- void addIndirectJumpEntry(int32_t stackOffset, DataLabelPtr dataLabel)
- {
- IndirectJumpHashMap::iterator result = m_indirectJumpMap.find(stackOffset);
-
- ASSERT(stackOffset >= 0);
-
- uint32_t offset = static_cast<uint32_t>(stackOffset);
-
- if (result == m_indirectJumpMap.end())
- m_indirectJumpMap.add(offset, new IndirectJumpEntry(stackOffset, dataLabel));
- else
- result->second->addDataLabel(dataLabel);
- }
-
- void emitIndirectJumpTable(MacroAssembler* masm)
+ // This structure is used to hold the compiled opcode information,
+ // including reference back to the original PatternTerm/PatternAlternatives,
+ // and JIT compilation data structures.
+ struct YarrOp {
+ explicit YarrOp(PatternTerm* term)
+ : m_op(OpTerm)
+ , m_term(term)
+ , m_isDeadCode(false)
{
- for (IndirectJumpHashMap::iterator iter = m_indirectJumpMap.begin(); iter != m_indirectJumpMap.end(); ++iter) {
- IndirectJumpEntry* indJumpEntry = iter->second;
- size_t size = indJumpEntry->m_dataLabelPtrVector.size();
- if (size) {
- // Link any associated DataLabelPtr's with indirect jump via label
- Label hereLabel = masm->label();
- for (size_t i = 0; i < size; ++i)
- m_backtrackRecords.append(AlternativeBacktrackRecord(indJumpEntry->m_dataLabelPtrVector[i], hereLabel));
- }
- indJumpEntry->m_relJumps.link(masm);
- masm->jump(Address(stackPointerRegister, indJumpEntry->m_stackOffset));
- delete indJumpEntry;
- }
- }
-
- void incrementParenNestingLevel()
- {
- ++m_parenNestingLevel;
- }
-
- void decrementParenNestingLevel()
- {
- --m_parenNestingLevel;
- }
-
- ParenthesesTail* addParenthesesTail(PatternTerm& term, JumpList* jumpListToPriorParen)
- {
- OwnPtr<ParenthesesTail> tail = adoptPtr(new ParenthesesTail(term, m_parenNestingLevel, jumpListToPriorParen));
- ParenthesesTail* rawTail = tail.get();
-
- m_parenTails.append(tail.release());
- m_parenTailsForIteration.append(rawTail);
-
- return rawTail;
}
- void emitParenthesesTail(YarrGenerator* generator)
+ explicit YarrOp(YarrOpCode op)
+ : m_op(op)
+ , m_isDeadCode(false)
{
- unsigned vectorSize = m_parenTails.size();
- bool priorBacktrackFallThrough = false;
-
- // Emit in reverse order so parentTail N can fall through to N-1
- for (unsigned index = vectorSize; index > 0; --index) {
- JumpList jumpsToNext;
- priorBacktrackFallThrough = m_parenTails[index-1].get()->generateCode(generator, jumpsToNext, priorBacktrackFallThrough, index > 1);
- if (index > 1)
- jumpsToNext.linkTo(generator->label(), generator);
- else
- addJumpsToNextInteration(jumpsToNext);
- }
- m_parenTails.clear();
}
- void addJumpToNextInteration(Jump jump)
- {
- m_jumpsToNextInteration.append(jump);
- }
+ // The operation, as a YarrOpCode, and also a reference to the PatternTerm.
+ YarrOpCode m_op;
+ PatternTerm* m_term;
- void addJumpsToNextInteration(JumpList jumps)
- {
- m_jumpsToNextInteration.append(jumps);
- }
+ // For alternatives, this holds the PatternAlternative and doubly linked
+ // references to this alternative's siblings. In the case of the
+ // OpBodyAlternativeEnd node at the end of a section of repeating nodes,
+ // m_nextOp will reference the OpBodyAlternativeBegin node of the first
+ // repeating alternative.
+ PatternAlternative* m_alternative;
+ size_t m_previousOp;
+ size_t m_nextOp;
- void addDataLabelToNextIteration(DataLabelPtr dataLabel)
- {
- m_dataPtrsToNextIteration.append(dataLabel);
- }
+ // Used to record a set of Jumps out of the generated code, typically
+ // used for jumps out to backtracking code, and a single reentry back
+ // into the code for a node (likely where a backtrack will trigger
+ // rematching).
+ Label m_reentry;
+ JumpList m_jumps;
- void linkToNextIteration(Label label)
- {
- m_nextIteration = label;
+ // This flag is used to null out the second pattern character, when
+ // two are fused to match a pair together.
+ bool m_isDeadCode;
- for (unsigned i = 0; i < m_dataPtrsToNextIteration.size(); ++i)
- m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataPtrsToNextIteration[i], m_nextIteration));
+ // Currently used in the case of some of the more complex management of
+ // 'm_checked', to cache the offset used in this alternative, to avoid
+ // recalculating it.
+ int m_checkAdjust;
- m_dataPtrsToNextIteration.clear();
-
- for (unsigned i = 0; i < m_parenTailsForIteration.size(); ++i)
- m_parenTailsForIteration[i]->setNextIteration(m_nextIteration);
-
- m_parenTailsForIteration.clear();
- }
-
- void linkToNextIteration(YarrGenerator* generator)
- {
- m_jumpsToNextInteration.linkTo(m_nextIteration, generator);
- }
-
- int m_parenNestingLevel;
- Vector<AlternativeBacktrackRecord> m_backtrackRecords;
- IndirectJumpHashMap m_indirectJumpMap;
- Label m_nextIteration;
- Vector<OwnPtr<ParenthesesTail> > m_parenTails;
- JumpList m_jumpsToNextInteration;
- Vector<DataLabelPtr> m_dataPtrsToNextIteration;
- Vector<ParenthesesTail*> m_parenTailsForIteration;
+ // Used by OpNestedAlternativeNext/End to hold the pointer to the
+ // value that will be pushed into the pattern's frame to return to,
+ // upon backtracking back into the disjunction.
+ DataLabelPtr m_returnAddress;
};
- struct BacktrackDestination {
- typedef enum {
- NoBacktrack,
- BacktrackLabel,
- BacktrackStackOffset,
- BacktrackJumpList,
- BacktrackLinked
- } BacktrackType;
-
- BacktrackDestination()
- : m_backtrackType(NoBacktrack)
- , m_backtrackToLabel(0)
- , m_subDataLabelPtr(0)
- , m_nextBacktrack(0)
- , m_backtrackSourceLabel(0)
- , m_backtrackSourceJumps(0)
+ // BacktrackingState
+ // This class encapsulates information about the state of code generation
+ // whilst generating the code for backtracking, when a term fails to match.
+ // Upon entry to code generation of the backtracking code for a given node,
+ // the Backtracking state will hold references to all control flow sources
+ // that are outputs in need of further backtracking from the prior node
+ // generated (which is the subsequent operation in the regular expression,
+ // and in the m_ops Vector, since we generated backtracking backwards).
+ // These references to control flow take the form of:
+ // - A jump list of jumps, to be linked to code that will backtrack them
+ // further.
+ // - A set of DataLabelPtr values, to be populated with values to be
+ // treated effectively as return addresses backtracking into complex
+ // subpatterns.
+ // - A flag indicating that the current sequence of generated code up to
+ // this point requires backtracking.
+ class BacktrackingState {
+ public:
+ BacktrackingState()
+ : m_pendingFallthrough(false)
{
}
- BacktrackDestination(int32_t stackOffset)
- : m_backtrackType(BacktrackStackOffset)
- , m_backtrackStackOffset(stackOffset)
- , m_backtrackToLabel(0)
- , m_subDataLabelPtr(0)
- , m_nextBacktrack(0)
- , m_backtrackSourceLabel(0)
- , m_backtrackSourceJumps(0)
+ // Add a jump or jumps, a return address, or set the flag indicating
+ // that the current 'fallthrough' control flow requires backtracking.
+ void append(const Jump& jump)
{
+ m_laterFailures.append(jump);
}
-
- BacktrackDestination(Label label)
- : m_backtrackType(BacktrackLabel)
- , m_backtrackLabel(label)
- , m_backtrackToLabel(0)
- , m_subDataLabelPtr(0)
- , m_nextBacktrack(0)
- , m_backtrackSourceLabel(0)
- , m_backtrackSourceJumps(0)
+ void append(JumpList& jumpList)
{
+ m_laterFailures.append(jumpList);
}
-
- void clear(bool doDataLabelClear = true)
+ void append(const DataLabelPtr& returnAddress)
{
- m_backtrackType = NoBacktrack;
- if (doDataLabelClear)
- clearDataLabel();
- m_nextBacktrack = 0;
+ m_pendingReturns.append(returnAddress);
}
-
- void clearDataLabel()
+ void fallthrough()
{
- m_dataLabelPtr = DataLabelPtr();
+ ASSERT(!m_pendingFallthrough);
+ m_pendingFallthrough = true;
}
- bool hasDestination()
+ // These methods clear the backtracking state, either linking to the
+ // current location, a provided label, or copying the backtracking out
+ // to a JumpList. All actions may require code generation to take place,
+ // and as such are passed a pointer to the assembler.
+ void link(MacroAssembler* assembler)
{
- return (m_backtrackType != NoBacktrack);
- }
-
- bool isStackOffset()
- {
- return (m_backtrackType == BacktrackStackOffset);
- }
-
- bool isLabel()
- {
- return (m_backtrackType == BacktrackLabel);
- }
-
- bool isJumpList()
- {
- return (m_backtrackType == BacktrackJumpList);
- }
-
- bool hasDataLabel()
- {
- return m_dataLabelPtr.isSet();
- }
-
- void copyTarget(BacktrackDestination& rhs, bool copyDataLabel = true)
- {
- m_backtrackType = rhs.m_backtrackType;
- if (m_backtrackType == BacktrackStackOffset)
- m_backtrackStackOffset = rhs.m_backtrackStackOffset;
- else if (m_backtrackType == BacktrackLabel)
- m_backtrackLabel = rhs.m_backtrackLabel;
- if (copyDataLabel)
- m_dataLabelPtr = rhs.m_dataLabelPtr;
- m_backtrackSourceJumps = rhs.m_backtrackSourceJumps;
- m_backtrackSourceLabel = rhs.m_backtrackSourceLabel;
- }
-
- void copyTo(BacktrackDestination& lhs)
- {
- lhs.m_backtrackType = m_backtrackType;
- if (m_backtrackType == BacktrackStackOffset)
- lhs.m_backtrackStackOffset = m_backtrackStackOffset;
- else if (m_backtrackType == BacktrackLabel)
- lhs.m_backtrackLabel = m_backtrackLabel;
- lhs.m_backtrackSourceJumps = m_backtrackSourceJumps;
- lhs.m_backtrackSourceLabel = m_backtrackSourceLabel;
- lhs.m_dataLabelPtr = m_dataLabelPtr;
- lhs.m_backTrackJumps = m_backTrackJumps;
- }
-
- void addBacktrackJump(Jump jump)
- {
- m_backTrackJumps.append(jump);
- }
-
- void setStackOffset(int32_t stackOffset)
- {
- m_backtrackType = BacktrackStackOffset;
- m_backtrackStackOffset = stackOffset;
- }
-
- void setLabel(Label label)
- {
- m_backtrackType = BacktrackLabel;
- m_backtrackLabel = label;
- }
-
- void setNextBacktrackLabel(Label label)
- {
- if (m_nextBacktrack)
- m_nextBacktrack->setLabel(label);
- }
-
- void propagateBacktrackToLabel(const BacktrackDestination& rhs)
- {
- if (!m_backtrackToLabel && rhs.m_backtrackToLabel)
- m_backtrackToLabel = rhs.m_backtrackToLabel;
- }
-
- void setBacktrackToLabel(Label* backtrackToLabel)
- {
- if (!m_backtrackToLabel)
- m_backtrackToLabel = backtrackToLabel;
- }
-
- bool hasBacktrackToLabel()
- {
- return m_backtrackToLabel;
- }
-
- void setBacktrackJumpList(JumpList* jumpList)
- {
- m_backtrackType = BacktrackJumpList;
- m_backtrackSourceJumps = jumpList;
- }
-
- void setBacktrackSourceLabel(Label* backtrackSourceLabel)
- {
- m_backtrackSourceLabel = backtrackSourceLabel;
- }
-
- void setDataLabel(DataLabelPtr dp)
- {
- if (m_subDataLabelPtr) {
- *m_subDataLabelPtr = dp;
- m_subDataLabelPtr = 0;
- } else {
- ASSERT(!hasDataLabel());
- m_dataLabelPtr = dp;
+ if (m_pendingReturns.size()) {
+ Label here(assembler);
+ for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
+ m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
+ m_pendingReturns.clear();
}
+ m_laterFailures.link(assembler);
+ m_laterFailures.clear();
+ m_pendingFallthrough = false;
}
-
- void clearSubDataLabelPtr()
- {
- m_subDataLabelPtr = 0;
- }
-
- void setSubDataLabelPtr(DataLabelPtr* subDataLabelPtr)
+ void linkTo(Label label, MacroAssembler* assembler)
{
- m_subDataLabelPtr = subDataLabelPtr;
- }
-
- void linkToNextBacktrack(BacktrackDestination* nextBacktrack)
- {
- m_nextBacktrack = nextBacktrack;
- }
-
- int32_t getStackOffset()
- {
- ASSERT(m_backtrackType == BacktrackStackOffset);
- return m_backtrackStackOffset;
- }
-
- Label getLabel()
- {
- ASSERT(m_backtrackType == BacktrackLabel);
- return m_backtrackLabel;
- }
-
- JumpList& getBacktrackJumps()
- {
- return m_backTrackJumps;
- }
-
- DataLabelPtr& getDataLabel()
- {
- return m_dataLabelPtr;
- }
-
- void jumpToBacktrack(MacroAssembler* masm)
- {
- if (isJumpList()) {
- if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
- masm->jump().linkTo(*m_backtrackSourceLabel, masm);
- else
- m_backtrackSourceJumps->append(masm->jump());
- } else if (isStackOffset())
- masm->jump(Address(stackPointerRegister, m_backtrackStackOffset));
- else if (isLabel())
- masm->jump().linkTo(m_backtrackLabel, masm);
- else
- m_backTrackJumps.append(masm->jump());
- }
-
- void jumpToBacktrack(YarrGenerator* generator, Jump jump)
- {
- if (isJumpList()) {
- if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
- jump.linkTo(*m_backtrackSourceLabel, generator);
- else
- m_backtrackSourceJumps->append(jump);
- } else if (isStackOffset())
- generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jump);
- else if (isLabel())
- jump.linkTo(getLabel(), generator);
- else
- m_backTrackJumps.append(jump);
- }
-
- void jumpToBacktrack(YarrGenerator* generator, JumpList& jumps)
- {
- if (isJumpList()) {
- if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
- jumps.linkTo(*m_backtrackSourceLabel, generator);
- else
- m_backtrackSourceJumps->append(jumps);
- } else if (isStackOffset())
- generator->m_expressionState.addIndirectJumpEntry(getStackOffset(), jumps);
- else if (isLabel())
- jumps.linkTo(getLabel(), generator);
- else
- m_backTrackJumps.append(jumps);
- }
-
- bool plantJumpToBacktrackIfExists(YarrGenerator* generator)
- {
- if (isJumpList()) {
- if (m_backtrackSourceLabel && (m_backtrackSourceLabel->isSet()))
- generator->jump(*m_backtrackSourceLabel);
- else
- m_backtrackSourceJumps->append(generator->jump());
-
- return true;
- }
-
- if (isStackOffset()) {
- generator->jump(Address(stackPointerRegister, getStackOffset()));
- return true;
+ if (m_pendingReturns.size()) {
+ for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
+ m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], label));
+ m_pendingReturns.clear();
}
-
- if (isLabel()) {
- generator->jump(getLabel());
- if (hasDataLabel()) {
- generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), getLabel()));
- clearDataLabel();
- }
- return true;
+ if (m_pendingFallthrough)
+ assembler->jump(label);
+ m_laterFailures.linkTo(label, assembler);
+ m_laterFailures.clear();
+ m_pendingFallthrough = false;
+ }
+ void takeBacktracksToJumpList(JumpList& jumpList, MacroAssembler* assembler)
+ {
+ if (m_pendingReturns.size()) {
+ Label here(assembler);
+ for (unsigned i = 0; i < m_pendingReturns.size(); ++i)
+ m_backtrackRecords.append(ReturnAddressRecord(m_pendingReturns[i], here));
+ m_pendingReturns.clear();
+ m_pendingFallthrough = true;
}
-
- return false;
+ if (m_pendingFallthrough)
+ jumpList.append(assembler->jump());
+ jumpList.append(m_laterFailures);
+ m_laterFailures.clear();
+ m_pendingFallthrough = false;
}
- void linkBacktrackToLabel(Label backtrackLabel)
+ bool isEmpty()
{
- if (m_backtrackToLabel)
- *m_backtrackToLabel = backtrackLabel;
+ return m_laterFailures.empty() && m_pendingReturns.isEmpty() && !m_pendingFallthrough;
}
- void linkAlternativeBacktracks(YarrGenerator* generator, bool nextIteration = false)
+ // Called at the end of code generation to link all return addresses.
+ void linkDataLabels(LinkBuffer& linkBuffer)
{
- Label hereLabel = generator->label();
-
- if (m_backtrackToLabel) {
- *m_backtrackToLabel = hereLabel;
- m_backtrackToLabel = 0;
- }
-
- m_backTrackJumps.link(generator);
-
- if (nextIteration)
- generator->m_expressionState.linkToNextIteration(hereLabel);
-
- if (hasDataLabel()) {
- generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), hereLabel));
- // data label cleared as a result of the clear() below
- }
-
- clear();
- }
-
- void linkAlternativeBacktracksTo(YarrGenerator* generator, Label label, bool nextIteration = false)
- {
- m_backTrackJumps.linkTo(label, generator);
-
- if (nextIteration)
- generator->m_expressionState.linkToNextIteration(label);
-
- if (hasDataLabel()) {
- generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(getDataLabel(), label));
- clearDataLabel();
- }
+ ASSERT(isEmpty());
+ for (unsigned i = 0; i < m_backtrackRecords.size(); ++i)
+ linkBuffer.patch(m_backtrackRecords[i].m_dataLabel, linkBuffer.locationOf(m_backtrackRecords[i].m_backtrackLocation));
}
private:
- BacktrackType m_backtrackType;
- int32_t m_backtrackStackOffset;
- Label m_backtrackLabel;
- DataLabelPtr m_dataLabelPtr;
- Label* m_backtrackToLabel;
- DataLabelPtr* m_subDataLabelPtr;
- BacktrackDestination* m_nextBacktrack;
- Label* m_backtrackSourceLabel;
- JumpList* m_backtrackSourceJumps;
- JumpList m_backTrackJumps;
- };
-
- struct TermGenerationState {
- TermGenerationState(PatternDisjunction* disjunction, unsigned checkedTotal)
- : disjunction(disjunction)
- , checkedTotal(checkedTotal)
- , m_subParenNum(0)
- , m_linkedBacktrack(0)
- , m_jumpList(0)
- {
- }
-
- void resetAlternative()
- {
- m_backtrack.clear();
- alt = 0;
- }
- bool alternativeValid()
- {
- return alt < disjunction->m_alternatives.size();
- }
- void nextAlternative()
- {
- ++alt;
- }
- PatternAlternative* alternative()
- {
- return disjunction->m_alternatives[alt];
- }
- bool isLastAlternative()
- {
- return (alt + 1) == disjunction->m_alternatives.size();
- }
-
- void resetTerm()
- {
- ASSERT(alternativeValid());
- t = 0;
- m_subParenNum = 0;
- }
- bool termValid()
- {
- ASSERT(alternativeValid());
- return t < alternative()->m_terms.size();
- }
- void nextTerm()
- {
- ASSERT(alternativeValid());
- ++t;
- }
- PatternTerm& term()
- {
- ASSERT(alternativeValid());
- return alternative()->m_terms[t];
- }
- bool isLastTerm()
- {
- ASSERT(alternativeValid());
- return (t + 1) == alternative()->m_terms.size();
- }
- unsigned getSubParenNum()
- {
- return m_subParenNum++;
- }
- bool isMainDisjunction()
- {
- return !disjunction->m_parent;
- }
-
- void setJumpListToPriorParen(JumpList* jumpList)
- {
- m_jumpList = jumpList;
- }
-
- JumpList* getJumpListToPriorParen()
- {
- return m_jumpList;
- }
-
- PatternTerm& lookaheadTerm()
- {
- ASSERT(alternativeValid());
- ASSERT((t + 1) < alternative()->m_terms.size());
- return alternative()->m_terms[t + 1];
- }
- bool isSinglePatternCharacterLookaheadTerm()
- {
- ASSERT(alternativeValid());
- return ((t + 1) < alternative()->m_terms.size())
- && (lookaheadTerm().type == PatternTerm::TypePatternCharacter)
- && (lookaheadTerm().quantityType == QuantifierFixedCount)
- && (lookaheadTerm().quantityCount == 1);
- }
-
- int inputOffset()
- {
- return term().inputPosition - checkedTotal;
- }
-
- void clearBacktrack()
- {
- m_backtrack.clear(false);
- m_linkedBacktrack = 0;
- }
-
- void jumpToBacktrack(MacroAssembler* masm)
- {
- m_backtrack.jumpToBacktrack(masm);
- }
-
- void jumpToBacktrack(YarrGenerator* generator, Jump jump)
- {
- m_backtrack.jumpToBacktrack(generator, jump);
- }
-
- void jumpToBacktrack(YarrGenerator* generator, JumpList& jumps)
- {
- m_backtrack.jumpToBacktrack(generator, jumps);
- }
-
- bool plantJumpToBacktrackIfExists(YarrGenerator* generator)
- {
- return m_backtrack.plantJumpToBacktrackIfExists(generator);
- }
-
- void linkDataLabelToBacktrackIfExists(YarrGenerator* generator, DataLabelPtr dataLabel)
- {
- // If we have a stack offset backtrack destination, use it directly
- if (m_backtrack.isStackOffset()) {
- generator->m_expressionState.addIndirectJumpEntry(m_backtrack.getStackOffset(), dataLabel);
- m_backtrack.clearSubDataLabelPtr();
- } else {
- // If we have a backtrack label, connect the datalabel to it directly.
- if (m_backtrack.isLabel()) {
- generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(dataLabel, m_backtrack.getLabel()));
- m_backtrack.clearSubDataLabelPtr();
- } else
- setBacktrackDataLabel(dataLabel);
+ struct ReturnAddressRecord {
+ ReturnAddressRecord(DataLabelPtr dataLabel, Label backtrackLocation)
+ : m_dataLabel(dataLabel)
+ , m_backtrackLocation(backtrackLocation)
+ {
}
- }
-
- void addBacktrackJump(Jump jump)
- {
- m_backtrack.addBacktrackJump(jump);
- }
-
- void setBacktrackDataLabel(DataLabelPtr dp)
- {
- m_backtrack.setDataLabel(dp);
- }
-
- void setBackTrackStackOffset(int32_t stackOffset)
- {
- m_backtrack.setStackOffset(stackOffset);
- }
-
- void setBacktrackLabel(Label label)
- {
- m_backtrack.setLabel(label);
- }
-
- void linkAlternativeBacktracks(YarrGenerator* generator, bool nextIteration = false)
- {
- m_backtrack.linkAlternativeBacktracks(generator, nextIteration);
- m_linkedBacktrack = 0;
- }
-
- void linkAlternativeBacktracksTo(YarrGenerator* generator, Label label, bool nextIteration = false)
- {
- m_backtrack.linkAlternativeBacktracksTo(generator, label, nextIteration);
- }
-
- void setBacktrackLink(BacktrackDestination* linkedBacktrack)
- {
- m_linkedBacktrack = linkedBacktrack;
- }
-
- void chainBacktracks(BacktrackDestination* followonBacktrack)
- {
- if (m_linkedBacktrack)
- m_linkedBacktrack->linkToNextBacktrack(followonBacktrack);
- }
- BacktrackDestination& getBacktrackDestination()
- {
- return m_backtrack;
- }
-
- void propagateBacktrackingFrom(YarrGenerator* generator, BacktrackDestination& backtrack, bool doJump = true)
- {
- if (doJump)
- m_backtrack.jumpToBacktrack(generator, backtrack.getBacktrackJumps());
-
- if (m_backtrack.isLabel() && backtrack.hasBacktrackToLabel())
- backtrack.linkBacktrackToLabel(m_backtrack.getLabel());
-
- if (backtrack.hasDestination()) {
- if (m_backtrack.hasDataLabel())
- generator->m_expressionState.addDataLabelToNextIteration(m_backtrack.getDataLabel());
+ DataLabelPtr m_dataLabel;
+ Label m_backtrackLocation;
+ };
- m_backtrack.copyTarget(backtrack, doJump);
- }
- }
-
- PatternDisjunction* disjunction;
- int checkedTotal;
- private:
- unsigned alt;
- unsigned t;
- unsigned m_subParenNum;
- BacktrackDestination m_backtrack;
- BacktrackDestination* m_linkedBacktrack;
- JumpList* m_jumpList;
+ JumpList m_laterFailures;
+ bool m_pendingFallthrough;
+ Vector<DataLabelPtr, 4> m_pendingReturns;
+ Vector<ReturnAddressRecord, 4> m_backtrackRecords;
};
- struct ParenthesesTail {
- ParenthesesTail(PatternTerm& term, int nestingLevel, JumpList* jumpListToPriorParen)
- : m_term(term)
- , m_nestingLevel(nestingLevel)
- , m_subParenIndex(0)
- , m_jumpListToPriorParen(jumpListToPriorParen)
- {
- }
-
- void processBacktracks(YarrGenerator* generator, TermGenerationState& state, TermGenerationState& parenthesesState, Label nonGreedyTryParentheses, Label fallThrough)
- {
- m_nonGreedyTryParentheses = nonGreedyTryParentheses;
- m_fallThrough = fallThrough;
-
- m_subParenIndex = state.getSubParenNum();
- parenthesesState.getBacktrackDestination().copyTo(m_parenBacktrack);
- state.chainBacktracks(&m_backtrack);
- BacktrackDestination& stateBacktrack = state.getBacktrackDestination();
- stateBacktrack.copyTo(m_backtrack);
- stateBacktrack.setBacktrackToLabel(&m_backtrackToLabel);
- state.setBacktrackLink(&m_backtrack);
- stateBacktrack.setSubDataLabelPtr(&m_dataAfterLabelPtr);
-
- m_doDirectBacktrack = m_parenBacktrack.hasDestination();
-
- if ((m_term.quantityType == QuantifierGreedy) || (m_term.quantityType == QuantifierNonGreedy))
- m_doDirectBacktrack = false;
-
- if (m_doDirectBacktrack)
- state.propagateBacktrackingFrom(generator, m_parenBacktrack, false);
- else {
- stateBacktrack.setBacktrackJumpList(&m_afterBacktrackJumps);
- stateBacktrack.setBacktrackSourceLabel(&m_backtrackFromAfterParens);
- }
- }
-
- void setNextIteration(Label nextIteration)
- {
- if (!m_nestingLevel && !m_backtrackToLabel.isSet())
- m_backtrackToLabel = nextIteration;
- }
-
- void addAfterParenJump(Jump jump)
- {
- m_afterBacktrackJumps.append(jump);
- }
+ // Generation methods:
+ // ===================
- bool generateCode(YarrGenerator* generator, JumpList& jumpsToNext, bool priorBackTrackFallThrough, bool nextBacktrackFallThrough)
- {
- const RegisterID indexTemporary = regT0;
- unsigned parenthesesFrameLocation = m_term.frameLocation;
- Jump fromPriorBacktrack;
- bool needJumpForPriorParenTail = false;
-
- if (priorBackTrackFallThrough
- && ((m_term.quantityType == QuantifierGreedy)
- || (m_term.quantityType == QuantifierNonGreedy)
- || (!m_doDirectBacktrack && m_parenBacktrack.hasDestination()))) {
- // If the prior paren tail code assumed that it could fall through,
- // but we need to generate after paren backtrack code, then provide
- // a jump around that code for the prior paren tail code.
- // A regular expressing like ((xxx)...)? needs this.
- fromPriorBacktrack = generator->jump();
- needJumpForPriorParenTail = true;
- }
-
- if (!m_backtrack.hasDestination()) {
- if (m_backtrackToLabel.isSet()) {
- m_backtrack.setLabel(m_backtrackToLabel);
- nextBacktrackFallThrough = false;
- } else if (m_jumpListToPriorParen) {
- // If we don't have a destination, go back to either the prior paren or the next outer paren.
- m_backtrack.setBacktrackJumpList(m_jumpListToPriorParen);
- nextBacktrackFallThrough = false;
- } else
- m_backtrack.setBacktrackJumpList(&jumpsToNext);
- } else
- nextBacktrackFallThrough = false;
-
- // A failure AFTER the parens jumps here - Backtrack to this paren
- m_backtrackFromAfterParens = generator->label();
-
- if (m_dataAfterLabelPtr.isSet())
- generator->m_expressionState.m_backtrackRecords.append(AlternativeBacktrackRecord(m_dataAfterLabelPtr, m_backtrackFromAfterParens));
-
- m_afterBacktrackJumps.link(generator);
-
- if (m_term.quantityType == QuantifierGreedy) {
- // If this is -1 we have now tested with both with and without the parens.
- generator->loadFromFrame(parenthesesFrameLocation, indexTemporary);
- m_backtrack.jumpToBacktrack(generator, generator->branch32(Equal, indexTemporary, TrustedImm32(-1)));
- } else if (m_term.quantityType == QuantifierNonGreedy) {
- // If this is -1 we have now tested with both with and without the parens.
- generator->loadFromFrame(parenthesesFrameLocation, indexTemporary);
- generator->branch32(Equal, indexTemporary, TrustedImm32(-1)).linkTo(m_nonGreedyTryParentheses, generator);
- }
-
- if (!m_doDirectBacktrack)
- m_parenBacktrack.plantJumpToBacktrackIfExists(generator);
-
- // A failure WITHIN the parens jumps here
- if (needJumpForPriorParenTail)
- fromPriorBacktrack.link(generator);
- m_parenBacktrack.linkAlternativeBacktracks(generator);
- m_withinBacktrackJumps.link(generator);
-
- if (m_term.capture())
- generator->store32(TrustedImm32(-1), Address(output, (m_term.parentheses.subpatternId << 1) * sizeof(int)));
-
- if (m_term.quantityType == QuantifierGreedy) {
- generator->storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
- generator->jump().linkTo(m_fallThrough, generator);
- nextBacktrackFallThrough = false;
- } else if (!nextBacktrackFallThrough)
- m_backtrack.jumpToBacktrack(generator);
-
- if (!m_doDirectBacktrack)
- m_backtrack.setNextBacktrackLabel(m_backtrackFromAfterParens);
-
- return nextBacktrackFallThrough;
- }
-
- PatternTerm& m_term;
- int m_nestingLevel;
- unsigned m_subParenIndex;
- JumpList* m_jumpListToPriorParen;
- Label m_nonGreedyTryParentheses;
- Label m_fallThrough;
- Label m_backtrackToLabel;
- Label m_backtrackFromAfterParens;
- DataLabelPtr m_dataAfterLabelPtr;
- JumpList m_withinBacktrackJumps;
- JumpList m_afterBacktrackJumps;
- BacktrackDestination m_parenBacktrack;
- BacktrackDestination m_backtrack;
- bool m_doDirectBacktrack;
- };
+ // This method provides a default implementation of backtracking common
+ // to many terms; terms commonly jump out of the forwards matching path
+ // on any failed conditions, and add these jumps to the m_jumps list. If
+ // no special handling is required we can often just backtrack to m_jumps.
+ void backtrackTermDefault(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ m_backtrackingState.append(op.m_jumps);
+ }
- void generateAssertionBOL(TermGenerationState& state)
+ void generateAssertionBOL(size_t opIndex)
{
- PatternTerm& term = state.term();
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
if (m_pattern.m_multiline) {
const RegisterID character = regT0;
JumpList matchDest;
- if (!term.inputPosition)
- matchDest.append(branch32(Equal, index, Imm32(state.checkedTotal)));
+ if (!term->inputPosition)
+ matchDest.append(branch32(Equal, index, Imm32(m_checked)));
- readCharacter(state.inputOffset() - 1, character);
+ readCharacter((term->inputPosition - m_checked) - 1, character);
matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
- state.jumpToBacktrack(this);
+ op.m_jumps.append(jump());
matchDest.link(this);
} else {
// Erk, really should poison out these alternatives early. :-/
- if (term.inputPosition)
- state.jumpToBacktrack(this);
+ if (term->inputPosition)
+ op.m_jumps.append(jump());
else
- state.jumpToBacktrack(this, branch32(NotEqual, index, Imm32(state.checkedTotal)));
+ op.m_jumps.append(branch32(NotEqual, index, Imm32(m_checked)));
}
}
+ void backtrackAssertionBOL(size_t opIndex)
+ {
+ backtrackTermDefault(opIndex);
+ }
- void generateAssertionEOL(TermGenerationState& state)
+ void generateAssertionEOL(size_t opIndex)
{
- PatternTerm& term = state.term();
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
if (m_pattern.m_multiline) {
const RegisterID character = regT0;
JumpList matchDest;
- if (term.inputPosition == state.checkedTotal)
+ if (term->inputPosition == m_checked)
matchDest.append(atEndOfInput());
- readCharacter(state.inputOffset(), character);
+ readCharacter((term->inputPosition - m_checked), character);
matchCharacterClass(character, matchDest, m_pattern.newlineCharacterClass());
- state.jumpToBacktrack(this);
+ op.m_jumps.append(jump());
matchDest.link(this);
} else {
- if (term.inputPosition == state.checkedTotal)
- state.jumpToBacktrack(this, notAtEndOfInput());
+ if (term->inputPosition == m_checked)
+ op.m_jumps.append(notAtEndOfInput());
// Erk, really should poison out these alternatives early. :-/
else
- state.jumpToBacktrack(this);
+ op.m_jumps.append(jump());
}
}
+ void backtrackAssertionEOL(size_t opIndex)
+ {
+ backtrackTermDefault(opIndex);
+ }
// Also falls though on nextIsNotWordChar.
- void matchAssertionWordchar(TermGenerationState& state, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
+ void matchAssertionWordchar(size_t opIndex, JumpList& nextIsWordChar, JumpList& nextIsNotWordChar)
{
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
const RegisterID character = regT0;
- PatternTerm& term = state.term();
- if (term.inputPosition == state.checkedTotal)
+ if (term->inputPosition == m_checked)
nextIsNotWordChar.append(atEndOfInput());
- readCharacter(state.inputOffset(), character);
+ readCharacter((term->inputPosition - m_checked), character);
matchCharacterClass(character, nextIsWordChar, m_pattern.wordcharCharacterClass());
}
- void generateAssertionWordBoundary(TermGenerationState& state)
+ void generateAssertionWordBoundary(size_t opIndex)
{
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
const RegisterID character = regT0;
- PatternTerm& term = state.term();
Jump atBegin;
JumpList matchDest;
- if (!term.inputPosition)
- atBegin = branch32(Equal, index, Imm32(state.checkedTotal));
- readCharacter(state.inputOffset() - 1, character);
+ if (!term->inputPosition)
+ atBegin = branch32(Equal, index, Imm32(m_checked));
+ readCharacter((term->inputPosition - m_checked) - 1, character);
matchCharacterClass(character, matchDest, m_pattern.wordcharCharacterClass());
- if (!term.inputPosition)
+ if (!term->inputPosition)
atBegin.link(this);
// We fall through to here if the last character was not a wordchar.
JumpList nonWordCharThenWordChar;
JumpList nonWordCharThenNonWordChar;
- if (term.invert()) {
- matchAssertionWordchar(state, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
+ if (term->invert()) {
+ matchAssertionWordchar(opIndex, nonWordCharThenNonWordChar, nonWordCharThenWordChar);
nonWordCharThenWordChar.append(jump());
} else {
- matchAssertionWordchar(state, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
+ matchAssertionWordchar(opIndex, nonWordCharThenWordChar, nonWordCharThenNonWordChar);
nonWordCharThenNonWordChar.append(jump());
}
- state.jumpToBacktrack(this, nonWordCharThenNonWordChar);
+ op.m_jumps.append(nonWordCharThenNonWordChar);
// We jump here if the last character was a wordchar.
matchDest.link(this);
JumpList wordCharThenWordChar;
JumpList wordCharThenNonWordChar;
- if (term.invert()) {
- matchAssertionWordchar(state, wordCharThenNonWordChar, wordCharThenWordChar);
+ if (term->invert()) {
+ matchAssertionWordchar(opIndex, wordCharThenNonWordChar, wordCharThenWordChar);
wordCharThenWordChar.append(jump());
} else {
- matchAssertionWordchar(state, wordCharThenWordChar, wordCharThenNonWordChar);
+ matchAssertionWordchar(opIndex, wordCharThenWordChar, wordCharThenNonWordChar);
// This can fall-though!
}
- state.jumpToBacktrack(this, wordCharThenWordChar);
+ op.m_jumps.append(wordCharThenWordChar);
nonWordCharThenWordChar.link(this);
wordCharThenNonWordChar.link(this);
}
+ void backtrackAssertionWordBoundary(size_t opIndex)
+ {
+ backtrackTermDefault(opIndex);
+ }
- void generatePatternCharacterSingle(TermGenerationState& state)
+ void generatePatternCharacterOnce(size_t opIndex)
{
+ YarrOp& op = m_ops[opIndex];
+
+ // m_ops always ends with a OpBodyAlternativeEnd or OpMatchFailed
+ // node, so there must always be at least one more node.
+ ASSERT(opIndex + 1 < m_ops.size());
+ YarrOp& nextOp = m_ops[opIndex + 1];
+
+ if (op.m_isDeadCode)
+ return;
+
+ PatternTerm* term = op.m_term;
+ UChar ch = term->patternCharacter;
+
const RegisterID character = regT0;
- UChar ch = state.term().patternCharacter;
+
+ if (nextOp.m_op == OpTerm) {
+ PatternTerm* nextTerm = nextOp.m_term;
+ if (nextTerm->type == PatternTerm::TypePatternCharacter
+ && nextTerm->quantityType == QuantifierFixedCount
+ && nextTerm->quantityCount == 1
+ && nextTerm->inputPosition == (term->inputPosition + 1)) {
+
+ UChar ch2 = nextTerm->patternCharacter;
+
+ int mask = 0;
+ int chPair = ch | (ch2 << 16);
+
+ if (m_pattern.m_ignoreCase) {
+ if (isASCIIAlpha(ch))
+ mask |= 32;
+ if (isASCIIAlpha(ch2))
+ mask |= 32 << 16;
+ }
+
+ BaseIndex address(input, index, TimesTwo, (term->inputPosition - m_checked) * sizeof(UChar));
+ if (mask) {
+ load32WithUnalignedHalfWords(address, character);
+ or32(Imm32(mask), character);
+ op.m_jumps.append(branch32(NotEqual, character, Imm32(chPair | mask)));
+ } else
+ op.m_jumps.append(branch32WithUnalignedHalfWords(NotEqual, address, Imm32(chPair)));
+
+ nextOp.m_isDeadCode = true;
+ return;
+ }
+ }
if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
- readCharacter(state.inputOffset(), character);
+ readCharacter(term->inputPosition - m_checked, character);
or32(TrustedImm32(32), character);
- state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
+ op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
} else {
ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- state.jumpToBacktrack(this, jumpIfCharNotEquals(ch, state.inputOffset()));
+ op.m_jumps.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
}
}
-
- void generatePatternCharacterPair(TermGenerationState& state)
+ void backtrackPatternCharacterOnce(size_t opIndex)
{
- const RegisterID character = regT0;
- UChar ch1 = state.term().patternCharacter;
- UChar ch2 = state.lookaheadTerm().patternCharacter;
-
- int mask = 0;
- int chPair = ch1 | (ch2 << 16);
-
- if (m_pattern.m_ignoreCase) {
- if (isASCIIAlpha(ch1))
- mask |= 32;
- if (isASCIIAlpha(ch2))
- mask |= 32 << 16;
- }
-
- if (mask) {
- load32WithUnalignedHalfWords(BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), character);
- or32(Imm32(mask), character);
- state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(chPair | mask)));
- } else
- state.jumpToBacktrack(this, branch32WithUnalignedHalfWords(NotEqual, BaseIndex(input, index, TimesTwo, state.inputOffset() * sizeof(UChar)), Imm32(chPair)));
+ backtrackTermDefault(opIndex);
}
- void generatePatternCharacterFixed(TermGenerationState& state)
+ void generatePatternCharacterFixed(size_t opIndex)
{
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+ UChar ch = term->patternCharacter;
+
const RegisterID character = regT0;
const RegisterID countRegister = regT1;
- PatternTerm& term = state.term();
- UChar ch = term.patternCharacter;
move(index, countRegister);
- sub32(Imm32(term.quantityCount), countRegister);
+ sub32(Imm32(term->quantityCount), countRegister);
Label loop(this);
+ BaseIndex address(input, countRegister, TimesTwo, (term->inputPosition - m_checked + term->quantityCount) * sizeof(UChar));
+
if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
- load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
+ load16(address, character);
or32(TrustedImm32(32), character);
- state.jumpToBacktrack(this, branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
+ op.m_jumps.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
} else {
ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- state.jumpToBacktrack(this, branch16(NotEqual, BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), Imm32(ch)));
+ op.m_jumps.append(branch16(NotEqual, address, Imm32(ch)));
}
add32(TrustedImm32(1), countRegister);
branch32(NotEqual, countRegister, index).linkTo(loop, this);
}
+ void backtrackPatternCharacterFixed(size_t opIndex)
+ {
+ backtrackTermDefault(opIndex);
+ }
- void generatePatternCharacterGreedy(TermGenerationState& state)
+ void generatePatternCharacterGreedy(size_t opIndex)
{
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+ UChar ch = term->patternCharacter;
+
const RegisterID character = regT0;
const RegisterID countRegister = regT1;
- PatternTerm& term = state.term();
- UChar ch = term.patternCharacter;
move(TrustedImm32(0), countRegister);
@@ -1352,121 +752,152 @@ class YarrGenerator : private MacroAssembler {
Label loop(this);
failures.append(atEndOfInput());
if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
- readCharacter(state.inputOffset(), character);
+ readCharacter(term->inputPosition - m_checked, character);
or32(TrustedImm32(32), character);
failures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
} else {
ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- failures.append(jumpIfCharNotEquals(ch, state.inputOffset()));
+ failures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
}
add32(TrustedImm32(1), countRegister);
add32(TrustedImm32(1), index);
- if (term.quantityCount != quantifyInfinite) {
- branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
- failures.append(jump());
- } else
+ if (term->quantityCount == quantifyInfinite)
jump(loop);
-
- Label backtrackBegin(this);
- loadFromFrame(term.frameLocation, countRegister);
- state.jumpToBacktrack(this, branchTest32(Zero, countRegister));
- sub32(TrustedImm32(1), countRegister);
- sub32(TrustedImm32(1), index);
+ else
+ branch32(NotEqual, countRegister, Imm32(term->quantityCount)).linkTo(loop, this);
failures.link(this);
+ op.m_reentry = label();
- storeToFrame(countRegister, term.frameLocation);
+ storeToFrame(countRegister, term->frameLocation);
- state.setBacktrackLabel(backtrackBegin);
}
+ void backtrackPatternCharacterGreedy(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
+ const RegisterID countRegister = regT1;
- void generatePatternCharacterNonGreedy(TermGenerationState& state)
+ m_backtrackingState.link(this);
+
+ loadFromFrame(term->frameLocation, countRegister);
+ m_backtrackingState.append(branchTest32(Zero, countRegister));
+ sub32(TrustedImm32(1), countRegister);
+ sub32(TrustedImm32(1), index);
+ jump(op.m_reentry);
+ }
+
+ void generatePatternCharacterNonGreedy(size_t opIndex)
{
- const RegisterID character = regT0;
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
const RegisterID countRegister = regT1;
- PatternTerm& term = state.term();
- UChar ch = term.patternCharacter;
move(TrustedImm32(0), countRegister);
+ op.m_reentry = label();
+ storeToFrame(countRegister, term->frameLocation);
+ }
+ void backtrackPatternCharacterNonGreedy(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+ UChar ch = term->patternCharacter;
+
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
- Jump firstTimeDoNothing = jump();
+ JumpList nonGreedyFailures;
- Label hardFail(this);
- sub32(countRegister, index);
- state.jumpToBacktrack(this);
+ m_backtrackingState.link(this);
- Label backtrackBegin(this);
- loadFromFrame(term.frameLocation, countRegister);
+ loadFromFrame(term->frameLocation, countRegister);
- atEndOfInput().linkTo(hardFail, this);
- if (term.quantityCount != quantifyInfinite)
- branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
+ nonGreedyFailures.append(atEndOfInput());
+ if (term->quantityCount != quantifyInfinite)
+ nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount)));
if (m_pattern.m_ignoreCase && isASCIIAlpha(ch)) {
- readCharacter(state.inputOffset(), character);
+ readCharacter(term->inputPosition - m_checked, character);
or32(TrustedImm32(32), character);
- branch32(NotEqual, character, Imm32(Unicode::toLower(ch))).linkTo(hardFail, this);
+ nonGreedyFailures.append(branch32(NotEqual, character, Imm32(Unicode::toLower(ch))));
} else {
ASSERT(!m_pattern.m_ignoreCase || (Unicode::toLower(ch) == Unicode::toUpper(ch)));
- jumpIfCharNotEquals(ch, state.inputOffset()).linkTo(hardFail, this);
+ nonGreedyFailures.append(jumpIfCharNotEquals(ch, term->inputPosition - m_checked));
}
add32(TrustedImm32(1), countRegister);
add32(TrustedImm32(1), index);
- firstTimeDoNothing.link(this);
- storeToFrame(countRegister, term.frameLocation);
+ jump(op.m_reentry);
- state.setBacktrackLabel(backtrackBegin);
+ nonGreedyFailures.link(this);
+ sub32(countRegister, index);
+ m_backtrackingState.fallthrough();
}
- void generateCharacterClassSingle(TermGenerationState& state)
+ void generateCharacterClassOnce(size_t opIndex)
{
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
const RegisterID character = regT0;
- PatternTerm& term = state.term();
JumpList matchDest;
- readCharacter(state.inputOffset(), character);
- matchCharacterClass(character, matchDest, term.characterClass);
+ readCharacter((term->inputPosition - m_checked), character);
+ matchCharacterClass(character, matchDest, term->characterClass);
- if (term.invert())
- state.jumpToBacktrack(this, matchDest);
+ if (term->invert())
+ op.m_jumps.append(matchDest);
else {
- state.jumpToBacktrack(this);
+ op.m_jumps.append(jump());
matchDest.link(this);
}
}
+ void backtrackCharacterClassOnce(size_t opIndex)
+ {
+ backtrackTermDefault(opIndex);
+ }
- void generateCharacterClassFixed(TermGenerationState& state)
+ void generateCharacterClassFixed(size_t opIndex)
{
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
const RegisterID character = regT0;
const RegisterID countRegister = regT1;
- PatternTerm& term = state.term();
move(index, countRegister);
- sub32(Imm32(term.quantityCount), countRegister);
+ sub32(Imm32(term->quantityCount), countRegister);
Label loop(this);
JumpList matchDest;
- load16(BaseIndex(input, countRegister, TimesTwo, (state.inputOffset() + term.quantityCount) * sizeof(UChar)), character);
- matchCharacterClass(character, matchDest, term.characterClass);
+ load16(BaseIndex(input, countRegister, TimesTwo, (term->inputPosition - m_checked + term->quantityCount) * sizeof(UChar)), character);
+ matchCharacterClass(character, matchDest, term->characterClass);
- if (term.invert())
- state.jumpToBacktrack(this, matchDest);
+ if (term->invert())
+ op.m_jumps.append(matchDest);
else {
- state.jumpToBacktrack(this);
+ op.m_jumps.append(jump());
matchDest.link(this);
}
add32(TrustedImm32(1), countRegister);
branch32(NotEqual, countRegister, index).linkTo(loop, this);
}
+ void backtrackCharacterClassFixed(size_t opIndex)
+ {
+ backtrackTermDefault(opIndex);
+ }
- void generateCharacterClassGreedy(TermGenerationState& state)
+ void generateCharacterClassGreedy(size_t opIndex)
{
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
const RegisterID character = regT0;
const RegisterID countRegister = regT1;
- PatternTerm& term = state.term();
move(TrustedImm32(0), countRegister);
@@ -1474,692 +905,1356 @@ class YarrGenerator : private MacroAssembler {
Label loop(this);
failures.append(atEndOfInput());
- if (term.invert()) {
- readCharacter(state.inputOffset(), character);
- matchCharacterClass(character, failures, term.characterClass);
+ if (term->invert()) {
+ readCharacter(term->inputPosition - m_checked, character);
+ matchCharacterClass(character, failures, term->characterClass);
} else {
JumpList matchDest;
- readCharacter(state.inputOffset(), character);
- matchCharacterClass(character, matchDest, term.characterClass);
+ readCharacter(term->inputPosition - m_checked, character);
+ matchCharacterClass(character, matchDest, term->characterClass);
failures.append(jump());
matchDest.link(this);
}
add32(TrustedImm32(1), countRegister);
add32(TrustedImm32(1), index);
- if (term.quantityCount != quantifyInfinite) {
- branch32(NotEqual, countRegister, Imm32(term.quantityCount)).linkTo(loop, this);
+ if (term->quantityCount != quantifyInfinite) {
+ branch32(NotEqual, countRegister, Imm32(term->quantityCount)).linkTo(loop, this);
failures.append(jump());
} else
jump(loop);
- Label backtrackBegin(this);
- loadFromFrame(term.frameLocation, countRegister);
- state.jumpToBacktrack(this, branchTest32(Zero, countRegister));
- sub32(TrustedImm32(1), countRegister);
- sub32(TrustedImm32(1), index);
-
failures.link(this);
+ op.m_reentry = label();
+
+ storeToFrame(countRegister, term->frameLocation);
+ }
+ void backtrackCharacterClassGreedy(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
- storeToFrame(countRegister, term.frameLocation);
+ const RegisterID countRegister = regT1;
+
+ m_backtrackingState.link(this);
- state.setBacktrackLabel(backtrackBegin);
+ loadFromFrame(term->frameLocation, countRegister);
+ m_backtrackingState.append(branchTest32(Zero, countRegister));
+ sub32(TrustedImm32(1), countRegister);
+ sub32(TrustedImm32(1), index);
+ jump(op.m_reentry);
}
- void generateCharacterClassNonGreedy(TermGenerationState& state)
+ void generateCharacterClassNonGreedy(size_t opIndex)
{
- const RegisterID character = regT0;
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
+
const RegisterID countRegister = regT1;
- PatternTerm& term = state.term();
move(TrustedImm32(0), countRegister);
+ op.m_reentry = label();
+ storeToFrame(countRegister, term->frameLocation);
+ }
+ void backtrackCharacterClassNonGreedy(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
- Jump firstTimeDoNothing = jump();
+ const RegisterID character = regT0;
+ const RegisterID countRegister = regT1;
- Label hardFail(this);
- sub32(countRegister, index);
- state.jumpToBacktrack(this);
+ JumpList nonGreedyFailures;
+
+ m_backtrackingState.link(this);
Label backtrackBegin(this);
- loadFromFrame(term.frameLocation, countRegister);
+ loadFromFrame(term->frameLocation, countRegister);
- atEndOfInput().linkTo(hardFail, this);
- branch32(Equal, countRegister, Imm32(term.quantityCount), hardFail);
+ nonGreedyFailures.append(atEndOfInput());
+ nonGreedyFailures.append(branch32(Equal, countRegister, Imm32(term->quantityCount)));
JumpList matchDest;
- readCharacter(state.inputOffset(), character);
- matchCharacterClass(character, matchDest, term.characterClass);
+ readCharacter(term->inputPosition - m_checked, character);
+ matchCharacterClass(character, matchDest, term->characterClass);
- if (term.invert())
- matchDest.linkTo(hardFail, this);
+ if (term->invert())
+ nonGreedyFailures.append(matchDest);
else {
- jump(hardFail);
+ nonGreedyFailures.append(jump());
matchDest.link(this);
}
add32(TrustedImm32(1), countRegister);
add32(TrustedImm32(1), index);
- firstTimeDoNothing.link(this);
- storeToFrame(countRegister, term.frameLocation);
+ jump(op.m_reentry);
- state.setBacktrackLabel(backtrackBegin);
+ nonGreedyFailures.link(this);
+ sub32(countRegister, index);
+ m_backtrackingState.fallthrough();
}
- void generateParenthesesDisjunction(PatternTerm& parenthesesTerm, TermGenerationState& state, unsigned alternativeFrameLocation)
+ // Code generation/backtracking for simple terms
+ // (pattern characters, character classes, and assertions).
+ // These methods farm out work to the set of functions above.
+ void generateTerm(size_t opIndex)
{
- ASSERT((parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern) || (parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion));
- ASSERT(parenthesesTerm.quantityCount == 1);
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
- PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
- unsigned preCheckedCount = ((parenthesesTerm.quantityType == QuantifierFixedCount) && (parenthesesTerm.type != PatternTerm::TypeParentheticalAssertion)) ? disjunction->m_minimumSize : 0;
+ switch (term->type) {
+ case PatternTerm::TypePatternCharacter:
+ switch (term->quantityType) {
+ case QuantifierFixedCount:
+ if (term->quantityCount == 1)
+ generatePatternCharacterOnce(opIndex);
+ else
+ generatePatternCharacterFixed(opIndex);
+ break;
+ case QuantifierGreedy:
+ generatePatternCharacterGreedy(opIndex);
+ break;
+ case QuantifierNonGreedy:
+ generatePatternCharacterNonGreedy(opIndex);
+ break;
+ }
+ break;
- if (disjunction->m_alternatives.size() == 1) {
- state.resetAlternative();
- ASSERT(state.alternativeValid());
- PatternAlternative* alternative = state.alternative();
- optimizeAlternative(alternative);
+ case PatternTerm::TypeCharacterClass:
+ switch (term->quantityType) {
+ case QuantifierFixedCount:
+ if (term->quantityCount == 1)
+ generateCharacterClassOnce(opIndex);
+ else
+ generateCharacterClassFixed(opIndex);
+ break;
+ case QuantifierGreedy:
+ generateCharacterClassGreedy(opIndex);
+ break;
+ case QuantifierNonGreedy:
+ generateCharacterClassNonGreedy(opIndex);
+ break;
+ }
+ break;
- int countToCheck = alternative->m_minimumSize - preCheckedCount;
- if (countToCheck) {
- ASSERT((parenthesesTerm.type == PatternTerm::TypeParentheticalAssertion) || (parenthesesTerm.quantityType != QuantifierFixedCount));
+ case PatternTerm::TypeAssertionBOL:
+ generateAssertionBOL(opIndex);
+ break;
- // FIXME: This is quite horrible. The call to 'plantJumpToBacktrackIfExists'
- // will be forced to always trampoline into here, just to decrement the index.
- // Ick.
- Jump skip = jump();
+ case PatternTerm::TypeAssertionEOL:
+ generateAssertionEOL(opIndex);
+ break;
- Label backtrackBegin(this);
- sub32(Imm32(countToCheck), index);
- state.addBacktrackJump(jump());
+ case PatternTerm::TypeAssertionWordBoundary:
+ generateAssertionWordBoundary(opIndex);
+ break;
- skip.link(this);
+ case PatternTerm::TypeForwardReference:
+ break;
- state.setBacktrackLabel(backtrackBegin);
+ case PatternTerm::TypeParenthesesSubpattern:
+ case PatternTerm::TypeParentheticalAssertion:
+ ASSERT_NOT_REACHED();
+ case PatternTerm::TypeBackReference:
+ m_shouldFallBack = true;
+ break;
+ }
+ }
+ void backtrackTerm(size_t opIndex)
+ {
+ YarrOp& op = m_ops[opIndex];
+ PatternTerm* term = op.m_term;
- state.jumpToBacktrack(this, jumpIfNoAvailableInput(countToCheck));
- state.checkedTotal += countToCheck;
+ switch (term->type) {
+ case PatternTerm::TypePatternCharacter:
+ switch (term->quantityType) {
+ case QuantifierFixedCount:
+ if (term->quantityCount == 1)
+ backtrackPatternCharacterOnce(opIndex);
+ else
+ backtrackPatternCharacterFixed(opIndex);
+ break;
+ case QuantifierGreedy:
+ backtrackPatternCharacterGreedy(opIndex);
+ break;
+ case QuantifierNonGreedy:
+ backtrackPatternCharacterNonGreedy(opIndex);
+ break;
}
+ break;
- for (state.resetTerm(); state.termValid(); state.nextTerm())
- generateTerm(state);
-
- state.checkedTotal -= countToCheck;
- } else {
- JumpList successes;
- bool propogateBacktrack = false;
-
- // Save current state's paren jump list for use with each alternative
- JumpList* outerJumpList = state.getJumpListToPriorParen();
+ case PatternTerm::TypeCharacterClass:
+ switch (term->quantityType) {
+ case QuantifierFixedCount:
+ if (term->quantityCount == 1)
+ backtrackCharacterClassOnce(opIndex);
+ else
+ backtrackCharacterClassFixed(opIndex);
+ break;
+ case QuantifierGreedy:
+ backtrackCharacterClassGreedy(opIndex);
+ break;
+ case QuantifierNonGreedy:
+ backtrackCharacterClassNonGreedy(opIndex);
+ break;
+ }
+ break;
- for (state.resetAlternative(); state.alternativeValid(); state.nextAlternative(), state.setJumpListToPriorParen(outerJumpList)) {
- PatternAlternative* alternative = state.alternative();
- optimizeAlternative(alternative);
+ case PatternTerm::TypeAssertionBOL:
+ backtrackAssertionBOL(opIndex);
+ break;
- ASSERT(alternative->m_minimumSize >= preCheckedCount);
- int countToCheck = alternative->m_minimumSize - preCheckedCount;
- if (countToCheck) {
- state.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
- state.checkedTotal += countToCheck;
- }
+ case PatternTerm::TypeAssertionEOL:
+ backtrackAssertionEOL(opIndex);
+ break;
- for (state.resetTerm(); state.termValid(); state.nextTerm())
- generateTerm(state);
+ case PatternTerm::TypeAssertionWordBoundary:
+ backtrackAssertionWordBoundary(opIndex);
+ break;
- // Matched an alternative.
- DataLabelPtr dataLabel = storeToFrameWithPatch(alternativeFrameLocation);
+ case PatternTerm::TypeForwardReference:
+ break;
- if (!state.isLastAlternative() || countToCheck)
- successes.append(jump());
+ case PatternTerm::TypeParenthesesSubpattern:
+ case PatternTerm::TypeParentheticalAssertion:
+ ASSERT_NOT_REACHED();
+ case PatternTerm::TypeBackReference:
+ m_shouldFallBack = true;
+ break;
+ }
+ }
- // Alternative did not match.
+ void generate()
+ {
+ // Forwards generate the matching code.
+ ASSERT(m_ops.size());
+ size_t opIndex = 0;
- // Do we have a backtrack destination?
- // if so, link the data label to it.
- state.linkDataLabelToBacktrackIfExists(this, dataLabel);
+ do {
+ YarrOp& op = m_ops[opIndex];
+ switch (op.m_op) {
- if (!state.isLastAlternative() || countToCheck)
- state.linkAlternativeBacktracks(this);
+ case OpTerm:
+ generateTerm(opIndex);
+ break;
- if (countToCheck) {
- sub32(Imm32(countToCheck), index);
- state.checkedTotal -= countToCheck;
- } else if (state.isLastAlternative())
- propogateBacktrack = true;
+ // OpBodyAlternativeBegin/Next/End
+ //
+ // These nodes wrap the set of alternatives in the body of the regular expression.
+ // There may be either one or two chains of OpBodyAlternative nodes, one representing
+ // the 'once through' sequence of alternatives (if any exist), and one representing
+ // the repeating alternatives (again, if any exist).
+ //
+ // Upon normal entry to the Begin alternative, we will check that input is available.
+ // Reentry to the Begin alternative will take place after the check has taken place,
+ // and will assume that the input position has already been progressed as appropriate.
+ //
+ // Entry to subsequent Next/End alternatives occurs when the prior alternative has
+ // successfully completed a match - return a success state from JIT code.
+ //
+ // Next alternatives allow for reentry optimized to suit backtracking from its
+ // preceding alternative. It expects the input position to still be set to a position
+ // appropriate to its predecessor, and it will only perform an input check if the
+ // predecessor had a minimum size less than its own.
+ //
+ // In the case 'once through' expressions, the End node will also have a reentry
+ // point to jump to when the last alternative fails. Again, this expects the input
+ // position to still reflect that expected by the prior alternative.
+ case OpBodyAlternativeBegin: {
+ PatternAlternative* alternative = op.m_alternative;
+
+ // Upon entry at the head of the set of alternatives, check if input is available
+ // to run the first alternative. (This progresses the input position).
+ op.m_jumps.append(jumpIfNoAvailableInput(alternative->m_minimumSize));
+ // We will reenter after the check, and assume the input position to have been
+ // set as appropriate to this alternative.
+ op.m_reentry = label();
+
+ m_checked += alternative->m_minimumSize;
+ break;
}
- // We fall through to here when the last alternative fails.
- // Add a backtrack out of here for the parenthese handling code to link up.
- if (!propogateBacktrack)
- state.addBacktrackJump(jump());
-
- // Save address on stack for the parens code to backtrack to, to retry the
- // next alternative.
- state.setBackTrackStackOffset(alternativeFrameLocation * sizeof(void*));
-
- successes.link(this);
- }
- }
+ case OpBodyAlternativeNext:
+ case OpBodyAlternativeEnd: {
+ PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
+ PatternAlternative* alternative = op.m_alternative;
+
+ // If we get here, the prior alternative matched - return success.
+
+ // Adjust the stack pointer to remove the pattern's frame.
+ if (m_pattern.m_body->m_callFrameSize)
+ addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
+
+ // Load appropriate values into the return register and the first output
+ // slot, and return. In the case of pattern with a fixed size, we will
+ // not have yet set the value in the first
+ ASSERT(index != returnRegister);
+ if (m_pattern.m_body->m_hasFixedSize) {
+ move(index, returnRegister);
+ if (priorAlternative->m_minimumSize)
+ sub32(Imm32(priorAlternative->m_minimumSize), returnRegister);
+ store32(returnRegister, output);
+ } else
+ load32(Address(output), returnRegister);
+ store32(index, Address(output, 4));
+ generateReturn();
+
+ // This is the divide between the tail of the prior alternative, above, and
+ // the head of the subsequent alternative, below.
+
+ if (op.m_op == OpBodyAlternativeNext) {
+ // This is the reentry point for the Next alternative. We expect any code
+ // that jumps here to do so with the input position matching that of the
+ // PRIOR alteranative, and we will only check input availability if we
+ // need to progress it forwards.
+ op.m_reentry = label();
+ if (int delta = alternative->m_minimumSize - priorAlternative->m_minimumSize) {
+ add32(Imm32(delta), index);
+ if (delta > 0)
+ op.m_jumps.append(jumpIfNoAvailableInput());
+ }
+ } else if (op.m_nextOp == notFound) {
+ // This is the reentry point for the End of 'once through' alternatives,
+ // jumped to when the las alternative fails to match.
+ op.m_reentry = label();
+ sub32(Imm32(priorAlternative->m_minimumSize), index);
+ }
- void generateParenthesesSingle(TermGenerationState& state)
- {
- const RegisterID indexTemporary = regT0;
- PatternTerm& term = state.term();
- PatternDisjunction* disjunction = term.parentheses.disjunction;
- ASSERT(term.quantityCount == 1);
+ if (op.m_op == OpBodyAlternativeNext)
+ m_checked += alternative->m_minimumSize;
+ m_checked -= priorAlternative->m_minimumSize;
+ break;
+ }
- unsigned preCheckedCount = (term.quantityType == QuantifierFixedCount) ? disjunction->m_minimumSize : 0;
+ // OpSimpleNestedAlternativeBegin/Next/End
+ // OpNestedAlternativeBegin/Next/End
+ //
+ // These nodes are used to handle sets of alternatives that are nested within
+ // subpatterns and parenthetical assertions. The 'simple' forms are used where
+ // we do not need to be able to backtrack back into any alternative other than
+ // the last, the normal forms allow backtracking into any alternative.
+ //
+ // Each Begin/Next node is responsible for planting an input check to ensure
+ // sufficient input is available on entry. Next nodes additionally need to
+ // jump to the end - Next nodes use the End node's m_jumps list to hold this
+ // set of jumps.
+ //
+ // In the non-simple forms, successful alternative matches must store a
+ // 'return address' using a DataLabelPtr, used to store the address to jump
+ // to when backtracking, to get to the code for the appropriate alternative.
+ case OpSimpleNestedAlternativeBegin:
+ case OpNestedAlternativeBegin: {
+ PatternTerm* term = op.m_term;
+ PatternAlternative* alternative = op.m_alternative;
+ PatternDisjunction* disjunction = term->parentheses.disjunction;
+
+ // Calculate how much input we need to check for, and if non-zero check.
+ op.m_checkAdjust = alternative->m_minimumSize;
+ if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
+ op.m_checkAdjust -= disjunction->m_minimumSize;
+ if (op.m_checkAdjust)
+ op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
+
+ m_checked += op.m_checkAdjust;
+ break;
+ }
+ case OpSimpleNestedAlternativeNext:
+ case OpNestedAlternativeNext: {
+ PatternTerm* term = op.m_term;
+ PatternAlternative* alternative = op.m_alternative;
+ PatternDisjunction* disjunction = term->parentheses.disjunction;
+
+ // In the non-simple case, store a 'return address' so we can backtrack correctly.
+ if (op.m_op == OpNestedAlternativeNext) {
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ unsigned alternativeFrameLocation = parenthesesFrameLocation;
+ if (term->quantityType != QuantifierFixedCount)
+ alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+ op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
+ }
- unsigned parenthesesFrameLocation = term.frameLocation;
- unsigned alternativeFrameLocation = parenthesesFrameLocation;
- if (term.quantityType != QuantifierFixedCount)
- alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+ // If we reach here then the last alternative has matched - jump to the
+ // End node, to skip over any further alternatives.
+ //
+ // FIXME: this is logically O(N^2) (though N can be expected to be very
+ // small). We could avoid this either by adding an extra jump to the JIT
+ // data structures, or by making backtracking code that jumps to Next
+ // alternatives are responsible for checking that input is available (if
+ // we didn't need to plant the input checks, then m_jumps would be free).
+ YarrOp* endOp = &m_ops[op.m_nextOp];
+ while (endOp->m_nextOp != notFound) {
+ ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
+ endOp = &m_ops[endOp->m_nextOp];
+ }
+ ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
+ endOp->m_jumps.append(jump());
+
+ // This is the entry point for the next alternative.
+ op.m_reentry = label();
+
+ // Calculate how much input we need to check for, and if non-zero check.
+ op.m_checkAdjust = alternative->m_minimumSize;
+ if ((term->quantityType == QuantifierFixedCount) && (term->type != PatternTerm::TypeParentheticalAssertion))
+ op.m_checkAdjust -= disjunction->m_minimumSize;
+ if (op.m_checkAdjust)
+ op.m_jumps.append(jumpIfNoAvailableInput(op.m_checkAdjust));
+
+ YarrOp& lastOp = m_ops[op.m_previousOp];
+ m_checked -= lastOp.m_checkAdjust;
+ m_checked += op.m_checkAdjust;
+ break;
+ }
+ case OpSimpleNestedAlternativeEnd:
+ case OpNestedAlternativeEnd: {
+ PatternTerm* term = op.m_term;
+
+ // In the non-simple case, store a 'return address' so we can backtrack correctly.
+ if (op.m_op == OpNestedAlternativeEnd) {
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ unsigned alternativeFrameLocation = parenthesesFrameLocation;
+ if (term->quantityType != QuantifierFixedCount)
+ alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+ op.m_returnAddress = storeToFrameWithPatch(alternativeFrameLocation);
+ }
- // optimized case - no capture & no quantifier can be handled in a light-weight manner.
- if (!term.capture() && (term.quantityType == QuantifierFixedCount)) {
- m_expressionState.incrementParenNestingLevel();
+ // If this set of alternatives contains more than one alternative,
+ // then the Next nodes will have planted jumps to the End, and added
+ // them to this node's m_jumps list.
+ op.m_jumps.link(this);
+ op.m_jumps.clear();
- TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+ YarrOp& lastOp = m_ops[op.m_previousOp];
+ m_checked -= lastOp.m_checkAdjust;
+ break;
+ }
- // Use the current state's jump list for the nested parentheses.
- parenthesesState.setJumpListToPriorParen(state.getJumpListToPriorParen());
+ // OpParenthesesSubpatternOnceBegin/End
+ //
+ // These nodes support (optionally) capturing subpatterns, that have a
+ // quantity count of 1 (this covers fixed once, and ?/?? quantifiers).
+ case OpParenthesesSubpatternOnceBegin: {
+ PatternTerm* term = op.m_term;
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ const RegisterID indexTemporary = regT0;
+ ASSERT(term->quantityCount == 1);
+
+ // Upon entry to a Greedy quantified set of parenthese store the index.
+ // We'll use this for two purposes:
+ // - To indicate which iteration we are on of mathing the remainder of
+ // the expression after the parentheses - the first, including the
+ // match within the parentheses, or the second having skipped over them.
+ // - To check for empty matches, which must be rejected.
+ //
+ // At the head of a NonGreedy set of parentheses we'll immediately set the
+ // value on the stack to -1 (indicating a match skipping the subpattern),
+ // and plant a jump to the end. We'll also plant a label to backtrack to
+ // to reenter the subpattern later, with a store to set up index on the
+ // second iteration.
+ //
+ // FIXME: for capturing parens, could use the index in the capture array?
+ if (term->quantityType == QuantifierGreedy)
+ storeToFrame(index, parenthesesFrameLocation);
+ else if (term->quantityType == QuantifierNonGreedy) {
+ storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
+ op.m_jumps.append(jump());
+ op.m_reentry = label();
+ storeToFrame(index, parenthesesFrameLocation);
+ }
- generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
- // this expects that any backtracks back out of the parentheses will be in the
- // parenthesesState's m_backTrackJumps vector, and that if they need backtracking
- // they will have set an entry point on the parenthesesState's m_backtrackLabel.
- BacktrackDestination& parenthesesBacktrack = parenthesesState.getBacktrackDestination();
- BacktrackDestination& stateBacktrack = state.getBacktrackDestination();
+ // If the parenthese are capturing, store the starting index value to the
+ // captures array, offsetting as necessary.
+ //
+ // FIXME: could avoid offsetting this value in JIT code, apply
+ // offsets only afterwards, at the point the results array is
+ // being accessed.
+ if (term->capture()) {
+ int offsetId = term->parentheses.subpatternId << 1;
+ int inputOffset = term->inputPosition - m_checked;
+ if (term->quantityType == QuantifierFixedCount)
+ inputOffset -= term->parentheses.disjunction->m_minimumSize;
+ if (inputOffset) {
+ move(index, indexTemporary);
+ add32(Imm32(inputOffset), indexTemporary);
+ store32(indexTemporary, Address(output, offsetId * sizeof(int)));
+ } else
+ store32(index, Address(output, offsetId * sizeof(int)));
+ }
+ break;
+ }
+ case OpParenthesesSubpatternOnceEnd: {
+ PatternTerm* term = op.m_term;
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ const RegisterID indexTemporary = regT0;
+ ASSERT(term->quantityCount == 1);
+
+ // For Greedy/NonGreedy quantified parentheses, we must reject zero length
+ // matches. If the minimum size is know to be non-zero we need not check.
+ if (term->quantityType != QuantifierFixedCount && !term->parentheses.disjunction->m_minimumSize)
+ op.m_jumps.append(branch32(Equal, index, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*))));
+
+ // If the parenthese are capturing, store the ending index value to the
+ // captures array, offsetting as necessary.
+ //
+ // FIXME: could avoid offsetting this value in JIT code, apply
+ // offsets only afterwards, at the point the results array is
+ // being accessed.
+ if (term->capture()) {
+ int offsetId = (term->parentheses.subpatternId << 1) + 1;
+ int inputOffset = term->inputPosition - m_checked;
+ if (inputOffset) {
+ move(index, indexTemporary);
+ add32(Imm32(inputOffset), indexTemporary);
+ store32(indexTemporary, Address(output, offsetId * sizeof(int)));
+ } else
+ store32(index, Address(output, offsetId * sizeof(int)));
+ }
- state.propagateBacktrackingFrom(this, parenthesesBacktrack);
- stateBacktrack.propagateBacktrackToLabel(parenthesesBacktrack);
+ // If the parentheses are quantified Greedy then add a label to jump back
+ // to if get a failed match from after the parentheses. For NonGreedy
+ // parentheses, link the jump from before the subpattern to here.
+ if (term->quantityType == QuantifierGreedy)
+ op.m_reentry = label();
+ else if (term->quantityType == QuantifierNonGreedy) {
+ YarrOp& beginOp = m_ops[op.m_previousOp];
+ beginOp.m_jumps.link(this);
+ }
+ break;
+ }
- state.setJumpListToPriorParen(parenthesesState.getJumpListToPriorParen());
+ // OpParenthesesSubpatternTerminalBegin/End
+ case OpParenthesesSubpatternTerminalBegin: {
+ PatternTerm* term = op.m_term;
+ ASSERT(term->quantityType == QuantifierGreedy);
+ ASSERT(term->quantityCount == quantifyInfinite);
+ ASSERT(!term->capture());
- m_expressionState.decrementParenNestingLevel();
- } else {
- Jump nonGreedySkipParentheses;
- Label nonGreedyTryParentheses;
- if (term.quantityType == QuantifierGreedy)
- storeToFrame(index, parenthesesFrameLocation);
- else if (term.quantityType == QuantifierNonGreedy) {
- storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
- nonGreedySkipParentheses = jump();
- nonGreedyTryParentheses = label();
- storeToFrame(index, parenthesesFrameLocation);
- }
+ // Upon entry set a label to loop back to.
+ op.m_reentry = label();
- // store the match start index
- if (term.capture()) {
- int inputOffset = state.inputOffset() - preCheckedCount;
- if (inputOffset) {
- move(index, indexTemporary);
- add32(Imm32(inputOffset), indexTemporary);
- store32(indexTemporary, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
- } else
- store32(index, Address(output, (term.parentheses.subpatternId << 1) * sizeof(int)));
+ // Store the start index of the current match; we need to reject zero
+ // length matches.
+ storeToFrame(index, term->frameLocation);
+ break;
}
+ case OpParenthesesSubpatternTerminalEnd: {
+ PatternTerm* term = op.m_term;
- ParenthesesTail* parenthesesTail = m_expressionState.addParenthesesTail(term, state.getJumpListToPriorParen());
+ // Check for zero length matches - if the match is non-zero, then we
+ // can accept it & loop back up to the head of the subpattern.
+ YarrOp& beginOp = m_ops[op.m_previousOp];
+ branch32(NotEqual, index, Address(stackPointerRegister, term->frameLocation * sizeof(void*)), beginOp.m_reentry);
- m_expressionState.incrementParenNestingLevel();
+ // Reject the match - backtrack back into the subpattern.
+ op.m_jumps.append(jump());
- TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+ // This is the entry point to jump to when we stop matching - we will
+ // do so once the subpattern cannot match any more.
+ op.m_reentry = label();
+ break;
+ }
- // Save the parenthesesTail for backtracking from nested parens to this one.
- parenthesesState.setJumpListToPriorParen(&parenthesesTail->m_withinBacktrackJumps);
+ // OpParentheticalAssertionBegin/End
+ case OpParentheticalAssertionBegin: {
+ PatternTerm* term = op.m_term;
- // generate the body of the parentheses
- generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
+ // Store the current index - assertions should not update index, so
+ // we will need to restore it upon a successful match.
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ storeToFrame(index, parenthesesFrameLocation);
- // For non-fixed counts, backtrack if we didn't match anything.
- if (term.quantityType != QuantifierFixedCount)
- parenthesesTail->addAfterParenJump(branch32(Equal, index, Address(stackPointerRegister, (parenthesesFrameLocation * sizeof(void*)))));
+ // Check
+ op.m_checkAdjust = m_checked - term->inputPosition;
+ if (op.m_checkAdjust)
+ sub32(Imm32(op.m_checkAdjust), index);
- // store the match end index
- if (term.capture()) {
- int inputOffset = state.inputOffset();
- if (inputOffset) {
- move(index, indexTemporary);
- add32(Imm32(state.inputOffset()), indexTemporary);
- store32(indexTemporary, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
- } else
- store32(index, Address(output, ((term.parentheses.subpatternId << 1) + 1) * sizeof(int)));
+ m_checked -= op.m_checkAdjust;
+ break;
}
+ case OpParentheticalAssertionEnd: {
+ PatternTerm* term = op.m_term;
+
+ // Restore the input index value.
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ loadFromFrame(parenthesesFrameLocation, index);
+
+ // If inverted, a successful match of the assertion must be treated
+ // as a failure, so jump to backtracking.
+ if (term->invert()) {
+ op.m_jumps.append(jump());
+ op.m_reentry = label();
+ }
- m_expressionState.decrementParenNestingLevel();
-
- parenthesesTail->processBacktracks(this, state, parenthesesState, nonGreedyTryParentheses, label());
+ YarrOp& lastOp = m_ops[op.m_previousOp];
+ m_checked += lastOp.m_checkAdjust;
+ break;
+ }
- state.setJumpListToPriorParen(&parenthesesTail->m_afterBacktrackJumps);
-
- parenthesesState.getBacktrackDestination().clear();
+ case OpMatchFailed:
+ if (m_pattern.m_body->m_callFrameSize)
+ addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
+ move(TrustedImm32(-1), returnRegister);
+ generateReturn();
+ break;
+ }
- if (term.quantityType == QuantifierNonGreedy)
- nonGreedySkipParentheses.link(this);
- }
+ ++opIndex;
+ } while (opIndex < m_ops.size());
}
- void generateParenthesesGreedyNoBacktrack(TermGenerationState& state)
+ void backtrack()
{
- PatternTerm& parenthesesTerm = state.term();
- PatternDisjunction* disjunction = parenthesesTerm.parentheses.disjunction;
- ASSERT(parenthesesTerm.type == PatternTerm::TypeParenthesesSubpattern);
- ASSERT(parenthesesTerm.quantityCount != 1); // Handled by generateParenthesesSingle.
+ // Backwards generate the backtracking code.
+ size_t opIndex = m_ops.size();
+ ASSERT(opIndex);
- TermGenerationState parenthesesState(disjunction, state.checkedTotal);
+ do {
+ --opIndex;
+ YarrOp& op = m_ops[opIndex];
+ switch (op.m_op) {
- Label matchAgain(this);
+ case OpTerm:
+ backtrackTerm(opIndex);
+ break;
- storeToFrame(index, parenthesesTerm.frameLocation); // Save the current index to check for zero len matches later.
+ // OpBodyAlternativeBegin/Next/End
+ //
+ // For each Begin/Next node representing an alternative, we need to decide what to do
+ // in two circumstances:
+ // - If we backtrack back into this node, from within the alternative.
+ // - If the input check at the head of the alternative fails (if this exists).
+ //
+ // We treat these two cases differently since in the former case we have slightly
+ // more information - since we are backtracking out of a prior alternative we know
+ // that at least enough input was available to run it. For example, given the regular
+ // expression /a|b/, if we backtrack out of the first alternative (a failed pattern
+ // character match of 'a'), then we need not perform an additional input availability
+ // check before running the second alternative.
+ //
+ // Backtracking required differs for the last alternative, which in the case of the
+ // repeating set of alternatives must loop. The code generated for the last alternative
+ // will also be used to handle all input check failures from any prior alternatives -
+ // these require similar functionality, in seeking the next available alternative for
+ // which there is sufficient input.
+ //
+ // Since backtracking of all other alternatives simply requires us to link backtracks
+ // to the reentry point for the subsequent alternative, we will only be generating any
+ // code when backtracking the last alternative.
+ case OpBodyAlternativeBegin:
+ case OpBodyAlternativeNext: {
+ PatternAlternative* alternative = op.m_alternative;
+
+ if (op.m_op == OpBodyAlternativeNext) {
+ PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
+ m_checked += priorAlternative->m_minimumSize;
+ }
+ m_checked -= alternative->m_minimumSize;
- for (parenthesesState.resetAlternative(); parenthesesState.alternativeValid(); parenthesesState.nextAlternative()) {
+ // Is this the last alternative? If not, then if we backtrack to this point we just
+ // need to jump to try to match the next alternative.
+ if (m_ops[op.m_nextOp].m_op != OpBodyAlternativeEnd) {
+ m_backtrackingState.linkTo(m_ops[op.m_nextOp].m_reentry, this);
+ break;
+ }
+ YarrOp& endOp = m_ops[op.m_nextOp];
- PatternAlternative* alternative = parenthesesState.alternative();
- optimizeAlternative(alternative);
+ YarrOp* beginOp = &op;
+ while (beginOp->m_op != OpBodyAlternativeBegin) {
+ ASSERT(beginOp->m_op == OpBodyAlternativeNext);
+ beginOp = &m_ops[beginOp->m_previousOp];
+ }
- int countToCheck = alternative->m_minimumSize;
- if (countToCheck) {
- parenthesesState.addBacktrackJump(jumpIfNoAvailableInput(countToCheck));
- parenthesesState.checkedTotal += countToCheck;
- }
+ bool onceThrough = endOp.m_nextOp == notFound;
+
+ // First, generate code to handle cases where we backtrack out of an attempted match
+ // of the last alternative. If this is a 'once through' set of alternatives then we
+ // have nothing to do - link this straight through to the End.
+ if (onceThrough)
+ m_backtrackingState.linkTo(endOp.m_reentry, this);
+ else {
+ // Okay, we're going to need to loop. Calculate the delta between where the input
+ // position was, and where we want it to be allowing for the fact that we need to
+ // increment by 1. E.g. for the regexp /a|x/ we need to increment the position by
+ // 1 between loop iterations, but for /abcd|xyz/ we need to increment by two when
+ // looping from the last alternative to the first, for /a|xyz/ we need to decrement
+ // by 1, and for /a|xy/ we don't need to move the input position at all.
+ int deltaLastAlternativeToFirstAlternativePlusOne = (beginOp->m_alternative->m_minimumSize - alternative->m_minimumSize) + 1;
+
+ // If we don't need to move the input poistion, and the pattern has a fixed size
+ // (in which case we omit the store of the start index until the pattern has matched)
+ // then we can just link the backtrack out of the last alternative straight to the
+ // head of the first alternative.
+ if (!deltaLastAlternativeToFirstAlternativePlusOne && m_pattern.m_body->m_hasFixedSize)
+ m_backtrackingState.linkTo(beginOp->m_reentry, this);
+ else {
+ // We need to generate a trampoline of code to execute before looping back
+ // around to the first alternative.
+ m_backtrackingState.link(this);
+
+ // If the pattern size is not fixed, then store the start index, for use if we match.
+ if (!m_pattern.m_body->m_hasFixedSize) {
+ if (alternative->m_minimumSize == 1)
+ store32(index, Address(output));
+ else {
+ move(index, regT0);
+ if (alternative->m_minimumSize)
+ sub32(Imm32(alternative->m_minimumSize - 1), regT0);
+ else
+ add32(Imm32(1), regT0);
+ store32(regT0, Address(output));
+ }
+ }
+
+ if (deltaLastAlternativeToFirstAlternativePlusOne)
+ add32(Imm32(deltaLastAlternativeToFirstAlternativePlusOne), index);
+
+ // Loop. Since this code is only reached when we backtrack out of the last
+ // alternative (and NOT linked to from the input check upon entry to the
+ // last alternative) we know that there must be at least enough input as
+ // required by the last alternative. As such, we only need to check if the
+ // first will require more to run - if the same or less is required we can
+ // unconditionally jump.
+ if (deltaLastAlternativeToFirstAlternativePlusOne > 0)
+ checkInput().linkTo(beginOp->m_reentry, this);
+ else
+ jump(beginOp->m_reentry);
+ }
+ }
- for (parenthesesState.resetTerm(); parenthesesState.termValid(); parenthesesState.nextTerm())
- generateTerm(parenthesesState);
+ // We can reach this point in the code in two ways:
+ // - Fallthrough from the code above (a repeating alternative backtracked out of its
+ // last alternative, and did not have sufficent input to run the first).
+ // - We will loop back up to the following label when a releating alternative loops,
+ // following a failed input check.
+ //
+ // Either way, we have just failed the input check for the first alternative.
+ Label firstInputCheckFailed(this);
+
+ // Generate code to handle input check failures from alternatives except the last.
+ // prevOp is the alternative we're handling a bail out from (initially Begin), and
+ // nextOp is the alternative we will be attempting to reenter into.
+ //
+ // We will link input check failures from the forwards matching path back to the code
+ // that can handle them.
+ YarrOp* prevOp = beginOp;
+ YarrOp* nextOp = &m_ops[beginOp->m_nextOp];
+ while (nextOp->m_op != OpBodyAlternativeEnd) {
+ prevOp->m_jumps.link(this);
+
+ int delta = nextOp->m_alternative->m_minimumSize - prevOp->m_alternative->m_minimumSize;
+ if (delta)
+ add32(Imm32(delta), index);
+
+ // We only get here if an input check fails, it is only worth checking again
+ // if the next alternative has a minimum size less than the last.
+ if (delta < 0) {
+ // FIXME: if we added an extra label to YarrOp, we could avoid needing to
+ // subtract delta back out, and reduce this code. Should performance test
+ // the benefit of this.
+ Jump fail = jumpIfNoAvailableInput();
+ sub32(Imm32(delta), index);
+ jump(nextOp->m_reentry);
+ fail.link(this);
+ }
+ prevOp = nextOp;
+ nextOp = &m_ops[nextOp->m_nextOp];
+ }
- // If we get here, we matched! If the index advanced then try to match more since limit isn't supported yet.
- branch32(NotEqual, index, Address(stackPointerRegister, (parenthesesTerm.frameLocation * sizeof(void*))), matchAgain);
+ // We fall through to here if there is insufficient input to run the last alternative.
- // If we get here we matched, but we matched "" - cannot accept this alternative as is, so either backtrack,
- // or fall through to try the next alternative if no backtrack is available.
- parenthesesState.plantJumpToBacktrackIfExists(this);
+ // If there is insufficient input to run the last alternative, then for 'once through'
+ // alternatives we are done - just jump back up into the forwards matching path at the End.
+ if (onceThrough) {
+ op.m_jumps.linkTo(endOp.m_reentry, this);
+ jump(endOp.m_reentry);
+ break;
+ }
- parenthesesState.linkAlternativeBacktracks(this);
+ // For repeating alternatives, link any input check failure from the last alternative to
+ // this point.
+ op.m_jumps.link(this);
- // We get here if the alternative fails to match - fall through to the next iteration, or out of the loop.
+ bool needsToUpdateMatchStart = !m_pattern.m_body->m_hasFixedSize;
- if (countToCheck) {
- sub32(Imm32(countToCheck), index);
- parenthesesState.checkedTotal -= countToCheck;
- }
- }
+ // Check for cases where input position is already incremented by 1 for the last
+ // alternative (this is particularly useful where the minimum size of the body
+ // disjunction is 0, e.g. /a*|b/).
+ if (needsToUpdateMatchStart && alternative->m_minimumSize == 1) {
+ // index is already incremented by 1, so just store it now!
+ store32(index, Address(output));
+ needsToUpdateMatchStart = false;
+ }
- // If the last alternative falls through to here, we have a failed match...
- // Which means that we match whatever we have matched up to this point (even if nothing).
- }
+ // Check whether there is sufficient input to loop. Increment the input position by
+ // one, and check. Also add in the minimum disjunction size before checking - there
+ // is no point in looping if we're just going to fail all the input checks around
+ // the next iteration.
+ int deltaLastAlternativeToBodyMinimumPlusOne = (m_pattern.m_body->m_minimumSize + 1) - alternative->m_minimumSize;
+ if (deltaLastAlternativeToBodyMinimumPlusOne)
+ add32(Imm32(deltaLastAlternativeToBodyMinimumPlusOne), index);
+ Jump matchFailed = jumpIfNoAvailableInput();
+
+ if (needsToUpdateMatchStart) {
+ if (!m_pattern.m_body->m_minimumSize)
+ store32(index, Address(output));
+ else {
+ move(index, regT0);
+ sub32(Imm32(m_pattern.m_body->m_minimumSize), regT0);
+ store32(regT0, Address(output));
+ }
+ }
- void generateParentheticalAssertion(TermGenerationState& state)
- {
- PatternTerm& term = state.term();
- PatternDisjunction* disjunction = term.parentheses.disjunction;
- ASSERT(term.quantityCount == 1);
- ASSERT(term.quantityType == QuantifierFixedCount);
+ // Calculate how much more input the first alternative requires than the minimum
+ // for the body as a whole. If no more is needed then we dont need an additional
+ // input check here - jump straight back up to the start of the first alternative.
+ int deltaBodyMinimumToFirstAlternative = beginOp->m_alternative->m_minimumSize - m_pattern.m_body->m_minimumSize;
+ if (!deltaBodyMinimumToFirstAlternative)
+ jump(beginOp->m_reentry);
+ else {
+ add32(Imm32(deltaBodyMinimumToFirstAlternative), index);
+ checkInput().linkTo(beginOp->m_reentry, this);
+ jump(firstInputCheckFailed);
+ }
- unsigned parenthesesFrameLocation = term.frameLocation;
- unsigned alternativeFrameLocation = parenthesesFrameLocation + YarrStackSpaceForBackTrackInfoParentheticalAssertion;
+ // We jump to here if we iterate to the point that there is insufficient input to
+ // run any matches, and need to return a failure state from JIT code.
+ matchFailed.link(this);
- int countCheckedAfterAssertion = state.checkedTotal - term.inputPosition;
+ if (m_pattern.m_body->m_callFrameSize)
+ addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
+ move(TrustedImm32(-1), returnRegister);
+ generateReturn();
+ break;
+ }
+ case OpBodyAlternativeEnd: {
+ // We should never backtrack back into a body disjunction.
+ ASSERT(m_backtrackingState.isEmpty());
- if (term.invert()) {
- // Inverted case
- storeToFrame(index, parenthesesFrameLocation);
+ PatternAlternative* priorAlternative = m_ops[op.m_previousOp].m_alternative;
+ m_checked += priorAlternative->m_minimumSize;
+ break;
+ }
- state.checkedTotal -= countCheckedAfterAssertion;
- if (countCheckedAfterAssertion)
- sub32(Imm32(countCheckedAfterAssertion), index);
+ // OpSimpleNestedAlternativeBegin/Next/End
+ // OpNestedAlternativeBegin/Next/End
+ //
+ // Generate code for when we backtrack back out of an alternative into
+ // a Begin or Next node, or when the entry input count check fails. If
+ // there are more alternatives we need to jump to the next alternative,
+ // if not we backtrack back out of the current set of parentheses.
+ //
+ // In the case of non-simple nested assertions we need to also link the
+ // 'return address' appropriately to backtrack back out into the correct
+ // alternative.
+ case OpSimpleNestedAlternativeBegin:
+ case OpSimpleNestedAlternativeNext:
+ case OpNestedAlternativeBegin:
+ case OpNestedAlternativeNext: {
+ YarrOp& nextOp = m_ops[op.m_nextOp];
+ bool isBegin = op.m_previousOp == notFound;
+ bool isLastAlternative = nextOp.m_nextOp == notFound;
+ ASSERT(isBegin == (op.m_op == OpSimpleNestedAlternativeBegin || op.m_op == OpNestedAlternativeBegin));
+ ASSERT(isLastAlternative == (nextOp.m_op == OpSimpleNestedAlternativeEnd || nextOp.m_op == OpNestedAlternativeEnd));
+
+ // Treat an input check failure the same as a failed match.
+ m_backtrackingState.append(op.m_jumps);
+
+ // Set the backtracks to jump to the appropriate place. We may need
+ // to link the backtracks in one of three different way depending on
+ // the type of alternative we are dealing with:
+ // - A single alternative, with no simplings.
+ // - The last alternative of a set of two or more.
+ // - An alternative other than the last of a set of two or more.
+ //
+ // In the case of a single alternative on its own, we don't need to
+ // jump anywhere - if the alternative fails to match we can just
+ // continue to backtrack out of the parentheses without jumping.
+ //
+ // In the case of the last alternative in a set of more than one, we
+ // need to jump to return back out to the beginning. We'll do so by
+ // adding a jump to the End node's m_jumps list, and linking this
+ // when we come to generate the Begin node. For alternatives other
+ // than the last, we need to jump to the next alternative.
+ //
+ // If the alternative had adjusted the input position we must link
+ // backtracking to here, correct, and then jump on. If not we can
+ // link the backtracks directly to their destination.
+ if (op.m_checkAdjust) {
+ // Handle the cases where we need to link the backtracks here.
+ m_backtrackingState.link(this);
+ sub32(Imm32(op.m_checkAdjust), index);
+ if (!isLastAlternative) {
+ // An alternative that is not the last should jump to its successor.
+ jump(nextOp.m_reentry);
+ } else if (!isBegin) {
+ // The last of more than one alternatives must jump back to the begnning.
+ nextOp.m_jumps.append(jump());
+ } else {
+ // A single alternative on its own can fall through.
+ m_backtrackingState.fallthrough();
+ }
+ } else {
+ // Handle the cases where we can link the backtracks directly to their destinations.
+ if (!isLastAlternative) {
+ // An alternative that is not the last should jump to its successor.
+ m_backtrackingState.linkTo(nextOp.m_reentry, this);
+ } else if (!isBegin) {
+ // The last of more than one alternatives must jump back to the begnning.
+ m_backtrackingState.takeBacktracksToJumpList(nextOp.m_jumps, this);
+ }
+ // In the case of a single alternative on its own do nothing - it can fall through.
+ }
- TermGenerationState parenthesesState(disjunction, state.checkedTotal);
- generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
- // Success! - which means - Fail!
- loadFromFrame(parenthesesFrameLocation, index);
- state.jumpToBacktrack(this);
+ // At this point we've handled the backtracking back into this node.
+ // Now link any backtracks that need to jump to here.
+
+ // For non-simple alternatives, link the alternative's 'return address'
+ // so that we backtrack back out into the previous alternative.
+ if (op.m_op == OpNestedAlternativeNext)
+ m_backtrackingState.append(op.m_returnAddress);
+
+ // If there is more than one alternative, then the last alternative will
+ // have planted a jump to be linked to the end. This jump was added to the
+ // End node's m_jumps list. If we are back at the beginning, link it here.
+ if (isBegin) {
+ YarrOp* endOp = &m_ops[op.m_nextOp];
+ while (endOp->m_nextOp != notFound) {
+ ASSERT(endOp->m_op == OpSimpleNestedAlternativeNext || endOp->m_op == OpNestedAlternativeNext);
+ endOp = &m_ops[endOp->m_nextOp];
+ }
+ ASSERT(endOp->m_op == OpSimpleNestedAlternativeEnd || endOp->m_op == OpNestedAlternativeEnd);
+ m_backtrackingState.append(endOp->m_jumps);
+ }
- // And fail means success.
- parenthesesState.linkAlternativeBacktracks(this);
+ if (!isBegin) {
+ YarrOp& lastOp = m_ops[op.m_previousOp];
+ m_checked += lastOp.m_checkAdjust;
+ }
+ m_checked -= op.m_checkAdjust;
+ break;
+ }
+ case OpSimpleNestedAlternativeEnd:
+ case OpNestedAlternativeEnd: {
+ PatternTerm* term = op.m_term;
+
+ // If we backtrack into the end of a simple subpattern do nothing;
+ // just continue through into the last alternative. If we backtrack
+ // into the end of a non-simple set of alterntives we need to jump
+ // to the backtracking return address set up during generation.
+ if (op.m_op == OpNestedAlternativeEnd) {
+ m_backtrackingState.link(this);
+
+ // Plant a jump to the return address.
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ unsigned alternativeFrameLocation = parenthesesFrameLocation;
+ if (term->quantityType != QuantifierFixedCount)
+ alternativeFrameLocation += YarrStackSpaceForBackTrackInfoParenthesesOnce;
+ loadFromFrameAndJump(alternativeFrameLocation);
+
+ // Link the DataLabelPtr associated with the end of the last
+ // alternative to this point.
+ m_backtrackingState.append(op.m_returnAddress);
+ }
- loadFromFrame(parenthesesFrameLocation, index);
+ YarrOp& lastOp = m_ops[op.m_previousOp];
+ m_checked += lastOp.m_checkAdjust;
+ break;
+ }
- state.checkedTotal += countCheckedAfterAssertion;
- } else {
- // Normal case
- storeToFrame(index, parenthesesFrameLocation);
+ // OpParenthesesSubpatternOnceBegin/End
+ //
+ // When we are backtracking back out of a capturing subpattern we need
+ // to clear the start index in the matches output array, to record that
+ // this subpattern has not been captured.
+ //
+ // When backtracking back out of a Greedy quantified subpattern we need
+ // to catch this, and try running the remainder of the alternative after
+ // the subpattern again, skipping the parentheses.
+ //
+ // Upon backtracking back into a quantified set of parentheses we need to
+ // check whether we were currently skipping the subpattern. If not, we
+ // can backtrack into them, if we were we need to either backtrack back
+ // out of the start of the parentheses, or jump back to the forwards
+ // matching start, depending of whether the match is Greedy or NonGreedy.
+ case OpParenthesesSubpatternOnceBegin: {
+ PatternTerm* term = op.m_term;
+ ASSERT(term->quantityCount == 1);
+
+ // We only need to backtrack to thispoint if capturing or greedy.
+ if (term->capture() || term->quantityType == QuantifierGreedy) {
+ m_backtrackingState.link(this);
+
+ // If capturing, clear the capture (we only need to reset start).
+ if (term->capture())
+ store32(TrustedImm32(-1), Address(output, (term->parentheses.subpatternId << 1) * sizeof(int)));
+
+ // If Greedy, jump to the end.
+ if (term->quantityType == QuantifierGreedy) {
+ // Clear the flag in the stackframe indicating we ran through the subpattern.
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ storeToFrame(TrustedImm32(-1), parenthesesFrameLocation);
+ // Jump to after the parentheses, skipping the subpattern.
+ jump(m_ops[op.m_nextOp].m_reentry);
+ // A backtrack from after the parentheses, when skipping the subpattern,
+ // will jump back to here.
+ op.m_jumps.link(this);
+ }
- state.checkedTotal -= countCheckedAfterAssertion;
- if (countCheckedAfterAssertion)
- sub32(Imm32(countCheckedAfterAssertion), index);
+ m_backtrackingState.fallthrough();
+ }
+ break;
+ }
+ case OpParenthesesSubpatternOnceEnd: {
+ PatternTerm* term = op.m_term;
+
+ if (term->quantityType != QuantifierFixedCount) {
+ m_backtrackingState.link(this);
+
+ // Check whether we should backtrack back into the parentheses, or if we
+ // are currently in a state where we had skipped over the subpattern
+ // (in which case the flag value on the stack will be -1).
+ unsigned parenthesesFrameLocation = term->frameLocation;
+ Jump hadSkipped = branch32(Equal, Address(stackPointerRegister, parenthesesFrameLocation * sizeof(void*)), TrustedImm32(-1));
+
+ if (term->quantityType == QuantifierGreedy) {
+ // For Greedy parentheses, we skip after having already tried going
+ // through the subpattern, so if we get here we're done.
+ YarrOp& beginOp = m_ops[op.m_previousOp];
+ beginOp.m_jumps.append(hadSkipped);
+ } else {
+ // For NonGreedy parentheses, we try skipping the subpattern first,
+ // so if we get here we need to try running through the subpattern
+ // next. Jump back to the start of the parentheses in the forwards
+ // matching path.
+ ASSERT(term->quantityType == QuantifierNonGreedy);
+ YarrOp& beginOp = m_ops[op.m_previousOp];
+ hadSkipped.linkTo(beginOp.m_reentry, this);
+ }
- TermGenerationState parenthesesState(disjunction, state.checkedTotal);
- generateParenthesesDisjunction(state.term(), parenthesesState, alternativeFrameLocation);
- // Success! - which means - Success!
- loadFromFrame(parenthesesFrameLocation, index);
- Jump success = jump();
+ m_backtrackingState.fallthrough();
+ }
- parenthesesState.linkAlternativeBacktracks(this);
+ m_backtrackingState.append(op.m_jumps);
+ break;
+ }
- loadFromFrame(parenthesesFrameLocation, index);
- state.jumpToBacktrack(this);
+ // OpParenthesesSubpatternTerminalBegin/End
+ //
+ // Terminal subpatterns will always match - there is nothing after them to
+ // force a backtrack, and they have a minimum count of 0, and as such will
+ // always produce an acceptable result.
+ case OpParenthesesSubpatternTerminalBegin: {
+ // We will backtrack to this point once the subpattern cannot match any
+ // more. Since no match is accepted as a successful match (we are Greedy
+ // quantified with a minimum of zero) jump back to the forwards matching
+ // path at the end.
+ YarrOp& endOp = m_ops[op.m_nextOp];
+ m_backtrackingState.linkTo(endOp.m_reentry, this);
+ break;
+ }
+ case OpParenthesesSubpatternTerminalEnd:
+ // We should never be backtracking to here (hence the 'terminal' in the name).
+ ASSERT(m_backtrackingState.isEmpty());
+ m_backtrackingState.append(op.m_jumps);
+ break;
- success.link(this);
+ // OpParentheticalAssertionBegin/End
+ case OpParentheticalAssertionBegin: {
+ PatternTerm* term = op.m_term;
+ YarrOp& endOp = m_ops[op.m_nextOp];
- state.checkedTotal += countCheckedAfterAssertion;
- }
- }
+ // We need to handle the backtracks upon backtracking back out
+ // of a parenthetical assertion if either we need to correct
+ // the input index, or the assertion was inverted.
+ if (op.m_checkAdjust || term->invert()) {
+ m_backtrackingState.link(this);
- void generateTerm(TermGenerationState& state)
- {
- PatternTerm& term = state.term();
+ if (op.m_checkAdjust)
+ add32(Imm32(op.m_checkAdjust), index);
- switch (term.type) {
- case PatternTerm::TypeAssertionBOL:
- generateAssertionBOL(state);
- break;
+ // In an inverted assertion failure to match the subpattern
+ // is treated as a successful match - jump to the end of the
+ // subpattern. We already have adjusted the input position
+ // back to that before the assertion, which is correct.
+ if (term->invert())
+ jump(endOp.m_reentry);
- case PatternTerm::TypeAssertionEOL:
- generateAssertionEOL(state);
- break;
+ m_backtrackingState.fallthrough();
+ }
- case PatternTerm::TypeAssertionWordBoundary:
- generateAssertionWordBoundary(state);
- break;
+ // The End node's jump list will contain any backtracks into
+ // the end of the assertion. Also, if inverted, we will have
+ // added the failure caused by a successful match to this.
+ m_backtrackingState.append(endOp.m_jumps);
- case PatternTerm::TypePatternCharacter:
- switch (term.quantityType) {
- case QuantifierFixedCount:
- if (term.quantityCount == 1) {
- if (state.isSinglePatternCharacterLookaheadTerm() && (state.lookaheadTerm().inputPosition == (term.inputPosition + 1))) {
- generatePatternCharacterPair(state);
- state.nextTerm();
- } else
- generatePatternCharacterSingle(state);
- } else
- generatePatternCharacterFixed(state);
- break;
- case QuantifierGreedy:
- generatePatternCharacterGreedy(state);
- break;
- case QuantifierNonGreedy:
- generatePatternCharacterNonGreedy(state);
+ m_checked += op.m_checkAdjust;
break;
}
- break;
+ case OpParentheticalAssertionEnd: {
+ // FIXME: We should really be clearing any nested subpattern
+ // matches on bailing out from after the pattern. Firefox has
+ // this bug too (presumably because they use YARR!)
- case PatternTerm::TypeCharacterClass:
- switch (term.quantityType) {
- case QuantifierFixedCount:
- if (term.quantityCount == 1)
- generateCharacterClassSingle(state);
- else
- generateCharacterClassFixed(state);
- break;
- case QuantifierGreedy:
- generateCharacterClassGreedy(state);
- break;
- case QuantifierNonGreedy:
- generateCharacterClassNonGreedy(state);
+ // Never backtrack into an assertion; later failures bail to before the begin.
+ m_backtrackingState.takeBacktracksToJumpList(op.m_jumps, this);
+
+ YarrOp& lastOp = m_ops[op.m_previousOp];
+ m_checked -= lastOp.m_checkAdjust;
break;
}
- break;
- case PatternTerm::TypeBackReference:
- m_shouldFallBack = true;
- break;
-
- case PatternTerm::TypeForwardReference:
- break;
-
- case PatternTerm::TypeParenthesesSubpattern:
- if (term.quantityCount == 1 && !term.parentheses.isCopy)
- generateParenthesesSingle(state);
- else if (term.parentheses.isTerminal)
- generateParenthesesGreedyNoBacktrack(state);
- else
- m_shouldFallBack = true;
- break;
+ case OpMatchFailed:
+ break;
+ }
- case PatternTerm::TypeParentheticalAssertion:
- generateParentheticalAssertion(state);
- break;
- }
+ } while (opIndex);
}
- void generateDisjunction(PatternDisjunction* disjunction)
+ // Compilation methods:
+ // ====================
+
+ // opCompileParenthesesSubpattern
+ // Emits ops for a subpattern (set of parentheses). These consist
+ // of a set of alternatives wrapped in an outer set of nodes for
+ // the parentheses.
+ // Supported types of parentheses are 'Once' (quantityCount == 1)
+ // and 'Terminal' (non-capturing parentheses quantified as greedy
+ // and infinite).
+ // Alternatives will use the 'Simple' set of ops if either the
+ // subpattern is terminal (in which case we will never need to
+ // backtrack), or if the subpattern only contains one alternative.
+ void opCompileParenthesesSubpattern(PatternTerm* term)
{
- TermGenerationState state(disjunction, 0);
- state.resetAlternative();
-
- // check availability for the next alternative
- int countCheckedForCurrentAlternative = 0;
- int countToCheckForFirstAlternative = 0;
- bool hasShorterAlternatives = false;
- bool setRepeatAlternativeLabels = false;
- JumpList notEnoughInputForPreviousAlternative;
- Label firstAlternative;
- Label firstAlternativeInputChecked;
-
- // The label 'firstAlternative' is used to plant a check to see if there is
- // sufficient input available to run the first repeating alternative.
- // The label 'firstAlternativeInputChecked' will jump directly to matching
- // the first repeating alternative having skipped this check.
-
- if (state.alternativeValid()) {
- PatternAlternative* alternative = state.alternative();
- if (!alternative->onceThrough()) {
- firstAlternative = Label(this);
- setRepeatAlternativeLabels = true;
+ YarrOpCode parenthesesBeginOpCode;
+ YarrOpCode parenthesesEndOpCode;
+ YarrOpCode alternativeBeginOpCode = OpSimpleNestedAlternativeBegin;
+ YarrOpCode alternativeNextOpCode = OpSimpleNestedAlternativeNext;
+ YarrOpCode alternativeEndOpCode = OpSimpleNestedAlternativeEnd;
+
+ // We can currently only compile quantity 1 subpatterns that are
+ // not copies. We generate a copy in the case of a range quantifier,
+ // e.g. /(?:x){3,9}/, or /(?:x)+/ (These are effectively expanded to
+ // /(?:x){3,3}(?:x){0,6}/ and /(?:x)(?:x)*/ repectively). The problem
+ // comes where the subpattern is capturing, in which case we would
+ // need to restore the capture from the first subpattern upon a
+ // failure in the second.
+ if (term->quantityCount == 1 && !term->parentheses.isCopy) {
+ // Select the 'Once' nodes.
+ parenthesesBeginOpCode = OpParenthesesSubpatternOnceBegin;
+ parenthesesEndOpCode = OpParenthesesSubpatternOnceEnd;
+
+ // If there is more than one alternative we cannot use the 'simple' nodes.
+ if (term->parentheses.disjunction->m_alternatives.size() != 1) {
+ alternativeBeginOpCode = OpNestedAlternativeBegin;
+ alternativeNextOpCode = OpNestedAlternativeNext;
+ alternativeEndOpCode = OpNestedAlternativeEnd;
}
- countToCheckForFirstAlternative = alternative->m_minimumSize;
- state.checkedTotal += countToCheckForFirstAlternative;
- if (countToCheckForFirstAlternative)
- notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
- countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
+ } else if (term->parentheses.isTerminal) {
+ // Select the 'Terminal' nodes.
+ parenthesesBeginOpCode = OpParenthesesSubpatternTerminalBegin;
+ parenthesesEndOpCode = OpParenthesesSubpatternTerminalEnd;
+ } else {
+ // This subpattern is not supported by the JIT.
+ m_shouldFallBack = true;
+ return;
}
- if (setRepeatAlternativeLabels)
- firstAlternativeInputChecked = Label(this);
-
- while (state.alternativeValid()) {
- PatternAlternative* alternative = state.alternative();
- optimizeAlternative(alternative);
+ size_t parenBegin = m_ops.size();
+ m_ops.append(parenthesesBeginOpCode);
- // Track whether any alternatives are shorter than the first one.
- if (!alternative->onceThrough())
- hasShorterAlternatives = hasShorterAlternatives || (countCheckedForCurrentAlternative < countToCheckForFirstAlternative);
+ m_ops.append(alternativeBeginOpCode);
+ m_ops.last().m_previousOp = notFound;
+ m_ops.last().m_term = term;
+ Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives;
+ for (unsigned i = 0; i < alternatives.size(); ++i) {
+ size_t lastOpIndex = m_ops.size() - 1;
- for (state.resetTerm(); state.termValid(); state.nextTerm())
- generateTerm(state);
+ PatternAlternative* nestedAlternative = alternatives[i];
+ opCompileAlternative(nestedAlternative);
- // If we get here, the alternative matched.
- if (m_pattern.m_body->m_callFrameSize)
- addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
+ size_t thisOpIndex = m_ops.size();
+ m_ops.append(YarrOp(alternativeNextOpCode));
- ASSERT(index != returnRegister);
- if (m_pattern.m_body->m_hasFixedSize) {
- move(index, returnRegister);
- if (alternative->m_minimumSize)
- sub32(Imm32(alternative->m_minimumSize), returnRegister);
-
- store32(returnRegister, output);
- } else
- load32(Address(output), returnRegister);
+ YarrOp& lastOp = m_ops[lastOpIndex];
+ YarrOp& thisOp = m_ops[thisOpIndex];
- store32(index, Address(output, 4));
-
- generateReturn();
-
- state.nextAlternative();
- if (alternative->onceThrough() && state.alternativeValid())
- state.clearBacktrack();
-
- // if there are any more alternatives, plant the check for input before looping.
- if (state.alternativeValid()) {
- state.setJumpListToPriorParen(0);
- PatternAlternative* nextAlternative = state.alternative();
- if (!setRepeatAlternativeLabels && !nextAlternative->onceThrough()) {
- // We have handled non-repeating alternatives, jump to next iteration
- // and loop over repeating alternatives.
- state.jumpToBacktrack(this);
-
- countToCheckForFirstAlternative = nextAlternative->m_minimumSize;
+ lastOp.m_alternative = nestedAlternative;
+ lastOp.m_nextOp = thisOpIndex;
+ thisOp.m_previousOp = lastOpIndex;
+ thisOp.m_term = term;
+ }
+ YarrOp& lastOp = m_ops.last();
+ ASSERT(lastOp.m_op == alternativeNextOpCode);
+ lastOp.m_op = alternativeEndOpCode;
+ lastOp.m_alternative = 0;
+ lastOp.m_nextOp = notFound;
- // If we get here, there the last input checked failed.
- notEnoughInputForPreviousAlternative.link(this);
+ size_t parenEnd = m_ops.size();
+ m_ops.append(parenthesesEndOpCode);
- state.linkAlternativeBacktracks(this);
+ m_ops[parenBegin].m_term = term;
+ m_ops[parenBegin].m_previousOp = notFound;
+ m_ops[parenBegin].m_nextOp = parenEnd;
+ m_ops[parenEnd].m_term = term;
+ m_ops[parenEnd].m_previousOp = parenBegin;
+ m_ops[parenEnd].m_nextOp = notFound;
+ }
- // Back up to start the looping alternatives.
- if (countCheckedForCurrentAlternative)
- sub32(Imm32(countCheckedForCurrentAlternative), index);
+ // opCompileParentheticalAssertion
+ // Emits ops for a parenthetical assertion. These consist of an
+ // OpSimpleNestedAlternativeBegin/Next/End set of nodes wrapping
+ // the alternatives, with these wrapped by an outer pair of
+ // OpParentheticalAssertionBegin/End nodes.
+ // We can always use the OpSimpleNestedAlternative nodes in the
+ // case of parenthetical assertions since these only ever match
+ // once, and will never backtrack back into the assertion.
+ void opCompileParentheticalAssertion(PatternTerm* term)
+ {
+ size_t parenBegin = m_ops.size();
+ m_ops.append(OpParentheticalAssertionBegin);
+
+ m_ops.append(OpSimpleNestedAlternativeBegin);
+ m_ops.last().m_previousOp = notFound;
+ m_ops.last().m_term = term;
+ Vector<PatternAlternative*>& alternatives = term->parentheses.disjunction->m_alternatives;
+ for (unsigned i = 0; i < alternatives.size(); ++i) {
+ size_t lastOpIndex = m_ops.size() - 1;
+
+ PatternAlternative* nestedAlternative = alternatives[i];
+ opCompileAlternative(nestedAlternative);
+
+ size_t thisOpIndex = m_ops.size();
+ m_ops.append(YarrOp(OpSimpleNestedAlternativeNext));
+
+ YarrOp& lastOp = m_ops[lastOpIndex];
+ YarrOp& thisOp = m_ops[thisOpIndex];
+
+ lastOp.m_alternative = nestedAlternative;
+ lastOp.m_nextOp = thisOpIndex;
+ thisOp.m_previousOp = lastOpIndex;
+ thisOp.m_term = term;
+ }
+ YarrOp& lastOp = m_ops.last();
+ ASSERT(lastOp.m_op == OpSimpleNestedAlternativeNext);
+ lastOp.m_op = OpSimpleNestedAlternativeEnd;
+ lastOp.m_alternative = 0;
+ lastOp.m_nextOp = notFound;
+
+ size_t parenEnd = m_ops.size();
+ m_ops.append(OpParentheticalAssertionEnd);
+
+ m_ops[parenBegin].m_term = term;
+ m_ops[parenBegin].m_previousOp = notFound;
+ m_ops[parenBegin].m_nextOp = parenEnd;
+ m_ops[parenEnd].m_term = term;
+ m_ops[parenEnd].m_previousOp = parenBegin;
+ m_ops[parenEnd].m_nextOp = notFound;
+ }
- firstAlternative = Label(this);
+ // opCompileAlternative
+ // Called to emit nodes for all terms in an alternative.
+ void opCompileAlternative(PatternAlternative* alternative)
+ {
+ optimizeAlternative(alternative);
- state.checkedTotal = countToCheckForFirstAlternative;
- if (countToCheckForFirstAlternative)
- notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForFirstAlternative));
+ for (unsigned i = 0; i < alternative->m_terms.size(); ++i) {
+ PatternTerm* term = &alternative->m_terms[i];
- countCheckedForCurrentAlternative = countToCheckForFirstAlternative;
+ switch (term->type) {
+ case PatternTerm::TypeParenthesesSubpattern:
+ opCompileParenthesesSubpattern(term);
+ break;
- firstAlternativeInputChecked = Label(this);
+ case PatternTerm::TypeParentheticalAssertion:
+ opCompileParentheticalAssertion(term);
+ break;
- setRepeatAlternativeLabels = true;
- } else {
- int countToCheckForNextAlternative = nextAlternative->m_minimumSize;
-
- if (countCheckedForCurrentAlternative > countToCheckForNextAlternative) { // CASE 1: current alternative was longer than the next one.
- // If we get here, then the last input checked failed.
- notEnoughInputForPreviousAlternative.link(this);
-
- // Check if sufficent input available to run the next alternative
- notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
- // We are now in the correct state to enter the next alternative; this add is only required
- // to mirror and revert operation of the sub32, just below.
- add32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
-
- // If we get here, then the last input checked passed.
- state.linkAlternativeBacktracks(this);
-
- // No need to check if we can run the next alternative, since it is shorter -
- // just update index.
- sub32(Imm32(countCheckedForCurrentAlternative - countToCheckForNextAlternative), index);
- } else if (countCheckedForCurrentAlternative < countToCheckForNextAlternative) { // CASE 2: next alternative is longer than the current one.
- // If we get here, then the last input checked failed.
- // If there is insufficient input to run the current alternative, and the next alternative is longer,
- // then there is definitely not enough input to run it - don't even check. Just adjust index, as if
- // we had checked.
- notEnoughInputForPreviousAlternative.link(this);
- add32(Imm32(countToCheckForNextAlternative - countCheckedForCurrentAlternative), index);
- notEnoughInputForPreviousAlternative.append(jump());
-
- // The next alternative is longer than the current one; check the difference.
- state.linkAlternativeBacktracks(this);
-
- notEnoughInputForPreviousAlternative.append(jumpIfNoAvailableInput(countToCheckForNextAlternative - countCheckedForCurrentAlternative));
- } else { // CASE 3: Both alternatives are the same length.
- ASSERT(countCheckedForCurrentAlternative == countToCheckForNextAlternative);
-
- // If the next alterative is the same length as this one, then no need to check the input -
- // if there was sufficent input to run the current alternative then there is sufficient
- // input to run the next one; if not, there isn't.
- state.linkAlternativeBacktracks(this);
- }
- state.checkedTotal -= countCheckedForCurrentAlternative;
- countCheckedForCurrentAlternative = countToCheckForNextAlternative;
- state.checkedTotal += countCheckedForCurrentAlternative;
- }
+ default:
+ m_ops.append(term);
}
}
+ }
- // If we get here, all Alternatives failed...
+ // opCompileBody
+ // This method compiles the body disjunction of the regular expression.
+ // The body consists of two sets of alternatives - zero or more 'once
+ // through' (BOL anchored) alternatives, followed by zero or more
+ // repeated alternatives.
+ // For each of these two sets of alteratives, if not empty they will be
+ // wrapped in a set of OpBodyAlternativeBegin/Next/End nodes (with the
+ // 'begin' node referencing the first alternative, and 'next' nodes
+ // referencing any further alternatives. The begin/next/end nodes are
+ // linked together in a doubly linked list. In the case of repeating
+ // alternatives, the end node is also linked back to the beginning.
+ // If no repeating alternatives exist, then a OpMatchFailed node exists
+ // to return the failing result.
+ void opCompileBody(PatternDisjunction* disjunction)
+ {
+ Vector<PatternAlternative*>& alternatives = disjunction->m_alternatives;
+ size_t currentAlternativeIndex = 0;
- state.checkedTotal -= countCheckedForCurrentAlternative;
+ // Emit the 'once through' alternatives.
+ if (alternatives.size() && alternatives[0]->onceThrough()) {
+ m_ops.append(YarrOp(OpBodyAlternativeBegin));
+ m_ops.last().m_previousOp = notFound;
- if (!setRepeatAlternativeLabels) {
- // If there are no alternatives that need repeating (all are marked 'onceThrough') then just link
- // the match failures to this point, and fall through to the return below.
- state.linkAlternativeBacktracks(this, true);
+ do {
+ size_t lastOpIndex = m_ops.size() - 1;
+ PatternAlternative* alternative = alternatives[currentAlternativeIndex];
+ opCompileAlternative(alternative);
- notEnoughInputForPreviousAlternative.link(this);
- } else {
- // How much more input need there be to be able to retry from the first alternative?
- // examples:
- // /yarr_jit/ or /wrec|pcre/
- // In these examples we need check for one more input before looping.
- // /yarr_jit|pcre/
- // In this case we need check for 5 more input to loop (+4 to allow for the first alterative
- // being four longer than the last alternative checked, and another +1 to effectively move
- // the start position along by one).
- // /yarr|rules/ or /wrec|notsomuch/
- // In these examples, provided that there was sufficient input to have just been matching for
- // the second alternative we can loop without checking for available input (since the second
- // alternative is longer than the first). In the latter example we need to decrement index
- // (by 4) so the start position is only progressed by 1 from the last iteration.
- int incrementForNextIter = (countToCheckForFirstAlternative - countCheckedForCurrentAlternative) + 1;
-
- // First, deal with the cases where there was sufficient input to try the last alternative.
- if (incrementForNextIter > 0) // We need to check for more input anyway, fall through to the checking below.
- state.linkAlternativeBacktracks(this, true);
- else if (m_pattern.m_body->m_hasFixedSize && !incrementForNextIter) // No need to update anything, link these backtracks straight to the to pof the loop!
- state.linkAlternativeBacktracksTo(this, firstAlternativeInputChecked, true);
- else { // no need to check the input, but we do have some bookkeeping to do first.
- state.linkAlternativeBacktracks(this, true);
-
- // Where necessary update our preserved start position.
- if (!m_pattern.m_body->m_hasFixedSize) {
- move(index, regT0);
- sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
- store32(regT0, Address(output));
- }
+ size_t thisOpIndex = m_ops.size();
+ m_ops.append(YarrOp(OpBodyAlternativeNext));
- // Update index if necessary, and loop (without checking).
- if (incrementForNextIter)
- add32(Imm32(incrementForNextIter), index);
- jump().linkTo(firstAlternativeInputChecked, this);
- }
+ YarrOp& lastOp = m_ops[lastOpIndex];
+ YarrOp& thisOp = m_ops[thisOpIndex];
- notEnoughInputForPreviousAlternative.link(this);
- // Update our idea of the start position, if we're tracking this.
- if (!m_pattern.m_body->m_hasFixedSize) {
- if (countCheckedForCurrentAlternative - 1) {
- move(index, regT0);
- sub32(Imm32(countCheckedForCurrentAlternative - 1), regT0);
- store32(regT0, Address(output));
- } else
- store32(index, Address(output));
- }
+ lastOp.m_alternative = alternative;
+ lastOp.m_nextOp = thisOpIndex;
+ thisOp.m_previousOp = lastOpIndex;
+
+ ++currentAlternativeIndex;
+ } while (currentAlternativeIndex < alternatives.size() && alternatives[currentAlternativeIndex]->onceThrough());
- // Check if there is sufficent input to run the first alternative again.
- jumpIfAvailableInput(incrementForNextIter).linkTo(firstAlternativeInputChecked, this);
- // No - insufficent input to run the first alteranative, are there any other alternatives we
- // might need to check? If so, the last check will have left the index incremented by
- // (countToCheckForFirstAlternative + 1), so we need test whether countToCheckForFirstAlternative
- // LESS input is available, to have the effect of just progressing the start position by 1
- // from the last iteration. If this check passes we can just jump up to the check associated
- // with the first alternative in the loop. This is a bit sad, since we'll end up trying the
- // first alternative again, and this check will fail (otherwise the check planted just above
- // here would have passed). This is a bit sad, however it saves trying to do something more
- // complex here in compilation, and in the common case we should end up coallescing the checks.
- //
- // FIXME: a nice improvement here may be to stop trying to match sooner, based on the least
- // of the minimum-alternative-lengths. E.g. if I have two alternatives of length 200 and 150,
- // and a string of length 100, we'll end up looping index from 0 to 100, checking whether there
- // is sufficient input to run either alternative (constantly failing). If there had been only
- // one alternative, or if the shorter alternative had come first, we would have terminated
- // immediately. :-/
- if (hasShorterAlternatives)
- jumpIfAvailableInput(-countToCheckForFirstAlternative).linkTo(firstAlternative, this);
- // index will now be a bit garbled (depending on whether 'hasShorterAlternatives' is true,
- // it has either been incremented by 1 or by (countToCheckForFirstAlternative + 1) ...
- // but since we're about to return a failure this doesn't really matter!)
+ YarrOp& lastOp = m_ops.last();
+
+ ASSERT(lastOp.m_op == OpBodyAlternativeNext);
+ lastOp.m_op = OpBodyAlternativeEnd;
+ lastOp.m_alternative = 0;
+ lastOp.m_nextOp = notFound;
}
- if (m_pattern.m_body->m_callFrameSize)
- addPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
+ if (currentAlternativeIndex == alternatives.size()) {
+ m_ops.append(YarrOp(OpMatchFailed));
+ return;
+ }
- move(TrustedImm32(-1), returnRegister);
+ // Emit the repeated alternatives.
+ size_t repeatLoop = m_ops.size();
+ m_ops.append(YarrOp(OpBodyAlternativeBegin));
+ m_ops.last().m_previousOp = notFound;
+ do {
+ size_t lastOpIndex = m_ops.size() - 1;
+ PatternAlternative* alternative = alternatives[currentAlternativeIndex];
+ ASSERT(!alternative->onceThrough());
+ opCompileAlternative(alternative);
+
+ size_t thisOpIndex = m_ops.size();
+ m_ops.append(YarrOp(OpBodyAlternativeNext));
- generateReturn();
+ YarrOp& lastOp = m_ops[lastOpIndex];
+ YarrOp& thisOp = m_ops[thisOpIndex];
- m_expressionState.emitParenthesesTail(this);
- m_expressionState.emitIndirectJumpTable(this);
- m_expressionState.linkToNextIteration(this);
+ lastOp.m_alternative = alternative;
+ lastOp.m_nextOp = thisOpIndex;
+ thisOp.m_previousOp = lastOpIndex;
+
+ ++currentAlternativeIndex;
+ } while (currentAlternativeIndex < alternatives.size());
+ YarrOp& lastOp = m_ops.last();
+ ASSERT(lastOp.m_op == OpBodyAlternativeNext);
+ lastOp.m_op = OpBodyAlternativeEnd;
+ lastOp.m_alternative = 0;
+ lastOp.m_nextOp = repeatLoop;
}
void generateEnter()
@@ -2230,10 +2325,11 @@ public:
YarrGenerator(YarrPattern& pattern)
: m_pattern(pattern)
, m_shouldFallBack(false)
+ , m_checked(0)
{
}
- void generate()
+ void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject)
{
generateEnter();
@@ -2243,26 +2339,50 @@ public:
if (m_pattern.m_body->m_callFrameSize)
subPtr(Imm32(m_pattern.m_body->m_callFrameSize * sizeof(void*)), stackPointerRegister);
- generateDisjunction(m_pattern.m_body);
- }
-
- void compile(JSGlobalData* globalData, YarrCodeBlock& jitObject)
- {
- generate();
+ // Compile the pattern to the internal 'YarrOp' representation.
+ opCompileBody(m_pattern.m_body);
- LinkBuffer patchBuffer(this, globalData->regexAllocator);
+ // If we encountered anything we can't handle in the JIT code
+ // (e.g. backreferences) then return early.
+ if (m_shouldFallBack) {
+ jitObject.setFallBack(true);
+ return;
+ }
- for (unsigned i = 0; i < m_expressionState.m_backtrackRecords.size(); ++i)
- patchBuffer.patch(m_expressionState.m_backtrackRecords[i].dataLabel, patchBuffer.locationOf(m_expressionState.m_backtrackRecords[i].backtrackLocation));
+ generate();
+ backtrack();
- jitObject.set(patchBuffer.finalizeCode());
+ // Link & finalize the code.
+ LinkBuffer linkBuffer(this, globalData->regexAllocator);
+ m_backtrackingState.linkDataLabels(linkBuffer);
+ jitObject.set(linkBuffer.finalizeCode());
jitObject.setFallBack(m_shouldFallBack);
}
private:
YarrPattern& m_pattern;
+
+ // Used to detect regular expression constructs that are not currently
+ // supported in the JIT; fall back to the interpreter when this is detected.
bool m_shouldFallBack;
- GenerationState m_expressionState;
+
+ // The regular expression expressed as a linear sequence of operations.
+ Vector<YarrOp, 128> m_ops;
+
+ // This records the current input offset being applied due to the current
+ // set of alternatives we are nested within. E.g. when matching the
+ // character 'b' within the regular expression /abc/, we will know that
+ // the minimum size for the alternative is 3, checked upon entry to the
+ // alternative, and that 'b' is at offset 1 from the start, and as such
+ // when matching 'b' we need to apply an offset of -2 to the load.
+ //
+ // FIXME: This should go away. Rather than tracking this value throughout
+ // code generation, we should gather this information up front & store it
+ // on the YarrOp structure.
+ int m_checked;
+
+ // This class records state whilst generating the backtracking path of code.
+ BacktrackingState m_backtrackingState;
};
void jitCompile(YarrPattern& pattern, JSGlobalData* globalData, YarrCodeBlock& jitObject)