summaryrefslogtreecommitdiffstats
path: root/src/3rdparty/webkit/JavaScriptCore/parser/Lexer.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/3rdparty/webkit/JavaScriptCore/parser/Lexer.cpp')
-rw-r--r--src/3rdparty/webkit/JavaScriptCore/parser/Lexer.cpp1499
1 files changed, 795 insertions, 704 deletions
diff --git a/src/3rdparty/webkit/JavaScriptCore/parser/Lexer.cpp b/src/3rdparty/webkit/JavaScriptCore/parser/Lexer.cpp
index 0bacb22..8e89c18 100644
--- a/src/3rdparty/webkit/JavaScriptCore/parser/Lexer.cpp
+++ b/src/3rdparty/webkit/JavaScriptCore/parser/Lexer.cpp
@@ -31,14 +31,12 @@
#include <ctype.h>
#include <limits.h>
#include <string.h>
-#include <wtf/ASCIICType.h>
#include <wtf/Assertions.h>
-#include <wtf/unicode/Unicode.h>
using namespace WTF;
using namespace Unicode;
-// we can't specify the namespace in yacc's C output, so do it here
+// We can't specify the namespace in yacc's C output, so do it here instead.
using namespace JSC;
#ifndef KDE_USE_FINAL
@@ -48,7 +46,7 @@ using namespace JSC;
#include "Lookup.h"
#include "Lexer.lut.h"
-// a bridge for yacc from the C world to C++
+// A bridge for yacc from the C world to the C++ world.
int jscyylex(void* lvalp, void* llocp, void* globalData)
{
return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
@@ -56,825 +54,895 @@ int jscyylex(void* lvalp, void* llocp, void* globalData)
namespace JSC {
-static bool isDecimalDigit(int);
+static const UChar byteOrderMark = 0xFEFF;
Lexer::Lexer(JSGlobalData* globalData)
- : yylineno(1)
- , m_restrKeyword(false)
- , m_eatNextIdentifier(false)
- , m_stackToken(-1)
- , m_lastToken(-1)
- , m_position(0)
- , m_code(0)
- , m_length(0)
- , m_isReparsing(false)
- , m_atLineStart(true)
- , m_current(0)
- , m_next1(0)
- , m_next2(0)
- , m_next3(0)
- , m_currentOffset(0)
- , m_nextOffset1(0)
- , m_nextOffset2(0)
- , m_nextOffset3(0)
+ : m_isReparsing(false)
, m_globalData(globalData)
- , m_mainTable(JSC::mainTable)
+ , m_keywordTable(JSC::mainTable)
{
- m_buffer8.reserveCapacity(initialReadBufferCapacity);
- m_buffer16.reserveCapacity(initialReadBufferCapacity);
+ m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
+ m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
}
Lexer::~Lexer()
{
- m_mainTable.deleteTable();
+ m_keywordTable.deleteTable();
+}
+
+inline const UChar* Lexer::currentCharacter() const
+{
+ return m_code - 4;
+}
+
+inline int Lexer::currentOffset() const
+{
+ return currentCharacter() - m_codeStart;
+}
+
+ALWAYS_INLINE void Lexer::shift1()
+{
+ m_current = m_next1;
+ m_next1 = m_next2;
+ m_next2 = m_next3;
+ if (LIKELY(m_code < m_codeEnd))
+ m_next3 = m_code[0];
+ else
+ m_next3 = -1;
+
+ ++m_code;
+}
+
+ALWAYS_INLINE void Lexer::shift2()
+{
+ m_current = m_next2;
+ m_next1 = m_next3;
+ if (LIKELY(m_code + 1 < m_codeEnd)) {
+ m_next2 = m_code[0];
+ m_next3 = m_code[1];
+ } else {
+ m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
+ m_next3 = -1;
+ }
+
+ m_code += 2;
+}
+
+ALWAYS_INLINE void Lexer::shift3()
+{
+ m_current = m_next3;
+ if (LIKELY(m_code + 2 < m_codeEnd)) {
+ m_next1 = m_code[0];
+ m_next2 = m_code[1];
+ m_next3 = m_code[2];
+ } else {
+ m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
+ m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
+ m_next3 = -1;
+ }
+
+ m_code += 3;
+}
+
+ALWAYS_INLINE void Lexer::shift4()
+{
+ if (LIKELY(m_code + 3 < m_codeEnd)) {
+ m_current = m_code[0];
+ m_next1 = m_code[1];
+ m_next2 = m_code[2];
+ m_next3 = m_code[3];
+ } else {
+ m_current = m_code < m_codeEnd ? m_code[0] : -1;
+ m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
+ m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
+ m_next3 = -1;
+ }
+
+ m_code += 4;
}
void Lexer::setCode(const SourceCode& source)
{
- yylineno = source.firstLine();
- m_restrKeyword = false;
+ m_lineNumber = source.firstLine();
m_delimited = false;
- m_eatNextIdentifier = false;
- m_stackToken = -1;
m_lastToken = -1;
- m_position = source.startOffset();
+ const UChar* data = source.provider()->data();
+
m_source = &source;
- m_code = source.provider()->data();
- m_length = source.endOffset();
- m_skipLF = false;
- m_skipCR = false;
+ m_codeStart = data;
+ m_code = data + source.startOffset();
+ m_codeEnd = data + source.endOffset();
m_error = false;
m_atLineStart = true;
- // read first characters
- shift(4);
+ // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
+ // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
+ if (source.provider()->hasBOMs()) {
+ for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
+ if (UNLIKELY(*p == byteOrderMark)) {
+ copyCodeWithoutBOMs();
+ break;
+ }
+ }
+ }
+
+ // Read the first characters into the 4-character buffer.
+ shift4();
+ ASSERT(currentOffset() == source.startOffset());
}
-void Lexer::shift(unsigned p)
+void Lexer::copyCodeWithoutBOMs()
{
- // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
- // see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
-
- while (p--) {
- m_current = m_next1;
- m_next1 = m_next2;
- m_next2 = m_next3;
- m_currentOffset = m_nextOffset1;
- m_nextOffset1 = m_nextOffset2;
- m_nextOffset2 = m_nextOffset3;
- do {
- if (m_position >= m_length) {
- m_nextOffset3 = m_position;
- m_position++;
- m_next3 = -1;
- break;
- }
- m_nextOffset3 = m_position;
- m_next3 = m_code[m_position++];
- } while (m_next3 == 0xFEFF);
+ // Note: In this case, the character offset data for debugging will be incorrect.
+ // If it's important to correctly debug code with extraneous BOMs, then the caller
+ // should strip the BOMs when creating the SourceProvider object and do its own
+ // mapping of offsets within the stripped text to original text offset.
+
+ m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
+ for (const UChar* p = m_code; p < m_codeEnd; ++p) {
+ UChar c = *p;
+ if (c != byteOrderMark)
+ m_codeWithoutBOMs.append(c);
+ }
+ ptrdiff_t startDelta = m_codeStart - m_code;
+ m_code = m_codeWithoutBOMs.data();
+ m_codeStart = m_code + startDelta;
+ m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
+}
+
+void Lexer::shiftLineTerminator()
+{
+ ASSERT(isLineTerminator(m_current));
+
+ // Allow both CRLF and LFCR.
+ if (m_current + m_next1 == '\n' + '\r')
+ shift2();
+ else
+ shift1();
+
+ ++m_lineNumber;
+}
+
+ALWAYS_INLINE Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
+{
+ m_identifiers.append(Identifier(m_globalData, characters, length));
+ return &m_identifiers.last();
+}
+
+inline bool Lexer::lastTokenWasRestrKeyword() const
+{
+ return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
+}
+
+static NEVER_INLINE bool isNonASCIIIdentStart(int c)
+{
+ return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
+}
+
+static inline bool isIdentStart(int c)
+{
+ return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
+}
+
+static NEVER_INLINE bool isNonASCIIIdentPart(int c)
+{
+ return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
+ | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
+}
+
+static inline bool isIdentPart(int c)
+{
+ return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
+}
+
+static inline int singleEscape(int c)
+{
+ switch (c) {
+ case 'b':
+ return 0x08;
+ case 't':
+ return 0x09;
+ case 'n':
+ return 0x0A;
+ case 'v':
+ return 0x0B;
+ case 'f':
+ return 0x0C;
+ case 'r':
+ return 0x0D;
+ default:
+ return c;
}
}
-// called on each new line
-void Lexer::nextLine()
+inline void Lexer::record8(int c)
{
- yylineno++;
- m_atLineStart = true;
+ ASSERT(c >= 0);
+ ASSERT(c <= 0xFF);
+ m_buffer8.append(static_cast<char>(c));
}
-void Lexer::setDone(State s)
+inline void Lexer::record16(UChar c)
{
- m_state = s;
- m_done = true;
+ m_buffer16.append(c);
+}
+
+inline void Lexer::record16(int c)
+{
+ ASSERT(c >= 0);
+ ASSERT(c <= USHRT_MAX);
+ record16(UChar(static_cast<unsigned short>(c)));
}
int Lexer::lex(void* p1, void* p2)
{
+ ASSERT(!m_error);
+ ASSERT(m_buffer8.isEmpty());
+ ASSERT(m_buffer16.isEmpty());
+
YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
int token = 0;
- m_state = Start;
- unsigned short stringType = 0; // either single or double quotes
- m_buffer8.clear();
- m_buffer16.clear();
- m_done = false;
m_terminator = false;
- m_skipLF = false;
- m_skipCR = false;
-
- // did we push a token on the stack previously ?
- // (after an automatic semicolon insertion)
- if (m_stackToken >= 0) {
- setDone(Other);
- token = m_stackToken;
- m_stackToken = 0;
- }
- int startOffset = m_currentOffset;
- while (!m_done) {
- if (m_skipLF && m_current != '\n') // found \r but not \n afterwards
- m_skipLF = false;
- if (m_skipCR && m_current != '\r') // found \n but not \r afterwards
- m_skipCR = false;
- if (m_skipLF || m_skipCR) { // found \r\n or \n\r -> eat the second one
- m_skipLF = false;
- m_skipCR = false;
- shift(1);
+
+start:
+ while (isWhiteSpace(m_current))
+ shift1();
+
+ int startOffset = currentOffset();
+
+ if (m_current == -1) {
+ if (!m_terminator && !m_delimited && !m_isReparsing) {
+ // automatic semicolon insertion if program incomplete
+ token = ';';
+ goto doneSemicolon;
}
- switch (m_state) {
- case Start:
- startOffset = m_currentOffset;
- if (isWhiteSpace()) {
- // do nothing
- } else if (m_current == '/' && m_next1 == '/') {
- shift(1);
- m_state = InSingleLineComment;
- } else if (m_current == '/' && m_next1 == '*') {
- shift(1);
- m_state = InMultiLineComment;
- } else if (m_current == -1) {
- if (!m_terminator && !m_delimited && !m_isReparsing) {
- // automatic semicolon insertion if program incomplete
- token = ';';
- m_stackToken = 0;
- setDone(Other);
- } else
- setDone(Eof);
- } else if (isLineTerminator()) {
- nextLine();
- m_terminator = true;
- if (m_restrKeyword) {
- token = ';';
- setDone(Other);
- }
- } else if (m_current == '"' || m_current == '\'') {
- m_state = InString;
- stringType = static_cast<unsigned short>(m_current);
- } else if (isIdentStart(m_current)) {
- record16(m_current);
- m_state = InIdentifierOrKeyword;
- } else if (m_current == '\\')
- m_state = InIdentifierStartUnicodeEscapeStart;
- else if (m_current == '0') {
- record8(m_current);
- m_state = InNum0;
- } else if (isDecimalDigit(m_current)) {
- record8(m_current);
- m_state = InNum;
- } else if (m_current == '.' && isDecimalDigit(m_next1)) {
- record8(m_current);
- m_state = InDecimal;
- // <!-- marks the beginning of a line comment (for www usage)
- } else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
- shift(3);
- m_state = InSingleLineComment;
- // same for -->
- } else if (m_atLineStart && m_current == '-' && m_next1 == '-' && m_next2 == '>') {
- shift(2);
- m_state = InSingleLineComment;
- } else {
- token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3);
- if (token != -1)
- setDone(Other);
- else
- setDone(Bad);
+ return 0;
+ }
+
+ m_delimited = false;
+ switch (m_current) {
+ case '>':
+ if (m_next1 == '>' && m_next2 == '>') {
+ if (m_next3 == '=') {
+ shift4();
+ token = URSHIFTEQUAL;
+ break;
}
+ shift3();
+ token = URSHIFT;
break;
- case InString:
- if (m_current == stringType) {
- shift(1);
- setDone(String);
- } else if (isLineTerminator() || m_current == -1)
- setDone(Bad);
- else if (m_current == '\\')
- m_state = InEscapeSequence;
- else
- record16(m_current);
+ }
+ if (m_next1 == '>') {
+ if (m_next2 == '=') {
+ shift3();
+ token = RSHIFTEQUAL;
+ break;
+ }
+ shift2();
+ token = RSHIFT;
break;
- // Escape Sequences inside of strings
- case InEscapeSequence:
- if (isOctalDigit(m_current)) {
- if (m_current >= '0' && m_current <= '3' &&
- isOctalDigit(m_next1) && isOctalDigit(m_next2)) {
- record16(convertOctal(m_current, m_next1, m_next2));
- shift(2);
- m_state = InString;
- } else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) {
- record16(convertOctal('0', m_current, m_next1));
- shift(1);
- m_state = InString;
- } else if (isOctalDigit(m_current)) {
- record16(convertOctal('0', '0', m_current));
- m_state = InString;
- } else
- setDone(Bad);
- } else if (m_current == 'x')
- m_state = InHexEscape;
- else if (m_current == 'u')
- m_state = InUnicodeEscape;
- else if (isLineTerminator()) {
- nextLine();
- m_state = InString;
- } else {
- record16(singleEscape(static_cast<unsigned short>(m_current)));
- m_state = InString;
+ }
+ if (m_next1 == '=') {
+ shift2();
+ token = GE;
+ break;
+ }
+ shift1();
+ token = '>';
+ break;
+ case '=':
+ if (m_next1 == '=') {
+ if (m_next2 == '=') {
+ shift3();
+ token = STREQ;
+ break;
}
+ shift2();
+ token = EQEQ;
break;
- case InHexEscape:
- if (isHexDigit(m_current) && isHexDigit(m_next1)) {
- m_state = InString;
- record16(convertHex(m_current, m_next1));
- shift(1);
- } else if (m_current == stringType) {
- record16('x');
- shift(1);
- setDone(String);
- } else {
- record16('x');
- record16(m_current);
- m_state = InString;
+ }
+ shift1();
+ token = '=';
+ break;
+ case '!':
+ if (m_next1 == '=') {
+ if (m_next2 == '=') {
+ shift3();
+ token = STRNEQ;
+ break;
}
+ shift2();
+ token = NE;
break;
- case InUnicodeEscape:
- if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) {
- record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
- shift(3);
- m_state = InString;
- } else if (m_current == stringType) {
- record16('u');
- shift(1);
- setDone(String);
- } else
- setDone(Bad);
+ }
+ shift1();
+ token = '!';
+ break;
+ case '<':
+ if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
+ // <!-- marks the beginning of a line comment (for www usage)
+ shift4();
+ goto inSingleLineComment;
+ }
+ if (m_next1 == '<') {
+ if (m_next2 == '=') {
+ shift3();
+ token = LSHIFTEQUAL;
+ break;
+ }
+ shift2();
+ token = LSHIFT;
break;
- case InSingleLineComment:
- if (isLineTerminator()) {
- nextLine();
- m_terminator = true;
- if (m_restrKeyword) {
- token = ';';
- setDone(Other);
- } else
- m_state = Start;
- } else if (m_current == -1)
- setDone(Eof);
+ }
+ if (m_next1 == '=') {
+ shift2();
+ token = LE;
break;
- case InMultiLineComment:
- if (m_current == -1)
- setDone(Bad);
- else if (isLineTerminator())
- nextLine();
- else if (m_current == '*' && m_next1 == '/') {
- m_state = Start;
- shift(1);
+ }
+ shift1();
+ token = '<';
+ break;
+ case '+':
+ if (m_next1 == '+') {
+ shift2();
+ if (m_terminator) {
+ token = AUTOPLUSPLUS;
+ break;
}
+ token = PLUSPLUS;
break;
- case InIdentifierOrKeyword:
- case InIdentifier:
- if (isIdentPart(m_current))
- record16(m_current);
- else if (m_current == '\\')
- m_state = InIdentifierPartUnicodeEscapeStart;
- else
- setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
+ }
+ if (m_next1 == '=') {
+ shift2();
+ token = PLUSEQUAL;
break;
- case InNum0:
- if (m_current == 'x' || m_current == 'X') {
- record8(m_current);
- m_state = InHex;
- } else if (m_current == '.') {
- record8(m_current);
- m_state = InDecimal;
- } else if (m_current == 'e' || m_current == 'E') {
- record8(m_current);
- m_state = InExponentIndicator;
- } else if (isOctalDigit(m_current)) {
- record8(m_current);
- m_state = InOctal;
- } else if (isDecimalDigit(m_current)) {
- record8(m_current);
- m_state = InDecimal;
- } else
- setDone(Number);
+ }
+ shift1();
+ token = '+';
+ break;
+ case '-':
+ if (m_next1 == '-') {
+ if (m_atLineStart && m_next2 == '>') {
+ shift3();
+ goto inSingleLineComment;
+ }
+ shift2();
+ if (m_terminator) {
+ token = AUTOMINUSMINUS;
+ break;
+ }
+ token = MINUSMINUS;
break;
- case InHex:
- if (isHexDigit(m_current))
- record8(m_current);
- else
- setDone(Hex);
+ }
+ if (m_next1 == '=') {
+ shift2();
+ token = MINUSEQUAL;
break;
- case InOctal:
- if (isOctalDigit(m_current))
- record8(m_current);
- else if (isDecimalDigit(m_current)) {
- record8(m_current);
- m_state = InDecimal;
- } else
- setDone(Octal);
+ }
+ shift1();
+ token = '-';
+ break;
+ case '*':
+ if (m_next1 == '=') {
+ shift2();
+ token = MULTEQUAL;
break;
- case InNum:
- if (isDecimalDigit(m_current))
- record8(m_current);
- else if (m_current == '.') {
- record8(m_current);
- m_state = InDecimal;
- } else if (m_current == 'e' || m_current == 'E') {
- record8(m_current);
- m_state = InExponentIndicator;
- } else
- setDone(Number);
+ }
+ shift1();
+ token = '*';
+ break;
+ case '/':
+ if (m_next1 == '/') {
+ shift2();
+ goto inSingleLineComment;
+ }
+ if (m_next1 == '*')
+ goto inMultiLineComment;
+ if (m_next1 == '=') {
+ shift2();
+ token = DIVEQUAL;
break;
- case InDecimal:
- if (isDecimalDigit(m_current))
- record8(m_current);
- else if (m_current == 'e' || m_current == 'E') {
- record8(m_current);
- m_state = InExponentIndicator;
- } else
- setDone(Number);
+ }
+ shift1();
+ token = '/';
+ break;
+ case '&':
+ if (m_next1 == '&') {
+ shift2();
+ token = AND;
break;
- case InExponentIndicator:
- if (m_current == '+' || m_current == '-')
- record8(m_current);
- else if (isDecimalDigit(m_current)) {
- record8(m_current);
- m_state = InExponent;
- } else
- setDone(Bad);
+ }
+ if (m_next1 == '=') {
+ shift2();
+ token = ANDEQUAL;
break;
- case InExponent:
- if (isDecimalDigit(m_current))
- record8(m_current);
- else
- setDone(Number);
+ }
+ shift1();
+ token = '&';
+ break;
+ case '^':
+ if (m_next1 == '=') {
+ shift2();
+ token = XOREQUAL;
break;
- case InIdentifierStartUnicodeEscapeStart:
- if (m_current == 'u')
- m_state = InIdentifierStartUnicodeEscape;
- else
- setDone(Bad);
+ }
+ shift1();
+ token = '^';
+ break;
+ case '%':
+ if (m_next1 == '=') {
+ shift2();
+ token = MODEQUAL;
break;
- case InIdentifierPartUnicodeEscapeStart:
- if (m_current == 'u')
- m_state = InIdentifierPartUnicodeEscape;
- else
- setDone(Bad);
+ }
+ shift1();
+ token = '%';
+ break;
+ case '|':
+ if (m_next1 == '=') {
+ shift2();
+ token = OREQUAL;
break;
- case InIdentifierStartUnicodeEscape:
- if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
- setDone(Bad);
- break;
- }
- token = convertUnicode(m_current, m_next1, m_next2, m_next3);
- shift(3);
- if (!isIdentStart(token)) {
- setDone(Bad);
- break;
- }
- record16(token);
- m_state = InIdentifier;
+ }
+ if (m_next1 == '|') {
+ shift2();
+ token = OR;
break;
- case InIdentifierPartUnicodeEscape:
- if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
- setDone(Bad);
- break;
- }
- token = convertUnicode(m_current, m_next1, m_next2, m_next3);
- shift(3);
- if (!isIdentPart(token)) {
- setDone(Bad);
- break;
+ }
+ shift1();
+ token = '|';
+ break;
+ case '.':
+ if (isASCIIDigit(m_next1)) {
+ record8('.');
+ shift1();
+ goto inNumberAfterDecimalPoint;
+ }
+ token = '.';
+ shift1();
+ break;
+ case ',':
+ case '~':
+ case '?':
+ case ':':
+ case '(':
+ case ')':
+ case '[':
+ case ']':
+ token = m_current;
+ shift1();
+ break;
+ case ';':
+ shift1();
+ m_delimited = true;
+ token = ';';
+ break;
+ case '{':
+ lvalp->intValue = currentOffset();
+ shift1();
+ token = OPENBRACE;
+ break;
+ case '}':
+ lvalp->intValue = currentOffset();
+ shift1();
+ m_delimited = true;
+ token = CLOSEBRACE;
+ break;
+ case '\\':
+ goto startIdentifierWithBackslash;
+ case '0':
+ goto startNumberWithZeroDigit;
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ goto startNumber;
+ case '"':
+ case '\'':
+ goto startString;
+ default:
+ if (isIdentStart(m_current))
+ goto startIdentifierOrKeyword;
+ if (isLineTerminator(m_current)) {
+ shiftLineTerminator();
+ m_atLineStart = true;
+ m_terminator = true;
+ if (lastTokenWasRestrKeyword()) {
+ token = ';';
+ goto doneSemicolon;
}
- record16(token);
- m_state = InIdentifier;
- break;
- default:
- ASSERT(!"Unhandled state in switch statement");
- }
-
- // move on to the next character
- if (!m_done)
- shift(1);
- if (m_state != Start && m_state != InSingleLineComment)
- m_atLineStart = false;
+ goto start;
+ }
+ goto returnError;
}
- // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
- if ((m_state == Number || m_state == Octal || m_state == Hex) && isIdentStart(m_current))
- m_state = Bad;
+ m_atLineStart = false;
+ goto returnToken;
- // terminate string
- m_buffer8.append('\0');
-
-#ifdef JSC_DEBUG_LEX
- fprintf(stderr, "line: %d ", lineNo());
- fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
- fprintf(stderr, "%s ", m_buffer8.data());
-#endif
+startString: {
+ int stringQuoteCharacter = m_current;
+ shift1();
- double dval = 0;
- if (m_state == Number)
- dval = WTF::strtod(m_buffer8.data(), 0L);
- else if (m_state == Hex) { // scan hex numbers
- const char* p = m_buffer8.data() + 2;
- while (char c = *p++) {
- dval *= 16;
- dval += convertHex(c);
+ const UChar* stringStart = currentCharacter();
+ while (m_current != stringQuoteCharacter) {
+ // Fast check for characters that require special handling.
+ // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
+ // as possible, and lets through all common ASCII characters.
+ if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
+ m_buffer16.append(stringStart, currentCharacter() - stringStart);
+ goto inString;
+ }
+ shift1();
+ }
+ lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
+ shift1();
+ m_atLineStart = false;
+ m_delimited = false;
+ token = STRING;
+ goto returnToken;
+
+inString:
+ while (m_current != stringQuoteCharacter) {
+ if (m_current == '\\')
+ goto inStringEscapeSequence;
+ if (UNLIKELY(isLineTerminator(m_current)))
+ goto returnError;
+ if (UNLIKELY(m_current == -1))
+ goto returnError;
+ record16(m_current);
+ shift1();
+ }
+ goto doneString;
+
+inStringEscapeSequence:
+ shift1();
+ if (m_current == 'x') {
+ shift1();
+ if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
+ record16(convertHex(m_current, m_next1));
+ shift2();
+ goto inString;
}
+ record16('x');
+ if (m_current == stringQuoteCharacter)
+ goto doneString;
+ goto inString;
+ }
+ if (m_current == 'u') {
+ shift1();
+ if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
+ record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
+ shift4();
+ goto inString;
+ }
+ if (m_current == stringQuoteCharacter) {
+ record16('u');
+ goto doneString;
+ }
+ goto returnError;
+ }
+ if (isASCIIOctalDigit(m_current)) {
+ if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
+ record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
+ shift3();
+ goto inString;
+ }
+ if (isASCIIOctalDigit(m_next1)) {
+ record16((m_current - '0') * 8 + m_next1 - '0');
+ shift2();
+ goto inString;
+ }
+ record16(m_current - '0');
+ shift1();
+ goto inString;
+ }
+ if (isLineTerminator(m_current)) {
+ shiftLineTerminator();
+ goto inString;
+ }
+ record16(singleEscape(m_current));
+ shift1();
+ goto inString;
+}
- if (dval >= mantissaOverflowLowerBound)
- dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
+startIdentifierWithBackslash:
+ shift1();
+ if (UNLIKELY(m_current != 'u'))
+ goto returnError;
+ shift1();
+ if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
+ goto returnError;
+ token = convertUnicode(m_current, m_next1, m_next2, m_next3);
+ if (UNLIKELY(!isIdentStart(token)))
+ goto returnError;
+ goto inIdentifierAfterCharacterCheck;
+
+startIdentifierOrKeyword: {
+ const UChar* identifierStart = currentCharacter();
+ shift1();
+ while (isIdentPart(m_current))
+ shift1();
+ if (LIKELY(m_current != '\\')) {
+ lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
+ goto doneIdentifierOrKeyword;
+ }
+ m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
+}
- m_state = Number;
- } else if (m_state == Octal) { // scan octal number
- const char* p = m_buffer8.data() + 1;
- while (char c = *p++) {
- dval *= 8;
- dval += c - '0';
+ do {
+ shift1();
+ if (UNLIKELY(m_current != 'u'))
+ goto returnError;
+ shift1();
+ if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
+ goto returnError;
+ token = convertUnicode(m_current, m_next1, m_next2, m_next3);
+ if (UNLIKELY(!isIdentPart(token)))
+ goto returnError;
+inIdentifierAfterCharacterCheck:
+ record16(token);
+ shift4();
+
+ while (isIdentPart(m_current)) {
+ record16(m_current);
+ shift1();
}
+ } while (UNLIKELY(m_current == '\\'));
+ goto doneIdentifier;
- if (dval >= mantissaOverflowLowerBound)
- dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
-
- m_state = Number;
+inSingleLineComment:
+ while (!isLineTerminator(m_current)) {
+ if (UNLIKELY(m_current == -1))
+ return 0;
+ shift1();
}
-
-#ifdef JSC_DEBUG_LEX
- switch (m_state) {
- case Eof:
- printf("(EOF)\n");
- break;
- case Other:
- printf("(Other)\n");
- break;
- case Identifier:
- printf("(Identifier)/(Keyword)\n");
- break;
- case String:
- printf("(String)\n");
- break;
- case Number:
- printf("(Number)\n");
- break;
- default:
- printf("(unknown)");
+ shiftLineTerminator();
+ m_atLineStart = true;
+ m_terminator = true;
+ if (lastTokenWasRestrKeyword())
+ goto doneSemicolon;
+ goto start;
+
+inMultiLineComment:
+ shift2();
+ while (m_current != '*' || m_next1 != '/') {
+ if (isLineTerminator(m_current))
+ shiftLineTerminator();
+ else {
+ shift1();
+ if (UNLIKELY(m_current == -1))
+ goto returnError;
+ }
}
-#endif
+ shift2();
+ m_atLineStart = false;
+ goto start;
+
+startNumberWithZeroDigit:
+ shift1();
+ if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
+ shift1();
+ goto inHex;
+ }
+ if (m_current == '.') {
+ record8('0');
+ record8('.');
+ shift1();
+ goto inNumberAfterDecimalPoint;
+ }
+ if ((m_current | 0x20) == 'e') {
+ record8('0');
+ record8('e');
+ shift1();
+ goto inExponentIndicator;
+ }
+ if (isASCIIOctalDigit(m_current))
+ goto inOctal;
+ if (isASCIIDigit(m_current))
+ goto startNumber;
+ lvalp->doubleValue = 0;
+ goto doneNumeric;
+
+inNumberAfterDecimalPoint:
+ while (isASCIIDigit(m_current)) {
+ record8(m_current);
+ shift1();
+ }
+ if ((m_current | 0x20) == 'e') {
+ record8('e');
+ shift1();
+ goto inExponentIndicator;
+ }
+ goto doneNumber;
+
+inExponentIndicator:
+ if (m_current == '+' || m_current == '-') {
+ record8(m_current);
+ shift1();
+ }
+ if (!isASCIIDigit(m_current))
+ goto returnError;
+ do {
+ record8(m_current);
+ shift1();
+ } while (isASCIIDigit(m_current));
+ goto doneNumber;
+
+inOctal: {
+ do {
+ record8(m_current);
+ shift1();
+ } while (isASCIIOctalDigit(m_current));
+ if (isASCIIDigit(m_current))
+ goto startNumber;
- if (m_state != Identifier)
- m_eatNextIdentifier = false;
+ double dval = 0;
- m_restrKeyword = false;
- m_delimited = false;
- llocp->first_line = yylineno;
- llocp->last_line = yylineno;
- llocp->first_column = startOffset;
- llocp->last_column = m_currentOffset;
- switch (m_state) {
- case Eof:
- token = 0;
- break;
- case Other:
- if (token == '}' || token == ';')
- m_delimited = true;
- break;
- case Identifier:
- // Apply anonymous-function hack below (eat the identifier).
- if (m_eatNextIdentifier) {
- m_eatNextIdentifier = false;
- token = lex(lvalp, llocp);
- break;
- }
- lvalp->ident = makeIdentifier(m_buffer16);
- token = IDENT;
- break;
- case IdentifierOrKeyword: {
- lvalp->ident = makeIdentifier(m_buffer16);
- const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident);
- if (!entry) {
- // Lookup for keyword failed, means this is an identifier.
- token = IDENT;
- break;
- }
- token = entry->lexerValue();
- // Hack for "f = function somename() { ... }"; too hard to get into the grammar.
- m_eatNextIdentifier = token == FUNCTION && m_lastToken == '=';
- if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW)
- m_restrKeyword = true;
- break;
- }
- case String:
- // Atomize constant strings in case they're later used in property lookup.
- lvalp->ident = makeIdentifier(m_buffer16);
- token = STRING;
- break;
- case Number:
- lvalp->doubleValue = dval;
- token = NUMBER;
- break;
- case Bad:
-#ifdef JSC_DEBUG_LEX
- fprintf(stderr, "yylex: ERROR.\n");
-#endif
- m_error = true;
- return -1;
- default:
- ASSERT(!"unhandled numeration value in switch");
- m_error = true;
- return -1;
+ const char* end = m_buffer8.end();
+ for (const char* p = m_buffer8.data(); p < end; ++p) {
+ dval *= 8;
+ dval += *p - '0';
}
- m_lastToken = token;
- return token;
-}
+ if (dval >= mantissaOverflowLowerBound)
+ dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
-bool Lexer::isWhiteSpace() const
-{
- return m_current == '\t' || m_current == 0x0b || m_current == 0x0c || isSeparatorSpace(m_current);
-}
+ m_buffer8.resize(0);
-bool Lexer::isLineTerminator()
-{
- bool cr = (m_current == '\r');
- bool lf = (m_current == '\n');
- if (cr)
- m_skipLF = true;
- else if (lf)
- m_skipCR = true;
- return cr || lf || m_current == 0x2028 || m_current == 0x2029;
+ lvalp->doubleValue = dval;
+ goto doneNumeric;
}
-bool Lexer::isIdentStart(int c)
-{
- return isASCIIAlpha(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other)));
-}
+inHex: {
+ do {
+ record8(m_current);
+ shift1();
+ } while (isASCIIHexDigit(m_current));
-bool Lexer::isIdentPart(int c)
-{
- return isASCIIAlphanumeric(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
- | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)));
-}
+ double dval = 0;
-static bool isDecimalDigit(int c)
-{
- return isASCIIDigit(c);
-}
+ const char* end = m_buffer8.end();
+ for (const char* p = m_buffer8.data(); p < end; ++p) {
+ dval *= 16;
+ dval += toASCIIHexValue(*p);
+ }
+ if (dval >= mantissaOverflowLowerBound)
+ dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
-bool Lexer::isHexDigit(int c)
-{
- return isASCIIHexDigit(c);
-}
+ m_buffer8.resize(0);
-bool Lexer::isOctalDigit(int c)
-{
- return isASCIIOctalDigit(c);
+ lvalp->doubleValue = dval;
+ goto doneNumeric;
}
-int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
-{
- if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
- shift(4);
- return URSHIFTEQUAL;
- }
- if (c1 == '=' && c2 == '=' && c3 == '=') {
- shift(3);
- return STREQ;
- }
- if (c1 == '!' && c2 == '=' && c3 == '=') {
- shift(3);
- return STRNEQ;
- }
- if (c1 == '>' && c2 == '>' && c3 == '>') {
- shift(3);
- return URSHIFT;
- }
- if (c1 == '<' && c2 == '<' && c3 == '=') {
- shift(3);
- return LSHIFTEQUAL;
- }
- if (c1 == '>' && c2 == '>' && c3 == '=') {
- shift(3);
- return RSHIFTEQUAL;
- }
- if (c1 == '<' && c2 == '=') {
- shift(2);
- return LE;
- }
- if (c1 == '>' && c2 == '=') {
- shift(2);
- return GE;
- }
- if (c1 == '!' && c2 == '=') {
- shift(2);
- return NE;
- }
- if (c1 == '+' && c2 == '+') {
- shift(2);
- if (m_terminator)
- return AUTOPLUSPLUS;
- return PLUSPLUS;
- }
- if (c1 == '-' && c2 == '-') {
- shift(2);
- if (m_terminator)
- return AUTOMINUSMINUS;
- return MINUSMINUS;
- }
- if (c1 == '=' && c2 == '=') {
- shift(2);
- return EQEQ;
- }
- if (c1 == '+' && c2 == '=') {
- shift(2);
- return PLUSEQUAL;
- }
- if (c1 == '-' && c2 == '=') {
- shift(2);
- return MINUSEQUAL;
- }
- if (c1 == '*' && c2 == '=') {
- shift(2);
- return MULTEQUAL;
- }
- if (c1 == '/' && c2 == '=') {
- shift(2);
- return DIVEQUAL;
- }
- if (c1 == '&' && c2 == '=') {
- shift(2);
- return ANDEQUAL;
- }
- if (c1 == '^' && c2 == '=') {
- shift(2);
- return XOREQUAL;
- }
- if (c1 == '%' && c2 == '=') {
- shift(2);
- return MODEQUAL;
- }
- if (c1 == '|' && c2 == '=') {
- shift(2);
- return OREQUAL;
- }
- if (c1 == '<' && c2 == '<') {
- shift(2);
- return LSHIFT;
- }
- if (c1 == '>' && c2 == '>') {
- shift(2);
- return RSHIFT;
+startNumber:
+ record8(m_current);
+ shift1();
+ while (isASCIIDigit(m_current)) {
+ record8(m_current);
+ shift1();
}
- if (c1 == '&' && c2 == '&') {
- shift(2);
- return AND;
+ if (m_current == '.') {
+ record8('.');
+ shift1();
+ goto inNumberAfterDecimalPoint;
}
- if (c1 == '|' && c2 == '|') {
- shift(2);
- return OR;
+ if ((m_current | 0x20) == 'e') {
+ record8('e');
+ shift1();
+ goto inExponentIndicator;
}
- switch (c1) {
- case '=':
- case '>':
- case '<':
- case ',':
- case '!':
- case '~':
- case '?':
- case ':':
- case '.':
- case '+':
- case '-':
- case '*':
- case '/':
- case '&':
- case '|':
- case '^':
- case '%':
- case '(':
- case ')':
- case '[':
- case ']':
- case ';':
- shift(1);
- return static_cast<int>(c1);
- case '{':
- charPos = m_position - 4;
- shift(1);
- return OPENBRACE;
- case '}':
- charPos = m_position - 4;
- shift(1);
- return CLOSEBRACE;
- default:
- return -1;
- }
-}
+ // Fall through into doneNumber.
-unsigned short Lexer::singleEscape(unsigned short c)
-{
- switch (c) {
- case 'b':
- return 0x08;
- case 't':
- return 0x09;
- case 'n':
- return 0x0A;
- case 'v':
- return 0x0B;
- case 'f':
- return 0x0C;
- case 'r':
- return 0x0D;
- case '"':
- return 0x22;
- case '\'':
- return 0x27;
- case '\\':
- return 0x5C;
- default:
- return c;
- }
-}
+doneNumber:
+ // Null-terminate string for strtod.
+ m_buffer8.append('\0');
+ lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
+ m_buffer8.resize(0);
-unsigned short Lexer::convertOctal(int c1, int c2, int c3)
-{
- return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
-}
+ // Fall through into doneNumeric.
-unsigned char Lexer::convertHex(int c)
-{
- if (c >= '0' && c <= '9')
- return static_cast<unsigned char>(c - '0');
- if (c >= 'a' && c <= 'f')
- return static_cast<unsigned char>(c - 'a' + 10);
- return static_cast<unsigned char>(c - 'A' + 10);
-}
+doneNumeric:
+ // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
+ if (UNLIKELY(isIdentStart(m_current)))
+ goto returnError;
-unsigned char Lexer::convertHex(int c1, int c2)
-{
- return ((convertHex(c1) << 4) + convertHex(c2));
-}
+ m_atLineStart = false;
+ m_delimited = false;
+ token = NUMBER;
+ goto returnToken;
-UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
-{
- unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
- unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
- return (highByte << 8 | lowByte);
-}
+doneSemicolon:
+ token = ';';
+ m_delimited = true;
+ goto returnToken;
-void Lexer::record8(int c)
-{
- ASSERT(c >= 0);
- ASSERT(c <= 0xff);
- m_buffer8.append(static_cast<char>(c));
+doneIdentifier:
+ m_atLineStart = false;
+ m_delimited = false;
+ lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+ m_buffer16.resize(0);
+ token = IDENT;
+ goto returnToken;
+
+doneIdentifierOrKeyword: {
+ m_atLineStart = false;
+ m_delimited = false;
+ m_buffer16.resize(0);
+ const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
+ token = entry ? entry->lexerValue() : IDENT;
+ goto returnToken;
}
-void Lexer::record16(int c)
-{
- ASSERT(c >= 0);
- ASSERT(c <= USHRT_MAX);
- record16(UChar(static_cast<unsigned short>(c)));
+doneString:
+ // Atomize constant strings in case they're later used in property lookup.
+ shift1();
+ m_atLineStart = false;
+ m_delimited = false;
+ lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+ m_buffer16.resize(0);
+ token = STRING;
+
+ // Fall through into returnToken.
+
+returnToken: {
+ int lineNumber = m_lineNumber;
+ llocp->first_line = lineNumber;
+ llocp->last_line = lineNumber;
+ llocp->first_column = startOffset;
+ llocp->last_column = currentOffset();
+
+ m_lastToken = token;
+ return token;
}
-void Lexer::record16(UChar c)
-{
- m_buffer16.append(c);
+returnError:
+ m_error = true;
+ return -1;
}
bool Lexer::scanRegExp()
{
- m_buffer16.clear();
+ ASSERT(m_buffer16.isEmpty());
+
bool lastWasEscape = false;
bool inBrackets = false;
- while (1) {
- if (isLineTerminator() || m_current == -1)
+ while (true) {
+ if (isLineTerminator(m_current) || m_current == -1)
return false;
- else if (m_current != '/' || lastWasEscape == true || inBrackets == true) {
+ if (m_current != '/' || lastWasEscape || inBrackets) {
// keep track of '[' and ']'
if (!lastWasEscape) {
- if ( m_current == '[' && !inBrackets )
+ if (m_current == '[' && !inBrackets)
inBrackets = true;
- if ( m_current == ']' && inBrackets )
+ if (m_current == ']' && inBrackets)
inBrackets = false;
}
record16(m_current);
- lastWasEscape =
- !lastWasEscape && (m_current == '\\');
+ lastWasEscape = !lastWasEscape && m_current == '\\';
} else { // end of regexp
m_pattern = UString(m_buffer16);
- m_buffer16.clear();
- shift(1);
+ m_buffer16.resize(0);
+ shift1();
break;
}
- shift(1);
+ shift1();
}
while (isIdentPart(m_current)) {
record16(m_current);
- shift(1);
+ shift1();
}
m_flags = UString(m_buffer16);
+ m_buffer16.resize(0);
return true;
}
@@ -882,19 +950,42 @@ bool Lexer::scanRegExp()
void Lexer::clear()
{
m_identifiers.clear();
+ m_codeWithoutBOMs.clear();
Vector<char> newBuffer8;
- newBuffer8.reserveCapacity(initialReadBufferCapacity);
+ newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
m_buffer8.swap(newBuffer8);
Vector<UChar> newBuffer16;
- newBuffer16.reserveCapacity(initialReadBufferCapacity);
+ newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
m_buffer16.swap(newBuffer16);
m_isReparsing = false;
- m_pattern = 0;
- m_flags = 0;
+ m_pattern = UString();
+ m_flags = UString();
+}
+
+SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
+{
+ if (m_codeWithoutBOMs.isEmpty())
+ return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
+
+ const UChar* data = m_source->provider()->data();
+
+ ASSERT(openBrace < closeBrace);
+
+ int numBOMsBeforeOpenBrace = 0;
+ int numBOMsBetweenBraces = 0;
+
+ int i;
+ for (i = m_source->startOffset(); i < openBrace; ++i)
+ numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
+ for (; i < closeBrace; ++i)
+ numBOMsBetweenBraces += data[i] == byteOrderMark;
+
+ return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
+ closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
}
} // namespace JSC