1 files changed, 795 insertions, 704 deletions
diff --git a/src/3rdparty/webkit/JavaScriptCore/parser/Lexer.cpp b/src/3rdparty/webkit/JavaScriptCore/parser/Lexer.cpp
index 0bacb22..8e89c18 100644
--- a/src/3rdparty/webkit/JavaScriptCore/parser/Lexer.cpp
+++ b/src/3rdparty/webkit/JavaScriptCore/parser/Lexer.cpp
@@ -31,14 +31,12 @@
 #include <ctype.h>
 #include <limits.h>
 #include <string.h>
-#include <wtf/ASCIICType.h>
 #include <wtf/Assertions.h>
-#include <wtf/unicode/Unicode.h>
 
 using namespace WTF;
 using namespace Unicode;
 
-// we can't specify the namespace in yacc's C output, so do it here
+// We can't specify the namespace in yacc's C output, so do it here instead.
 using namespace JSC;
 
 #ifndef KDE_USE_FINAL
@@ -48,7 +46,7 @@ using namespace JSC;
 #include "Lookup.h"
 #include "Lexer.lut.h"
 
-// a bridge for yacc from the C world to C++
+// A bridge for yacc from the C world to the C++ world.
 int jscyylex(void* lvalp, void* llocp, void* globalData)
 {
     return static_cast<JSGlobalData*>(globalData)->lexer->lex(lvalp, llocp);
@@ -56,825 +54,895 @@ int jscyylex(void* lvalp, void* llocp, void* globalData)
 
 namespace JSC {
 
-static bool isDecimalDigit(int);
+static const UChar byteOrderMark = 0xFEFF;
 
 Lexer::Lexer(JSGlobalData* globalData)
-    : yylineno(1)
-    , m_restrKeyword(false)
-    , m_eatNextIdentifier(false)
-    , m_stackToken(-1)
-    , m_lastToken(-1)
-    , m_position(0)
-    , m_code(0)
-    , m_length(0)
-    , m_isReparsing(false)
-    , m_atLineStart(true)
-    , m_current(0)
-    , m_next1(0)
-    , m_next2(0)
-    , m_next3(0)
-    , m_currentOffset(0)
-    , m_nextOffset1(0)
-    , m_nextOffset2(0)
-    , m_nextOffset3(0)
+    : m_isReparsing(false)
     , m_globalData(globalData)
-    , m_mainTable(JSC::mainTable)
+    , m_keywordTable(JSC::mainTable)
 {
-    m_buffer8.reserveCapacity(initialReadBufferCapacity);
-    m_buffer16.reserveCapacity(initialReadBufferCapacity);
+    m_buffer8.reserveInitialCapacity(initialReadBufferCapacity);
+    m_buffer16.reserveInitialCapacity(initialReadBufferCapacity);
 }
 
 Lexer::~Lexer()
 {
-    m_mainTable.deleteTable();
+    m_keywordTable.deleteTable();
+}
+
+inline const UChar* Lexer::currentCharacter() const
+{
+    return m_code - 4;
+}
+
+inline int Lexer::currentOffset() const
+{
+    return currentCharacter() - m_codeStart;
+}
+
+ALWAYS_INLINE void Lexer::shift1()
+{
+    m_current = m_next1;
+    m_next1 = m_next2;
+    m_next2 = m_next3;
+    if (LIKELY(m_code < m_codeEnd))
+        m_next3 = m_code[0];
+    else
+        m_next3 = -1;
+
+    ++m_code;
+}
+
+ALWAYS_INLINE void Lexer::shift2()
+{
+    m_current = m_next2;
+    m_next1 = m_next3;
+    if (LIKELY(m_code + 1 < m_codeEnd)) {
+        m_next2 = m_code[0];
+        m_next3 = m_code[1];
+    } else {
+        m_next2 = m_code < m_codeEnd ? m_code[0] : -1;
+        m_next3 = -1;
+    }
+
+    m_code += 2;
+}
+
+ALWAYS_INLINE void Lexer::shift3()
+{
+    m_current = m_next3;
+    if (LIKELY(m_code + 2 < m_codeEnd)) {
+        m_next1 = m_code[0];
+        m_next2 = m_code[1];
+        m_next3 = m_code[2];
+    } else {
+        m_next1 = m_code < m_codeEnd ? m_code[0] : -1;
+        m_next2 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
+        m_next3 = -1;
+    }
+
+    m_code += 3;
+}
+
+ALWAYS_INLINE void Lexer::shift4()
+{
+    if (LIKELY(m_code + 3 < m_codeEnd)) {
+        m_current = m_code[0];
+        m_next1 = m_code[1];
+        m_next2 = m_code[2];
+        m_next3 = m_code[3];
+    } else {
+        m_current = m_code < m_codeEnd ? m_code[0] : -1;
+        m_next1 = m_code + 1 < m_codeEnd ? m_code[1] : -1;
+        m_next2 = m_code + 2 < m_codeEnd ? m_code[2] : -1;
+        m_next3 = -1;
+    }
+
+    m_code += 4;
 }
 
 void Lexer::setCode(const SourceCode& source)
 {
-    yylineno = source.firstLine();
-    m_restrKeyword = false;
+    m_lineNumber = source.firstLine();
     m_delimited = false;
-    m_eatNextIdentifier = false;
-    m_stackToken = -1;
     m_lastToken = -1;
 
-    m_position = source.startOffset();
+    const UChar* data = source.provider()->data();
+
     m_source = &source;
-    m_code = source.provider()->data();
-    m_length = source.endOffset();
-    m_skipLF = false;
-    m_skipCR = false;
+    m_codeStart = data;
+    m_code = data + source.startOffset();
+    m_codeEnd = data + source.endOffset();
     m_error = false;
     m_atLineStart = true;
 
-    // read first characters
-    shift(4);
+    // ECMA-262 calls for stripping all Cf characters, but we only strip BOM characters.
+    // See <https://bugs.webkit.org/show_bug.cgi?id=4931> for details.
+    if (source.provider()->hasBOMs()) {
+        for (const UChar* p = m_codeStart; p < m_codeEnd; ++p) {
+            if (UNLIKELY(*p == byteOrderMark)) {
+                copyCodeWithoutBOMs();
+                break;
+            }
+        }
+    }
+
+    // Read the first characters into the 4-character buffer.
+    shift4();
+    ASSERT(currentOffset() == source.startOffset());
 }
 
-void Lexer::shift(unsigned p)
+void Lexer::copyCodeWithoutBOMs()
 {
-    // ECMA-262 calls for stripping Cf characters here, but we only do this for BOM,
-    // see <https://bugs.webkit.org/show_bug.cgi?id=4931>.
-
-    while (p--) {
-        m_current = m_next1;
-        m_next1 = m_next2;
-        m_next2 = m_next3;
-        m_currentOffset = m_nextOffset1;
-        m_nextOffset1 = m_nextOffset2;
-        m_nextOffset2 = m_nextOffset3;
-        do {
-            if (m_position >= m_length) {
-                m_nextOffset3 = m_position;
-                m_position++;
-                m_next3 = -1;
-                break;
-            }
-            m_nextOffset3 = m_position;
-            m_next3 = m_code[m_position++];
-        } while (m_next3 == 0xFEFF);
+    // Note: In this case, the character offset data for debugging will be incorrect.
+    // If it's important to correctly debug code with extraneous BOMs, then the caller
+    // should strip the BOMs when creating the SourceProvider object and do its own
+    // mapping of offsets within the stripped text to original text offset.
+
+    m_codeWithoutBOMs.reserveCapacity(m_codeEnd - m_code);
+    for (const UChar* p = m_code; p < m_codeEnd; ++p) {
+        UChar c = *p;
+        if (c != byteOrderMark)
+            m_codeWithoutBOMs.append(c);
+    }
+    ptrdiff_t startDelta = m_codeStart - m_code;
+    m_code = m_codeWithoutBOMs.data();
+    m_codeStart = m_code + startDelta;
+    m_codeEnd = m_codeWithoutBOMs.data() + m_codeWithoutBOMs.size();
+}
+
+void Lexer::shiftLineTerminator()
+{
+    ASSERT(isLineTerminator(m_current));
+
+    // Allow both CRLF and LFCR.
+    if (m_current + m_next1 == '\n' + '\r')
+        shift2();
+    else
+        shift1();
+
+    ++m_lineNumber;
+}
+
+ALWAYS_INLINE Identifier* Lexer::makeIdentifier(const UChar* characters, size_t length)
+{
+    m_identifiers.append(Identifier(m_globalData, characters, length));
+    return &m_identifiers.last();
+}
+
+inline bool Lexer::lastTokenWasRestrKeyword() const
+{
+    return m_lastToken == CONTINUE || m_lastToken == BREAK || m_lastToken == RETURN || m_lastToken == THROW;
+}
+
+static NEVER_INLINE bool isNonASCIIIdentStart(int c)
+{
+    return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other);
+}
+
+static inline bool isIdentStart(int c)
+{
+    return isASCII(c) ? isASCIIAlpha(c) || c == '$' || c == '_' : isNonASCIIIdentStart(c);
+}
+
+static NEVER_INLINE bool isNonASCIIIdentPart(int c)
+{
+    return category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
+        | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector);
+}
+
+static inline bool isIdentPart(int c)
+{
+    return isASCII(c) ? isASCIIAlphanumeric(c) || c == '$' || c == '_' : isNonASCIIIdentPart(c);
+}
+
+static inline int singleEscape(int c)
+{
+    switch (c) {
+        case 'b':
+            return 0x08;
+        case 't':
+            return 0x09;
+        case 'n':
+            return 0x0A;
+        case 'v':
+            return 0x0B;
+        case 'f':
+            return 0x0C;
+        case 'r':
+            return 0x0D;
+        default:
+            return c;
     }
 }
 
-// called on each new line
-void Lexer::nextLine()
+inline void Lexer::record8(int c)
 {
-    yylineno++;
-    m_atLineStart = true;
+    ASSERT(c >= 0);
+    ASSERT(c <= 0xFF);
+    m_buffer8.append(static_cast<char>(c));
 }
 
-void Lexer::setDone(State s)
+inline void Lexer::record16(UChar c)
 {
-    m_state = s;
-    m_done = true;
+    m_buffer16.append(c);
+}
+
+inline void Lexer::record16(int c)
+{
+    ASSERT(c >= 0);
+    ASSERT(c <= USHRT_MAX);
+    record16(UChar(static_cast<unsigned short>(c)));
 }
 
 int Lexer::lex(void* p1, void* p2)
 {
+    ASSERT(!m_error);
+    ASSERT(m_buffer8.isEmpty());
+    ASSERT(m_buffer16.isEmpty());
+
     YYSTYPE* lvalp = static_cast<YYSTYPE*>(p1);
     YYLTYPE* llocp = static_cast<YYLTYPE*>(p2);
     int token = 0;
-    m_state = Start;
-    unsigned short stringType = 0; // either single or double quotes
-    m_buffer8.clear();
-    m_buffer16.clear();
-    m_done = false;
     m_terminator = false;
-    m_skipLF = false;
-    m_skipCR = false;
-
-    // did we push a token on the stack previously ?
-    // (after an automatic semicolon insertion)
-    if (m_stackToken >= 0) {
-        setDone(Other);
-        token = m_stackToken;
-        m_stackToken = 0;
-    }
-    int startOffset = m_currentOffset;
-    while (!m_done) {
-        if (m_skipLF && m_current != '\n') // found \r but not \n afterwards
-            m_skipLF = false;
-        if (m_skipCR && m_current != '\r') // found \n but not \r afterwards
-            m_skipCR = false;
-        if (m_skipLF || m_skipCR) { // found \r\n or \n\r -> eat the second one
-            m_skipLF = false;
-            m_skipCR = false;
-            shift(1);
+
+start:
+    while (isWhiteSpace(m_current))
+        shift1();
+
+    int startOffset = currentOffset();
+
+    if (m_current == -1) {
+        if (!m_terminator && !m_delimited && !m_isReparsing) {
+            // automatic semicolon insertion if program incomplete
+            token = ';';
+            goto doneSemicolon;
         }
-        switch (m_state) {
-            case Start:
-                startOffset = m_currentOffset;
-                if (isWhiteSpace()) {
-                    // do nothing
-                } else if (m_current == '/' && m_next1 == '/') {
-                    shift(1);
-                    m_state = InSingleLineComment;
-                } else if (m_current == '/' && m_next1 == '*') {
-                    shift(1);
-                    m_state = InMultiLineComment;
-                } else if (m_current == -1) {
-                    if (!m_terminator && !m_delimited && !m_isReparsing) {
-                        // automatic semicolon insertion if program incomplete
-                        token = ';';
-                        m_stackToken = 0;
-                        setDone(Other);
-                    } else
-                        setDone(Eof);
-                } else if (isLineTerminator()) {
-                    nextLine();
-                    m_terminator = true;
-                    if (m_restrKeyword) {
-                        token = ';';
-                        setDone(Other);
-                    }
-                } else if (m_current == '"' || m_current == '\'') {
-                    m_state = InString;
-                    stringType = static_cast<unsigned short>(m_current);
-                } else if (isIdentStart(m_current)) {
-                    record16(m_current);
-                    m_state = InIdentifierOrKeyword;
-                } else if (m_current == '\\')
-                    m_state = InIdentifierStartUnicodeEscapeStart;
-                else if (m_current == '0') {
-                    record8(m_current);
-                    m_state = InNum0;
-                } else if (isDecimalDigit(m_current)) {
-                    record8(m_current);
-                    m_state = InNum;
-                } else if (m_current == '.' && isDecimalDigit(m_next1)) {
-                    record8(m_current);
-                    m_state = InDecimal;
-                    // <!-- marks the beginning of a line comment (for www usage)
-                } else if (m_current == '<' && m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
-                    shift(3);
-                    m_state = InSingleLineComment;
-                    // same for -->
-                } else if (m_atLineStart && m_current == '-' && m_next1 == '-' &&  m_next2 == '>') {
-                    shift(2);
-                    m_state = InSingleLineComment;
-                } else {
-                    token = matchPunctuator(lvalp->intValue, m_current, m_next1, m_next2, m_next3);
-                    if (token != -1)
-                        setDone(Other);
-                    else
-                        setDone(Bad);
+        return 0;
+    }
+
+    m_delimited = false;
+    switch (m_current) {
+        case '>':
+            if (m_next1 == '>' && m_next2 == '>') {
+                if (m_next3 == '=') {
+                    shift4();
+                    token = URSHIFTEQUAL;
+                    break;
                 }
+                shift3();
+                token = URSHIFT;
                 break;
-            case InString:
-                if (m_current == stringType) {
-                    shift(1);
-                    setDone(String);
-                } else if (isLineTerminator() || m_current == -1)
-                    setDone(Bad);
-                else if (m_current == '\\')
-                    m_state = InEscapeSequence;
-                else
-                    record16(m_current);
+            }
+            if (m_next1 == '>') {
+                if (m_next2 == '=') {
+                    shift3();
+                    token = RSHIFTEQUAL;
+                    break;
+                }
+                shift2();
+                token = RSHIFT;
                 break;
-            // Escape Sequences inside of strings
-            case InEscapeSequence:
-                if (isOctalDigit(m_current)) {
-                    if (m_current >= '0' && m_current <= '3' &&
-                        isOctalDigit(m_next1) && isOctalDigit(m_next2)) {
-                        record16(convertOctal(m_current, m_next1, m_next2));
-                        shift(2);
-                        m_state = InString;
-                    } else if (isOctalDigit(m_current) && isOctalDigit(m_next1)) {
-                        record16(convertOctal('0', m_current, m_next1));
-                        shift(1);
-                        m_state = InString;
-                    } else if (isOctalDigit(m_current)) {
-                        record16(convertOctal('0', '0', m_current));
-                        m_state = InString;
-                    } else
-                        setDone(Bad);
-                } else if (m_current == 'x')
-                    m_state = InHexEscape;
-                else if (m_current == 'u')
-                    m_state = InUnicodeEscape;
-                else if (isLineTerminator()) {
-                    nextLine();
-                    m_state = InString;
-                } else {
-                    record16(singleEscape(static_cast<unsigned short>(m_current)));
-                    m_state = InString;
+            }
+            if (m_next1 == '=') {
+                shift2();
+                token = GE;
+                break;
+            }
+            shift1();
+            token = '>';
+            break;
+        case '=':
+            if (m_next1 == '=') {
+                if (m_next2 == '=') {
+                    shift3();
+                    token = STREQ;
+                    break;
                 }
+                shift2();
+                token = EQEQ;
                 break;
-            case InHexEscape:
-                if (isHexDigit(m_current) && isHexDigit(m_next1)) {
-                    m_state = InString;
-                    record16(convertHex(m_current, m_next1));
-                    shift(1);
-                } else if (m_current == stringType) {
-                    record16('x');
-                    shift(1);
-                    setDone(String);
-                } else {
-                    record16('x');
-                    record16(m_current);
-                    m_state = InString;
+            }
+            shift1();
+            token = '=';
+            break;
+        case '!':
+            if (m_next1 == '=') {
+                if (m_next2 == '=') {
+                    shift3();
+                    token = STRNEQ;
+                    break;
                 }
+                shift2();
+                token = NE;
                 break;
-            case InUnicodeEscape:
-                if (isHexDigit(m_current) && isHexDigit(m_next1) && isHexDigit(m_next2) && isHexDigit(m_next3)) {
-                    record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
-                    shift(3);
-                    m_state = InString;
-                } else if (m_current == stringType) {
-                    record16('u');
-                    shift(1);
-                    setDone(String);
-                } else
-                    setDone(Bad);
+            }
+            shift1();
+            token = '!';
+            break;
+        case '<':
+            if (m_next1 == '!' && m_next2 == '-' && m_next3 == '-') {
+                // <!-- marks the beginning of a line comment (for www usage)
+                shift4();
+                goto inSingleLineComment;
+            }
+            if (m_next1 == '<') {
+                if (m_next2 == '=') {
+                    shift3();
+                    token = LSHIFTEQUAL;
+                    break;
+                }
+                shift2();
+                token = LSHIFT;
                 break;
-            case InSingleLineComment:
-                if (isLineTerminator()) {
-                    nextLine();
-                    m_terminator = true;
-                    if (m_restrKeyword) {
-                        token = ';';
-                        setDone(Other);
-                    } else
-                        m_state = Start;
-                } else if (m_current == -1)
-                    setDone(Eof);
+            }
+            if (m_next1 == '=') {
+                shift2();
+                token = LE;
                 break;
-            case InMultiLineComment:
-                if (m_current == -1)
-                    setDone(Bad);
-                else if (isLineTerminator())
-                    nextLine();
-                else if (m_current == '*' && m_next1 == '/') {
-                    m_state = Start;
-                    shift(1);
+            }
+            shift1();
+            token = '<';
+            break;
+        case '+':
+            if (m_next1 == '+') {
+                shift2();
+                if (m_terminator) {
+                    token = AUTOPLUSPLUS;
+                    break;
                 }
+                token = PLUSPLUS;
                 break;
-            case InIdentifierOrKeyword:
-            case InIdentifier:
-                if (isIdentPart(m_current))
-                    record16(m_current);
-                else if (m_current == '\\')
-                    m_state = InIdentifierPartUnicodeEscapeStart;
-                else
-                    setDone(m_state == InIdentifierOrKeyword ? IdentifierOrKeyword : Identifier);
+            }
+            if (m_next1 == '=') {
+                shift2();
+                token = PLUSEQUAL;
                 break;
-            case InNum0:
-                if (m_current == 'x' || m_current == 'X') {
-                    record8(m_current);
-                    m_state = InHex;
-                } else if (m_current == '.') {
-                    record8(m_current);
-                    m_state = InDecimal;
-                } else if (m_current == 'e' || m_current == 'E') {
-                    record8(m_current);
-                    m_state = InExponentIndicator;
-                } else if (isOctalDigit(m_current)) {
-                    record8(m_current);
-                    m_state = InOctal;
-                } else if (isDecimalDigit(m_current)) {
-                    record8(m_current);
-                    m_state = InDecimal;
-                } else
-                    setDone(Number);
+            }
+            shift1();
+            token = '+';
+            break;
+        case '-':
+            if (m_next1 == '-') {
+                if (m_atLineStart && m_next2 == '>') {
+                    shift3();
+                    goto inSingleLineComment;
+                }
+                shift2();
+                if (m_terminator) {
+                    token = AUTOMINUSMINUS;
+                    break;
+                }
+                token = MINUSMINUS;
                 break;
-            case InHex:
-                if (isHexDigit(m_current))
-                    record8(m_current);
-                else
-                    setDone(Hex);
+            }
+            if (m_next1 == '=') {
+                shift2();
+                token = MINUSEQUAL;
                 break;
-            case InOctal:
-                if (isOctalDigit(m_current))
-                    record8(m_current);
-                else if (isDecimalDigit(m_current)) {
-                    record8(m_current);
-                    m_state = InDecimal;
-                } else
-                    setDone(Octal);
+            }
+            shift1();
+            token = '-';
+            break;
+        case '*':
+            if (m_next1 == '=') {
+                shift2();
+                token = MULTEQUAL;
                 break;
-            case InNum:
-                if (isDecimalDigit(m_current))
-                    record8(m_current);
-                else if (m_current == '.') {
-                    record8(m_current);
-                    m_state = InDecimal;
-                } else if (m_current == 'e' || m_current == 'E') {
-                    record8(m_current);
-                    m_state = InExponentIndicator;
-                } else
-                    setDone(Number);
+            }
+            shift1();
+            token = '*';
+            break;
+        case '/':
+            if (m_next1 == '/') {
+                shift2();
+                goto inSingleLineComment;
+            }
+            if (m_next1 == '*')
+                goto inMultiLineComment;
+            if (m_next1 == '=') {
+                shift2();
+                token = DIVEQUAL;
                 break;
-            case InDecimal:
-                if (isDecimalDigit(m_current))
-                    record8(m_current);
-                else if (m_current == 'e' || m_current == 'E') {
-                    record8(m_current);
-                    m_state = InExponentIndicator;
-                } else
-                    setDone(Number);
+            }
+            shift1();
+            token = '/';
+            break;
+        case '&':
+            if (m_next1 == '&') {
+                shift2();
+                token = AND;
                 break;
-            case InExponentIndicator:
-                if (m_current == '+' || m_current == '-')
-                    record8(m_current);
-                else if (isDecimalDigit(m_current)) {
-                    record8(m_current);
-                    m_state = InExponent;
-                } else
-                    setDone(Bad);
+            }
+            if (m_next1 == '=') {
+                shift2();
+                token = ANDEQUAL;
                 break;
-            case InExponent:
-                if (isDecimalDigit(m_current))
-                    record8(m_current);
-                else
-                    setDone(Number);
+            }
+            shift1();
+            token = '&';
+            break;
+        case '^':
+            if (m_next1 == '=') {
+                shift2();
+                token = XOREQUAL;
                 break;
-            case InIdentifierStartUnicodeEscapeStart:
-                if (m_current == 'u')
-                    m_state = InIdentifierStartUnicodeEscape;
-                else
-                    setDone(Bad);
+            }
+            shift1();
+            token = '^';
+            break;
+        case '%':
+            if (m_next1 == '=') {
+                shift2();
+                token = MODEQUAL;
                 break;
-            case InIdentifierPartUnicodeEscapeStart:
-                if (m_current == 'u')
-                    m_state = InIdentifierPartUnicodeEscape;
-                else
-                    setDone(Bad);
+            }
+            shift1();
+            token = '%';
+            break;
+        case '|':
+            if (m_next1 == '=') {
+                shift2();
+                token = OREQUAL;
                 break;
-            case InIdentifierStartUnicodeEscape:
-                if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
-                    setDone(Bad);
-                    break;
-                }
-                token = convertUnicode(m_current, m_next1, m_next2, m_next3);
-                shift(3);
-                if (!isIdentStart(token)) {
-                    setDone(Bad);
-                    break;
-                }
-                record16(token);
-                m_state = InIdentifier;
+            }
+            if (m_next1 == '|') {
+                shift2();
+                token = OR;
                 break;
-            case InIdentifierPartUnicodeEscape:
-                if (!isHexDigit(m_current) || !isHexDigit(m_next1) || !isHexDigit(m_next2) || !isHexDigit(m_next3)) {
-                    setDone(Bad);
-                    break;
-                }
-                token = convertUnicode(m_current, m_next1, m_next2, m_next3);
-                shift(3);
-                if (!isIdentPart(token)) {
-                    setDone(Bad);
-                    break;
+            }
+            shift1();
+            token = '|';
+            break;
+        case '.':
+            if (isASCIIDigit(m_next1)) {
+                record8('.');
+                shift1();
+                goto inNumberAfterDecimalPoint;
+            }
+            token = '.';
+            shift1();
+            break;
+        case ',':
+        case '~':
+        case '?':
+        case ':':
+        case '(':
+        case ')':
+        case '[':
+        case ']':
+            token = m_current;
+            shift1();
+            break;
+        case ';':
+            shift1();
+            m_delimited = true;
+            token = ';';
+            break;
+        case '{':
+            lvalp->intValue = currentOffset();
+            shift1();
+            token = OPENBRACE;
+            break;
+        case '}':
+            lvalp->intValue = currentOffset();
+            shift1();
+            m_delimited = true;
+            token = CLOSEBRACE;
+            break;
+        case '\\':
+            goto startIdentifierWithBackslash;
+        case '0':
+            goto startNumberWithZeroDigit;
+        case '1':
+        case '2':
+        case '3':
+        case '4':
+        case '5':
+        case '6':
+        case '7':
+        case '8':
+        case '9':
+            goto startNumber;
+        case '"':
+        case '\'':
+            goto startString;
+        default:
+            if (isIdentStart(m_current))
+                goto startIdentifierOrKeyword;
+            if (isLineTerminator(m_current)) {
+                shiftLineTerminator();
+                m_atLineStart = true;
+                m_terminator = true;
+                if (lastTokenWasRestrKeyword()) {
+                    token = ';';
+                    goto doneSemicolon;
                 }
-                record16(token);
-                m_state = InIdentifier;
-                break;
-            default:
-                ASSERT(!"Unhandled state in switch statement");
-        }
-
-        // move on to the next character
-        if (!m_done)
-            shift(1);
-        if (m_state != Start && m_state != InSingleLineComment)
-            m_atLineStart = false;
+                goto start;
+            }
+            goto returnError;
     }
 
-    // no identifiers allowed directly after numeric literal, e.g. "3in" is bad
-    if ((m_state == Number || m_state == Octal || m_state == Hex) && isIdentStart(m_current))
-        m_state = Bad;
+    m_atLineStart = false;
+    goto returnToken;
 
-    // terminate string
-    m_buffer8.append('\0');
-
-#ifdef JSC_DEBUG_LEX
-    fprintf(stderr, "line: %d ", lineNo());
-    fprintf(stderr, "yytext (%x): ", m_buffer8[0]);
-    fprintf(stderr, "%s ", m_buffer8.data());
-#endif
+startString: {
+    int stringQuoteCharacter = m_current;
+    shift1();
 
-    double dval = 0;
-    if (m_state == Number)
-        dval = WTF::strtod(m_buffer8.data(), 0L);
-    else if (m_state == Hex) { // scan hex numbers
-        const char* p = m_buffer8.data() + 2;
-        while (char c = *p++) {
-            dval *= 16;
-            dval += convertHex(c);
+    const UChar* stringStart = currentCharacter();
+    while (m_current != stringQuoteCharacter) {
+        // Fast check for characters that require special handling.
+        // Catches -1, \n, \r, \, 0x2028, and 0x2029 as efficiently
+        // as possible, and lets through all common ASCII characters.
+        if (UNLIKELY(m_current == '\\') || UNLIKELY(((static_cast<unsigned>(m_current) - 0xE) & 0x2000))) {
+            m_buffer16.append(stringStart, currentCharacter() - stringStart);
+            goto inString;
+        }
+        shift1();
+    }
+    lvalp->ident = makeIdentifier(stringStart, currentCharacter() - stringStart);
+    shift1();
+    m_atLineStart = false;
+    m_delimited = false;
+    token = STRING;
+    goto returnToken;
+
+inString:
+    while (m_current != stringQuoteCharacter) {
+        if (m_current == '\\')
+            goto inStringEscapeSequence;
+        if (UNLIKELY(isLineTerminator(m_current)))
+            goto returnError;
+        if (UNLIKELY(m_current == -1))
+            goto returnError;
+        record16(m_current);
+        shift1();
+    }
+    goto doneString;
+
+inStringEscapeSequence:
+    shift1();
+    if (m_current == 'x') {
+        shift1();
+        if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1)) {
+            record16(convertHex(m_current, m_next1));
+            shift2();
+            goto inString;
         }
+        record16('x');
+        if (m_current == stringQuoteCharacter)
+            goto doneString;
+        goto inString;
+    }
+    if (m_current == 'u') {
+        shift1();
+        if (isASCIIHexDigit(m_current) && isASCIIHexDigit(m_next1) && isASCIIHexDigit(m_next2) && isASCIIHexDigit(m_next3)) {
+            record16(convertUnicode(m_current, m_next1, m_next2, m_next3));
+            shift4();
+            goto inString;
+        }
+        if (m_current == stringQuoteCharacter) {
+            record16('u');
+            goto doneString;
+        }
+        goto returnError;
+    }
+    if (isASCIIOctalDigit(m_current)) {
+        if (m_current >= '0' && m_current <= '3' && isASCIIOctalDigit(m_next1) && isASCIIOctalDigit(m_next2)) {
+            record16((m_current - '0') * 64 + (m_next1 - '0') * 8 + m_next2 - '0');
+            shift3();
+            goto inString;
+        }
+        if (isASCIIOctalDigit(m_next1)) {
+            record16((m_current - '0') * 8 + m_next1 - '0');
+            shift2();
+            goto inString;
+        }
+        record16(m_current - '0');
+        shift1();
+        goto inString;
+    }
+    if (isLineTerminator(m_current)) {
+        shiftLineTerminator();
+        goto inString;
+    }
+    record16(singleEscape(m_current));
+    shift1();
+    goto inString;
+}
 
-        if (dval >= mantissaOverflowLowerBound)
-            dval = parseIntOverflow(m_buffer8.data() + 2, p - (m_buffer8.data() + 3), 16);
+startIdentifierWithBackslash:
+    shift1();
+    if (UNLIKELY(m_current != 'u'))
+        goto returnError;
+    shift1();
+    if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
+        goto returnError;
+    token = convertUnicode(m_current, m_next1, m_next2, m_next3);
+    if (UNLIKELY(!isIdentStart(token)))
+        goto returnError;
+    goto inIdentifierAfterCharacterCheck;
+
+startIdentifierOrKeyword: {
+    const UChar* identifierStart = currentCharacter();
+    shift1();
+    while (isIdentPart(m_current))
+        shift1();
+    if (LIKELY(m_current != '\\')) {
+        lvalp->ident = makeIdentifier(identifierStart, currentCharacter() - identifierStart);
+        goto doneIdentifierOrKeyword;
+    }
+    m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
+}
 
-        m_state = Number;
-    } else if (m_state == Octal) {   // scan octal number
-        const char* p = m_buffer8.data() + 1;
-        while (char c = *p++) {
-            dval *= 8;
-            dval += c - '0';
+    do {
+        shift1();
+        if (UNLIKELY(m_current != 'u'))
+            goto returnError;
+        shift1();
+        if (UNLIKELY(!isASCIIHexDigit(m_current) || !isASCIIHexDigit(m_next1) || !isASCIIHexDigit(m_next2) || !isASCIIHexDigit(m_next3)))
+            goto returnError;
+        token = convertUnicode(m_current, m_next1, m_next2, m_next3);
+        if (UNLIKELY(!isIdentPart(token)))
+            goto returnError;
+inIdentifierAfterCharacterCheck:
+        record16(token);
+        shift4();
+
+        while (isIdentPart(m_current)) {
+            record16(m_current);
+            shift1();
         }
+    } while (UNLIKELY(m_current == '\\'));
+    goto doneIdentifier;
 
-        if (dval >= mantissaOverflowLowerBound)
-            dval = parseIntOverflow(m_buffer8.data() + 1, p - (m_buffer8.data() + 2), 8);
-
-        m_state = Number;
+inSingleLineComment:
+    while (!isLineTerminator(m_current)) {
+        if (UNLIKELY(m_current == -1))
+            return 0;
+        shift1();
     }
-
-#ifdef JSC_DEBUG_LEX
-    switch (m_state) {
-        case Eof:
-            printf("(EOF)\n");
-            break;
-        case Other:
-            printf("(Other)\n");
-            break;
-        case Identifier:
-            printf("(Identifier)/(Keyword)\n");
-            break;
-        case String:
-            printf("(String)\n");
-            break;
-        case Number:
-            printf("(Number)\n");
-            break;
-        default:
-            printf("(unknown)");
+    shiftLineTerminator();
+    m_atLineStart = true;
+    m_terminator = true;
+    if (lastTokenWasRestrKeyword())
+        goto doneSemicolon;
+    goto start;
+
+inMultiLineComment:
+    shift2();
+    while (m_current != '*' || m_next1 != '/') {
+        if (isLineTerminator(m_current))
+            shiftLineTerminator();
+        else {
+            shift1();
+            if (UNLIKELY(m_current == -1))
+                goto returnError;
+        }
     }
-#endif
+    shift2();
+    m_atLineStart = false;
+    goto start;
+
+startNumberWithZeroDigit:
+    shift1();
+    if ((m_current | 0x20) == 'x' && isASCIIHexDigit(m_next1)) {
+        shift1();
+        goto inHex;
+    }
+    if (m_current == '.') {
+        record8('0');
+        record8('.');
+        shift1();
+        goto inNumberAfterDecimalPoint;
+    }
+    if ((m_current | 0x20) == 'e') {
+        record8('0');
+        record8('e');
+        shift1();
+        goto inExponentIndicator;
+    }
+    if (isASCIIOctalDigit(m_current))
+        goto inOctal;
+    if (isASCIIDigit(m_current))
+        goto startNumber;
+    lvalp->doubleValue = 0;
+    goto doneNumeric;
+
+inNumberAfterDecimalPoint:
+    while (isASCIIDigit(m_current)) {
+        record8(m_current);
+        shift1();
+    }
+    if ((m_current | 0x20) == 'e') {
+        record8('e');
+        shift1();
+        goto inExponentIndicator;
+    }
+    goto doneNumber;
+
+inExponentIndicator:
+    if (m_current == '+' || m_current == '-') {
+        record8(m_current);
+        shift1();
+    }
+    if (!isASCIIDigit(m_current))
+        goto returnError;
+    do {
+        record8(m_current);
+        shift1();
+    } while (isASCIIDigit(m_current));
+    goto doneNumber;
+
+inOctal: {
+    do {
+        record8(m_current);
+        shift1();
+    } while (isASCIIOctalDigit(m_current));
+    if (isASCIIDigit(m_current))
+        goto startNumber;
 
-    if (m_state != Identifier)
-        m_eatNextIdentifier = false;
+    double dval = 0;
 
-    m_restrKeyword = false;
-    m_delimited = false;
-    llocp->first_line = yylineno;
-    llocp->last_line = yylineno;
-    llocp->first_column = startOffset;
-    llocp->last_column = m_currentOffset;
-    switch (m_state) {
-        case Eof:
-            token = 0;
-            break;
-        case Other:
-            if (token == '}' || token == ';')
-                m_delimited = true;
-            break;
-        case Identifier:
-            // Apply anonymous-function hack below (eat the identifier).
-            if (m_eatNextIdentifier) {
-                m_eatNextIdentifier = false;
-                token = lex(lvalp, llocp);
-                break;
-            }
-            lvalp->ident = makeIdentifier(m_buffer16);
-            token = IDENT;
-            break;
-        case IdentifierOrKeyword: {
-            lvalp->ident = makeIdentifier(m_buffer16);
-            const HashEntry* entry = m_mainTable.entry(m_globalData, *lvalp->ident);
-            if (!entry) {
-                // Lookup for keyword failed, means this is an identifier.
-                token = IDENT;
-                break;
-            }
-            token = entry->lexerValue();
-            // Hack for "f = function somename() { ... }"; too hard to get into the grammar.
-            m_eatNextIdentifier = token == FUNCTION && m_lastToken == '=';
-            if (token == CONTINUE || token == BREAK || token == RETURN || token == THROW)
-                m_restrKeyword = true;
-            break;
-        }
-        case String:
-            // Atomize constant strings in case they're later used in property lookup.
-            lvalp->ident = makeIdentifier(m_buffer16);
-            token = STRING;
-            break;
-        case Number:
-            lvalp->doubleValue = dval;
-            token = NUMBER;
-            break;
-        case Bad:
-#ifdef JSC_DEBUG_LEX
-            fprintf(stderr, "yylex: ERROR.\n");
-#endif
-            m_error = true;
-            return -1;
-        default:
-            ASSERT(!"unhandled numeration value in switch");
-            m_error = true;
-            return -1;
+    const char* end = m_buffer8.end();
+    for (const char* p = m_buffer8.data(); p < end; ++p) {
+        dval *= 8;
+        dval += *p - '0';
     }
-    m_lastToken = token;
-    return token;
-}
+    if (dval >= mantissaOverflowLowerBound)
+        dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 8);
 
-bool Lexer::isWhiteSpace() const
-{
-    return m_current == '\t' || m_current == 0x0b || m_current == 0x0c || isSeparatorSpace(m_current);
-}
+    m_buffer8.resize(0);
 
-bool Lexer::isLineTerminator()
-{
-    bool cr = (m_current == '\r');
-    bool lf = (m_current == '\n');
-    if (cr)
-        m_skipLF = true;
-    else if (lf)
-        m_skipCR = true;
-    return cr || lf || m_current == 0x2028 || m_current == 0x2029;
+    lvalp->doubleValue = dval;
+    goto doneNumeric;
 }
 
-bool Lexer::isIdentStart(int c)
-{
-    return isASCIIAlpha(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other)));
-}
+inHex: {
+    do {
+        record8(m_current);
+        shift1();
+    } while (isASCIIHexDigit(m_current));
 
-bool Lexer::isIdentPart(int c)
-{
-    return isASCIIAlphanumeric(c) || c == '$' || c == '_' || (!isASCII(c) && (category(c) & (Letter_Uppercase | Letter_Lowercase | Letter_Titlecase | Letter_Modifier | Letter_Other
-                            | Mark_NonSpacing | Mark_SpacingCombining | Number_DecimalDigit | Punctuation_Connector)));
-}
+    double dval = 0;
 
-static bool isDecimalDigit(int c)
-{
-    return isASCIIDigit(c);
-}
+    const char* end = m_buffer8.end();
+    for (const char* p = m_buffer8.data(); p < end; ++p) {
+        dval *= 16;
+        dval += toASCIIHexValue(*p);
+    }
+    if (dval >= mantissaOverflowLowerBound)
+        dval = parseIntOverflow(m_buffer8.data(), end - m_buffer8.data(), 16);
 
-bool Lexer::isHexDigit(int c)
-{
-    return isASCIIHexDigit(c); 
-}
+    m_buffer8.resize(0);
 
-bool Lexer::isOctalDigit(int c)
-{
-    return isASCIIOctalDigit(c);
+    lvalp->doubleValue = dval;
+    goto doneNumeric;
 }
 
-int Lexer::matchPunctuator(int& charPos, int c1, int c2, int c3, int c4)
-{
-    if (c1 == '>' && c2 == '>' && c3 == '>' && c4 == '=') {
-        shift(4);
-        return URSHIFTEQUAL;
-    }
-    if (c1 == '=' && c2 == '=' && c3 == '=') {
-        shift(3);
-        return STREQ;
-    }
-    if (c1 == '!' && c2 == '=' && c3 == '=') {
-        shift(3);
-        return STRNEQ;
-    }
-    if (c1 == '>' && c2 == '>' && c3 == '>') {
-        shift(3);
-        return URSHIFT;
-    }
-    if (c1 == '<' && c2 == '<' && c3 == '=') {
-        shift(3);
-        return LSHIFTEQUAL;
-    }
-    if (c1 == '>' && c2 == '>' && c3 == '=') {
-        shift(3);
-        return RSHIFTEQUAL;
-    }
-    if (c1 == '<' && c2 == '=') {
-        shift(2);
-        return LE;
-    }
-    if (c1 == '>' && c2 == '=') {
-        shift(2);
-        return GE;
-    }
-    if (c1 == '!' && c2 == '=') {
-        shift(2);
-        return NE;
-    }
-    if (c1 == '+' && c2 == '+') {
-        shift(2);
-        if (m_terminator)
-            return AUTOPLUSPLUS;
-        return PLUSPLUS;
-    }
-    if (c1 == '-' && c2 == '-') {
-        shift(2);
-        if (m_terminator)
-            return AUTOMINUSMINUS;
-        return MINUSMINUS;
-    }
-    if (c1 == '=' && c2 == '=') {
-        shift(2);
-        return EQEQ;
-    }
-    if (c1 == '+' && c2 == '=') {
-        shift(2);
-        return PLUSEQUAL;
-    }
-    if (c1 == '-' && c2 == '=') {
-        shift(2);
-        return MINUSEQUAL;
-    }
-    if (c1 == '*' && c2 == '=') {
-        shift(2);
-        return MULTEQUAL;
-    }
-    if (c1 == '/' && c2 == '=') {
-        shift(2);
-        return DIVEQUAL;
-    }
-    if (c1 == '&' && c2 == '=') {
-        shift(2);
-        return ANDEQUAL;
-    }
-    if (c1 == '^' && c2 == '=') {
-        shift(2);
-        return XOREQUAL;
-    }
-    if (c1 == '%' && c2 == '=') {
-        shift(2);
-        return MODEQUAL;
-    }
-    if (c1 == '|' && c2 == '=') {
-        shift(2);
-        return OREQUAL;
-    }
-    if (c1 == '<' && c2 == '<') {
-        shift(2);
-        return LSHIFT;
-    }
-    if (c1 == '>' && c2 == '>') {
-        shift(2);
-        return RSHIFT;
+startNumber:
+    record8(m_current);
+    shift1();
+    while (isASCIIDigit(m_current)) {
+        record8(m_current);
+        shift1();
     }
-    if (c1 == '&' && c2 == '&') {
-        shift(2);
-        return AND;
+    if (m_current == '.') {
+        record8('.');
+        shift1();
+        goto inNumberAfterDecimalPoint;
     }
-    if (c1 == '|' && c2 == '|') {
-        shift(2);
-        return OR;
+    if ((m_current | 0x20) == 'e') {
+        record8('e');
+        shift1();
+        goto inExponentIndicator;
     }
 
-    switch (c1) {
-        case '=':
-        case '>':
-        case '<':
-        case ',':
-        case '!':
-        case '~':
-        case '?':
-        case ':':
-        case '.':
-        case '+':
-        case '-':
-        case '*':
-        case '/':
-        case '&':
-        case '|':
-        case '^':
-        case '%':
-        case '(':
-        case ')':
-        case '[':
-        case ']':
-        case ';':
-            shift(1);
-            return static_cast<int>(c1);
-        case '{':
-            charPos = m_position - 4;
-            shift(1);
-            return OPENBRACE;
-        case '}':
-            charPos = m_position - 4;
-            shift(1);
-            return CLOSEBRACE;
-        default:
-            return -1;
-    }
-}
+    // Fall through into doneNumber.
 
-unsigned short Lexer::singleEscape(unsigned short c)
-{
-    switch (c) {
-        case 'b':
-            return 0x08;
-        case 't':
-            return 0x09;
-        case 'n':
-            return 0x0A;
-        case 'v':
-            return 0x0B;
-        case 'f':
-            return 0x0C;
-        case 'r':
-            return 0x0D;
-        case '"':
-            return 0x22;
-        case '\'':
-            return 0x27;
-        case '\\':
-            return 0x5C;
-        default:
-            return c;
-    }
-}
+doneNumber:
+    // Null-terminate string for strtod.
+    m_buffer8.append('\0');
+    lvalp->doubleValue = WTF::strtod(m_buffer8.data(), 0);
+    m_buffer8.resize(0);
 
-unsigned short Lexer::convertOctal(int c1, int c2, int c3)
-{
-    return static_cast<unsigned short>((c1 - '0') * 64 + (c2 - '0') * 8 + c3 - '0');
-}
+    // Fall through into doneNumeric.
 
-unsigned char Lexer::convertHex(int c)
-{
-    if (c >= '0' && c <= '9')
-        return static_cast<unsigned char>(c - '0');
-    if (c >= 'a' && c <= 'f')
-        return static_cast<unsigned char>(c - 'a' + 10);
-    return static_cast<unsigned char>(c - 'A' + 10);
-}
+doneNumeric:
+    // No identifiers allowed directly after numeric literal, e.g. "3in" is bad.
+    if (UNLIKELY(isIdentStart(m_current)))
+        goto returnError;
 
-unsigned char Lexer::convertHex(int c1, int c2)
-{
-    return ((convertHex(c1) << 4) + convertHex(c2));
-}
+    m_atLineStart = false;
+    m_delimited = false;
+    token = NUMBER;
+    goto returnToken;
 
-UChar Lexer::convertUnicode(int c1, int c2, int c3, int c4)
-{
-    unsigned char highByte = (convertHex(c1) << 4) + convertHex(c2);
-    unsigned char lowByte = (convertHex(c3) << 4) + convertHex(c4);
-    return (highByte << 8 | lowByte);
-}
+doneSemicolon:
+    token = ';';
+    m_delimited = true;
+    goto returnToken;
 
-void Lexer::record8(int c)
-{
-    ASSERT(c >= 0);
-    ASSERT(c <= 0xff);
-    m_buffer8.append(static_cast<char>(c));
+doneIdentifier:
+    m_atLineStart = false;
+    m_delimited = false;
+    lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+    m_buffer16.resize(0);
+    token = IDENT;
+    goto returnToken;
+
+doneIdentifierOrKeyword: {
+    m_atLineStart = false;
+    m_delimited = false;
+    m_buffer16.resize(0);
+    const HashEntry* entry = m_keywordTable.entry(m_globalData, *lvalp->ident);
+    token = entry ? entry->lexerValue() : IDENT;
+    goto returnToken;
 }
 
-void Lexer::record16(int c)
-{
-    ASSERT(c >= 0);
-    ASSERT(c <= USHRT_MAX);
-    record16(UChar(static_cast<unsigned short>(c)));
+doneString:
+    // Atomize constant strings in case they're later used in property lookup.
+    shift1();
+    m_atLineStart = false;
+    m_delimited = false;
+    lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+    m_buffer16.resize(0);
+    token = STRING;
+
+    // Fall through into returnToken.
+
+returnToken: {
+    int lineNumber = m_lineNumber;
+    llocp->first_line = lineNumber;
+    llocp->last_line = lineNumber;
+    llocp->first_column = startOffset;
+    llocp->last_column = currentOffset();
+
+    m_lastToken = token;
+    return token;
 }
 
-void Lexer::record16(UChar c)
-{
-    m_buffer16.append(c);
+returnError:
+    m_error = true;
+    return -1;
 }
 
 bool Lexer::scanRegExp()
 {
-    m_buffer16.clear();
+    ASSERT(m_buffer16.isEmpty());
+
     bool lastWasEscape = false;
     bool inBrackets = false;
 
-    while (1) {
-        if (isLineTerminator() || m_current == -1)
+    while (true) {
+        if (isLineTerminator(m_current) || m_current == -1)
             return false;
-        else if (m_current != '/' || lastWasEscape == true || inBrackets == true) {
+        if (m_current != '/' || lastWasEscape || inBrackets) {
             // keep track of '[' and ']'
             if (!lastWasEscape) {
-                if ( m_current == '[' && !inBrackets )
+                if (m_current == '[' && !inBrackets)
                     inBrackets = true;
-                if ( m_current == ']' && inBrackets )
+                if (m_current == ']' && inBrackets)
                     inBrackets = false;
             }
             record16(m_current);
-            lastWasEscape =
-            !lastWasEscape && (m_current == '\\');
+            lastWasEscape = !lastWasEscape && m_current == '\\';
         } else { // end of regexp
             m_pattern = UString(m_buffer16);
-            m_buffer16.clear();
-            shift(1);
+            m_buffer16.resize(0);
+            shift1();
             break;
         }
-        shift(1);
+        shift1();
     }
 
     while (isIdentPart(m_current)) {
         record16(m_current);
-        shift(1);
+        shift1();
     }
     m_flags = UString(m_buffer16);
+    m_buffer16.resize(0);
 
     return true;
 }
@@ -882,19 +950,42 @@ bool Lexer::scanRegExp()
 void Lexer::clear()
 {
     m_identifiers.clear();
+    m_codeWithoutBOMs.clear();
 
     Vector<char> newBuffer8;
-    newBuffer8.reserveCapacity(initialReadBufferCapacity);
+    newBuffer8.reserveInitialCapacity(initialReadBufferCapacity);
     m_buffer8.swap(newBuffer8);
 
     Vector<UChar> newBuffer16;
-    newBuffer16.reserveCapacity(initialReadBufferCapacity);
+    newBuffer16.reserveInitialCapacity(initialReadBufferCapacity);
     m_buffer16.swap(newBuffer16);
 
     m_isReparsing = false;
 
-    m_pattern = 0;
-    m_flags = 0;
+    m_pattern = UString();
+    m_flags = UString();
+}
+
+SourceCode Lexer::sourceCode(int openBrace, int closeBrace, int firstLine)
+{
+    if (m_codeWithoutBOMs.isEmpty())
+        return SourceCode(m_source->provider(), openBrace, closeBrace + 1, firstLine);
+
+    const UChar* data = m_source->provider()->data();
+
+    ASSERT(openBrace < closeBrace);
+
+    int numBOMsBeforeOpenBrace = 0;
+    int numBOMsBetweenBraces = 0;
+
+    int i;
+    for (i = m_source->startOffset(); i < openBrace; ++i)
+        numBOMsBeforeOpenBrace += data[i] == byteOrderMark;
+    for (; i < closeBrace; ++i)
+        numBOMsBetweenBraces += data[i] == byteOrderMark;
+
+    return SourceCode(m_source->provider(), openBrace + numBOMsBeforeOpenBrace,
+        closeBrace + numBOMsBeforeOpenBrace + numBOMsBetweenBraces + 1, firstLine);
 }
 
 } // namespace JSC