diff options
Diffstat (limited to 'src/lexer.cc')
-rw-r--r-- | src/lexer.cc | 729 |
1 files changed, 729 insertions, 0 deletions
diff --git a/src/lexer.cc b/src/lexer.cc new file mode 100644 index 0000000..0371371 --- /dev/null +++ b/src/lexer.cc @@ -0,0 +1,729 @@ +/* Generated by re2c 0.13.5 */ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lexer.h" + +#include <stdio.h> + +#include "eval_env.h" + +bool Lexer::Error(const string& message, string* err) { + // Compute line/column. + int line = 1; + const char* context = input_.str_; + for (const char* p = input_.str_; p < last_token_; ++p) { + if (*p == '\n') { + ++line; + context = p + 1; + } + } + int col = last_token_ ? last_token_ - context : 0; + + char buf[1024]; + snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line); + *err = buf; + *err += message + "\n"; + + // Add some context to the message. + const int kTruncateColumn = 72; + if (col > 0 && col < kTruncateColumn) { + int len; + bool truncated = true; + for (len = 0; len < kTruncateColumn; ++len) { + if (context[len] == 0 || context[len] == '\n') { + truncated = false; + break; + } + } + *err += string(context, len); + if (truncated) + *err += "..."; + *err += "\n"; + *err += string(col, ' '); + *err += "^ near here\n"; + } + + return false; +} + +Lexer::Lexer(const char* input) { + Start("input", input); +} + +void Lexer::Start(StringPiece filename, StringPiece input) { + filename_ = filename; + input_ = input; + ofs_ = input_.str_; + last_token_ = NULL; +} + +const char* Lexer::TokenName(Token t) { + switch (t) { + case ERROR: return "lexing error"; + case BUILD: return "'build'"; + case COLON: return "':'"; + case DEFAULT: return "'default'"; + case EQUALS: return "'='"; + case IDENT: return "identifier"; + case INCLUDE: return "'include'"; + case INDENT: return "indent"; + case NEWLINE: return "newline"; + case PIPE2: return "'||'"; + case PIPE: return "'|'"; + case RULE: return "'rule'"; + case SUBNINJA: return "'subninja'"; + case TEOF: return "eof"; + } + return NULL; // not reached +} + +void Lexer::UnreadToken() { + ofs_ = last_token_; +} + +Lexer::Token Lexer::ReadToken() { + const char* p = ofs_; + const char* q; + const char* start; + Lexer::Token token; + for (;;) { + start = p; + +{ + char yych; + static const unsigned char yybm[] = { + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 192, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 160, 128, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 128, 128, 128, 128, 128, 128, + 128, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 128, 128, 128, 128, 160, + 128, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + + yych = *p; + if (yych <= 'Z') { + if (yych <= '-') { + if (yych <= 0x1F) { + if (yych <= 0x00) goto yy21; + if (yych == '\n') goto yy4; + goto yy23; + } else { + if (yych <= ' ') goto yy6; + if (yych != '#') goto yy23; + } + } else { + if (yych <= ':') { + if (yych == '/') goto yy23; + if (yych <= '9') goto yy20; + goto yy14; + } else { + if (yych == '=') goto yy12; + if (yych <= '@') goto yy23; + goto yy20; + } + } + } else { + if (yych <= 'h') { + if (yych <= 'a') { + if (yych == '_') goto yy20; + if (yych <= '`') goto yy23; + goto yy20; + } else { + if (yych <= 'b') goto yy8; + if (yych == 'd') goto yy11; + goto yy20; + } + } else { + if (yych <= 's') { + if (yych <= 'i') goto yy18; + if (yych <= 'q') goto yy20; + if (yych <= 'r') goto yy10; + goto yy19; + } else { + if (yych <= 'z') goto yy20; + if (yych == '|') goto yy16; + goto yy23; + } + } + } + yych = *(q = ++p); + if (yych >= 0x01) goto yy62; +yy3: + { token = ERROR; break; } +yy4: + ++p; + { token = NEWLINE; break; } +yy6: + ++p; + yych = *p; + goto yy60; +yy7: + { token = INDENT; break; } +yy8: + ++p; + if ((yych = *p) == 'u') goto yy54; + goto yy25; +yy9: + { token = IDENT; break; } +yy10: + yych = *++p; + if (yych == 'u') goto yy50; + goto yy25; +yy11: + yych = *++p; + if (yych == 'e') goto yy43; + goto yy25; +yy12: + ++p; + { token = EQUALS; break; } +yy14: + ++p; + { token = COLON; break; } +yy16: + ++p; + if ((yych = *p) == '|') goto yy41; + { token = PIPE; break; } +yy18: + yych = *++p; + if (yych == 'n') goto yy34; + goto yy25; +yy19: + yych = *++p; + if (yych == 'u') goto yy26; + goto yy25; +yy20: + yych = *++p; + goto yy25; +yy21: + ++p; + { token = TEOF; break; } +yy23: + yych = *++p; + goto yy3; +yy24: + ++p; + yych = *p; +yy25: + if (yybm[0+yych] & 32) { + goto yy24; + } + goto yy9; +yy26: + yych = *++p; + if (yych != 'b') goto yy25; + yych = *++p; + if (yych != 'n') goto yy25; + yych = *++p; + if (yych != 'i') goto yy25; + yych = *++p; + if (yych != 'n') goto yy25; + yych = *++p; + if (yych != 'j') goto yy25; + yych = *++p; + if (yych != 'a') goto yy25; + ++p; + if (yybm[0+(yych = *p)] & 32) { + goto yy24; + } + { token = SUBNINJA; break; } +yy34: + yych = *++p; + if (yych != 'c') goto yy25; + yych = *++p; + if (yych != 'l') goto yy25; + yych = *++p; + if (yych != 'u') goto yy25; + yych = *++p; + if (yych != 'd') goto yy25; + yych = *++p; + if (yych != 'e') goto yy25; + ++p; + if (yybm[0+(yych = *p)] & 32) { + goto yy24; + } + { token = INCLUDE; break; } +yy41: + ++p; + { token = PIPE2; break; } +yy43: + yych = *++p; + if (yych != 'f') goto yy25; + yych = *++p; + if (yych != 'a') goto yy25; + yych = *++p; + if (yych != 'u') goto yy25; + yych = *++p; + if (yych != 'l') goto yy25; + yych = *++p; + if (yych != 't') goto yy25; + ++p; + if (yybm[0+(yych = *p)] & 32) { + goto yy24; + } + { token = DEFAULT; break; } +yy50: + yych = *++p; + if (yych != 'l') goto yy25; + yych = *++p; + if (yych != 'e') goto yy25; + ++p; + if (yybm[0+(yych = *p)] & 32) { + goto yy24; + } + { token = RULE; break; } +yy54: + yych = *++p; + if (yych != 'i') goto yy25; + yych = *++p; + if (yych != 'l') goto yy25; + yych = *++p; + if (yych != 'd') goto yy25; + ++p; + if (yybm[0+(yych = *p)] & 32) { + goto yy24; + } + { token = BUILD; break; } +yy59: + ++p; + yych = *p; +yy60: + if (yybm[0+yych] & 64) { + goto yy59; + } + goto yy7; +yy61: + ++p; + yych = *p; +yy62: + if (yybm[0+yych] & 128) { + goto yy61; + } + if (yych >= 0x01) goto yy64; + p = q; + goto yy3; +yy64: + ++p; + { continue; } +} + + } + + last_token_ = start; + ofs_ = p; + if (token != NEWLINE && token != TEOF) + EatWhitespace(); + return token; +} + +bool Lexer::PeekToken(Token token) { + Token t = ReadToken(); + if (t == token) + return true; + UnreadToken(); + return false; +} + +void Lexer::EatWhitespace() { + const char* p = ofs_; + for (;;) { + ofs_ = p; + +{ + char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych <= ' ') { + if (yych <= 0x00) goto yy72; + if (yych <= 0x1F) goto yy74; + } else { + if (yych == '$') goto yy70; + goto yy74; + } + ++p; + yych = *p; + goto yy78; +yy69: + { continue; } +yy70: + ++p; + if ((yych = *p) == '\n') goto yy75; +yy71: + { break; } +yy72: + ++p; + { break; } +yy74: + yych = *++p; + goto yy71; +yy75: + ++p; + { continue; } +yy77: + ++p; + yych = *p; +yy78: + if (yybm[0+yych] & 128) { + goto yy77; + } + goto yy69; +} + + } +} + +bool Lexer::ReadIdent(string* out) { + const char* p = ofs_; + for (;;) { + const char* start = p; + +{ + char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 128, 0, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 0, 0, 0, + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 0, 0, 0, 128, + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych <= '@') { + if (yych <= '.') { + if (yych <= '-') goto yy83; + } else { + if (yych <= '/') goto yy83; + if (yych >= ':') goto yy83; + } + } else { + if (yych <= '_') { + if (yych <= 'Z') goto yy81; + if (yych <= '^') goto yy83; + } else { + if (yych <= '`') goto yy83; + if (yych >= '{') goto yy83; + } + } +yy81: + ++p; + yych = *p; + goto yy86; +yy82: + { + out->assign(start, p - start); + break; + } +yy83: + ++p; + { return false; } +yy85: + ++p; + yych = *p; +yy86: + if (yybm[0+yych] & 128) { + goto yy85; + } + goto yy82; +} + + } + ofs_ = p; + EatWhitespace(); + return true; +} + +bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) { + const char* p = ofs_; + const char* q; + const char* start; + for (;;) { + start = p; + +{ + char yych; + static const unsigned char yybm[] = { + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 16, 128, 128, 128, 0, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 160, 128, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 0, 128, 128, 128, 128, 128, + 128, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 128, 128, 128, 128, 224, + 128, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 128, 0, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + yych = *p; + if (yych <= '#') { + if (yych <= '\n') { + if (yych <= 0x00) goto yy95; + if (yych >= '\n') goto yy91; + } else { + if (yych == ' ') goto yy91; + } + } else { + if (yych <= ':') { + if (yych <= '$') goto yy93; + if (yych >= ':') goto yy91; + } else { + if (yych == '|') goto yy91; + } + } + ++p; + yych = *p; + goto yy117; +yy90: + { + eval->Add(EvalString::RAW, StringPiece(start, p - start)); + continue; + } +yy91: + ++p; + { + if (path) { + p = start; + break; + } else { + if (*start == '\n') + break; + eval->Add(EvalString::RAW, StringPiece(start, 1)); + continue; + } + } +yy93: + ++p; + if ((yych = *p) <= '9') { + if (yych <= ' ') { + if (yych == '\n') goto yy106; + if (yych <= 0x1F) goto yy97; + goto yy99; + } else { + if (yych == '$') goto yy101; + if (yych <= '/') goto yy97; + goto yy103; + } + } else { + if (yych <= '_') { + if (yych <= '@') goto yy97; + if (yych <= 'Z') goto yy103; + if (yych <= '^') goto yy97; + goto yy103; + } else { + if (yych <= '`') goto yy97; + if (yych <= 'z') goto yy103; + if (yych <= '{') goto yy105; + goto yy97; + } + } + { + last_token_ = start; + return Error("lexing error", err); + } +yy95: + ++p; + { + last_token_ = start; + return Error("unexpected EOF", err); + } +yy97: + ++p; +yy98: + { + last_token_ = start; + return Error("bad $-escape (literal $ must be written as $$)", err); + } +yy99: + ++p; + { + eval->Add(EvalString::RAW, StringPiece(" ", 1)); + continue; + } +yy101: + ++p; + { + eval->Add(EvalString::RAW, StringPiece("$", 1)); + continue; + } +yy103: + ++p; + yych = *p; + goto yy115; +yy104: + { + eval->Add(EvalString::SPECIAL, StringPiece(start + 1, p - start - 1)); + continue; + } +yy105: + yych = *(q = ++p); + if (yybm[0+yych] & 32) { + goto yy109; + } + goto yy98; +yy106: + ++p; + yych = *p; + if (yybm[0+yych] & 16) { + goto yy106; + } + { + continue; + } +yy109: + ++p; + yych = *p; + if (yybm[0+yych] & 32) { + goto yy109; + } + if (yych == '}') goto yy112; + p = q; + goto yy98; +yy112: + ++p; + { + eval->Add(EvalString::SPECIAL, StringPiece(start + 2, p - start - 3)); + continue; + } +yy114: + ++p; + yych = *p; +yy115: + if (yybm[0+yych] & 64) { + goto yy114; + } + goto yy104; +yy116: + ++p; + yych = *p; +yy117: + if (yybm[0+yych] & 128) { + goto yy116; + } + goto yy90; +} + + } + last_token_ = start; + ofs_ = p; + if (path) + EatWhitespace(); + // Non-path strings end in newlines, so there's no whitespace to eat. + return true; +} |