diff options
author | Evan Martin <martine@danga.com> | 2011-12-29 21:00:27 (GMT) |
---|---|---|
committer | Evan Martin <martine@danga.com> | 2011-12-29 21:14:39 (GMT) |
commit | 8a0c96075786c1983bdfa2f37f32b75200ea0334 (patch) | |
tree | 95e2b0c24aedcda9ec5ed09329e69fd7a1925212 /src/lexer.in.cc | |
parent | ad7d9f43f1bd8e04321d8fdb07ebf7b96ab525a1 (diff) | |
download | Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.zip Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.tar.gz Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.tar.bz2 |
switch the core ninja parser to use re2c for the lexer
- Delete the old "Tokenizer" code.
- Write separate tests for the lexer distinct from the parser.
- Switch the parser to use the new code.
- New lexer error output has file:line numbers so e.g. Emacs can
jump your editor to the syntax error.
- The EvalEnv ($-interpolation) code is now part of the lexer as well.
Diffstat (limited to 'src/lexer.in.cc')
-rw-r--r-- | src/lexer.in.cc | 234 |
1 files changed, 234 insertions, 0 deletions
diff --git a/src/lexer.in.cc b/src/lexer.in.cc new file mode 100644 index 0000000..a3b29c1 --- /dev/null +++ b/src/lexer.in.cc @@ -0,0 +1,234 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lexer.h" + +#include "eval_env.h" + +bool Lexer::Error(const string& message, string* err) { + // Compute line/column. + int line = 1; + const char* context = input_.str_; + for (const char* p = input_.str_; p < last_token_; ++p) { + if (*p == '\n') { + ++line; + context = p + 1; + } + } + int col = last_token_ ? last_token_ - context : 0; + + char buf[1024]; + snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line); + *err = buf; + *err += message + "\n"; + + // Add some context to the message. + const int kTruncateColumn = 72; + if (col > 0 && col < kTruncateColumn) { + int len; + bool truncated = true; + for (len = 0; len < kTruncateColumn; ++len) { + if (context[len] == 0 || context[len] == '\n') { + truncated = false; + break; + } + } + *err += string(context, len); + if (truncated) + *err += "..."; + *err += "\n"; + *err += string(col, ' '); + *err += "^ near here\n"; + } + + return false; +} + +Lexer::Lexer(const char* input) { + Start("input", input); +} + +void Lexer::Start(StringPiece filename, StringPiece input) { + filename_ = filename; + input_ = input; + ofs_ = input_.str_; + last_token_ = NULL; +} + +const char* Lexer::TokenName(Token t) { + switch (t) { + case ERROR: return "lexing error"; + case BUILD: return "'build'"; + case COLON: return "':'"; + case DEFAULT: return "'default'"; + case EQUALS: return "'='"; + case IDENT: return "identifier"; + case INCLUDE: return "'include'"; + case INDENT: return "indent"; + case NEWLINE: return "newline"; + case PIPE2: return "'||'"; + case PIPE: return "'|'"; + case RULE: return "'rule'"; + case SUBNINJA: return "'subninja'"; + case TEOF: return "eof"; + } + return NULL; // not reached +} + +void Lexer::UnreadToken() { + ofs_ = last_token_; +} + +Lexer::Token Lexer::ReadToken() { + const char* p = ofs_; + const char* q; + const char* start; + Lexer::Token token; + for (;;) { + start = p; + /*!re2c + re2c:define:YYCTYPE = "char"; + re2c:define:YYCURSOR = p; + re2c:define:YYMARKER = q; + re2c:yyfill:enable = 0; + + nul = "\000"; + simple_varname = [a-zA-Z0-9_]+; + varname = [a-zA-Z0-9_.]+; + + "#"[^\000\n]*"\n" { continue; } + [\n] { token = NEWLINE; break; } + [ ]+ { token = INDENT; break; } + "build" { token = BUILD; break; } + "rule" { token = RULE; break; } + "default" { token = DEFAULT; break; } + "=" { token = EQUALS; break; } + ":" { token = COLON; break; } + "||" { token = PIPE2; break; } + "|" { token = PIPE; break; } + "include" { token = INCLUDE; break; } + "subninja" { token = SUBNINJA; break; } + varname { token = IDENT; break; } + nul { token = TEOF; break; } + [^] { token = ERROR; break; } + */ + } + + last_token_ = start; + ofs_ = p; + if (token != NEWLINE && token != TEOF) + EatWhitespace(); + return token; +} + +bool Lexer::PeekToken(Token token) { + Token t = ReadToken(); + if (t == token) + return true; + UnreadToken(); + return false; +} + +void Lexer::EatWhitespace() { + const char* p = ofs_; + for (;;) { + ofs_ = p; + /*!re2c + [ ]+ { continue; } + "$\n" { continue; } + nul { break; } + [^] { break; } + */ + } +} + +bool Lexer::ReadIdent(string* out) { + const char* p = ofs_; + for (;;) { + const char* start = p; + /*!re2c + varname { + out->assign(start, p - start); + break; + } + [^] { return false; } + */ + } + ofs_ = p; + EatWhitespace(); + return true; +} + +bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) { + const char* p = ofs_; + const char* q; + const char* start; + for (;;) { + start = p; + /*!re2c + [^$ :\n|\000]+ { + eval->Add(EvalString::RAW, StringPiece(start, p - start)); + continue; + } + [ :|\n] { + if (path) { + p = start; + break; + } else { + if (*start == '\n') + break; + eval->Add(EvalString::RAW, StringPiece(start, 1)); + continue; + } + } + "$$" { + eval->Add(EvalString::RAW, StringPiece("$", 1)); + continue; + } + "$ " { + eval->Add(EvalString::RAW, StringPiece(" ", 1)); + continue; + } + "$\n"[ ]* { + continue; + } + "${"varname"}" { + eval->Add(EvalString::SPECIAL, StringPiece(start + 2, p - start - 3)); + continue; + } + "$"simple_varname { + eval->Add(EvalString::SPECIAL, StringPiece(start + 1, p - start - 1)); + continue; + } + "$". { + last_token_ = start; + return Error("bad $-escape (literal $ must be written as $$)", err); + } + nul { + last_token_ = start; + return Error("unexpected EOF", err); + } + [^] { + last_token_ = start; + return Error("lexing error", err); + } + */ + } + last_token_ = start; + ofs_ = p; + if (path) + EatWhitespace(); + // Non-path strings end in newlines, so there's no whitespace to eat. + return true; +} |