diff options
author | Evan Martin <martine@danga.com> | 2011-12-29 21:00:27 (GMT) |
---|---|---|
committer | Evan Martin <martine@danga.com> | 2011-12-29 21:14:39 (GMT) |
commit | 8a0c96075786c1983bdfa2f37f32b75200ea0334 (patch) | |
tree | 95e2b0c24aedcda9ec5ed09329e69fd7a1925212 /src/lexer.h | |
parent | ad7d9f43f1bd8e04321d8fdb07ebf7b96ab525a1 (diff) | |
download | Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.zip Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.tar.gz Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.tar.bz2 |
switch the core ninja parser to use re2c for the lexer
- Delete the old "Tokenizer" code.
- Write separate tests for the lexer distinct from the parser.
- Switch the parser to use the new code.
- New lexer error output has file:line numbers so e.g. Emacs can
jump your editor to the syntax error.
- The EvalEnv ($-interpolation) code is now part of the lexer as well.
Diffstat (limited to 'src/lexer.h')
-rw-r--r-- | src/lexer.h | 88 |
1 files changed, 88 insertions, 0 deletions
diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..40e602a --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,88 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "string_piece.h" + +struct EvalString; + +struct Lexer { + Lexer() {} + /// Helper ctor useful for tests. + explicit Lexer(const char* input); + + enum Token { + ERROR, + BUILD, + COLON, + DEFAULT, + EQUALS, + IDENT, + INCLUDE, + INDENT, + NEWLINE, + PIPE, + PIPE2, + RULE, + SUBNINJA, + TEOF, + }; + + /// Return a human-readable form of a token, used in error messages. + static const char* TokenName(Token t); + + /// Start parsing some input. + void Start(StringPiece filename, StringPiece input); + + /// Read a Token from the Token enum. + Token ReadToken(); + + /// Rewind to the last read Token. + void UnreadToken(); + + /// If the next token is \a token, read it and return true. + bool PeekToken(Token token); + + /// Read a simple identifier (a rule or variable name). + /// Returns false if a name can't be read. + bool ReadIdent(string* out); + + /// Read a path (complete with $escapes). + /// Returns false only on error, returned path may be empty if a delimiter + /// (space, newline) is hit. + bool ReadPath(EvalString* path, string* err) { + return ReadEvalString(path, true, err); + } + + /// Read the value side of a var = value line (complete with $escapes). + /// Returns false only on error. + bool ReadVarValue(EvalString* value, string* err) { + return ReadEvalString(value, false, err); + } + + /// Construct an error message with context. + bool Error(const string& message, string* err); + +private: + /// Skip past whitespace (called after each read token/ident/etc.). + void EatWhitespace(); + + /// Read a $-escaped string. + bool ReadEvalString(EvalString* eval, bool path, string* err); + + StringPiece filename_; + StringPiece input_; + const char* ofs_; + const char* last_token_; +}; + |