summaryrefslogtreecommitdiffstats
path: root/src/lexer.h
diff options
context:
space:
mode:
authorEvan Martin <martine@danga.com>2011-12-29 21:00:27 (GMT)
committerEvan Martin <martine@danga.com>2011-12-29 21:14:39 (GMT)
commit8a0c96075786c1983bdfa2f37f32b75200ea0334 (patch)
tree95e2b0c24aedcda9ec5ed09329e69fd7a1925212 /src/lexer.h
parentad7d9f43f1bd8e04321d8fdb07ebf7b96ab525a1 (diff)
downloadNinja-8a0c96075786c1983bdfa2f37f32b75200ea0334.zip
Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.tar.gz
Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.tar.bz2
switch the core ninja parser to use re2c for the lexer
- Delete the old "Tokenizer" code. - Write separate tests for the lexer distinct from the parser. - Switch the parser to use the new code. - New lexer error output has file:line numbers so e.g. Emacs can jump your editor to the syntax error. - The EvalEnv ($-interpolation) code is now part of the lexer as well.
Diffstat (limited to 'src/lexer.h')
-rw-r--r--src/lexer.h88
1 files changed, 88 insertions, 0 deletions
diff --git a/src/lexer.h b/src/lexer.h
new file mode 100644
index 0000000..40e602a
--- /dev/null
+++ b/src/lexer.h
@@ -0,0 +1,88 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "string_piece.h"
+
+struct EvalString;
+
+struct Lexer {
+ Lexer() {}
+ /// Helper ctor useful for tests.
+ explicit Lexer(const char* input);
+
+ enum Token {
+ ERROR,
+ BUILD,
+ COLON,
+ DEFAULT,
+ EQUALS,
+ IDENT,
+ INCLUDE,
+ INDENT,
+ NEWLINE,
+ PIPE,
+ PIPE2,
+ RULE,
+ SUBNINJA,
+ TEOF,
+ };
+
+ /// Return a human-readable form of a token, used in error messages.
+ static const char* TokenName(Token t);
+
+ /// Start parsing some input.
+ void Start(StringPiece filename, StringPiece input);
+
+ /// Read a Token from the Token enum.
+ Token ReadToken();
+
+ /// Rewind to the last read Token.
+ void UnreadToken();
+
+ /// If the next token is \a token, read it and return true.
+ bool PeekToken(Token token);
+
+ /// Read a simple identifier (a rule or variable name).
+ /// Returns false if a name can't be read.
+ bool ReadIdent(string* out);
+
+ /// Read a path (complete with $escapes).
+ /// Returns false only on error, returned path may be empty if a delimiter
+ /// (space, newline) is hit.
+ bool ReadPath(EvalString* path, string* err) {
+ return ReadEvalString(path, true, err);
+ }
+
+ /// Read the value side of a var = value line (complete with $escapes).
+ /// Returns false only on error.
+ bool ReadVarValue(EvalString* value, string* err) {
+ return ReadEvalString(value, false, err);
+ }
+
+ /// Construct an error message with context.
+ bool Error(const string& message, string* err);
+
+private:
+ /// Skip past whitespace (called after each read token/ident/etc.).
+ void EatWhitespace();
+
+ /// Read a $-escaped string.
+ bool ReadEvalString(EvalString* eval, bool path, string* err);
+
+ StringPiece filename_;
+ StringPiece input_;
+ const char* ofs_;
+ const char* last_token_;
+};
+