switch the core ninja parser to use re2c for the lexer

- Delete the old "Tokenizer" code. - Write separate tests for the lexer distinct from the parser. - Switch the parser to use the new code. - New lexer error output has file:line numbers so e.g. Emacs can jump your editor to the syntax error. - The EvalEnv ($-interpolation) code is now part of the lexer as well.
author: Evan Martin <martine@danga.com> 2011-12-29 21:00:27 (GMT)
committer: Evan Martin <martine@danga.com> 2011-12-29 21:14:39 (GMT)
commit: 8a0c96075786c1983bdfa2f37f32b75200ea0334 (patch)
tree: 95e2b0c24aedcda9ec5ed09329e69fd7a1925212 /src/lexer.h
parent: ad7d9f43f1bd8e04321d8fdb07ebf7b96ab525a1 (diff)
download: Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.zip
Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.tar.gz
Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.tar.bz2
1 files changed, 88 insertions, 0 deletions
diff --git a/src/lexer.h b/src/lexer.h
new file mode 100644
index 0000000..40e602a
--- /dev/null
+++ b/src/lexer.h
@@ -0,0 +1,88 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "string_piece.h"
+
+struct EvalString;
+
+struct Lexer {
+  Lexer() {}
+  /// Helper ctor useful for tests.
+  explicit Lexer(const char* input);
+
+  enum Token {
+    ERROR,
+    BUILD,
+    COLON,
+    DEFAULT,
+    EQUALS,
+    IDENT,
+    INCLUDE,
+    INDENT,
+    NEWLINE,
+    PIPE,
+    PIPE2,
+    RULE,
+    SUBNINJA,
+    TEOF,
+  };
+
+  /// Return a human-readable form of a token, used in error messages.
+  static const char* TokenName(Token t);
+
+  /// Start parsing some input.
+  void Start(StringPiece filename, StringPiece input);
+
+  /// Read a Token from the Token enum.
+  Token ReadToken();
+
+  /// Rewind to the last read Token.
+  void UnreadToken();
+
+  /// If the next token is \a token, read it and return true.
+  bool PeekToken(Token token);
+
+  /// Read a simple identifier (a rule or variable name).
+  /// Returns false if a name can't be read.
+  bool ReadIdent(string* out);
+
+  /// Read a path (complete with $escapes).
+  /// Returns false only on error, returned path may be empty if a delimiter
+  /// (space, newline) is hit.
+  bool ReadPath(EvalString* path, string* err) {
+    return ReadEvalString(path, true, err);
+  }
+
+  /// Read the value side of a var = value line (complete with $escapes).
+  /// Returns false only on error.
+  bool ReadVarValue(EvalString* value, string* err) {
+    return ReadEvalString(value, false, err);
+  }
+
+  /// Construct an error message with context.
+  bool Error(const string& message, string* err);
+
+private:
+  /// Skip past whitespace (called after each read token/ident/etc.).
+  void EatWhitespace();
+
+  /// Read a $-escaped string.
+  bool ReadEvalString(EvalString* eval, bool path, string* err);
+
+  StringPiece filename_;
+  StringPiece input_;
+  const char* ofs_;
+  const char* last_token_;
+};
+
author	Evan Martin <martine@danga.com>	2011-12-29 21:00:27 (GMT)
committer	Evan Martin <martine@danga.com>	2011-12-29 21:14:39 (GMT)
commit	8a0c96075786c1983bdfa2f37f32b75200ea0334 (patch)
tree	95e2b0c24aedcda9ec5ed09329e69fd7a1925212 /src/lexer.h
parent	ad7d9f43f1bd8e04321d8fdb07ebf7b96ab525a1 (diff)
download	Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.zip Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.tar.gz Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.tar.bz2