summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorEvan Martin <martine@danga.com>2011-12-29 21:00:27 (GMT)
committerEvan Martin <martine@danga.com>2011-12-29 21:14:39 (GMT)
commit8a0c96075786c1983bdfa2f37f32b75200ea0334 (patch)
tree95e2b0c24aedcda9ec5ed09329e69fd7a1925212 /src
parentad7d9f43f1bd8e04321d8fdb07ebf7b96ab525a1 (diff)
downloadNinja-8a0c96075786c1983bdfa2f37f32b75200ea0334.zip
Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.tar.gz
Ninja-8a0c96075786c1983bdfa2f37f32b75200ea0334.tar.bz2
switch the core ninja parser to use re2c for the lexer
- Delete the old "Tokenizer" code. - Write separate tests for the lexer distinct from the parser. - Switch the parser to use the new code. - New lexer error output has file:line numbers so e.g. Emacs can jump your editor to the syntax error. - The EvalEnv ($-interpolation) code is now part of the lexer as well.
Diffstat (limited to 'src')
-rw-r--r--src/eval_env.cc80
-rw-r--r--src/eval_env.h14
-rw-r--r--src/eval_env_test.cc101
-rw-r--r--src/graph.h4
-rw-r--r--src/lexer.cc729
-rw-r--r--src/lexer.h88
-rw-r--r--src/lexer.in.cc234
-rw-r--r--src/lexer_test.cc85
-rw-r--r--src/ninja.cc12
-rw-r--r--src/parsers.cc617
-rw-r--r--src/parsers.h97
-rw-r--r--src/parsers_test.cc201
-rw-r--r--src/state_test.cc9
-rw-r--r--src/test.cc2
14 files changed, 1505 insertions, 768 deletions
diff --git a/src/eval_env.cc b/src/eval_env.cc
index fa5e35b..57c20c6 100644
--- a/src/eval_env.cc
+++ b/src/eval_env.cc
@@ -27,64 +27,6 @@ void BindingEnv::AddBinding(const string& key, const string& val) {
bindings_[key] = val;
}
-bool EvalString::Parse(const string& input, string* err, size_t* err_index) {
- unparsed_ = input;
-
- string::size_type start, end;
- start = 0;
- do {
- end = input.find('$', start);
- if (end == string::npos) {
- end = input.size();
- break;
- }
- if (end > start)
- parsed_.push_back(make_pair(input.substr(start, end - start), RAW));
- start = end + 1;
- if (start < input.size() && input[start] == '{') {
- ++start;
- for (end = start + 1; end < input.size(); ++end) {
- if (input[end] == '}')
- break;
- }
- if (end >= input.size()) {
- *err = "expected closing curly after ${";
- if (err_index)
- *err_index = end;
- return false;
- }
- parsed_.push_back(make_pair(input.substr(start, end - start), SPECIAL));
- ++end;
- } else if (start < input.size() && input[start] == '$') {
- parsed_.push_back(make_pair("$", RAW));
- end = start + 1;
- } else if (start < input.size() && input[start] == ' ') {
- parsed_.push_back(make_pair(" ", RAW));
- end = start + 1;
- } else {
- for (end = start; end < input.size(); ++end) {
- char c = input[end];
- if (!(('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') ||
- ('0' <= c && c <= '9') || c == '_')) {
- break;
- }
- }
- if (end == start) {
- *err = "expected variable after $";
- if (err_index)
- *err_index = start;
- return false;
- }
- parsed_.push_back(make_pair(input.substr(start, end - start), SPECIAL));
- }
- start = end;
- } while (end < input.size());
- if (end > start)
- parsed_.push_back(make_pair(input.substr(start, end - start), RAW));
-
- return true;
-}
-
string EvalString::Evaluate(Env* env) const {
string result;
for (TokenList::const_iterator i = parsed_.begin(); i != parsed_.end(); ++i) {
@@ -95,3 +37,25 @@ string EvalString::Evaluate(Env* env) const {
}
return result;
}
+
+void EvalString::Add(TokenType type, StringPiece text) {
+ // Add it to the end of an existing RAW token if possible.
+ if (type == RAW && !parsed_.empty() && parsed_.back().second == RAW) {
+ parsed_.back().first.append(text.str_, text.len_);
+ } else {
+ parsed_.push_back(make_pair(text.AsString(), type));
+ }
+}
+
+string EvalString::Serialize() const {
+ string result;
+ for (TokenList::const_iterator i = parsed_.begin();
+ i != parsed_.end(); ++i) {
+ result.append("[");
+ if (i->second == SPECIAL)
+ result.append("$");
+ result.append(i->first);
+ result.append("]");
+ }
+ return result;
+}
diff --git a/src/eval_env.h b/src/eval_env.h
index ed7c2f4..8c144f0 100644
--- a/src/eval_env.h
+++ b/src/eval_env.h
@@ -20,6 +20,8 @@
#include <vector>
using namespace std;
+#include "string_piece.h"
+
/// An interface for a scope for variable (e.g. "$foo") lookups.
struct Env {
virtual ~Env() {}
@@ -41,14 +43,18 @@ struct BindingEnv : public Env {
/// A tokenized string that contains variable references.
/// Can be evaluated relative to an Env.
struct EvalString {
- bool Parse(const string& input, string* err, size_t* err_index=NULL);
string Evaluate(Env* env) const;
- const string& unparsed() const { return unparsed_; }
- bool empty() const { return unparsed_.empty(); }
+ void Clear() { parsed_.clear(); }
+ bool empty() const { return parsed_.empty(); }
- string unparsed_;
enum TokenType { RAW, SPECIAL };
+ void Add(TokenType type, StringPiece text);
+
+ /// Construct a human-readable representation of the parsed state,
+ /// for use in tests.
+ string Serialize() const;
+
typedef vector<pair<string, TokenType> > TokenList;
TokenList parsed_;
};
diff --git a/src/eval_env_test.cc b/src/eval_env_test.cc
deleted file mode 100644
index 4836e24..0000000
--- a/src/eval_env_test.cc
+++ /dev/null
@@ -1,101 +0,0 @@
-// Copyright 2011 Google Inc. All Rights Reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-#include <gtest/gtest.h>
-
-#include <map>
-#include <string>
-
-#include "eval_env.h"
-
-namespace {
-
-struct TestEnv : public Env {
- virtual string LookupVariable(const string& var) {
- return vars[var];
- }
- map<string, string> vars;
-};
-
-TEST(EvalString, PlainText) {
- EvalString str;
- string err;
- EXPECT_TRUE(str.Parse("plain text", &err));
- EXPECT_EQ("", err);
- EXPECT_EQ("plain text", str.Evaluate(NULL));
-}
-
-TEST(EvalString, OneVariable) {
- EvalString str;
- string err;
- EXPECT_TRUE(str.Parse("hi $var", &err));
- EXPECT_EQ("", err);
- EXPECT_EQ("hi $var", str.unparsed());
- TestEnv env;
- EXPECT_EQ("hi ", str.Evaluate(&env));
- env.vars["var"] = "there";
- EXPECT_EQ("hi there", str.Evaluate(&env));
-}
-
-TEST(EvalString, OneVariableUpperCase) {
- EvalString str;
- string err;
- EXPECT_TRUE(str.Parse("hi $VaR", &err));
- EXPECT_EQ("", err);
- EXPECT_EQ("hi $VaR", str.unparsed());
- TestEnv env;
- EXPECT_EQ("hi ", str.Evaluate(&env));
- env.vars["VaR"] = "there";
- EXPECT_EQ("hi there", str.Evaluate(&env));
-}
-
-TEST(EvalString, Error) {
- EvalString str;
- string err;
- size_t err_index;
- EXPECT_FALSE(str.Parse("bad $", &err, &err_index));
- EXPECT_EQ("expected variable after $", err);
- EXPECT_EQ(5u, err_index);
-}
-TEST(EvalString, CurlyError) {
- EvalString str;
- string err;
- size_t err_index;
- EXPECT_FALSE(str.Parse("bad ${bar", &err, &err_index));
- EXPECT_EQ("expected closing curly after ${", err);
- EXPECT_EQ(9u, err_index);
-}
-
-TEST(EvalString, Curlies) {
- EvalString str;
- string err;
- EXPECT_TRUE(str.Parse("foo ${var}baz", &err));
- EXPECT_EQ("", err);
- TestEnv env;
- EXPECT_EQ("foo baz", str.Evaluate(&env));
- env.vars["var"] = "barbar";
- EXPECT_EQ("foo barbarbaz", str.Evaluate(&env));
-}
-
-TEST(EvalString, Dollars) {
- EvalString str;
- string err;
- EXPECT_TRUE(str.Parse("foo$$bar$bar", &err));
- ASSERT_EQ("", err);
- TestEnv env;
- env.vars["bar"] = "baz";
- EXPECT_EQ("foo$barbaz", str.Evaluate(&env));
-}
-
-} // namespace
diff --git a/src/graph.h b/src/graph.h
index b483c6d..20765a3 100644
--- a/src/graph.h
+++ b/src/graph.h
@@ -109,10 +109,10 @@ struct Rule {
const EvalString& description() const { return description_; }
const EvalString& depfile() const { return depfile_; }
- private:
+ // TODO: private:
+
// Allow the parsers to reach into this object and fill out its fields.
friend class ManifestParser;
- friend class ParserTest;
string name_;
diff --git a/src/lexer.cc b/src/lexer.cc
new file mode 100644
index 0000000..0371371
--- /dev/null
+++ b/src/lexer.cc
@@ -0,0 +1,729 @@
+/* Generated by re2c 0.13.5 */
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lexer.h"
+
+#include <stdio.h>
+
+#include "eval_env.h"
+
+bool Lexer::Error(const string& message, string* err) {
+ // Compute line/column.
+ int line = 1;
+ const char* context = input_.str_;
+ for (const char* p = input_.str_; p < last_token_; ++p) {
+ if (*p == '\n') {
+ ++line;
+ context = p + 1;
+ }
+ }
+ int col = last_token_ ? last_token_ - context : 0;
+
+ char buf[1024];
+ snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line);
+ *err = buf;
+ *err += message + "\n";
+
+ // Add some context to the message.
+ const int kTruncateColumn = 72;
+ if (col > 0 && col < kTruncateColumn) {
+ int len;
+ bool truncated = true;
+ for (len = 0; len < kTruncateColumn; ++len) {
+ if (context[len] == 0 || context[len] == '\n') {
+ truncated = false;
+ break;
+ }
+ }
+ *err += string(context, len);
+ if (truncated)
+ *err += "...";
+ *err += "\n";
+ *err += string(col, ' ');
+ *err += "^ near here\n";
+ }
+
+ return false;
+}
+
+Lexer::Lexer(const char* input) {
+ Start("input", input);
+}
+
+void Lexer::Start(StringPiece filename, StringPiece input) {
+ filename_ = filename;
+ input_ = input;
+ ofs_ = input_.str_;
+ last_token_ = NULL;
+}
+
+const char* Lexer::TokenName(Token t) {
+ switch (t) {
+ case ERROR: return "lexing error";
+ case BUILD: return "'build'";
+ case COLON: return "':'";
+ case DEFAULT: return "'default'";
+ case EQUALS: return "'='";
+ case IDENT: return "identifier";
+ case INCLUDE: return "'include'";
+ case INDENT: return "indent";
+ case NEWLINE: return "newline";
+ case PIPE2: return "'||'";
+ case PIPE: return "'|'";
+ case RULE: return "'rule'";
+ case SUBNINJA: return "'subninja'";
+ case TEOF: return "eof";
+ }
+ return NULL; // not reached
+}
+
+void Lexer::UnreadToken() {
+ ofs_ = last_token_;
+}
+
+Lexer::Token Lexer::ReadToken() {
+ const char* p = ofs_;
+ const char* q;
+ const char* start;
+ Lexer::Token token;
+ for (;;) {
+ start = p;
+
+{
+ char yych;
+ static const unsigned char yybm[] = {
+ 0, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 0, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 192, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 160, 128,
+ 160, 160, 160, 160, 160, 160, 160, 160,
+ 160, 160, 128, 128, 128, 128, 128, 128,
+ 128, 160, 160, 160, 160, 160, 160, 160,
+ 160, 160, 160, 160, 160, 160, 160, 160,
+ 160, 160, 160, 160, 160, 160, 160, 160,
+ 160, 160, 160, 128, 128, 128, 128, 160,
+ 128, 160, 160, 160, 160, 160, 160, 160,
+ 160, 160, 160, 160, 160, 160, 160, 160,
+ 160, 160, 160, 160, 160, 160, 160, 160,
+ 160, 160, 160, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ };
+
+ yych = *p;
+ if (yych <= 'Z') {
+ if (yych <= '-') {
+ if (yych <= 0x1F) {
+ if (yych <= 0x00) goto yy21;
+ if (yych == '\n') goto yy4;
+ goto yy23;
+ } else {
+ if (yych <= ' ') goto yy6;
+ if (yych != '#') goto yy23;
+ }
+ } else {
+ if (yych <= ':') {
+ if (yych == '/') goto yy23;
+ if (yych <= '9') goto yy20;
+ goto yy14;
+ } else {
+ if (yych == '=') goto yy12;
+ if (yych <= '@') goto yy23;
+ goto yy20;
+ }
+ }
+ } else {
+ if (yych <= 'h') {
+ if (yych <= 'a') {
+ if (yych == '_') goto yy20;
+ if (yych <= '`') goto yy23;
+ goto yy20;
+ } else {
+ if (yych <= 'b') goto yy8;
+ if (yych == 'd') goto yy11;
+ goto yy20;
+ }
+ } else {
+ if (yych <= 's') {
+ if (yych <= 'i') goto yy18;
+ if (yych <= 'q') goto yy20;
+ if (yych <= 'r') goto yy10;
+ goto yy19;
+ } else {
+ if (yych <= 'z') goto yy20;
+ if (yych == '|') goto yy16;
+ goto yy23;
+ }
+ }
+ }
+ yych = *(q = ++p);
+ if (yych >= 0x01) goto yy62;
+yy3:
+ { token = ERROR; break; }
+yy4:
+ ++p;
+ { token = NEWLINE; break; }
+yy6:
+ ++p;
+ yych = *p;
+ goto yy60;
+yy7:
+ { token = INDENT; break; }
+yy8:
+ ++p;
+ if ((yych = *p) == 'u') goto yy54;
+ goto yy25;
+yy9:
+ { token = IDENT; break; }
+yy10:
+ yych = *++p;
+ if (yych == 'u') goto yy50;
+ goto yy25;
+yy11:
+ yych = *++p;
+ if (yych == 'e') goto yy43;
+ goto yy25;
+yy12:
+ ++p;
+ { token = EQUALS; break; }
+yy14:
+ ++p;
+ { token = COLON; break; }
+yy16:
+ ++p;
+ if ((yych = *p) == '|') goto yy41;
+ { token = PIPE; break; }
+yy18:
+ yych = *++p;
+ if (yych == 'n') goto yy34;
+ goto yy25;
+yy19:
+ yych = *++p;
+ if (yych == 'u') goto yy26;
+ goto yy25;
+yy20:
+ yych = *++p;
+ goto yy25;
+yy21:
+ ++p;
+ { token = TEOF; break; }
+yy23:
+ yych = *++p;
+ goto yy3;
+yy24:
+ ++p;
+ yych = *p;
+yy25:
+ if (yybm[0+yych] & 32) {
+ goto yy24;
+ }
+ goto yy9;
+yy26:
+ yych = *++p;
+ if (yych != 'b') goto yy25;
+ yych = *++p;
+ if (yych != 'n') goto yy25;
+ yych = *++p;
+ if (yych != 'i') goto yy25;
+ yych = *++p;
+ if (yych != 'n') goto yy25;
+ yych = *++p;
+ if (yych != 'j') goto yy25;
+ yych = *++p;
+ if (yych != 'a') goto yy25;
+ ++p;
+ if (yybm[0+(yych = *p)] & 32) {
+ goto yy24;
+ }
+ { token = SUBNINJA; break; }
+yy34:
+ yych = *++p;
+ if (yych != 'c') goto yy25;
+ yych = *++p;
+ if (yych != 'l') goto yy25;
+ yych = *++p;
+ if (yych != 'u') goto yy25;
+ yych = *++p;
+ if (yych != 'd') goto yy25;
+ yych = *++p;
+ if (yych != 'e') goto yy25;
+ ++p;
+ if (yybm[0+(yych = *p)] & 32) {
+ goto yy24;
+ }
+ { token = INCLUDE; break; }
+yy41:
+ ++p;
+ { token = PIPE2; break; }
+yy43:
+ yych = *++p;
+ if (yych != 'f') goto yy25;
+ yych = *++p;
+ if (yych != 'a') goto yy25;
+ yych = *++p;
+ if (yych != 'u') goto yy25;
+ yych = *++p;
+ if (yych != 'l') goto yy25;
+ yych = *++p;
+ if (yych != 't') goto yy25;
+ ++p;
+ if (yybm[0+(yych = *p)] & 32) {
+ goto yy24;
+ }
+ { token = DEFAULT; break; }
+yy50:
+ yych = *++p;
+ if (yych != 'l') goto yy25;
+ yych = *++p;
+ if (yych != 'e') goto yy25;
+ ++p;
+ if (yybm[0+(yych = *p)] & 32) {
+ goto yy24;
+ }
+ { token = RULE; break; }
+yy54:
+ yych = *++p;
+ if (yych != 'i') goto yy25;
+ yych = *++p;
+ if (yych != 'l') goto yy25;
+ yych = *++p;
+ if (yych != 'd') goto yy25;
+ ++p;
+ if (yybm[0+(yych = *p)] & 32) {
+ goto yy24;
+ }
+ { token = BUILD; break; }
+yy59:
+ ++p;
+ yych = *p;
+yy60:
+ if (yybm[0+yych] & 64) {
+ goto yy59;
+ }
+ goto yy7;
+yy61:
+ ++p;
+ yych = *p;
+yy62:
+ if (yybm[0+yych] & 128) {
+ goto yy61;
+ }
+ if (yych >= 0x01) goto yy64;
+ p = q;
+ goto yy3;
+yy64:
+ ++p;
+ { continue; }
+}
+
+ }
+
+ last_token_ = start;
+ ofs_ = p;
+ if (token != NEWLINE && token != TEOF)
+ EatWhitespace();
+ return token;
+}
+
+bool Lexer::PeekToken(Token token) {
+ Token t = ReadToken();
+ if (t == token)
+ return true;
+ UnreadToken();
+ return false;
+}
+
+void Lexer::EatWhitespace() {
+ const char* p = ofs_;
+ for (;;) {
+ ofs_ = p;
+
+{
+ char yych;
+ static const unsigned char yybm[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 128, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+ yych = *p;
+ if (yych <= ' ') {
+ if (yych <= 0x00) goto yy72;
+ if (yych <= 0x1F) goto yy74;
+ } else {
+ if (yych == '$') goto yy70;
+ goto yy74;
+ }
+ ++p;
+ yych = *p;
+ goto yy78;
+yy69:
+ { continue; }
+yy70:
+ ++p;
+ if ((yych = *p) == '\n') goto yy75;
+yy71:
+ { break; }
+yy72:
+ ++p;
+ { break; }
+yy74:
+ yych = *++p;
+ goto yy71;
+yy75:
+ ++p;
+ { continue; }
+yy77:
+ ++p;
+ yych = *p;
+yy78:
+ if (yybm[0+yych] & 128) {
+ goto yy77;
+ }
+ goto yy69;
+}
+
+ }
+}
+
+bool Lexer::ReadIdent(string* out) {
+ const char* p = ofs_;
+ for (;;) {
+ const char* start = p;
+
+{
+ char yych;
+ static const unsigned char yybm[] = {
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 128, 0,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 0, 0, 0, 0, 0, 0,
+ 0, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 0, 0, 0, 0, 128,
+ 0, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ };
+ yych = *p;
+ if (yych <= '@') {
+ if (yych <= '.') {
+ if (yych <= '-') goto yy83;
+ } else {
+ if (yych <= '/') goto yy83;
+ if (yych >= ':') goto yy83;
+ }
+ } else {
+ if (yych <= '_') {
+ if (yych <= 'Z') goto yy81;
+ if (yych <= '^') goto yy83;
+ } else {
+ if (yych <= '`') goto yy83;
+ if (yych >= '{') goto yy83;
+ }
+ }
+yy81:
+ ++p;
+ yych = *p;
+ goto yy86;
+yy82:
+ {
+ out->assign(start, p - start);
+ break;
+ }
+yy83:
+ ++p;
+ { return false; }
+yy85:
+ ++p;
+ yych = *p;
+yy86:
+ if (yybm[0+yych] & 128) {
+ goto yy85;
+ }
+ goto yy82;
+}
+
+ }
+ ofs_ = p;
+ EatWhitespace();
+ return true;
+}
+
+bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
+ const char* p = ofs_;
+ const char* q;
+ const char* start;
+ for (;;) {
+ start = p;
+
+{
+ char yych;
+ static const unsigned char yybm[] = {
+ 0, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 0, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 16, 128, 128, 128, 0, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 160, 128,
+ 224, 224, 224, 224, 224, 224, 224, 224,
+ 224, 224, 0, 128, 128, 128, 128, 128,
+ 128, 224, 224, 224, 224, 224, 224, 224,
+ 224, 224, 224, 224, 224, 224, 224, 224,
+ 224, 224, 224, 224, 224, 224, 224, 224,
+ 224, 224, 224, 128, 128, 128, 128, 224,
+ 128, 224, 224, 224, 224, 224, 224, 224,
+ 224, 224, 224, 224, 224, 224, 224, 224,
+ 224, 224, 224, 224, 224, 224, 224, 224,
+ 224, 224, 224, 128, 0, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ 128, 128, 128, 128, 128, 128, 128, 128,
+ };
+ yych = *p;
+ if (yych <= '#') {
+ if (yych <= '\n') {
+ if (yych <= 0x00) goto yy95;
+ if (yych >= '\n') goto yy91;
+ } else {
+ if (yych == ' ') goto yy91;
+ }
+ } else {
+ if (yych <= ':') {
+ if (yych <= '$') goto yy93;
+ if (yych >= ':') goto yy91;
+ } else {
+ if (yych == '|') goto yy91;
+ }
+ }
+ ++p;
+ yych = *p;
+ goto yy117;
+yy90:
+ {
+ eval->Add(EvalString::RAW, StringPiece(start, p - start));
+ continue;
+ }
+yy91:
+ ++p;
+ {
+ if (path) {
+ p = start;
+ break;
+ } else {
+ if (*start == '\n')
+ break;
+ eval->Add(EvalString::RAW, StringPiece(start, 1));
+ continue;
+ }
+ }
+yy93:
+ ++p;
+ if ((yych = *p) <= '9') {
+ if (yych <= ' ') {
+ if (yych == '\n') goto yy106;
+ if (yych <= 0x1F) goto yy97;
+ goto yy99;
+ } else {
+ if (yych == '$') goto yy101;
+ if (yych <= '/') goto yy97;
+ goto yy103;
+ }
+ } else {
+ if (yych <= '_') {
+ if (yych <= '@') goto yy97;
+ if (yych <= 'Z') goto yy103;
+ if (yych <= '^') goto yy97;
+ goto yy103;
+ } else {
+ if (yych <= '`') goto yy97;
+ if (yych <= 'z') goto yy103;
+ if (yych <= '{') goto yy105;
+ goto yy97;
+ }
+ }
+ {
+ last_token_ = start;
+ return Error("lexing error", err);
+ }
+yy95:
+ ++p;
+ {
+ last_token_ = start;
+ return Error("unexpected EOF", err);
+ }
+yy97:
+ ++p;
+yy98:
+ {
+ last_token_ = start;
+ return Error("bad $-escape (literal $ must be written as $$)", err);
+ }
+yy99:
+ ++p;
+ {
+ eval->Add(EvalString::RAW, StringPiece(" ", 1));
+ continue;
+ }
+yy101:
+ ++p;
+ {
+ eval->Add(EvalString::RAW, StringPiece("$", 1));
+ continue;
+ }
+yy103:
+ ++p;
+ yych = *p;
+ goto yy115;
+yy104:
+ {
+ eval->Add(EvalString::SPECIAL, StringPiece(start + 1, p - start - 1));
+ continue;
+ }
+yy105:
+ yych = *(q = ++p);
+ if (yybm[0+yych] & 32) {
+ goto yy109;
+ }
+ goto yy98;
+yy106:
+ ++p;
+ yych = *p;
+ if (yybm[0+yych] & 16) {
+ goto yy106;
+ }
+ {
+ continue;
+ }
+yy109:
+ ++p;
+ yych = *p;
+ if (yybm[0+yych] & 32) {
+ goto yy109;
+ }
+ if (yych == '}') goto yy112;
+ p = q;
+ goto yy98;
+yy112:
+ ++p;
+ {
+ eval->Add(EvalString::SPECIAL, StringPiece(start + 2, p - start - 3));
+ continue;
+ }
+yy114:
+ ++p;
+ yych = *p;
+yy115:
+ if (yybm[0+yych] & 64) {
+ goto yy114;
+ }
+ goto yy104;
+yy116:
+ ++p;
+ yych = *p;
+yy117:
+ if (yybm[0+yych] & 128) {
+ goto yy116;
+ }
+ goto yy90;
+}
+
+ }
+ last_token_ = start;
+ ofs_ = p;
+ if (path)
+ EatWhitespace();
+ // Non-path strings end in newlines, so there's no whitespace to eat.
+ return true;
+}
diff --git a/src/lexer.h b/src/lexer.h
new file mode 100644
index 0000000..40e602a
--- /dev/null
+++ b/src/lexer.h
@@ -0,0 +1,88 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "string_piece.h"
+
+struct EvalString;
+
+struct Lexer {
+ Lexer() {}
+ /// Helper ctor useful for tests.
+ explicit Lexer(const char* input);
+
+ enum Token {
+ ERROR,
+ BUILD,
+ COLON,
+ DEFAULT,
+ EQUALS,
+ IDENT,
+ INCLUDE,
+ INDENT,
+ NEWLINE,
+ PIPE,
+ PIPE2,
+ RULE,
+ SUBNINJA,
+ TEOF,
+ };
+
+ /// Return a human-readable form of a token, used in error messages.
+ static const char* TokenName(Token t);
+
+ /// Start parsing some input.
+ void Start(StringPiece filename, StringPiece input);
+
+ /// Read a Token from the Token enum.
+ Token ReadToken();
+
+ /// Rewind to the last read Token.
+ void UnreadToken();
+
+ /// If the next token is \a token, read it and return true.
+ bool PeekToken(Token token);
+
+ /// Read a simple identifier (a rule or variable name).
+ /// Returns false if a name can't be read.
+ bool ReadIdent(string* out);
+
+ /// Read a path (complete with $escapes).
+ /// Returns false only on error, returned path may be empty if a delimiter
+ /// (space, newline) is hit.
+ bool ReadPath(EvalString* path, string* err) {
+ return ReadEvalString(path, true, err);
+ }
+
+ /// Read the value side of a var = value line (complete with $escapes).
+ /// Returns false only on error.
+ bool ReadVarValue(EvalString* value, string* err) {
+ return ReadEvalString(value, false, err);
+ }
+
+ /// Construct an error message with context.
+ bool Error(const string& message, string* err);
+
+private:
+ /// Skip past whitespace (called after each read token/ident/etc.).
+ void EatWhitespace();
+
+ /// Read a $-escaped string.
+ bool ReadEvalString(EvalString* eval, bool path, string* err);
+
+ StringPiece filename_;
+ StringPiece input_;
+ const char* ofs_;
+ const char* last_token_;
+};
+
diff --git a/src/lexer.in.cc b/src/lexer.in.cc
new file mode 100644
index 0000000..a3b29c1
--- /dev/null
+++ b/src/lexer.in.cc
@@ -0,0 +1,234 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lexer.h"
+
+#include "eval_env.h"
+
+bool Lexer::Error(const string& message, string* err) {
+ // Compute line/column.
+ int line = 1;
+ const char* context = input_.str_;
+ for (const char* p = input_.str_; p < last_token_; ++p) {
+ if (*p == '\n') {
+ ++line;
+ context = p + 1;
+ }
+ }
+ int col = last_token_ ? last_token_ - context : 0;
+
+ char buf[1024];
+ snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line);
+ *err = buf;
+ *err += message + "\n";
+
+ // Add some context to the message.
+ const int kTruncateColumn = 72;
+ if (col > 0 && col < kTruncateColumn) {
+ int len;
+ bool truncated = true;
+ for (len = 0; len < kTruncateColumn; ++len) {
+ if (context[len] == 0 || context[len] == '\n') {
+ truncated = false;
+ break;
+ }
+ }
+ *err += string(context, len);
+ if (truncated)
+ *err += "...";
+ *err += "\n";
+ *err += string(col, ' ');
+ *err += "^ near here\n";
+ }
+
+ return false;
+}
+
+Lexer::Lexer(const char* input) {
+ Start("input", input);
+}
+
+void Lexer::Start(StringPiece filename, StringPiece input) {
+ filename_ = filename;
+ input_ = input;
+ ofs_ = input_.str_;
+ last_token_ = NULL;
+}
+
+const char* Lexer::TokenName(Token t) {
+ switch (t) {
+ case ERROR: return "lexing error";
+ case BUILD: return "'build'";
+ case COLON: return "':'";
+ case DEFAULT: return "'default'";
+ case EQUALS: return "'='";
+ case IDENT: return "identifier";
+ case INCLUDE: return "'include'";
+ case INDENT: return "indent";
+ case NEWLINE: return "newline";
+ case PIPE2: return "'||'";
+ case PIPE: return "'|'";
+ case RULE: return "'rule'";
+ case SUBNINJA: return "'subninja'";
+ case TEOF: return "eof";
+ }
+ return NULL; // not reached
+}
+
+void Lexer::UnreadToken() {
+ ofs_ = last_token_;
+}
+
+Lexer::Token Lexer::ReadToken() {
+ const char* p = ofs_;
+ const char* q;
+ const char* start;
+ Lexer::Token token;
+ for (;;) {
+ start = p;
+ /*!re2c
+ re2c:define:YYCTYPE = "char";
+ re2c:define:YYCURSOR = p;
+ re2c:define:YYMARKER = q;
+ re2c:yyfill:enable = 0;
+
+ nul = "\000";
+ simple_varname = [a-zA-Z0-9_]+;
+ varname = [a-zA-Z0-9_.]+;
+
+ "#"[^\000\n]*"\n" { continue; }
+ [\n] { token = NEWLINE; break; }
+ [ ]+ { token = INDENT; break; }
+ "build" { token = BUILD; break; }
+ "rule" { token = RULE; break; }
+ "default" { token = DEFAULT; break; }
+ "=" { token = EQUALS; break; }
+ ":" { token = COLON; break; }
+ "||" { token = PIPE2; break; }
+ "|" { token = PIPE; break; }
+ "include" { token = INCLUDE; break; }
+ "subninja" { token = SUBNINJA; break; }
+ varname { token = IDENT; break; }
+ nul { token = TEOF; break; }
+ [^] { token = ERROR; break; }
+ */
+ }
+
+ last_token_ = start;
+ ofs_ = p;
+ if (token != NEWLINE && token != TEOF)
+ EatWhitespace();
+ return token;
+}
+
+bool Lexer::PeekToken(Token token) {
+ Token t = ReadToken();
+ if (t == token)
+ return true;
+ UnreadToken();
+ return false;
+}
+
+void Lexer::EatWhitespace() {
+ const char* p = ofs_;
+ for (;;) {
+ ofs_ = p;
+ /*!re2c
+ [ ]+ { continue; }
+ "$\n" { continue; }
+ nul { break; }
+ [^] { break; }
+ */
+ }
+}
+
+bool Lexer::ReadIdent(string* out) {
+ const char* p = ofs_;
+ for (;;) {
+ const char* start = p;
+ /*!re2c
+ varname {
+ out->assign(start, p - start);
+ break;
+ }
+ [^] { return false; }
+ */
+ }
+ ofs_ = p;
+ EatWhitespace();
+ return true;
+}
+
+bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) {
+ const char* p = ofs_;
+ const char* q;
+ const char* start;
+ for (;;) {
+ start = p;
+ /*!re2c
+ [^$ :\n|\000]+ {
+ eval->Add(EvalString::RAW, StringPiece(start, p - start));
+ continue;
+ }
+ [ :|\n] {
+ if (path) {
+ p = start;
+ break;
+ } else {
+ if (*start == '\n')
+ break;
+ eval->Add(EvalString::RAW, StringPiece(start, 1));
+ continue;
+ }
+ }
+ "$$" {
+ eval->Add(EvalString::RAW, StringPiece("$", 1));
+ continue;
+ }
+ "$ " {
+ eval->Add(EvalString::RAW, StringPiece(" ", 1));
+ continue;
+ }
+ "$\n"[ ]* {
+ continue;
+ }
+ "${"varname"}" {
+ eval->Add(EvalString::SPECIAL, StringPiece(start + 2, p - start - 3));
+ continue;
+ }
+ "$"simple_varname {
+ eval->Add(EvalString::SPECIAL, StringPiece(start + 1, p - start - 1));
+ continue;
+ }
+ "$". {
+ last_token_ = start;
+ return Error("bad $-escape (literal $ must be written as $$)", err);
+ }
+ nul {
+ last_token_ = start;
+ return Error("unexpected EOF", err);
+ }
+ [^] {
+ last_token_ = start;
+ return Error("lexing error", err);
+ }
+ */
+ }
+ last_token_ = start;
+ ofs_ = p;
+ if (path)
+ EatWhitespace();
+ // Non-path strings end in newlines, so there's no whitespace to eat.
+ return true;
+}
diff --git a/src/lexer_test.cc b/src/lexer_test.cc
new file mode 100644
index 0000000..ce8082a
--- /dev/null
+++ b/src/lexer_test.cc
@@ -0,0 +1,85 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "lexer.h"
+
+#include <gtest/gtest.h>
+
+#include "eval_env.h"
+
+TEST(Lexer, ReadVarValue) {
+ Lexer lexer("plain text $var $VaR ${x}\n");
+ EvalString eval;
+ string err;
+ EXPECT_TRUE(lexer.ReadVarValue(&eval, &err));
+ EXPECT_EQ("", err);
+ EXPECT_EQ("[plain text ][$var][ ][$VaR][ ][$x]",
+ eval.Serialize());
+}
+
+TEST(Lexer, ReadEvalStringEscapes) {
+ Lexer lexer("$ $$ab $\ncde\n");
+ EvalString eval;
+ string err;
+ EXPECT_TRUE(lexer.ReadVarValue(&eval, &err));
+ EXPECT_EQ("", err);
+ EXPECT_EQ("[ $ab cde]",
+ eval.Serialize());
+}
+
+TEST(Lexer, ReadIdent) {
+ Lexer lexer("foo baR baz_123 blah.dots");
+ string ident;
+ EXPECT_TRUE(lexer.ReadIdent(&ident));
+ EXPECT_EQ("foo", ident);
+ EXPECT_TRUE(lexer.ReadIdent(&ident));
+ EXPECT_EQ("baR", ident);
+ EXPECT_TRUE(lexer.ReadIdent(&ident));
+ EXPECT_EQ("baz_123", ident);
+}
+
+TEST(Lexer, ReadIdentCurlies) {
+ // Verify that ReadIdent includes dots in the name,
+ // but in an expansion $bar.dots stops at the dot.
+ Lexer lexer("foo.dots $bar.dots ${bar.dots}\n");
+ string ident;
+ EXPECT_TRUE(lexer.ReadIdent(&ident));
+ EXPECT_EQ("foo.dots", ident);
+
+ EvalString eval;
+ string err;
+ EXPECT_TRUE(lexer.ReadVarValue(&eval, &err));
+ EXPECT_EQ("", err);
+ EXPECT_EQ("[$bar][.dots ][$bar.dots]",
+ eval.Serialize());
+}
+
+TEST(Lexer, Error) {
+ Lexer lexer("foo$\nbad $");
+ EvalString eval;
+ string err;
+ ASSERT_FALSE(lexer.ReadVarValue(&eval, &err));
+ EXPECT_EQ("input:2: bad $-escape (literal $ must be written as $$)\n"
+ "bad $\n"
+ " ^ near here\n"
+ , err);
+}
+
+TEST(Lexer, CommentEOF) {
+ // Verify we don't run off the end of the string when the EOF is
+ // mid-comment.
+ Lexer lexer("# foo");
+ Lexer::Token token = lexer.ReadToken();
+ EXPECT_EQ(Lexer::ERROR, token);
+}
diff --git a/src/ninja.cc b/src/ninja.cc
index 452f075..e08f2fc 100644
--- a/src/ninja.cc
+++ b/src/ninja.cc
@@ -364,12 +364,15 @@ int CmdTargets(State* state, int argc, char* argv[]) {
int CmdRules(State* state, int argc, char* /* argv */[]) {
for (map<string, const Rule*>::iterator i = state->rules_.begin();
i != state->rules_.end(); ++i) {
- if (i->second->description().unparsed_.empty()) {
+ if (i->second->description().empty()) {
printf("%s\n", i->first.c_str());
} else {
printf("%s: %s\n",
i->first.c_str(),
- i->second->description().unparsed_.c_str());
+ // XXX I changed it such that we don't have an easy way
+ // to get the source text anymore, so this output is
+ // unsatisfactory. How useful is this command, anyway?
+ i->second->description().Serialize().c_str());
}
}
return 0;
@@ -547,7 +550,10 @@ reload:
ManifestParser parser(&state, &file_reader);
string err;
if (!parser.Load(input_file, &err)) {
- Error("loading '%s': %s", input_file, err.c_str());
+ // The pattern in Ninja for errors is to return a one-line string,
+ // but parse errors are special in that they are multiline with
+ // context. Just report it verbatim.
+ fprintf(stderr, "%s", err.c_str());
return 1;
}
diff --git a/src/parsers.cc b/src/parsers.cc
index 44c3711..095e93f 100644
--- a/src/parsers.cc
+++ b/src/parsers.cc
@@ -23,242 +23,6 @@
#include "state.h"
#include "util.h"
-string Token::AsString() const {
- switch (type_) {
- case IDENT: return "'" + string(pos_, end_ - pos_) + "'";
- case UNKNOWN: return "unknown '" + string(pos_, end_ - pos_) + "'";
- case NEWLINE: return "newline";
- case EQUALS: return "'='";
- case COLON: return "':'";
- case PIPE: return "'|'";
- case PIPE2: return "'||'";
- case TEOF: return "eof";
- case INDENT: return "indenting in";
- case OUTDENT: return "indenting out";
- case NONE: break;
- }
- assert(false);
- return "";
-}
-
-bool Tokenizer::ErrorAt(const char* pos, const string& message, string* err) {
- // Re-scan the input, counting newlines so that we can compute the
- // correct position.
- int line = 1;
- const char* line_start = start_;
- for (const char* p = start_; p < pos; ++p) {
- if (*p == '\n') {
- ++line;
- line_start = p + 1;
- }
- }
- int col = pos - line_start + 1;
-
- char buf[1024];
- snprintf(buf, sizeof(buf),
- "line %d, col %d: %s", line, col, message.c_str());
- err->assign(buf);
- return false;
-}
-
-void Tokenizer::Start(const char* start, const char* end) {
- cur_line_ = cur_ = start_ = start;
- end_ = end;
-}
-
-bool Tokenizer::ErrorExpected(const string& expected, string* err) {
- return Error("expected " + expected + ", got " + token_.AsString(), err);
-}
-
-void Tokenizer::SkipWhitespace(bool newline) {
- if (token_.type_ == Token::NEWLINE && newline)
- Newline(NULL);
-
- while (cur_ < end_) {
- if (*cur_ == ' ') {
- ++cur_;
- } else if (newline && *cur_ == '\n') {
- Newline(NULL);
- } else if (*cur_ == '$' && cur_ + 1 < end_ && cur_[1] == '\n') {
- ++cur_; ++cur_;
- } else if (*cur_ == '#' && cur_ == cur_line_) {
- while (cur_ < end_ && *cur_ != '\n')
- ++cur_;
- if (cur_ < end_ && *cur_ == '\n')
- ++cur_;
- cur_line_ = cur_;
- } else {
- break;
- }
- }
-}
-
-bool Tokenizer::Newline(string* err) {
- if (!ExpectToken(Token::NEWLINE, err))
- return false;
-
- return true;
-}
-
-/// Return true if |c| is part of an identifier.
-static bool IsIdentChar(char c) {
- // This function shows up hot on profiles. Instead of the natural
- // 'if' statement, use a table as generated by this Python script:
- // import string
- // cs = set()
- // for c in string.ascii_letters + string.digits + r'+,-./\_$':
- // cs.add(ord(c))
- // for i in range(128):
- // if i in cs:
- // print '1,',
- // else:
- // print '0,',
- // if i % 16 == 15:
- // print
- static const bool kIdents[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1,
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- };
- return kIdents[(int)c];
-}
-
-bool Tokenizer::ExpectToken(Token::Type expected, string* err) {
- PeekToken();
- if (token_.type_ != expected)
- return ErrorExpected(Token(expected).AsString(), err);
- ConsumeToken();
- return true;
-}
-
-bool Tokenizer::ExpectIdent(const char* expected, string* err) {
- PeekToken();
- if (token_.type_ != Token::IDENT ||
- strncmp(token_.pos_, expected, token_.end_ - token_.pos_) != 0) {
- return ErrorExpected(string("'") + expected + "'", err);
- }
- ConsumeToken();
- return true;
-}
-
-bool Tokenizer::ReadIdent(StringPiece* out) {
- PeekToken();
- if (token_.type_ != Token::IDENT)
- return false;
- out->str_ = token_.pos_;
- out->len_ = token_.end_ - token_.pos_;
- ConsumeToken();
- return true;
-}
-
-bool Tokenizer::ReadIdent(string* out) {
- StringPiece token;
- if (!ReadIdent(&token))
- return false;
- out->assign(token.str_, token.len_);
- return true;
-}
-
-bool Tokenizer::ReadToNewline(string *text, string* err, size_t max_length) {
- // XXX token_.clear();
- while (cur_ < end_ && *cur_ != '\n') {
- if (*cur_ == '$') {
- // Might be a line continuation; peek ahead to check.
- if (cur_ + 1 >= end_)
- return Error("unexpected eof", err);
- if (*(cur_ + 1) == '\n') {
- // Let SkipWhitespace handle the continuation logic.
- SkipWhitespace();
- continue;
- }
-
- // Otherwise, just treat it like a normal character.
- text->push_back(*cur_);
- ++cur_;
- } else {
- text->push_back(*cur_);
- ++cur_;
- }
- if (text->size() >= max_length) {
- token_.pos_ = cur_;
- return false;
- }
- }
- return Newline(err);
-}
-
-Token::Type Tokenizer::PeekToken() {
- if (token_.type_ != Token::NONE)
- return token_.type_;
-
- token_.pos_ = cur_;
- if (cur_indent_ == -1) {
- cur_indent_ = cur_ - cur_line_;
- if (cur_indent_ != last_indent_) {
- if (cur_indent_ > last_indent_) {
- token_.type_ = Token::INDENT;
- } else if (cur_indent_ < last_indent_) {
- token_.type_ = Token::OUTDENT;
- }
- last_indent_ = cur_indent_;
- return token_.type_;
- }
- }
-
- if (cur_ >= end_) {
- token_.type_ = Token::TEOF;
- return token_.type_;
- }
-
- if (IsIdentChar(*cur_)) {
- while (cur_ < end_ && IsIdentChar(*cur_)) {
- if (*cur_ == '$' && cur_ + 1 < end_ && cur_[1] == ' ') {
- ++cur_;
- }
- ++cur_;
- }
- token_.end_ = cur_;
- token_.type_ = Token::IDENT;
- } else if (*cur_ == ':') {
- token_.type_ = Token::COLON;
- ++cur_;
- } else if (*cur_ == '=') {
- token_.type_ = Token::EQUALS;
- ++cur_;
- } else if (*cur_ == '|') {
- if (cur_ + 1 < end_ && cur_[1] == '|') {
- token_.type_ = Token::PIPE2;
- cur_ += 2;
- } else {
- token_.type_ = Token::PIPE;
- ++cur_;
- }
- } else if (*cur_ == '\n') {
- token_.type_ = Token::NEWLINE;
- ++cur_;
- cur_line_ = cur_;
- cur_indent_ = -1;
- }
-
- SkipWhitespace();
-
- if (token_.type_ == Token::NONE) {
- token_.type_ = Token::UNKNOWN;
- token_.end_ = cur_ + 1;
- }
-
- return token_.type_;
-}
-
-void Tokenizer::ConsumeToken() {
- token_.Clear();
-}
-
ManifestParser::ManifestParser(State* state, FileReader* file_reader)
: state_(state), file_reader_(file_reader) {
env_ = &state->bindings_;
@@ -267,58 +31,66 @@ bool ManifestParser::Load(const string& filename, string* err) {
string contents;
if (!file_reader_->ReadFile(filename, &contents, err))
return false;
- return Parse(contents, err);
+ contents.resize(contents.size() + 10);
+ return Parse(filename, contents, err);
}
-bool ManifestParser::Parse(const string& input, string* err) {
- tokenizer_.Start(input.data(), input.data() + input.size());
-
- tokenizer_.SkipWhitespace(true);
-
- while (tokenizer_.token().type_ != Token::TEOF) {
- switch (tokenizer_.PeekToken()) {
- case Token::IDENT: {
- const Token& token = tokenizer_.token();
- int len = token.end_ - token.pos_;
- if (len == 4 && memcmp(token.pos_, "rule", 4) == 0) {
- if (!ParseRule(err))
- return false;
- } else if (len == 5 && memcmp(token.pos_, "build", 5) == 0) {
- if (!ParseEdge(err))
- return false;
- } else if (len == 7 && memcmp(token.pos_, "default", 7) == 0) {
- if (!ParseDefaults(err))
- return false;
- } else if ((len == 7 && memcmp(token.pos_, "include", 7) == 0) ||
- (len == 8 && memcmp(token.pos_, "subninja", 8) == 0)) {
- if (!ParseFileInclude(err))
- return false;
- } else {
- string name, value;
- if (!ParseLet(&name, &value, err))
- return false;
- env_->AddBinding(name, value);
- }
- break;
- }
- case Token::TEOF:
- continue;
- default:
- return tokenizer_.Error("unhandled " + tokenizer_.token().AsString(), err);
+bool ManifestParser::Parse(const string& filename, const string& input,
+ string* err) {
+ lexer_.Start(filename, input);
+
+ for (;;) {
+ Lexer::Token token = lexer_.ReadToken();
+ switch (token) {
+ case Lexer::BUILD:
+ if (!ParseEdge(err))
+ return false;
+ break;
+ case Lexer::RULE:
+ if (!ParseRule(err))
+ return false;
+ break;
+ case Lexer::DEFAULT:
+ if (!ParseDefault(err))
+ return false;
+ break;
+ case Lexer::IDENT: {
+ lexer_.UnreadToken();
+ string name;
+ EvalString value;
+ if (!ParseLet(&name, &value, err))
+ return false;
+ env_->AddBinding(name, value.Evaluate(env_));
+ break;
+ }
+ case Lexer::INCLUDE:
+ if (!ParseFileInclude(false, err))
+ return false;
+ break;
+ case Lexer::SUBNINJA:
+ if (!ParseFileInclude(true, err))
+ return false;
+ break;
+ case Lexer::ERROR:
+ return lexer_.Error("lexing error", err);
+ case Lexer::TEOF:
+ return true;
+ case Lexer::NEWLINE:
+ break;
+ default:
+ return lexer_.Error(string("unexpected") + Lexer::TokenName(token),
+ err);
}
- tokenizer_.SkipWhitespace(true);
}
-
- return true;
+ return false; // not reached
}
bool ManifestParser::ParseRule(string* err) {
- if (!tokenizer_.ExpectIdent("rule", err))
- return false;
string name;
- if (!tokenizer_.ReadIdent(&name))
- return tokenizer_.ErrorExpected("rule name", err);
- if (!tokenizer_.Newline(err))
+ if (!lexer_.ReadIdent(&name))
+ return lexer_.Error("expected rule name", err);
+
+ if (!ExpectToken(Lexer::NEWLINE, err))
return false;
if (state_->LookupRule(name) != NULL) {
@@ -328,167 +100,120 @@ bool ManifestParser::ParseRule(string* err) {
Rule* rule = new Rule(name); // XXX scoped_ptr
- if (tokenizer_.PeekToken() == Token::INDENT) {
- tokenizer_.ConsumeToken();
-
- while (tokenizer_.PeekToken() != Token::OUTDENT) {
- const char* let_loc = tokenizer_.token_.pos_;
-
- string key;
- if (!ParseLetKey(&key, err))
- return false;
+ while (lexer_.PeekToken(Lexer::INDENT)) {
+ string key;
+ EvalString value;
+ if (!ParseLet(&key, &value, err))
+ return false;
- EvalString* eval_target = NULL;
- if (key == "command") {
- eval_target = &rule->command_;
- } else if (key == "depfile") {
- eval_target = &rule->depfile_;
- } else if (key == "description") {
- eval_target = &rule->description_;
- } else if (key == "generator") {
- rule->generator_ = true;
- string dummy;
- if (!tokenizer_.ReadToNewline(&dummy, err))
- return false;
- continue;
- } else if (key == "restat") {
- rule->restat_ = true;
- string dummy;
- if (!tokenizer_.ReadToNewline(&dummy, err))
- return false;
- continue;
- } else {
- // Die on other keyvals for now; revisit if we want to add a
- // scope here.
- return tokenizer_.ErrorAt(let_loc, "unexpected variable '" + key + "'",
- err);
- }
-
- if (!ParseLetValue(eval_target, err))
- return false;
+ if (key == "command") {
+ rule->command_ = value;
+ } else if (key == "depfile") {
+ rule->depfile_ = value;
+ } else if (key == "description") {
+ rule->description_ = value;
+ } else if (key == "generator") {
+ rule->generator_ = true;
+ } else if (key == "restat") {
+ rule->restat_ = true;
+ } else {
+ // Die on other keyvals for now; revisit if we want to add a
+ // scope here.
+ return lexer_.Error("unexpected variable '" + key + "'", err);
}
- tokenizer_.ConsumeToken();
}
- if (rule->command_.unparsed().empty())
- return tokenizer_.Error("expected 'command =' line", err);
+ if (rule->command_.empty())
+ return lexer_.Error("expected 'command =' line", err);
state_->AddRule(rule);
return true;
}
-bool ManifestParser::ParseLet(string* key, string* value, string* err) {
- if (!ParseLetKey(key, err))
+bool ManifestParser::ParseLet(string* key, EvalString* value, string* err) {
+ if (!lexer_.ReadIdent(key))
return false;
-
- EvalString eval;
- if (!ParseLetValue(&eval, err))
+ if (!ExpectToken(Lexer::EQUALS, err))
return false;
-
- *value = eval.Evaluate(env_);
-
- return true;
-}
-
-bool ManifestParser::ParseLetKey(string* key, string* err) {
- if (!tokenizer_.ReadIdent(key))
- return tokenizer_.ErrorExpected("variable name", err);
- if (!tokenizer_.ExpectToken(Token::EQUALS, err))
- return false;
- return true;
-}
-
-bool ManifestParser::ParseLetValue(EvalString* eval, string* err) {
- // Backup the tokenizer state prior to consuming the line, for reporting
- // the source location in case of a parse error later.
- Tokenizer tokenizer_backup = tokenizer_;
-
- // XXX should we tokenize here? it means we'll need to understand
- // command syntax, though...
- string value;
- if (!tokenizer_.ReadToNewline(&value, err))
+ if (!lexer_.ReadVarValue(value, err))
return false;
-
- string eval_err;
- size_t err_index;
- if (!eval->Parse(value, &eval_err, &err_index)) {
- value.clear();
- // Advance the saved tokenizer state up to the error index to report the
- // error at the correct source location.
- tokenizer_backup.ReadToNewline(&value, err, err_index);
- return tokenizer_backup.Error(eval_err, err);
- }
-
return true;
}
-bool ManifestParser::ParseDefaults(string* err) {
- if (!tokenizer_.ExpectIdent("default", err))
+bool ManifestParser::ParseDefault(string* err) {
+ EvalString eval;
+ if (!lexer_.ReadPath(&eval, err))
return false;
-
- string target;
- if (!tokenizer_.ReadIdent(&target))
- return tokenizer_.ErrorExpected("target name", err);
+ if (eval.empty())
+ return lexer_.Error("expected target name", err);
do {
- EvalString eval;
- string eval_err;
- if (!eval.Parse(target, &eval_err))
- return tokenizer_.Error(eval_err, err);
string path = eval.Evaluate(env_);
- if (!CanonicalizePath(&path, &eval_err))
- return tokenizer_.Error(eval_err, err);
- if (!state_->AddDefault(path, &eval_err))
- return tokenizer_.Error(eval_err, err);
- } while (tokenizer_.ReadIdent(&target));
+ string path_err;
+ if (!CanonicalizePath(&path, &path_err))
+ return lexer_.Error(path_err, err);
+ if (!state_->AddDefault(path, &path_err))
+ return lexer_.Error(path_err, err);
+
+ eval.Clear();
+ if (!lexer_.ReadPath(&eval, err))
+ return false;
+ } while (!eval.empty());
- if (!tokenizer_.Newline(err))
+ if (!ExpectToken(Lexer::NEWLINE, err))
return false;
return true;
}
bool ManifestParser::ParseEdge(string* err) {
- vector<string> ins, outs;
+ vector<EvalString> ins, outs;
- if (!tokenizer_.ExpectIdent("build", err))
- return false;
+ {
+ EvalString out;
+ if (!lexer_.ReadPath(&out, err))
+ return false;
+ if (out.empty())
+ return lexer_.Error("expected path", err);
- for (;;) {
- if (tokenizer_.PeekToken() == Token::COLON) {
- tokenizer_.ConsumeToken();
- break;
- }
+ do {
+ outs.push_back(out);
- string out;
- if (!tokenizer_.ReadIdent(&out))
- return tokenizer_.ErrorExpected("output file list", err);
- outs.push_back(out);
+ out.Clear();
+ if (!lexer_.ReadPath(&out, err))
+ return false;
+ } while (!out.empty());
}
- // XXX check outs not empty
+
+ if (!ExpectToken(Lexer::COLON, err))
+ return false;
string rule_name;
- if (!tokenizer_.ReadIdent(&rule_name))
- return tokenizer_.ErrorExpected("build command name", err);
+ if (!lexer_.ReadIdent(&rule_name))
+ return lexer_.Error("expected build command name", err);
const Rule* rule = state_->LookupRule(rule_name);
if (!rule)
- return tokenizer_.Error("unknown build rule '" + rule_name + "'", err);
+ return lexer_.Error("unknown build rule '" + rule_name + "'", err);
for (;;) {
- string in;
- if (!tokenizer_.ReadIdent(&in))
+ // XXX should we require one path here?
+ EvalString in;
+ if (!lexer_.ReadPath(&in, err))
+ return false;
+ if (in.empty())
break;
ins.push_back(in);
}
// Add all order-only deps, counting how many as we go.
int implicit = 0;
- if (tokenizer_.PeekToken() == Token::PIPE) {
- tokenizer_.ConsumeToken();
+ if (lexer_.PeekToken(Lexer::PIPE)) {
for (;;) {
- string in;
- if (!tokenizer_.ReadIdent(&in))
+ EvalString in;
+ if (!lexer_.ReadPath(&in, err))
+ return err;
+ if (in.empty())
break;
ins.push_back(in);
++implicit;
@@ -497,97 +222,95 @@ bool ManifestParser::ParseEdge(string* err) {
// Add all order-only deps, counting how many as we go.
int order_only = 0;
- if (tokenizer_.PeekToken() == Token::PIPE2) {
- tokenizer_.ConsumeToken();
+ if (lexer_.PeekToken(Lexer::PIPE2)) {
for (;;) {
- string in;
- if (!tokenizer_.ReadIdent(&in))
+ EvalString in;
+ if (!lexer_.ReadPath(&in, err))
+ return false;
+ if (in.empty())
break;
ins.push_back(in);
++order_only;
}
}
- if (!tokenizer_.Newline(err))
+ if (!ExpectToken(Lexer::NEWLINE, err))
return false;
// Default to using outer env.
BindingEnv* env = env_;
- // But use a nested env if there are variables in scope.
- if (tokenizer_.PeekToken() == Token::INDENT) {
- tokenizer_.ConsumeToken();
-
+ // But create and fill a nested env if there are variables in scope.
+ if (lexer_.PeekToken(Lexer::INDENT)) {
// XXX scoped_ptr to handle error case.
env = new BindingEnv;
env->parent_ = env_;
- while (tokenizer_.PeekToken() != Token::OUTDENT) {
- string key, val;
+ do {
+ string key;
+ EvalString val;
if (!ParseLet(&key, &val, err))
return false;
- env->AddBinding(key, val);
- }
- tokenizer_.ConsumeToken();
- }
-
- // Evaluate all variables in paths.
- // XXX: fast path skip the eval parse if there's no $ in the path?
- vector<string>* paths[2] = { &ins, &outs };
- for (int p = 0; p < 2; ++p) {
- for (vector<string>::iterator i = paths[p]->begin();
- i != paths[p]->end(); ++i) {
- EvalString eval;
- string eval_err;
- if (!eval.Parse(*i, &eval_err))
- return tokenizer_.Error(eval_err, err);
- string path = eval.Evaluate(env);
- if (!CanonicalizePath(&path, &eval_err))
- return tokenizer_.Error(eval_err, err);
- *i = path;
- }
+ env->AddBinding(key, val.Evaluate(env_));
+ } while (lexer_.PeekToken(Lexer::INDENT));
}
Edge* edge = state_->AddEdge(rule);
edge->env_ = env;
- for (vector<string>::iterator i = ins.begin(); i != ins.end(); ++i)
- state_->AddIn(edge, *i);
- for (vector<string>::iterator i = outs.begin(); i != outs.end(); ++i)
- state_->AddOut(edge, *i);
+ for (vector<EvalString>::iterator i = ins.begin(); i != ins.end(); ++i) {
+ string path = i->Evaluate(env);
+ string path_err;
+ if (!CanonicalizePath(&path, &path_err))
+ return lexer_.Error(path_err, err);
+ state_->AddIn(edge, path);
+ }
+ for (vector<EvalString>::iterator i = outs.begin(); i != outs.end(); ++i) {
+ string path = i->Evaluate(env);
+ string path_err;
+ if (!CanonicalizePath(&path, &path_err))
+ return lexer_.Error(path_err, err);
+ state_->AddOut(edge, path);
+ }
edge->implicit_deps_ = implicit;
edge->order_only_deps_ = order_only;
return true;
}
-bool ManifestParser::ParseFileInclude(string* err) {
- string type;
- tokenizer_.ReadIdent(&type);
-
- string path;
- if (!tokenizer_.ReadIdent(&path))
- return tokenizer_.ErrorExpected("path to ninja file", err);
+bool ManifestParser::ParseFileInclude(bool new_scope, string* err) {
+ // XXX this should use ReadPath!
+ EvalString eval;
+ if (!lexer_.ReadPath(&eval, err))
+ return false;
+ string path = eval.Evaluate(env_);
string contents;
string read_err;
if (!file_reader_->ReadFile(path, &contents, &read_err))
- return tokenizer_.Error("loading " + path + ": " + read_err, err);
+ return lexer_.Error("loading '" + path + "': " + read_err, err);
ManifestParser subparser(state_, file_reader_);
- if (type == "subninja") {
- // subninja: Construct a new scope for the new parser.
+ if (new_scope) {
subparser.env_ = new BindingEnv;
subparser.env_->parent_ = env_;
} else {
- // include: Reuse the current scope.
subparser.env_ = env_;
}
- string sub_err;
- if (!subparser.Parse(contents, &sub_err))
- return tokenizer_.Error("in '" + path + "': " + sub_err, err);
+ if (!subparser.Parse(path, contents, err))
+ return false;
- if (!tokenizer_.Newline(err))
+ if (!ExpectToken(Lexer::NEWLINE, err))
return false;
return true;
}
+
+bool ManifestParser::ExpectToken(Lexer::Token expected, string* err) {
+ Lexer::Token token = lexer_.ReadToken();
+ if (token != expected) {
+ string message = string("expected ") + Lexer::TokenName(expected);
+ message += string(", got ") + Lexer::TokenName(token);
+ return lexer_.Error(message, err);
+ }
+ return true;
+}
diff --git a/src/parsers.h b/src/parsers.h
index 101b278..f889156 100644
--- a/src/parsers.h
+++ b/src/parsers.h
@@ -21,74 +21,10 @@
using namespace std;
+#include "lexer.h"
#include "string_piece.h"
struct BindingEnv;
-
-/// A single parsed token in an input stream.
-struct Token {
- enum Type {
- NONE,
- UNKNOWN,
- IDENT,
- NEWLINE,
- EQUALS,
- COLON,
- PIPE,
- PIPE2,
- INDENT,
- OUTDENT,
- TEOF
- };
- explicit Token(Type type) : type_(type) {}
-
- void Clear() { type_ = NONE; }
- string AsString() const;
-
- Type type_;
- const char* pos_;
- const char* end_;
-};
-
-/// Processes an input stream into Tokens.
-struct Tokenizer {
- Tokenizer() :
- token_(Token::NONE),
- last_indent_(0), cur_indent_(-1) {}
-
- void Start(const char* start, const char* end);
- /// Report an error at a particular location.
- bool ErrorAt(const char* pos, const string& message, string* err);
- /// Report an error with a location pointing at the current token.
- bool Error(const string& message, string* err) {
- return ErrorAt(token_.pos_, message, err);
- }
- /// Call Error() with "expected foo, got bar".
- bool ErrorExpected(const string& expected, string* err);
-
- const Token& token() const { return token_; }
-
- void SkipWhitespace(bool newline=false);
- bool Newline(string* err);
- bool ExpectToken(Token::Type expected, string* err);
- bool ExpectIdent(const char* expected, string* err);
- bool ReadIdent(StringPiece* out);
- bool ReadIdent(string* out);
- bool ReadToNewline(string* text, string* err,
- size_t max_length=std::numeric_limits<size_t>::max());
-
- Token::Type PeekToken();
- void ConsumeToken();
-
- const char* start_; /// Start of the input.
- const char* cur_; /// Current position within the input.
- const char* end_; /// End of the input.
-
- const char* cur_line_; /// Start of current line.
- Token token_;
- int last_indent_, cur_indent_;
-};
-
struct EvalString;
struct State;
@@ -101,30 +37,35 @@ struct ManifestParser {
ManifestParser(State* state, FileReader* file_reader);
+ /// Load and parse a file.
bool Load(const string& filename, string* err);
- bool Parse(const string& input, string* err);
+ /// Parse a text string of input. Used by tests.
+ bool ParseTest(const string& input, string* err) {
+ return Parse("input", input, err);
+ }
+
+private:
+ /// Parse a file, given its contents as a string.
+ bool Parse(const string& filename, const string& input, string* err);
+
+ /// Parse various statement types.
bool ParseRule(string* err);
- /// Parse a key=val statement, expanding $vars in the value with the
- /// current env.
- bool ParseLet(string* key, string* val, string* err);
+ bool ParseLet(string* key, EvalString* val, string* err);
bool ParseEdge(string* err);
- bool ParseDefaults(string* err);
+ bool ParseDefault(string* err);
/// Parse either a 'subninja' or 'include' line.
- bool ParseFileInclude(string* err);
-
+ bool ParseFileInclude(bool new_scope, string* err);
- /// Parse the "key=" half of a key=val statement.
- bool ParseLetKey(string* key, string* err);
- /// Parse the val half of a key=val statement, writing and parsing
- /// output into an EvalString (ready for expansion).
- bool ParseLetValue(EvalString* eval, string* err);
+ /// If the next token is not \a expected, produce an error string
+ /// saying "expectd foo, got bar".
+ bool ExpectToken(Lexer::Token expected, string* err);
State* state_;
BindingEnv* env_;
FileReader* file_reader_;
- Tokenizer tokenizer_;
+ Lexer lexer_;
};
#endif // NINJA_PARSERS_H_
diff --git a/src/parsers_test.cc b/src/parsers_test.cc
index 53b4e92..9d46beb 100644
--- a/src/parsers_test.cc
+++ b/src/parsers_test.cc
@@ -24,7 +24,7 @@ struct ParserTest : public testing::Test,
void AssertParse(const char* input) {
ManifestParser parser(&state, this);
string err;
- ASSERT_TRUE(parser.Parse(input, &err)) << err;
+ ASSERT_TRUE(parser.ParseTest(input, &err)) << err;
ASSERT_EQ("", err);
}
@@ -61,7 +61,7 @@ TEST_F(ParserTest, Rules) {
ASSERT_EQ(3u, state.rules_.size());
const Rule* rule = state.rules_.begin()->second;
EXPECT_EQ("cat", rule->name());
- EXPECT_EQ("cat $in > $out", rule->command().unparsed());
+ EXPECT_EQ("[cat ][$in][ > ][$out]", rule->command().Serialize());
}
TEST_F(ParserTest, Variables) {
@@ -118,7 +118,7 @@ TEST_F(ParserTest, Continuation) {
ASSERT_EQ(2u, state.rules_.size());
const Rule* rule = state.rules_.begin()->second;
EXPECT_EQ("link", rule->name());
- EXPECT_EQ("foo bar baz", rule->command().unparsed());
+ EXPECT_EQ("[foo bar baz]", rule->command().Serialize());
}
TEST_F(ParserTest, Backslash) {
@@ -151,9 +151,9 @@ TEST_F(ParserTest, Dollars) {
TEST_F(ParserTest, EscapeSpaces) {
ASSERT_NO_FATAL_FAILURE(AssertParse(
-"rule has$ spaces\n"
+"rule spaces\n"
" command = something\n"
-"build foo$ bar: has$ spaces $$one two$$$ three\n"
+"build foo$ bar: spaces $$one two$$$ three\n"
));
EXPECT_TRUE(state.LookupNode("foo bar"));
EXPECT_EQ(state.edges_[0]->outputs_[0]->path(), "foo bar");
@@ -211,98 +211,131 @@ TEST_F(ParserTest, Errors) {
{
ManifestParser parser(NULL, NULL);
string err;
- EXPECT_FALSE(parser.Parse("foobar", &err));
- EXPECT_EQ("line 1, col 7: expected '=', got eof", err);
+ EXPECT_FALSE(parser.ParseTest("foobar", &err));
+ EXPECT_EQ("input:1: expected '=', got eof\n"
+ "foobar\n"
+ " ^ near here\n"
+ , err);
}
{
ManifestParser parser(NULL, NULL);
string err;
- EXPECT_FALSE(parser.Parse("x 3", &err));
- EXPECT_EQ("line 1, col 3: expected '=', got '3'", err);
+ EXPECT_FALSE(parser.ParseTest("x 3", &err));
+ EXPECT_EQ("input:1: expected '=', got identifier\n"
+ "x 3\n"
+ " ^ near here\n"
+ , err);
}
{
ManifestParser parser(NULL, NULL);
string err;
- EXPECT_FALSE(parser.Parse("x = 3", &err));
- EXPECT_EQ("line 1, col 6: expected newline, got eof", err);
+ EXPECT_FALSE(parser.ParseTest("x = 3", &err));
+ EXPECT_EQ("input:1: unexpected EOF\n"
+ "x = 3\n"
+ " ^ near here\n"
+ , err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("x = 3\ny 2", &err));
- EXPECT_EQ("line 2, col 3: expected '=', got '2'", err);
+ EXPECT_FALSE(parser.ParseTest("x = 3\ny 2", &err));
+ EXPECT_EQ("input:2: expected '=', got identifier\n"
+ "y 2\n"
+ " ^ near here\n"
+ , err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("x = $", &err));
- EXPECT_EQ("line 1, col 3: unexpected eof", err);
+ EXPECT_FALSE(parser.ParseTest("x = $", &err));
+ EXPECT_EQ("input:1: bad $-escape (literal $ must be written as $$)\n"
+ "x = $\n"
+ " ^ near here\n"
+ , err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("x = $\n $[\n", &err));
- EXPECT_EQ("line 2, col 3: expected variable after $", err);
+ EXPECT_FALSE(parser.ParseTest("x = $\n $[\n", &err));
+ EXPECT_EQ("input:2: bad $-escape (literal $ must be written as $$)\n"
+ " $[\n"
+ " ^ near here\n"
+ , err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("x = a$\n b$\n $\n", &err));
- EXPECT_EQ("line 4, col 1: expected newline, got eof", err);
+ EXPECT_FALSE(parser.ParseTest("x = a$\n b$\n $\n", &err));
+ EXPECT_EQ("input:4: unexpected EOF\n"
+ , err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("build x: y z\n", &err));
- EXPECT_EQ("line 1, col 10: unknown build rule 'y'", err);
+ EXPECT_FALSE(parser.ParseTest("build x: y z\n", &err));
+ EXPECT_EQ("input:1: unknown build rule 'y'\n"
+ "build x: y z\n"
+ " ^ near here\n"
+ , err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("build x:: y z\n", &err));
- EXPECT_EQ("line 1, col 9: expected build command name, got ':'", err);
+ EXPECT_FALSE(parser.ParseTest("build x:: y z\n", &err));
+ EXPECT_EQ("input:1: expected build command name\n"
+ "build x:: y z\n"
+ " ^ near here\n"
+ , err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("rule cat\n command = cat ok\n"
- "build x: cat $\n :\n",
- &err));
- EXPECT_EQ("line 4, col 2: expected newline, got ':'", err);
+ EXPECT_FALSE(parser.ParseTest("rule cat\n command = cat ok\n"
+ "build x: cat $\n :\n",
+ &err));
+ EXPECT_EQ("input:4: expected newline, got ':'\n"
+ " :\n"
+ " ^ near here\n"
+ , err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("rule cat\n",
- &err));
- EXPECT_EQ("line 2, col 1: expected 'command =' line", err);
+ EXPECT_FALSE(parser.ParseTest("rule cat\n",
+ &err));
+ EXPECT_EQ("input:2: expected 'command =' line\n", err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("rule cat\n command = ${fafsd\n foo = bar\n",
- &err));
- EXPECT_EQ("line 2, col 20: expected closing curly after ${", err);
+ EXPECT_FALSE(parser.ParseTest("rule cat\n"
+ " command = ${fafsd\n"
+ "foo = bar\n",
+ &err));
+ EXPECT_EQ("input:2: bad $-escape (literal $ must be written as $$)\n"
+ " command = ${fafsd\n"
+ " ^ near here\n"
+ , err);
}
@@ -310,87 +343,110 @@ TEST_F(ParserTest, Errors) {
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("rule cat\n command = cat\nbuild $: cat foo\n",
- &err));
- // XXX EXPECT_EQ("line 3, col 7: expected variable after $", err);
- EXPECT_EQ("line 4, col 1: expected variable after $", err);
+ EXPECT_FALSE(parser.ParseTest("rule cat\n"
+ " command = cat\nbuild $: cat foo\n",
+ &err));
+ EXPECT_EQ("input:3: bad $-escape (literal $ must be written as $$)\n"
+ "build $: cat foo\n"
+ " ^ near here\n"
+ , err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("rule %foo\n",
- &err));
- EXPECT_EQ("line 1, col 6: expected rule name, got unknown '%'", err);
+ EXPECT_FALSE(parser.ParseTest("rule %foo\n",
+ &err));
+ EXPECT_EQ("input:1: expected rule name\n", err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("rule cc\n command = foo\n othervar = bar\n",
- &err));
- EXPECT_EQ("line 3, col 3: unexpected variable 'othervar'", err);
+ EXPECT_FALSE(parser.ParseTest("rule cc\n"
+ " command = foo\n"
+ " othervar = bar\n",
+ &err));
+ EXPECT_EQ("input:3: unexpected variable 'othervar'\n"
+ " othervar = bar\n"
+ " ^ near here\n"
+ , err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("rule cc\n command = foo\n"
- "build $: cc bar.cc\n",
- &err));
- EXPECT_EQ("line 4, col 1: expected variable after $", err);
+ EXPECT_FALSE(parser.ParseTest("rule cc\n command = foo\n"
+ "build $: cc bar.cc\n",
+ &err));
+ EXPECT_EQ("input:3: bad $-escape (literal $ must be written as $$)\n"
+ "build $: cc bar.cc\n"
+ " ^ near here\n"
+ , err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("default\n",
- &err));
- EXPECT_EQ("line 1, col 8: expected target name, got newline", err);
+ EXPECT_FALSE(parser.ParseTest("default\n",
+ &err));
+ EXPECT_EQ("input:1: expected target name\n"
+ "default\n"
+ " ^ near here\n"
+ , err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("default nonexistent\n",
- &err));
- EXPECT_EQ("line 1, col 9: unknown target 'nonexistent'", err);
+ EXPECT_FALSE(parser.ParseTest("default nonexistent\n",
+ &err));
+ EXPECT_EQ("input:1: unknown target 'nonexistent'\n"
+ "default nonexistent\n"
+ " ^ near here\n"
+ , err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("rule r\n command = r\n"
- "build b: r\n"
- "default b:\n",
- &err));
- EXPECT_EQ("line 4, col 10: expected newline, got ':'", err);
+ EXPECT_FALSE(parser.ParseTest("rule r\n command = r\n"
+ "build b: r\n"
+ "default b:\n",
+ &err));
+ EXPECT_EQ("input:4: expected newline, got ':'\n"
+ "default b:\n"
+ " ^ near here\n"
+ , err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("default $a\n", &err));
- EXPECT_EQ("line 1, col 9: empty path", err);
+ EXPECT_FALSE(parser.ParseTest("default $a\n", &err));
+ EXPECT_EQ("input:1: empty path\n"
+ "default $a\n"
+ " ^ near here\n"
+ , err);
}
{
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_FALSE(parser.Parse("rule r\n"
- " command = r\n"
- "build $a: r $c\n", &err));
+ EXPECT_FALSE(parser.ParseTest("rule r\n"
+ " command = r\n"
+ "build $a: r $c\n", &err));
// XXX the line number is wrong; we should evaluate paths in ParseEdge
// as we see them, not after we've read them all!
- EXPECT_EQ("line 4, col 1: empty path", err);
+ EXPECT_EQ("input:4: empty path\n", err);
}
}
@@ -399,9 +455,9 @@ TEST_F(ParserTest, MultipleOutputs)
State state;
ManifestParser parser(&state, NULL);
string err;
- EXPECT_TRUE(parser.Parse("rule cc\n command = foo\n depfile = bar\n"
- "build a.o b.o: cc c.cc\n",
- &err));
+ EXPECT_TRUE(parser.ParseTest("rule cc\n command = foo\n depfile = bar\n"
+ "build a.o b.o: cc c.cc\n",
+ &err));
EXPECT_EQ("", err);
}
@@ -433,9 +489,11 @@ TEST_F(ParserTest, SubNinja) {
TEST_F(ParserTest, MissingSubNinja) {
ManifestParser parser(&state, this);
string err;
- EXPECT_FALSE(parser.Parse("subninja foo.ninja\n", &err));
- EXPECT_EQ("line 1, col 10: loading foo.ninja: No such file or directory",
- err);
+ EXPECT_FALSE(parser.ParseTest("subninja foo.ninja\n", &err));
+ EXPECT_EQ("input:1: loading 'foo.ninja': No such file or directory\n"
+ "subninja foo.ninja\n"
+ " ^ near here\n"
+ , err);
}
TEST_F(ParserTest, Include) {
@@ -451,7 +509,8 @@ TEST_F(ParserTest, Include) {
TEST_F(ParserTest, Implicit) {
ASSERT_NO_FATAL_FAILURE(AssertParse(
-"rule cat\n command = cat $in > $out\n"
+"rule cat\n"
+" command = cat $in > $out\n"
"build foo: cat bar | baz\n"));
Edge* edge = state.LookupNode("foo")->in_edge();
diff --git a/src/state_test.cc b/src/state_test.cc
index b9e55cb..ca4e60c 100644
--- a/src/state_test.cc
+++ b/src/state_test.cc
@@ -21,11 +21,14 @@ namespace {
TEST(State, Basic) {
State state;
+
Rule* rule = new Rule("cat");
- string err;
- EXPECT_TRUE(rule->command().Parse("cat $in > $out", &err));
- ASSERT_EQ("", err);
+ rule->command_.Add(EvalString::RAW, "cat ");
+ rule->command_.Add(EvalString::SPECIAL, "in");
+ rule->command_.Add(EvalString::RAW, " > ");
+ rule->command_.Add(EvalString::SPECIAL, "out");
state.AddRule(rule);
+
Edge* edge = state.AddEdge(rule);
state.AddIn(edge, "in1");
state.AddIn(edge, "in2");
diff --git a/src/test.cc b/src/test.cc
index 719cec3..20b55b3 100644
--- a/src/test.cc
+++ b/src/test.cc
@@ -29,7 +29,7 @@ Node* StateTestWithBuiltinRules::GetNode(const string& path) {
void AssertParse(State* state, const char* input) {
ManifestParser parser(state, NULL);
string err;
- ASSERT_TRUE(parser.Parse(input, &err)) << err;
+ ASSERT_TRUE(parser.ParseTest(input, &err)) << err;
ASSERT_EQ("", err);
}