From 8a0c96075786c1983bdfa2f37f32b75200ea0334 Mon Sep 17 00:00:00 2001 From: Evan Martin Date: Thu, 29 Dec 2011 13:00:27 -0800 Subject: switch the core ninja parser to use re2c for the lexer - Delete the old "Tokenizer" code. - Write separate tests for the lexer distinct from the parser. - Switch the parser to use the new code. - New lexer error output has file:line numbers so e.g. Emacs can jump your editor to the syntax error. - The EvalEnv ($-interpolation) code is now part of the lexer as well. --- configure.py | 8 +- src/eval_env.cc | 80 ++---- src/eval_env.h | 14 +- src/eval_env_test.cc | 101 ------- src/graph.h | 4 +- src/lexer.cc | 729 +++++++++++++++++++++++++++++++++++++++++++++++++++ src/lexer.h | 88 +++++++ src/lexer.in.cc | 234 +++++++++++++++++ src/lexer_test.cc | 85 ++++++ src/ninja.cc | 12 +- src/parsers.cc | 617 ++++++++++++------------------------------- src/parsers.h | 97 ++----- src/parsers_test.cc | 201 +++++++++----- src/state_test.cc | 9 +- src/test.cc | 2 +- 15 files changed, 1510 insertions(+), 771 deletions(-) delete mode 100644 src/eval_env_test.cc create mode 100644 src/lexer.cc create mode 100644 src/lexer.h create mode 100644 src/lexer.in.cc create mode 100644 src/lexer_test.cc diff --git a/configure.py b/configure.py index b2eed2a..7397b20 100755 --- a/configure.py +++ b/configure.py @@ -151,12 +151,13 @@ if platform != 'mingw': objs += cxx('browse', order_only=built('browse_py.h')) n.newline() -n.comment('the depfile parser is generated using re2c.') +n.comment('the depfile parser and ninja lexers are generated using re2c.') n.rule('re2c', command='re2c -b -i --no-generation-date -o $out $in', description='RE2C $out') -# Generate the .cc file in the source directory so we can check it in. +# Generate the .cc files in the source directory so we can check them in. n.build(src('depfile_parser.cc'), 're2c', src('depfile_parser.in.cc')) +n.build(src('lexer.cc'), 're2c', src('lexer.in.cc')) n.newline() n.comment('Core source files all build into ninja library.') @@ -169,6 +170,7 @@ for name in ['build', 'eval_env', 'graph', 'graphviz', + 'lexer', 'parsers', 'state', 'util']: @@ -219,8 +221,8 @@ for name in ['build_log_test', 'depfile_parser_test', 'disk_interface_test', 'edit_distance_test', - 'eval_env_test', 'graph_test', + 'lexer_test', 'parsers_test', 'state_test', 'subprocess_test', diff --git a/src/eval_env.cc b/src/eval_env.cc index fa5e35b..57c20c6 100644 --- a/src/eval_env.cc +++ b/src/eval_env.cc @@ -27,64 +27,6 @@ void BindingEnv::AddBinding(const string& key, const string& val) { bindings_[key] = val; } -bool EvalString::Parse(const string& input, string* err, size_t* err_index) { - unparsed_ = input; - - string::size_type start, end; - start = 0; - do { - end = input.find('$', start); - if (end == string::npos) { - end = input.size(); - break; - } - if (end > start) - parsed_.push_back(make_pair(input.substr(start, end - start), RAW)); - start = end + 1; - if (start < input.size() && input[start] == '{') { - ++start; - for (end = start + 1; end < input.size(); ++end) { - if (input[end] == '}') - break; - } - if (end >= input.size()) { - *err = "expected closing curly after ${"; - if (err_index) - *err_index = end; - return false; - } - parsed_.push_back(make_pair(input.substr(start, end - start), SPECIAL)); - ++end; - } else if (start < input.size() && input[start] == '$') { - parsed_.push_back(make_pair("$", RAW)); - end = start + 1; - } else if (start < input.size() && input[start] == ' ') { - parsed_.push_back(make_pair(" ", RAW)); - end = start + 1; - } else { - for (end = start; end < input.size(); ++end) { - char c = input[end]; - if (!(('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || - ('0' <= c && c <= '9') || c == '_')) { - break; - } - } - if (end == start) { - *err = "expected variable after $"; - if (err_index) - *err_index = start; - return false; - } - parsed_.push_back(make_pair(input.substr(start, end - start), SPECIAL)); - } - start = end; - } while (end < input.size()); - if (end > start) - parsed_.push_back(make_pair(input.substr(start, end - start), RAW)); - - return true; -} - string EvalString::Evaluate(Env* env) const { string result; for (TokenList::const_iterator i = parsed_.begin(); i != parsed_.end(); ++i) { @@ -95,3 +37,25 @@ string EvalString::Evaluate(Env* env) const { } return result; } + +void EvalString::Add(TokenType type, StringPiece text) { + // Add it to the end of an existing RAW token if possible. + if (type == RAW && !parsed_.empty() && parsed_.back().second == RAW) { + parsed_.back().first.append(text.str_, text.len_); + } else { + parsed_.push_back(make_pair(text.AsString(), type)); + } +} + +string EvalString::Serialize() const { + string result; + for (TokenList::const_iterator i = parsed_.begin(); + i != parsed_.end(); ++i) { + result.append("["); + if (i->second == SPECIAL) + result.append("$"); + result.append(i->first); + result.append("]"); + } + return result; +} diff --git a/src/eval_env.h b/src/eval_env.h index ed7c2f4..8c144f0 100644 --- a/src/eval_env.h +++ b/src/eval_env.h @@ -20,6 +20,8 @@ #include using namespace std; +#include "string_piece.h" + /// An interface for a scope for variable (e.g. "$foo") lookups. struct Env { virtual ~Env() {} @@ -41,14 +43,18 @@ struct BindingEnv : public Env { /// A tokenized string that contains variable references. /// Can be evaluated relative to an Env. struct EvalString { - bool Parse(const string& input, string* err, size_t* err_index=NULL); string Evaluate(Env* env) const; - const string& unparsed() const { return unparsed_; } - bool empty() const { return unparsed_.empty(); } + void Clear() { parsed_.clear(); } + bool empty() const { return parsed_.empty(); } - string unparsed_; enum TokenType { RAW, SPECIAL }; + void Add(TokenType type, StringPiece text); + + /// Construct a human-readable representation of the parsed state, + /// for use in tests. + string Serialize() const; + typedef vector > TokenList; TokenList parsed_; }; diff --git a/src/eval_env_test.cc b/src/eval_env_test.cc deleted file mode 100644 index 4836e24..0000000 --- a/src/eval_env_test.cc +++ /dev/null @@ -1,101 +0,0 @@ -// Copyright 2011 Google Inc. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -#include -#include - -#include "eval_env.h" - -namespace { - -struct TestEnv : public Env { - virtual string LookupVariable(const string& var) { - return vars[var]; - } - map vars; -}; - -TEST(EvalString, PlainText) { - EvalString str; - string err; - EXPECT_TRUE(str.Parse("plain text", &err)); - EXPECT_EQ("", err); - EXPECT_EQ("plain text", str.Evaluate(NULL)); -} - -TEST(EvalString, OneVariable) { - EvalString str; - string err; - EXPECT_TRUE(str.Parse("hi $var", &err)); - EXPECT_EQ("", err); - EXPECT_EQ("hi $var", str.unparsed()); - TestEnv env; - EXPECT_EQ("hi ", str.Evaluate(&env)); - env.vars["var"] = "there"; - EXPECT_EQ("hi there", str.Evaluate(&env)); -} - -TEST(EvalString, OneVariableUpperCase) { - EvalString str; - string err; - EXPECT_TRUE(str.Parse("hi $VaR", &err)); - EXPECT_EQ("", err); - EXPECT_EQ("hi $VaR", str.unparsed()); - TestEnv env; - EXPECT_EQ("hi ", str.Evaluate(&env)); - env.vars["VaR"] = "there"; - EXPECT_EQ("hi there", str.Evaluate(&env)); -} - -TEST(EvalString, Error) { - EvalString str; - string err; - size_t err_index; - EXPECT_FALSE(str.Parse("bad $", &err, &err_index)); - EXPECT_EQ("expected variable after $", err); - EXPECT_EQ(5u, err_index); -} -TEST(EvalString, CurlyError) { - EvalString str; - string err; - size_t err_index; - EXPECT_FALSE(str.Parse("bad ${bar", &err, &err_index)); - EXPECT_EQ("expected closing curly after ${", err); - EXPECT_EQ(9u, err_index); -} - -TEST(EvalString, Curlies) { - EvalString str; - string err; - EXPECT_TRUE(str.Parse("foo ${var}baz", &err)); - EXPECT_EQ("", err); - TestEnv env; - EXPECT_EQ("foo baz", str.Evaluate(&env)); - env.vars["var"] = "barbar"; - EXPECT_EQ("foo barbarbaz", str.Evaluate(&env)); -} - -TEST(EvalString, Dollars) { - EvalString str; - string err; - EXPECT_TRUE(str.Parse("foo$$bar$bar", &err)); - ASSERT_EQ("", err); - TestEnv env; - env.vars["bar"] = "baz"; - EXPECT_EQ("foo$barbaz", str.Evaluate(&env)); -} - -} // namespace diff --git a/src/graph.h b/src/graph.h index b483c6d..20765a3 100644 --- a/src/graph.h +++ b/src/graph.h @@ -109,10 +109,10 @@ struct Rule { const EvalString& description() const { return description_; } const EvalString& depfile() const { return depfile_; } - private: + // TODO: private: + // Allow the parsers to reach into this object and fill out its fields. friend class ManifestParser; - friend class ParserTest; string name_; diff --git a/src/lexer.cc b/src/lexer.cc new file mode 100644 index 0000000..0371371 --- /dev/null +++ b/src/lexer.cc @@ -0,0 +1,729 @@ +/* Generated by re2c 0.13.5 */ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lexer.h" + +#include + +#include "eval_env.h" + +bool Lexer::Error(const string& message, string* err) { + // Compute line/column. + int line = 1; + const char* context = input_.str_; + for (const char* p = input_.str_; p < last_token_; ++p) { + if (*p == '\n') { + ++line; + context = p + 1; + } + } + int col = last_token_ ? last_token_ - context : 0; + + char buf[1024]; + snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line); + *err = buf; + *err += message + "\n"; + + // Add some context to the message. + const int kTruncateColumn = 72; + if (col > 0 && col < kTruncateColumn) { + int len; + bool truncated = true; + for (len = 0; len < kTruncateColumn; ++len) { + if (context[len] == 0 || context[len] == '\n') { + truncated = false; + break; + } + } + *err += string(context, len); + if (truncated) + *err += "..."; + *err += "\n"; + *err += string(col, ' '); + *err += "^ near here\n"; + } + + return false; +} + +Lexer::Lexer(const char* input) { + Start("input", input); +} + +void Lexer::Start(StringPiece filename, StringPiece input) { + filename_ = filename; + input_ = input; + ofs_ = input_.str_; + last_token_ = NULL; +} + +const char* Lexer::TokenName(Token t) { + switch (t) { + case ERROR: return "lexing error"; + case BUILD: return "'build'"; + case COLON: return "':'"; + case DEFAULT: return "'default'"; + case EQUALS: return "'='"; + case IDENT: return "identifier"; + case INCLUDE: return "'include'"; + case INDENT: return "indent"; + case NEWLINE: return "newline"; + case PIPE2: return "'||'"; + case PIPE: return "'|'"; + case RULE: return "'rule'"; + case SUBNINJA: return "'subninja'"; + case TEOF: return "eof"; + } + return NULL; // not reached +} + +void Lexer::UnreadToken() { + ofs_ = last_token_; +} + +Lexer::Token Lexer::ReadToken() { + const char* p = ofs_; + const char* q; + const char* start; + Lexer::Token token; + for (;;) { + start = p; + +{ + char yych; + static const unsigned char yybm[] = { + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 192, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 160, 128, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 128, 128, 128, 128, 128, 128, + 128, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 128, 128, 128, 128, 160, + 128, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 160, 160, 160, 160, 160, + 160, 160, 160, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + + yych = *p; + if (yych <= 'Z') { + if (yych <= '-') { + if (yych <= 0x1F) { + if (yych <= 0x00) goto yy21; + if (yych == '\n') goto yy4; + goto yy23; + } else { + if (yych <= ' ') goto yy6; + if (yych != '#') goto yy23; + } + } else { + if (yych <= ':') { + if (yych == '/') goto yy23; + if (yych <= '9') goto yy20; + goto yy14; + } else { + if (yych == '=') goto yy12; + if (yych <= '@') goto yy23; + goto yy20; + } + } + } else { + if (yych <= 'h') { + if (yych <= 'a') { + if (yych == '_') goto yy20; + if (yych <= '`') goto yy23; + goto yy20; + } else { + if (yych <= 'b') goto yy8; + if (yych == 'd') goto yy11; + goto yy20; + } + } else { + if (yych <= 's') { + if (yych <= 'i') goto yy18; + if (yych <= 'q') goto yy20; + if (yych <= 'r') goto yy10; + goto yy19; + } else { + if (yych <= 'z') goto yy20; + if (yych == '|') goto yy16; + goto yy23; + } + } + } + yych = *(q = ++p); + if (yych >= 0x01) goto yy62; +yy3: + { token = ERROR; break; } +yy4: + ++p; + { token = NEWLINE; break; } +yy6: + ++p; + yych = *p; + goto yy60; +yy7: + { token = INDENT; break; } +yy8: + ++p; + if ((yych = *p) == 'u') goto yy54; + goto yy25; +yy9: + { token = IDENT; break; } +yy10: + yych = *++p; + if (yych == 'u') goto yy50; + goto yy25; +yy11: + yych = *++p; + if (yych == 'e') goto yy43; + goto yy25; +yy12: + ++p; + { token = EQUALS; break; } +yy14: + ++p; + { token = COLON; break; } +yy16: + ++p; + if ((yych = *p) == '|') goto yy41; + { token = PIPE; break; } +yy18: + yych = *++p; + if (yych == 'n') goto yy34; + goto yy25; +yy19: + yych = *++p; + if (yych == 'u') goto yy26; + goto yy25; +yy20: + yych = *++p; + goto yy25; +yy21: + ++p; + { token = TEOF; break; } +yy23: + yych = *++p; + goto yy3; +yy24: + ++p; + yych = *p; +yy25: + if (yybm[0+yych] & 32) { + goto yy24; + } + goto yy9; +yy26: + yych = *++p; + if (yych != 'b') goto yy25; + yych = *++p; + if (yych != 'n') goto yy25; + yych = *++p; + if (yych != 'i') goto yy25; + yych = *++p; + if (yych != 'n') goto yy25; + yych = *++p; + if (yych != 'j') goto yy25; + yych = *++p; + if (yych != 'a') goto yy25; + ++p; + if (yybm[0+(yych = *p)] & 32) { + goto yy24; + } + { token = SUBNINJA; break; } +yy34: + yych = *++p; + if (yych != 'c') goto yy25; + yych = *++p; + if (yych != 'l') goto yy25; + yych = *++p; + if (yych != 'u') goto yy25; + yych = *++p; + if (yych != 'd') goto yy25; + yych = *++p; + if (yych != 'e') goto yy25; + ++p; + if (yybm[0+(yych = *p)] & 32) { + goto yy24; + } + { token = INCLUDE; break; } +yy41: + ++p; + { token = PIPE2; break; } +yy43: + yych = *++p; + if (yych != 'f') goto yy25; + yych = *++p; + if (yych != 'a') goto yy25; + yych = *++p; + if (yych != 'u') goto yy25; + yych = *++p; + if (yych != 'l') goto yy25; + yych = *++p; + if (yych != 't') goto yy25; + ++p; + if (yybm[0+(yych = *p)] & 32) { + goto yy24; + } + { token = DEFAULT; break; } +yy50: + yych = *++p; + if (yych != 'l') goto yy25; + yych = *++p; + if (yych != 'e') goto yy25; + ++p; + if (yybm[0+(yych = *p)] & 32) { + goto yy24; + } + { token = RULE; break; } +yy54: + yych = *++p; + if (yych != 'i') goto yy25; + yych = *++p; + if (yych != 'l') goto yy25; + yych = *++p; + if (yych != 'd') goto yy25; + ++p; + if (yybm[0+(yych = *p)] & 32) { + goto yy24; + } + { token = BUILD; break; } +yy59: + ++p; + yych = *p; +yy60: + if (yybm[0+yych] & 64) { + goto yy59; + } + goto yy7; +yy61: + ++p; + yych = *p; +yy62: + if (yybm[0+yych] & 128) { + goto yy61; + } + if (yych >= 0x01) goto yy64; + p = q; + goto yy3; +yy64: + ++p; + { continue; } +} + + } + + last_token_ = start; + ofs_ = p; + if (token != NEWLINE && token != TEOF) + EatWhitespace(); + return token; +} + +bool Lexer::PeekToken(Token token) { + Token t = ReadToken(); + if (t == token) + return true; + UnreadToken(); + return false; +} + +void Lexer::EatWhitespace() { + const char* p = ofs_; + for (;;) { + ofs_ = p; + +{ + char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 128, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych <= ' ') { + if (yych <= 0x00) goto yy72; + if (yych <= 0x1F) goto yy74; + } else { + if (yych == '$') goto yy70; + goto yy74; + } + ++p; + yych = *p; + goto yy78; +yy69: + { continue; } +yy70: + ++p; + if ((yych = *p) == '\n') goto yy75; +yy71: + { break; } +yy72: + ++p; + { break; } +yy74: + yych = *++p; + goto yy71; +yy75: + ++p; + { continue; } +yy77: + ++p; + yych = *p; +yy78: + if (yybm[0+yych] & 128) { + goto yy77; + } + goto yy69; +} + + } +} + +bool Lexer::ReadIdent(string* out) { + const char* p = ofs_; + for (;;) { + const char* start = p; + +{ + char yych; + static const unsigned char yybm[] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 128, 0, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 0, 0, 0, 0, 0, + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 0, 0, 0, 128, + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + }; + yych = *p; + if (yych <= '@') { + if (yych <= '.') { + if (yych <= '-') goto yy83; + } else { + if (yych <= '/') goto yy83; + if (yych >= ':') goto yy83; + } + } else { + if (yych <= '_') { + if (yych <= 'Z') goto yy81; + if (yych <= '^') goto yy83; + } else { + if (yych <= '`') goto yy83; + if (yych >= '{') goto yy83; + } + } +yy81: + ++p; + yych = *p; + goto yy86; +yy82: + { + out->assign(start, p - start); + break; + } +yy83: + ++p; + { return false; } +yy85: + ++p; + yych = *p; +yy86: + if (yybm[0+yych] & 128) { + goto yy85; + } + goto yy82; +} + + } + ofs_ = p; + EatWhitespace(); + return true; +} + +bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) { + const char* p = ofs_; + const char* q; + const char* start; + for (;;) { + start = p; + +{ + char yych; + static const unsigned char yybm[] = { + 0, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 0, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 16, 128, 128, 128, 0, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 160, 128, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 0, 128, 128, 128, 128, 128, + 128, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 128, 128, 128, 128, 224, + 128, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 224, 224, 224, 224, 224, + 224, 224, 224, 128, 0, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + 128, 128, 128, 128, 128, 128, 128, 128, + }; + yych = *p; + if (yych <= '#') { + if (yych <= '\n') { + if (yych <= 0x00) goto yy95; + if (yych >= '\n') goto yy91; + } else { + if (yych == ' ') goto yy91; + } + } else { + if (yych <= ':') { + if (yych <= '$') goto yy93; + if (yych >= ':') goto yy91; + } else { + if (yych == '|') goto yy91; + } + } + ++p; + yych = *p; + goto yy117; +yy90: + { + eval->Add(EvalString::RAW, StringPiece(start, p - start)); + continue; + } +yy91: + ++p; + { + if (path) { + p = start; + break; + } else { + if (*start == '\n') + break; + eval->Add(EvalString::RAW, StringPiece(start, 1)); + continue; + } + } +yy93: + ++p; + if ((yych = *p) <= '9') { + if (yych <= ' ') { + if (yych == '\n') goto yy106; + if (yych <= 0x1F) goto yy97; + goto yy99; + } else { + if (yych == '$') goto yy101; + if (yych <= '/') goto yy97; + goto yy103; + } + } else { + if (yych <= '_') { + if (yych <= '@') goto yy97; + if (yych <= 'Z') goto yy103; + if (yych <= '^') goto yy97; + goto yy103; + } else { + if (yych <= '`') goto yy97; + if (yych <= 'z') goto yy103; + if (yych <= '{') goto yy105; + goto yy97; + } + } + { + last_token_ = start; + return Error("lexing error", err); + } +yy95: + ++p; + { + last_token_ = start; + return Error("unexpected EOF", err); + } +yy97: + ++p; +yy98: + { + last_token_ = start; + return Error("bad $-escape (literal $ must be written as $$)", err); + } +yy99: + ++p; + { + eval->Add(EvalString::RAW, StringPiece(" ", 1)); + continue; + } +yy101: + ++p; + { + eval->Add(EvalString::RAW, StringPiece("$", 1)); + continue; + } +yy103: + ++p; + yych = *p; + goto yy115; +yy104: + { + eval->Add(EvalString::SPECIAL, StringPiece(start + 1, p - start - 1)); + continue; + } +yy105: + yych = *(q = ++p); + if (yybm[0+yych] & 32) { + goto yy109; + } + goto yy98; +yy106: + ++p; + yych = *p; + if (yybm[0+yych] & 16) { + goto yy106; + } + { + continue; + } +yy109: + ++p; + yych = *p; + if (yybm[0+yych] & 32) { + goto yy109; + } + if (yych == '}') goto yy112; + p = q; + goto yy98; +yy112: + ++p; + { + eval->Add(EvalString::SPECIAL, StringPiece(start + 2, p - start - 3)); + continue; + } +yy114: + ++p; + yych = *p; +yy115: + if (yybm[0+yych] & 64) { + goto yy114; + } + goto yy104; +yy116: + ++p; + yych = *p; +yy117: + if (yybm[0+yych] & 128) { + goto yy116; + } + goto yy90; +} + + } + last_token_ = start; + ofs_ = p; + if (path) + EatWhitespace(); + // Non-path strings end in newlines, so there's no whitespace to eat. + return true; +} diff --git a/src/lexer.h b/src/lexer.h new file mode 100644 index 0000000..40e602a --- /dev/null +++ b/src/lexer.h @@ -0,0 +1,88 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "string_piece.h" + +struct EvalString; + +struct Lexer { + Lexer() {} + /// Helper ctor useful for tests. + explicit Lexer(const char* input); + + enum Token { + ERROR, + BUILD, + COLON, + DEFAULT, + EQUALS, + IDENT, + INCLUDE, + INDENT, + NEWLINE, + PIPE, + PIPE2, + RULE, + SUBNINJA, + TEOF, + }; + + /// Return a human-readable form of a token, used in error messages. + static const char* TokenName(Token t); + + /// Start parsing some input. + void Start(StringPiece filename, StringPiece input); + + /// Read a Token from the Token enum. + Token ReadToken(); + + /// Rewind to the last read Token. + void UnreadToken(); + + /// If the next token is \a token, read it and return true. + bool PeekToken(Token token); + + /// Read a simple identifier (a rule or variable name). + /// Returns false if a name can't be read. + bool ReadIdent(string* out); + + /// Read a path (complete with $escapes). + /// Returns false only on error, returned path may be empty if a delimiter + /// (space, newline) is hit. + bool ReadPath(EvalString* path, string* err) { + return ReadEvalString(path, true, err); + } + + /// Read the value side of a var = value line (complete with $escapes). + /// Returns false only on error. + bool ReadVarValue(EvalString* value, string* err) { + return ReadEvalString(value, false, err); + } + + /// Construct an error message with context. + bool Error(const string& message, string* err); + +private: + /// Skip past whitespace (called after each read token/ident/etc.). + void EatWhitespace(); + + /// Read a $-escaped string. + bool ReadEvalString(EvalString* eval, bool path, string* err); + + StringPiece filename_; + StringPiece input_; + const char* ofs_; + const char* last_token_; +}; + diff --git a/src/lexer.in.cc b/src/lexer.in.cc new file mode 100644 index 0000000..a3b29c1 --- /dev/null +++ b/src/lexer.in.cc @@ -0,0 +1,234 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lexer.h" + +#include "eval_env.h" + +bool Lexer::Error(const string& message, string* err) { + // Compute line/column. + int line = 1; + const char* context = input_.str_; + for (const char* p = input_.str_; p < last_token_; ++p) { + if (*p == '\n') { + ++line; + context = p + 1; + } + } + int col = last_token_ ? last_token_ - context : 0; + + char buf[1024]; + snprintf(buf, sizeof(buf), "%s:%d: ", filename_.AsString().c_str(), line); + *err = buf; + *err += message + "\n"; + + // Add some context to the message. + const int kTruncateColumn = 72; + if (col > 0 && col < kTruncateColumn) { + int len; + bool truncated = true; + for (len = 0; len < kTruncateColumn; ++len) { + if (context[len] == 0 || context[len] == '\n') { + truncated = false; + break; + } + } + *err += string(context, len); + if (truncated) + *err += "..."; + *err += "\n"; + *err += string(col, ' '); + *err += "^ near here\n"; + } + + return false; +} + +Lexer::Lexer(const char* input) { + Start("input", input); +} + +void Lexer::Start(StringPiece filename, StringPiece input) { + filename_ = filename; + input_ = input; + ofs_ = input_.str_; + last_token_ = NULL; +} + +const char* Lexer::TokenName(Token t) { + switch (t) { + case ERROR: return "lexing error"; + case BUILD: return "'build'"; + case COLON: return "':'"; + case DEFAULT: return "'default'"; + case EQUALS: return "'='"; + case IDENT: return "identifier"; + case INCLUDE: return "'include'"; + case INDENT: return "indent"; + case NEWLINE: return "newline"; + case PIPE2: return "'||'"; + case PIPE: return "'|'"; + case RULE: return "'rule'"; + case SUBNINJA: return "'subninja'"; + case TEOF: return "eof"; + } + return NULL; // not reached +} + +void Lexer::UnreadToken() { + ofs_ = last_token_; +} + +Lexer::Token Lexer::ReadToken() { + const char* p = ofs_; + const char* q; + const char* start; + Lexer::Token token; + for (;;) { + start = p; + /*!re2c + re2c:define:YYCTYPE = "char"; + re2c:define:YYCURSOR = p; + re2c:define:YYMARKER = q; + re2c:yyfill:enable = 0; + + nul = "\000"; + simple_varname = [a-zA-Z0-9_]+; + varname = [a-zA-Z0-9_.]+; + + "#"[^\000\n]*"\n" { continue; } + [\n] { token = NEWLINE; break; } + [ ]+ { token = INDENT; break; } + "build" { token = BUILD; break; } + "rule" { token = RULE; break; } + "default" { token = DEFAULT; break; } + "=" { token = EQUALS; break; } + ":" { token = COLON; break; } + "||" { token = PIPE2; break; } + "|" { token = PIPE; break; } + "include" { token = INCLUDE; break; } + "subninja" { token = SUBNINJA; break; } + varname { token = IDENT; break; } + nul { token = TEOF; break; } + [^] { token = ERROR; break; } + */ + } + + last_token_ = start; + ofs_ = p; + if (token != NEWLINE && token != TEOF) + EatWhitespace(); + return token; +} + +bool Lexer::PeekToken(Token token) { + Token t = ReadToken(); + if (t == token) + return true; + UnreadToken(); + return false; +} + +void Lexer::EatWhitespace() { + const char* p = ofs_; + for (;;) { + ofs_ = p; + /*!re2c + [ ]+ { continue; } + "$\n" { continue; } + nul { break; } + [^] { break; } + */ + } +} + +bool Lexer::ReadIdent(string* out) { + const char* p = ofs_; + for (;;) { + const char* start = p; + /*!re2c + varname { + out->assign(start, p - start); + break; + } + [^] { return false; } + */ + } + ofs_ = p; + EatWhitespace(); + return true; +} + +bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) { + const char* p = ofs_; + const char* q; + const char* start; + for (;;) { + start = p; + /*!re2c + [^$ :\n|\000]+ { + eval->Add(EvalString::RAW, StringPiece(start, p - start)); + continue; + } + [ :|\n] { + if (path) { + p = start; + break; + } else { + if (*start == '\n') + break; + eval->Add(EvalString::RAW, StringPiece(start, 1)); + continue; + } + } + "$$" { + eval->Add(EvalString::RAW, StringPiece("$", 1)); + continue; + } + "$ " { + eval->Add(EvalString::RAW, StringPiece(" ", 1)); + continue; + } + "$\n"[ ]* { + continue; + } + "${"varname"}" { + eval->Add(EvalString::SPECIAL, StringPiece(start + 2, p - start - 3)); + continue; + } + "$"simple_varname { + eval->Add(EvalString::SPECIAL, StringPiece(start + 1, p - start - 1)); + continue; + } + "$". { + last_token_ = start; + return Error("bad $-escape (literal $ must be written as $$)", err); + } + nul { + last_token_ = start; + return Error("unexpected EOF", err); + } + [^] { + last_token_ = start; + return Error("lexing error", err); + } + */ + } + last_token_ = start; + ofs_ = p; + if (path) + EatWhitespace(); + // Non-path strings end in newlines, so there's no whitespace to eat. + return true; +} diff --git a/src/lexer_test.cc b/src/lexer_test.cc new file mode 100644 index 0000000..ce8082a --- /dev/null +++ b/src/lexer_test.cc @@ -0,0 +1,85 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "lexer.h" + +#include + +#include "eval_env.h" + +TEST(Lexer, ReadVarValue) { + Lexer lexer("plain text $var $VaR ${x}\n"); + EvalString eval; + string err; + EXPECT_TRUE(lexer.ReadVarValue(&eval, &err)); + EXPECT_EQ("", err); + EXPECT_EQ("[plain text ][$var][ ][$VaR][ ][$x]", + eval.Serialize()); +} + +TEST(Lexer, ReadEvalStringEscapes) { + Lexer lexer("$ $$ab $\ncde\n"); + EvalString eval; + string err; + EXPECT_TRUE(lexer.ReadVarValue(&eval, &err)); + EXPECT_EQ("", err); + EXPECT_EQ("[ $ab cde]", + eval.Serialize()); +} + +TEST(Lexer, ReadIdent) { + Lexer lexer("foo baR baz_123 blah.dots"); + string ident; + EXPECT_TRUE(lexer.ReadIdent(&ident)); + EXPECT_EQ("foo", ident); + EXPECT_TRUE(lexer.ReadIdent(&ident)); + EXPECT_EQ("baR", ident); + EXPECT_TRUE(lexer.ReadIdent(&ident)); + EXPECT_EQ("baz_123", ident); +} + +TEST(Lexer, ReadIdentCurlies) { + // Verify that ReadIdent includes dots in the name, + // but in an expansion $bar.dots stops at the dot. + Lexer lexer("foo.dots $bar.dots ${bar.dots}\n"); + string ident; + EXPECT_TRUE(lexer.ReadIdent(&ident)); + EXPECT_EQ("foo.dots", ident); + + EvalString eval; + string err; + EXPECT_TRUE(lexer.ReadVarValue(&eval, &err)); + EXPECT_EQ("", err); + EXPECT_EQ("[$bar][.dots ][$bar.dots]", + eval.Serialize()); +} + +TEST(Lexer, Error) { + Lexer lexer("foo$\nbad $"); + EvalString eval; + string err; + ASSERT_FALSE(lexer.ReadVarValue(&eval, &err)); + EXPECT_EQ("input:2: bad $-escape (literal $ must be written as $$)\n" + "bad $\n" + " ^ near here\n" + , err); +} + +TEST(Lexer, CommentEOF) { + // Verify we don't run off the end of the string when the EOF is + // mid-comment. + Lexer lexer("# foo"); + Lexer::Token token = lexer.ReadToken(); + EXPECT_EQ(Lexer::ERROR, token); +} diff --git a/src/ninja.cc b/src/ninja.cc index 452f075..e08f2fc 100644 --- a/src/ninja.cc +++ b/src/ninja.cc @@ -364,12 +364,15 @@ int CmdTargets(State* state, int argc, char* argv[]) { int CmdRules(State* state, int argc, char* /* argv */[]) { for (map::iterator i = state->rules_.begin(); i != state->rules_.end(); ++i) { - if (i->second->description().unparsed_.empty()) { + if (i->second->description().empty()) { printf("%s\n", i->first.c_str()); } else { printf("%s: %s\n", i->first.c_str(), - i->second->description().unparsed_.c_str()); + // XXX I changed it such that we don't have an easy way + // to get the source text anymore, so this output is + // unsatisfactory. How useful is this command, anyway? + i->second->description().Serialize().c_str()); } } return 0; @@ -547,7 +550,10 @@ reload: ManifestParser parser(&state, &file_reader); string err; if (!parser.Load(input_file, &err)) { - Error("loading '%s': %s", input_file, err.c_str()); + // The pattern in Ninja for errors is to return a one-line string, + // but parse errors are special in that they are multiline with + // context. Just report it verbatim. + fprintf(stderr, "%s", err.c_str()); return 1; } diff --git a/src/parsers.cc b/src/parsers.cc index 44c3711..095e93f 100644 --- a/src/parsers.cc +++ b/src/parsers.cc @@ -23,242 +23,6 @@ #include "state.h" #include "util.h" -string Token::AsString() const { - switch (type_) { - case IDENT: return "'" + string(pos_, end_ - pos_) + "'"; - case UNKNOWN: return "unknown '" + string(pos_, end_ - pos_) + "'"; - case NEWLINE: return "newline"; - case EQUALS: return "'='"; - case COLON: return "':'"; - case PIPE: return "'|'"; - case PIPE2: return "'||'"; - case TEOF: return "eof"; - case INDENT: return "indenting in"; - case OUTDENT: return "indenting out"; - case NONE: break; - } - assert(false); - return ""; -} - -bool Tokenizer::ErrorAt(const char* pos, const string& message, string* err) { - // Re-scan the input, counting newlines so that we can compute the - // correct position. - int line = 1; - const char* line_start = start_; - for (const char* p = start_; p < pos; ++p) { - if (*p == '\n') { - ++line; - line_start = p + 1; - } - } - int col = pos - line_start + 1; - - char buf[1024]; - snprintf(buf, sizeof(buf), - "line %d, col %d: %s", line, col, message.c_str()); - err->assign(buf); - return false; -} - -void Tokenizer::Start(const char* start, const char* end) { - cur_line_ = cur_ = start_ = start; - end_ = end; -} - -bool Tokenizer::ErrorExpected(const string& expected, string* err) { - return Error("expected " + expected + ", got " + token_.AsString(), err); -} - -void Tokenizer::SkipWhitespace(bool newline) { - if (token_.type_ == Token::NEWLINE && newline) - Newline(NULL); - - while (cur_ < end_) { - if (*cur_ == ' ') { - ++cur_; - } else if (newline && *cur_ == '\n') { - Newline(NULL); - } else if (*cur_ == '$' && cur_ + 1 < end_ && cur_[1] == '\n') { - ++cur_; ++cur_; - } else if (*cur_ == '#' && cur_ == cur_line_) { - while (cur_ < end_ && *cur_ != '\n') - ++cur_; - if (cur_ < end_ && *cur_ == '\n') - ++cur_; - cur_line_ = cur_; - } else { - break; - } - } -} - -bool Tokenizer::Newline(string* err) { - if (!ExpectToken(Token::NEWLINE, err)) - return false; - - return true; -} - -/// Return true if |c| is part of an identifier. -static bool IsIdentChar(char c) { - // This function shows up hot on profiles. Instead of the natural - // 'if' statement, use a table as generated by this Python script: - // import string - // cs = set() - // for c in string.ascii_letters + string.digits + r'+,-./\_$': - // cs.add(ord(c)) - // for i in range(128): - // if i in cs: - // print '1,', - // else: - // print '0,', - // if i % 16 == 15: - // print - static const bool kIdents[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - }; - return kIdents[(int)c]; -} - -bool Tokenizer::ExpectToken(Token::Type expected, string* err) { - PeekToken(); - if (token_.type_ != expected) - return ErrorExpected(Token(expected).AsString(), err); - ConsumeToken(); - return true; -} - -bool Tokenizer::ExpectIdent(const char* expected, string* err) { - PeekToken(); - if (token_.type_ != Token::IDENT || - strncmp(token_.pos_, expected, token_.end_ - token_.pos_) != 0) { - return ErrorExpected(string("'") + expected + "'", err); - } - ConsumeToken(); - return true; -} - -bool Tokenizer::ReadIdent(StringPiece* out) { - PeekToken(); - if (token_.type_ != Token::IDENT) - return false; - out->str_ = token_.pos_; - out->len_ = token_.end_ - token_.pos_; - ConsumeToken(); - return true; -} - -bool Tokenizer::ReadIdent(string* out) { - StringPiece token; - if (!ReadIdent(&token)) - return false; - out->assign(token.str_, token.len_); - return true; -} - -bool Tokenizer::ReadToNewline(string *text, string* err, size_t max_length) { - // XXX token_.clear(); - while (cur_ < end_ && *cur_ != '\n') { - if (*cur_ == '$') { - // Might be a line continuation; peek ahead to check. - if (cur_ + 1 >= end_) - return Error("unexpected eof", err); - if (*(cur_ + 1) == '\n') { - // Let SkipWhitespace handle the continuation logic. - SkipWhitespace(); - continue; - } - - // Otherwise, just treat it like a normal character. - text->push_back(*cur_); - ++cur_; - } else { - text->push_back(*cur_); - ++cur_; - } - if (text->size() >= max_length) { - token_.pos_ = cur_; - return false; - } - } - return Newline(err); -} - -Token::Type Tokenizer::PeekToken() { - if (token_.type_ != Token::NONE) - return token_.type_; - - token_.pos_ = cur_; - if (cur_indent_ == -1) { - cur_indent_ = cur_ - cur_line_; - if (cur_indent_ != last_indent_) { - if (cur_indent_ > last_indent_) { - token_.type_ = Token::INDENT; - } else if (cur_indent_ < last_indent_) { - token_.type_ = Token::OUTDENT; - } - last_indent_ = cur_indent_; - return token_.type_; - } - } - - if (cur_ >= end_) { - token_.type_ = Token::TEOF; - return token_.type_; - } - - if (IsIdentChar(*cur_)) { - while (cur_ < end_ && IsIdentChar(*cur_)) { - if (*cur_ == '$' && cur_ + 1 < end_ && cur_[1] == ' ') { - ++cur_; - } - ++cur_; - } - token_.end_ = cur_; - token_.type_ = Token::IDENT; - } else if (*cur_ == ':') { - token_.type_ = Token::COLON; - ++cur_; - } else if (*cur_ == '=') { - token_.type_ = Token::EQUALS; - ++cur_; - } else if (*cur_ == '|') { - if (cur_ + 1 < end_ && cur_[1] == '|') { - token_.type_ = Token::PIPE2; - cur_ += 2; - } else { - token_.type_ = Token::PIPE; - ++cur_; - } - } else if (*cur_ == '\n') { - token_.type_ = Token::NEWLINE; - ++cur_; - cur_line_ = cur_; - cur_indent_ = -1; - } - - SkipWhitespace(); - - if (token_.type_ == Token::NONE) { - token_.type_ = Token::UNKNOWN; - token_.end_ = cur_ + 1; - } - - return token_.type_; -} - -void Tokenizer::ConsumeToken() { - token_.Clear(); -} - ManifestParser::ManifestParser(State* state, FileReader* file_reader) : state_(state), file_reader_(file_reader) { env_ = &state->bindings_; @@ -267,58 +31,66 @@ bool ManifestParser::Load(const string& filename, string* err) { string contents; if (!file_reader_->ReadFile(filename, &contents, err)) return false; - return Parse(contents, err); + contents.resize(contents.size() + 10); + return Parse(filename, contents, err); } -bool ManifestParser::Parse(const string& input, string* err) { - tokenizer_.Start(input.data(), input.data() + input.size()); - - tokenizer_.SkipWhitespace(true); - - while (tokenizer_.token().type_ != Token::TEOF) { - switch (tokenizer_.PeekToken()) { - case Token::IDENT: { - const Token& token = tokenizer_.token(); - int len = token.end_ - token.pos_; - if (len == 4 && memcmp(token.pos_, "rule", 4) == 0) { - if (!ParseRule(err)) - return false; - } else if (len == 5 && memcmp(token.pos_, "build", 5) == 0) { - if (!ParseEdge(err)) - return false; - } else if (len == 7 && memcmp(token.pos_, "default", 7) == 0) { - if (!ParseDefaults(err)) - return false; - } else if ((len == 7 && memcmp(token.pos_, "include", 7) == 0) || - (len == 8 && memcmp(token.pos_, "subninja", 8) == 0)) { - if (!ParseFileInclude(err)) - return false; - } else { - string name, value; - if (!ParseLet(&name, &value, err)) - return false; - env_->AddBinding(name, value); - } - break; - } - case Token::TEOF: - continue; - default: - return tokenizer_.Error("unhandled " + tokenizer_.token().AsString(), err); +bool ManifestParser::Parse(const string& filename, const string& input, + string* err) { + lexer_.Start(filename, input); + + for (;;) { + Lexer::Token token = lexer_.ReadToken(); + switch (token) { + case Lexer::BUILD: + if (!ParseEdge(err)) + return false; + break; + case Lexer::RULE: + if (!ParseRule(err)) + return false; + break; + case Lexer::DEFAULT: + if (!ParseDefault(err)) + return false; + break; + case Lexer::IDENT: { + lexer_.UnreadToken(); + string name; + EvalString value; + if (!ParseLet(&name, &value, err)) + return false; + env_->AddBinding(name, value.Evaluate(env_)); + break; + } + case Lexer::INCLUDE: + if (!ParseFileInclude(false, err)) + return false; + break; + case Lexer::SUBNINJA: + if (!ParseFileInclude(true, err)) + return false; + break; + case Lexer::ERROR: + return lexer_.Error("lexing error", err); + case Lexer::TEOF: + return true; + case Lexer::NEWLINE: + break; + default: + return lexer_.Error(string("unexpected") + Lexer::TokenName(token), + err); } - tokenizer_.SkipWhitespace(true); } - - return true; + return false; // not reached } bool ManifestParser::ParseRule(string* err) { - if (!tokenizer_.ExpectIdent("rule", err)) - return false; string name; - if (!tokenizer_.ReadIdent(&name)) - return tokenizer_.ErrorExpected("rule name", err); - if (!tokenizer_.Newline(err)) + if (!lexer_.ReadIdent(&name)) + return lexer_.Error("expected rule name", err); + + if (!ExpectToken(Lexer::NEWLINE, err)) return false; if (state_->LookupRule(name) != NULL) { @@ -328,167 +100,120 @@ bool ManifestParser::ParseRule(string* err) { Rule* rule = new Rule(name); // XXX scoped_ptr - if (tokenizer_.PeekToken() == Token::INDENT) { - tokenizer_.ConsumeToken(); - - while (tokenizer_.PeekToken() != Token::OUTDENT) { - const char* let_loc = tokenizer_.token_.pos_; - - string key; - if (!ParseLetKey(&key, err)) - return false; + while (lexer_.PeekToken(Lexer::INDENT)) { + string key; + EvalString value; + if (!ParseLet(&key, &value, err)) + return false; - EvalString* eval_target = NULL; - if (key == "command") { - eval_target = &rule->command_; - } else if (key == "depfile") { - eval_target = &rule->depfile_; - } else if (key == "description") { - eval_target = &rule->description_; - } else if (key == "generator") { - rule->generator_ = true; - string dummy; - if (!tokenizer_.ReadToNewline(&dummy, err)) - return false; - continue; - } else if (key == "restat") { - rule->restat_ = true; - string dummy; - if (!tokenizer_.ReadToNewline(&dummy, err)) - return false; - continue; - } else { - // Die on other keyvals for now; revisit if we want to add a - // scope here. - return tokenizer_.ErrorAt(let_loc, "unexpected variable '" + key + "'", - err); - } - - if (!ParseLetValue(eval_target, err)) - return false; + if (key == "command") { + rule->command_ = value; + } else if (key == "depfile") { + rule->depfile_ = value; + } else if (key == "description") { + rule->description_ = value; + } else if (key == "generator") { + rule->generator_ = true; + } else if (key == "restat") { + rule->restat_ = true; + } else { + // Die on other keyvals for now; revisit if we want to add a + // scope here. + return lexer_.Error("unexpected variable '" + key + "'", err); } - tokenizer_.ConsumeToken(); } - if (rule->command_.unparsed().empty()) - return tokenizer_.Error("expected 'command =' line", err); + if (rule->command_.empty()) + return lexer_.Error("expected 'command =' line", err); state_->AddRule(rule); return true; } -bool ManifestParser::ParseLet(string* key, string* value, string* err) { - if (!ParseLetKey(key, err)) +bool ManifestParser::ParseLet(string* key, EvalString* value, string* err) { + if (!lexer_.ReadIdent(key)) return false; - - EvalString eval; - if (!ParseLetValue(&eval, err)) + if (!ExpectToken(Lexer::EQUALS, err)) return false; - - *value = eval.Evaluate(env_); - - return true; -} - -bool ManifestParser::ParseLetKey(string* key, string* err) { - if (!tokenizer_.ReadIdent(key)) - return tokenizer_.ErrorExpected("variable name", err); - if (!tokenizer_.ExpectToken(Token::EQUALS, err)) - return false; - return true; -} - -bool ManifestParser::ParseLetValue(EvalString* eval, string* err) { - // Backup the tokenizer state prior to consuming the line, for reporting - // the source location in case of a parse error later. - Tokenizer tokenizer_backup = tokenizer_; - - // XXX should we tokenize here? it means we'll need to understand - // command syntax, though... - string value; - if (!tokenizer_.ReadToNewline(&value, err)) + if (!lexer_.ReadVarValue(value, err)) return false; - - string eval_err; - size_t err_index; - if (!eval->Parse(value, &eval_err, &err_index)) { - value.clear(); - // Advance the saved tokenizer state up to the error index to report the - // error at the correct source location. - tokenizer_backup.ReadToNewline(&value, err, err_index); - return tokenizer_backup.Error(eval_err, err); - } - return true; } -bool ManifestParser::ParseDefaults(string* err) { - if (!tokenizer_.ExpectIdent("default", err)) +bool ManifestParser::ParseDefault(string* err) { + EvalString eval; + if (!lexer_.ReadPath(&eval, err)) return false; - - string target; - if (!tokenizer_.ReadIdent(&target)) - return tokenizer_.ErrorExpected("target name", err); + if (eval.empty()) + return lexer_.Error("expected target name", err); do { - EvalString eval; - string eval_err; - if (!eval.Parse(target, &eval_err)) - return tokenizer_.Error(eval_err, err); string path = eval.Evaluate(env_); - if (!CanonicalizePath(&path, &eval_err)) - return tokenizer_.Error(eval_err, err); - if (!state_->AddDefault(path, &eval_err)) - return tokenizer_.Error(eval_err, err); - } while (tokenizer_.ReadIdent(&target)); + string path_err; + if (!CanonicalizePath(&path, &path_err)) + return lexer_.Error(path_err, err); + if (!state_->AddDefault(path, &path_err)) + return lexer_.Error(path_err, err); + + eval.Clear(); + if (!lexer_.ReadPath(&eval, err)) + return false; + } while (!eval.empty()); - if (!tokenizer_.Newline(err)) + if (!ExpectToken(Lexer::NEWLINE, err)) return false; return true; } bool ManifestParser::ParseEdge(string* err) { - vector ins, outs; + vector ins, outs; - if (!tokenizer_.ExpectIdent("build", err)) - return false; + { + EvalString out; + if (!lexer_.ReadPath(&out, err)) + return false; + if (out.empty()) + return lexer_.Error("expected path", err); - for (;;) { - if (tokenizer_.PeekToken() == Token::COLON) { - tokenizer_.ConsumeToken(); - break; - } + do { + outs.push_back(out); - string out; - if (!tokenizer_.ReadIdent(&out)) - return tokenizer_.ErrorExpected("output file list", err); - outs.push_back(out); + out.Clear(); + if (!lexer_.ReadPath(&out, err)) + return false; + } while (!out.empty()); } - // XXX check outs not empty + + if (!ExpectToken(Lexer::COLON, err)) + return false; string rule_name; - if (!tokenizer_.ReadIdent(&rule_name)) - return tokenizer_.ErrorExpected("build command name", err); + if (!lexer_.ReadIdent(&rule_name)) + return lexer_.Error("expected build command name", err); const Rule* rule = state_->LookupRule(rule_name); if (!rule) - return tokenizer_.Error("unknown build rule '" + rule_name + "'", err); + return lexer_.Error("unknown build rule '" + rule_name + "'", err); for (;;) { - string in; - if (!tokenizer_.ReadIdent(&in)) + // XXX should we require one path here? + EvalString in; + if (!lexer_.ReadPath(&in, err)) + return false; + if (in.empty()) break; ins.push_back(in); } // Add all order-only deps, counting how many as we go. int implicit = 0; - if (tokenizer_.PeekToken() == Token::PIPE) { - tokenizer_.ConsumeToken(); + if (lexer_.PeekToken(Lexer::PIPE)) { for (;;) { - string in; - if (!tokenizer_.ReadIdent(&in)) + EvalString in; + if (!lexer_.ReadPath(&in, err)) + return err; + if (in.empty()) break; ins.push_back(in); ++implicit; @@ -497,97 +222,95 @@ bool ManifestParser::ParseEdge(string* err) { // Add all order-only deps, counting how many as we go. int order_only = 0; - if (tokenizer_.PeekToken() == Token::PIPE2) { - tokenizer_.ConsumeToken(); + if (lexer_.PeekToken(Lexer::PIPE2)) { for (;;) { - string in; - if (!tokenizer_.ReadIdent(&in)) + EvalString in; + if (!lexer_.ReadPath(&in, err)) + return false; + if (in.empty()) break; ins.push_back(in); ++order_only; } } - if (!tokenizer_.Newline(err)) + if (!ExpectToken(Lexer::NEWLINE, err)) return false; // Default to using outer env. BindingEnv* env = env_; - // But use a nested env if there are variables in scope. - if (tokenizer_.PeekToken() == Token::INDENT) { - tokenizer_.ConsumeToken(); - + // But create and fill a nested env if there are variables in scope. + if (lexer_.PeekToken(Lexer::INDENT)) { // XXX scoped_ptr to handle error case. env = new BindingEnv; env->parent_ = env_; - while (tokenizer_.PeekToken() != Token::OUTDENT) { - string key, val; + do { + string key; + EvalString val; if (!ParseLet(&key, &val, err)) return false; - env->AddBinding(key, val); - } - tokenizer_.ConsumeToken(); - } - - // Evaluate all variables in paths. - // XXX: fast path skip the eval parse if there's no $ in the path? - vector* paths[2] = { &ins, &outs }; - for (int p = 0; p < 2; ++p) { - for (vector::iterator i = paths[p]->begin(); - i != paths[p]->end(); ++i) { - EvalString eval; - string eval_err; - if (!eval.Parse(*i, &eval_err)) - return tokenizer_.Error(eval_err, err); - string path = eval.Evaluate(env); - if (!CanonicalizePath(&path, &eval_err)) - return tokenizer_.Error(eval_err, err); - *i = path; - } + env->AddBinding(key, val.Evaluate(env_)); + } while (lexer_.PeekToken(Lexer::INDENT)); } Edge* edge = state_->AddEdge(rule); edge->env_ = env; - for (vector::iterator i = ins.begin(); i != ins.end(); ++i) - state_->AddIn(edge, *i); - for (vector::iterator i = outs.begin(); i != outs.end(); ++i) - state_->AddOut(edge, *i); + for (vector::iterator i = ins.begin(); i != ins.end(); ++i) { + string path = i->Evaluate(env); + string path_err; + if (!CanonicalizePath(&path, &path_err)) + return lexer_.Error(path_err, err); + state_->AddIn(edge, path); + } + for (vector::iterator i = outs.begin(); i != outs.end(); ++i) { + string path = i->Evaluate(env); + string path_err; + if (!CanonicalizePath(&path, &path_err)) + return lexer_.Error(path_err, err); + state_->AddOut(edge, path); + } edge->implicit_deps_ = implicit; edge->order_only_deps_ = order_only; return true; } -bool ManifestParser::ParseFileInclude(string* err) { - string type; - tokenizer_.ReadIdent(&type); - - string path; - if (!tokenizer_.ReadIdent(&path)) - return tokenizer_.ErrorExpected("path to ninja file", err); +bool ManifestParser::ParseFileInclude(bool new_scope, string* err) { + // XXX this should use ReadPath! + EvalString eval; + if (!lexer_.ReadPath(&eval, err)) + return false; + string path = eval.Evaluate(env_); string contents; string read_err; if (!file_reader_->ReadFile(path, &contents, &read_err)) - return tokenizer_.Error("loading " + path + ": " + read_err, err); + return lexer_.Error("loading '" + path + "': " + read_err, err); ManifestParser subparser(state_, file_reader_); - if (type == "subninja") { - // subninja: Construct a new scope for the new parser. + if (new_scope) { subparser.env_ = new BindingEnv; subparser.env_->parent_ = env_; } else { - // include: Reuse the current scope. subparser.env_ = env_; } - string sub_err; - if (!subparser.Parse(contents, &sub_err)) - return tokenizer_.Error("in '" + path + "': " + sub_err, err); + if (!subparser.Parse(path, contents, err)) + return false; - if (!tokenizer_.Newline(err)) + if (!ExpectToken(Lexer::NEWLINE, err)) return false; return true; } + +bool ManifestParser::ExpectToken(Lexer::Token expected, string* err) { + Lexer::Token token = lexer_.ReadToken(); + if (token != expected) { + string message = string("expected ") + Lexer::TokenName(expected); + message += string(", got ") + Lexer::TokenName(token); + return lexer_.Error(message, err); + } + return true; +} diff --git a/src/parsers.h b/src/parsers.h index 101b278..f889156 100644 --- a/src/parsers.h +++ b/src/parsers.h @@ -21,74 +21,10 @@ using namespace std; +#include "lexer.h" #include "string_piece.h" struct BindingEnv; - -/// A single parsed token in an input stream. -struct Token { - enum Type { - NONE, - UNKNOWN, - IDENT, - NEWLINE, - EQUALS, - COLON, - PIPE, - PIPE2, - INDENT, - OUTDENT, - TEOF - }; - explicit Token(Type type) : type_(type) {} - - void Clear() { type_ = NONE; } - string AsString() const; - - Type type_; - const char* pos_; - const char* end_; -}; - -/// Processes an input stream into Tokens. -struct Tokenizer { - Tokenizer() : - token_(Token::NONE), - last_indent_(0), cur_indent_(-1) {} - - void Start(const char* start, const char* end); - /// Report an error at a particular location. - bool ErrorAt(const char* pos, const string& message, string* err); - /// Report an error with a location pointing at the current token. - bool Error(const string& message, string* err) { - return ErrorAt(token_.pos_, message, err); - } - /// Call Error() with "expected foo, got bar". - bool ErrorExpected(const string& expected, string* err); - - const Token& token() const { return token_; } - - void SkipWhitespace(bool newline=false); - bool Newline(string* err); - bool ExpectToken(Token::Type expected, string* err); - bool ExpectIdent(const char* expected, string* err); - bool ReadIdent(StringPiece* out); - bool ReadIdent(string* out); - bool ReadToNewline(string* text, string* err, - size_t max_length=std::numeric_limits::max()); - - Token::Type PeekToken(); - void ConsumeToken(); - - const char* start_; /// Start of the input. - const char* cur_; /// Current position within the input. - const char* end_; /// End of the input. - - const char* cur_line_; /// Start of current line. - Token token_; - int last_indent_, cur_indent_; -}; - struct EvalString; struct State; @@ -101,30 +37,35 @@ struct ManifestParser { ManifestParser(State* state, FileReader* file_reader); + /// Load and parse a file. bool Load(const string& filename, string* err); - bool Parse(const string& input, string* err); + /// Parse a text string of input. Used by tests. + bool ParseTest(const string& input, string* err) { + return Parse("input", input, err); + } + +private: + /// Parse a file, given its contents as a string. + bool Parse(const string& filename, const string& input, string* err); + + /// Parse various statement types. bool ParseRule(string* err); - /// Parse a key=val statement, expanding $vars in the value with the - /// current env. - bool ParseLet(string* key, string* val, string* err); + bool ParseLet(string* key, EvalString* val, string* err); bool ParseEdge(string* err); - bool ParseDefaults(string* err); + bool ParseDefault(string* err); /// Parse either a 'subninja' or 'include' line. - bool ParseFileInclude(string* err); - + bool ParseFileInclude(bool new_scope, string* err); - /// Parse the "key=" half of a key=val statement. - bool ParseLetKey(string* key, string* err); - /// Parse the val half of a key=val statement, writing and parsing - /// output into an EvalString (ready for expansion). - bool ParseLetValue(EvalString* eval, string* err); + /// If the next token is not \a expected, produce an error string + /// saying "expectd foo, got bar". + bool ExpectToken(Lexer::Token expected, string* err); State* state_; BindingEnv* env_; FileReader* file_reader_; - Tokenizer tokenizer_; + Lexer lexer_; }; #endif // NINJA_PARSERS_H_ diff --git a/src/parsers_test.cc b/src/parsers_test.cc index 53b4e92..9d46beb 100644 --- a/src/parsers_test.cc +++ b/src/parsers_test.cc @@ -24,7 +24,7 @@ struct ParserTest : public testing::Test, void AssertParse(const char* input) { ManifestParser parser(&state, this); string err; - ASSERT_TRUE(parser.Parse(input, &err)) << err; + ASSERT_TRUE(parser.ParseTest(input, &err)) << err; ASSERT_EQ("", err); } @@ -61,7 +61,7 @@ TEST_F(ParserTest, Rules) { ASSERT_EQ(3u, state.rules_.size()); const Rule* rule = state.rules_.begin()->second; EXPECT_EQ("cat", rule->name()); - EXPECT_EQ("cat $in > $out", rule->command().unparsed()); + EXPECT_EQ("[cat ][$in][ > ][$out]", rule->command().Serialize()); } TEST_F(ParserTest, Variables) { @@ -118,7 +118,7 @@ TEST_F(ParserTest, Continuation) { ASSERT_EQ(2u, state.rules_.size()); const Rule* rule = state.rules_.begin()->second; EXPECT_EQ("link", rule->name()); - EXPECT_EQ("foo bar baz", rule->command().unparsed()); + EXPECT_EQ("[foo bar baz]", rule->command().Serialize()); } TEST_F(ParserTest, Backslash) { @@ -151,9 +151,9 @@ TEST_F(ParserTest, Dollars) { TEST_F(ParserTest, EscapeSpaces) { ASSERT_NO_FATAL_FAILURE(AssertParse( -"rule has$ spaces\n" +"rule spaces\n" " command = something\n" -"build foo$ bar: has$ spaces $$one two$$$ three\n" +"build foo$ bar: spaces $$one two$$$ three\n" )); EXPECT_TRUE(state.LookupNode("foo bar")); EXPECT_EQ(state.edges_[0]->outputs_[0]->path(), "foo bar"); @@ -211,98 +211,131 @@ TEST_F(ParserTest, Errors) { { ManifestParser parser(NULL, NULL); string err; - EXPECT_FALSE(parser.Parse("foobar", &err)); - EXPECT_EQ("line 1, col 7: expected '=', got eof", err); + EXPECT_FALSE(parser.ParseTest("foobar", &err)); + EXPECT_EQ("input:1: expected '=', got eof\n" + "foobar\n" + " ^ near here\n" + , err); } { ManifestParser parser(NULL, NULL); string err; - EXPECT_FALSE(parser.Parse("x 3", &err)); - EXPECT_EQ("line 1, col 3: expected '=', got '3'", err); + EXPECT_FALSE(parser.ParseTest("x 3", &err)); + EXPECT_EQ("input:1: expected '=', got identifier\n" + "x 3\n" + " ^ near here\n" + , err); } { ManifestParser parser(NULL, NULL); string err; - EXPECT_FALSE(parser.Parse("x = 3", &err)); - EXPECT_EQ("line 1, col 6: expected newline, got eof", err); + EXPECT_FALSE(parser.ParseTest("x = 3", &err)); + EXPECT_EQ("input:1: unexpected EOF\n" + "x = 3\n" + " ^ near here\n" + , err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("x = 3\ny 2", &err)); - EXPECT_EQ("line 2, col 3: expected '=', got '2'", err); + EXPECT_FALSE(parser.ParseTest("x = 3\ny 2", &err)); + EXPECT_EQ("input:2: expected '=', got identifier\n" + "y 2\n" + " ^ near here\n" + , err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("x = $", &err)); - EXPECT_EQ("line 1, col 3: unexpected eof", err); + EXPECT_FALSE(parser.ParseTest("x = $", &err)); + EXPECT_EQ("input:1: bad $-escape (literal $ must be written as $$)\n" + "x = $\n" + " ^ near here\n" + , err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("x = $\n $[\n", &err)); - EXPECT_EQ("line 2, col 3: expected variable after $", err); + EXPECT_FALSE(parser.ParseTest("x = $\n $[\n", &err)); + EXPECT_EQ("input:2: bad $-escape (literal $ must be written as $$)\n" + " $[\n" + " ^ near here\n" + , err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("x = a$\n b$\n $\n", &err)); - EXPECT_EQ("line 4, col 1: expected newline, got eof", err); + EXPECT_FALSE(parser.ParseTest("x = a$\n b$\n $\n", &err)); + EXPECT_EQ("input:4: unexpected EOF\n" + , err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("build x: y z\n", &err)); - EXPECT_EQ("line 1, col 10: unknown build rule 'y'", err); + EXPECT_FALSE(parser.ParseTest("build x: y z\n", &err)); + EXPECT_EQ("input:1: unknown build rule 'y'\n" + "build x: y z\n" + " ^ near here\n" + , err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("build x:: y z\n", &err)); - EXPECT_EQ("line 1, col 9: expected build command name, got ':'", err); + EXPECT_FALSE(parser.ParseTest("build x:: y z\n", &err)); + EXPECT_EQ("input:1: expected build command name\n" + "build x:: y z\n" + " ^ near here\n" + , err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("rule cat\n command = cat ok\n" - "build x: cat $\n :\n", - &err)); - EXPECT_EQ("line 4, col 2: expected newline, got ':'", err); + EXPECT_FALSE(parser.ParseTest("rule cat\n command = cat ok\n" + "build x: cat $\n :\n", + &err)); + EXPECT_EQ("input:4: expected newline, got ':'\n" + " :\n" + " ^ near here\n" + , err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("rule cat\n", - &err)); - EXPECT_EQ("line 2, col 1: expected 'command =' line", err); + EXPECT_FALSE(parser.ParseTest("rule cat\n", + &err)); + EXPECT_EQ("input:2: expected 'command =' line\n", err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("rule cat\n command = ${fafsd\n foo = bar\n", - &err)); - EXPECT_EQ("line 2, col 20: expected closing curly after ${", err); + EXPECT_FALSE(parser.ParseTest("rule cat\n" + " command = ${fafsd\n" + "foo = bar\n", + &err)); + EXPECT_EQ("input:2: bad $-escape (literal $ must be written as $$)\n" + " command = ${fafsd\n" + " ^ near here\n" + , err); } @@ -310,87 +343,110 @@ TEST_F(ParserTest, Errors) { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("rule cat\n command = cat\nbuild $: cat foo\n", - &err)); - // XXX EXPECT_EQ("line 3, col 7: expected variable after $", err); - EXPECT_EQ("line 4, col 1: expected variable after $", err); + EXPECT_FALSE(parser.ParseTest("rule cat\n" + " command = cat\nbuild $: cat foo\n", + &err)); + EXPECT_EQ("input:3: bad $-escape (literal $ must be written as $$)\n" + "build $: cat foo\n" + " ^ near here\n" + , err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("rule %foo\n", - &err)); - EXPECT_EQ("line 1, col 6: expected rule name, got unknown '%'", err); + EXPECT_FALSE(parser.ParseTest("rule %foo\n", + &err)); + EXPECT_EQ("input:1: expected rule name\n", err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("rule cc\n command = foo\n othervar = bar\n", - &err)); - EXPECT_EQ("line 3, col 3: unexpected variable 'othervar'", err); + EXPECT_FALSE(parser.ParseTest("rule cc\n" + " command = foo\n" + " othervar = bar\n", + &err)); + EXPECT_EQ("input:3: unexpected variable 'othervar'\n" + " othervar = bar\n" + " ^ near here\n" + , err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("rule cc\n command = foo\n" - "build $: cc bar.cc\n", - &err)); - EXPECT_EQ("line 4, col 1: expected variable after $", err); + EXPECT_FALSE(parser.ParseTest("rule cc\n command = foo\n" + "build $: cc bar.cc\n", + &err)); + EXPECT_EQ("input:3: bad $-escape (literal $ must be written as $$)\n" + "build $: cc bar.cc\n" + " ^ near here\n" + , err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("default\n", - &err)); - EXPECT_EQ("line 1, col 8: expected target name, got newline", err); + EXPECT_FALSE(parser.ParseTest("default\n", + &err)); + EXPECT_EQ("input:1: expected target name\n" + "default\n" + " ^ near here\n" + , err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("default nonexistent\n", - &err)); - EXPECT_EQ("line 1, col 9: unknown target 'nonexistent'", err); + EXPECT_FALSE(parser.ParseTest("default nonexistent\n", + &err)); + EXPECT_EQ("input:1: unknown target 'nonexistent'\n" + "default nonexistent\n" + " ^ near here\n" + , err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("rule r\n command = r\n" - "build b: r\n" - "default b:\n", - &err)); - EXPECT_EQ("line 4, col 10: expected newline, got ':'", err); + EXPECT_FALSE(parser.ParseTest("rule r\n command = r\n" + "build b: r\n" + "default b:\n", + &err)); + EXPECT_EQ("input:4: expected newline, got ':'\n" + "default b:\n" + " ^ near here\n" + , err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("default $a\n", &err)); - EXPECT_EQ("line 1, col 9: empty path", err); + EXPECT_FALSE(parser.ParseTest("default $a\n", &err)); + EXPECT_EQ("input:1: empty path\n" + "default $a\n" + " ^ near here\n" + , err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("rule r\n" - " command = r\n" - "build $a: r $c\n", &err)); + EXPECT_FALSE(parser.ParseTest("rule r\n" + " command = r\n" + "build $a: r $c\n", &err)); // XXX the line number is wrong; we should evaluate paths in ParseEdge // as we see them, not after we've read them all! - EXPECT_EQ("line 4, col 1: empty path", err); + EXPECT_EQ("input:4: empty path\n", err); } } @@ -399,9 +455,9 @@ TEST_F(ParserTest, MultipleOutputs) State state; ManifestParser parser(&state, NULL); string err; - EXPECT_TRUE(parser.Parse("rule cc\n command = foo\n depfile = bar\n" - "build a.o b.o: cc c.cc\n", - &err)); + EXPECT_TRUE(parser.ParseTest("rule cc\n command = foo\n depfile = bar\n" + "build a.o b.o: cc c.cc\n", + &err)); EXPECT_EQ("", err); } @@ -433,9 +489,11 @@ TEST_F(ParserTest, SubNinja) { TEST_F(ParserTest, MissingSubNinja) { ManifestParser parser(&state, this); string err; - EXPECT_FALSE(parser.Parse("subninja foo.ninja\n", &err)); - EXPECT_EQ("line 1, col 10: loading foo.ninja: No such file or directory", - err); + EXPECT_FALSE(parser.ParseTest("subninja foo.ninja\n", &err)); + EXPECT_EQ("input:1: loading 'foo.ninja': No such file or directory\n" + "subninja foo.ninja\n" + " ^ near here\n" + , err); } TEST_F(ParserTest, Include) { @@ -451,7 +509,8 @@ TEST_F(ParserTest, Include) { TEST_F(ParserTest, Implicit) { ASSERT_NO_FATAL_FAILURE(AssertParse( -"rule cat\n command = cat $in > $out\n" +"rule cat\n" +" command = cat $in > $out\n" "build foo: cat bar | baz\n")); Edge* edge = state.LookupNode("foo")->in_edge(); diff --git a/src/state_test.cc b/src/state_test.cc index b9e55cb..ca4e60c 100644 --- a/src/state_test.cc +++ b/src/state_test.cc @@ -21,11 +21,14 @@ namespace { TEST(State, Basic) { State state; + Rule* rule = new Rule("cat"); - string err; - EXPECT_TRUE(rule->command().Parse("cat $in > $out", &err)); - ASSERT_EQ("", err); + rule->command_.Add(EvalString::RAW, "cat "); + rule->command_.Add(EvalString::SPECIAL, "in"); + rule->command_.Add(EvalString::RAW, " > "); + rule->command_.Add(EvalString::SPECIAL, "out"); state.AddRule(rule); + Edge* edge = state.AddEdge(rule); state.AddIn(edge, "in1"); state.AddIn(edge, "in2"); diff --git a/src/test.cc b/src/test.cc index 719cec3..20b55b3 100644 --- a/src/test.cc +++ b/src/test.cc @@ -29,7 +29,7 @@ Node* StateTestWithBuiltinRules::GetNode(const string& path) { void AssertParse(State* state, const char* input) { ManifestParser parser(state, NULL); string err; - ASSERT_TRUE(parser.Parse(input, &err)) << err; + ASSERT_TRUE(parser.ParseTest(input, &err)) << err; ASSERT_EQ("", err); } -- cgit v0.12