summaryrefslogtreecommitdiffstats
path: root/src/parsers.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/parsers.cc')
-rw-r--r--src/parsers.cc617
1 files changed, 170 insertions, 447 deletions
diff --git a/src/parsers.cc b/src/parsers.cc
index 44c3711..095e93f 100644
--- a/src/parsers.cc
+++ b/src/parsers.cc
@@ -23,242 +23,6 @@
#include "state.h"
#include "util.h"
-string Token::AsString() const {
- switch (type_) {
- case IDENT: return "'" + string(pos_, end_ - pos_) + "'";
- case UNKNOWN: return "unknown '" + string(pos_, end_ - pos_) + "'";
- case NEWLINE: return "newline";
- case EQUALS: return "'='";
- case COLON: return "':'";
- case PIPE: return "'|'";
- case PIPE2: return "'||'";
- case TEOF: return "eof";
- case INDENT: return "indenting in";
- case OUTDENT: return "indenting out";
- case NONE: break;
- }
- assert(false);
- return "";
-}
-
-bool Tokenizer::ErrorAt(const char* pos, const string& message, string* err) {
- // Re-scan the input, counting newlines so that we can compute the
- // correct position.
- int line = 1;
- const char* line_start = start_;
- for (const char* p = start_; p < pos; ++p) {
- if (*p == '\n') {
- ++line;
- line_start = p + 1;
- }
- }
- int col = pos - line_start + 1;
-
- char buf[1024];
- snprintf(buf, sizeof(buf),
- "line %d, col %d: %s", line, col, message.c_str());
- err->assign(buf);
- return false;
-}
-
-void Tokenizer::Start(const char* start, const char* end) {
- cur_line_ = cur_ = start_ = start;
- end_ = end;
-}
-
-bool Tokenizer::ErrorExpected(const string& expected, string* err) {
- return Error("expected " + expected + ", got " + token_.AsString(), err);
-}
-
-void Tokenizer::SkipWhitespace(bool newline) {
- if (token_.type_ == Token::NEWLINE && newline)
- Newline(NULL);
-
- while (cur_ < end_) {
- if (*cur_ == ' ') {
- ++cur_;
- } else if (newline && *cur_ == '\n') {
- Newline(NULL);
- } else if (*cur_ == '$' && cur_ + 1 < end_ && cur_[1] == '\n') {
- ++cur_; ++cur_;
- } else if (*cur_ == '#' && cur_ == cur_line_) {
- while (cur_ < end_ && *cur_ != '\n')
- ++cur_;
- if (cur_ < end_ && *cur_ == '\n')
- ++cur_;
- cur_line_ = cur_;
- } else {
- break;
- }
- }
-}
-
-bool Tokenizer::Newline(string* err) {
- if (!ExpectToken(Token::NEWLINE, err))
- return false;
-
- return true;
-}
-
-/// Return true if |c| is part of an identifier.
-static bool IsIdentChar(char c) {
- // This function shows up hot on profiles. Instead of the natural
- // 'if' statement, use a table as generated by this Python script:
- // import string
- // cs = set()
- // for c in string.ascii_letters + string.digits + r'+,-./\_$':
- // cs.add(ord(c))
- // for i in range(128):
- // if i in cs:
- // print '1,',
- // else:
- // print '0,',
- // if i % 16 == 15:
- // print
- static const bool kIdents[] = {
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
- 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0,
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1,
- 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
- 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0,
- };
- return kIdents[(int)c];
-}
-
-bool Tokenizer::ExpectToken(Token::Type expected, string* err) {
- PeekToken();
- if (token_.type_ != expected)
- return ErrorExpected(Token(expected).AsString(), err);
- ConsumeToken();
- return true;
-}
-
-bool Tokenizer::ExpectIdent(const char* expected, string* err) {
- PeekToken();
- if (token_.type_ != Token::IDENT ||
- strncmp(token_.pos_, expected, token_.end_ - token_.pos_) != 0) {
- return ErrorExpected(string("'") + expected + "'", err);
- }
- ConsumeToken();
- return true;
-}
-
-bool Tokenizer::ReadIdent(StringPiece* out) {
- PeekToken();
- if (token_.type_ != Token::IDENT)
- return false;
- out->str_ = token_.pos_;
- out->len_ = token_.end_ - token_.pos_;
- ConsumeToken();
- return true;
-}
-
-bool Tokenizer::ReadIdent(string* out) {
- StringPiece token;
- if (!ReadIdent(&token))
- return false;
- out->assign(token.str_, token.len_);
- return true;
-}
-
-bool Tokenizer::ReadToNewline(string *text, string* err, size_t max_length) {
- // XXX token_.clear();
- while (cur_ < end_ && *cur_ != '\n') {
- if (*cur_ == '$') {
- // Might be a line continuation; peek ahead to check.
- if (cur_ + 1 >= end_)
- return Error("unexpected eof", err);
- if (*(cur_ + 1) == '\n') {
- // Let SkipWhitespace handle the continuation logic.
- SkipWhitespace();
- continue;
- }
-
- // Otherwise, just treat it like a normal character.
- text->push_back(*cur_);
- ++cur_;
- } else {
- text->push_back(*cur_);
- ++cur_;
- }
- if (text->size() >= max_length) {
- token_.pos_ = cur_;
- return false;
- }
- }
- return Newline(err);
-}
-
-Token::Type Tokenizer::PeekToken() {
- if (token_.type_ != Token::NONE)
- return token_.type_;
-
- token_.pos_ = cur_;
- if (cur_indent_ == -1) {
- cur_indent_ = cur_ - cur_line_;
- if (cur_indent_ != last_indent_) {
- if (cur_indent_ > last_indent_) {
- token_.type_ = Token::INDENT;
- } else if (cur_indent_ < last_indent_) {
- token_.type_ = Token::OUTDENT;
- }
- last_indent_ = cur_indent_;
- return token_.type_;
- }
- }
-
- if (cur_ >= end_) {
- token_.type_ = Token::TEOF;
- return token_.type_;
- }
-
- if (IsIdentChar(*cur_)) {
- while (cur_ < end_ && IsIdentChar(*cur_)) {
- if (*cur_ == '$' && cur_ + 1 < end_ && cur_[1] == ' ') {
- ++cur_;
- }
- ++cur_;
- }
- token_.end_ = cur_;
- token_.type_ = Token::IDENT;
- } else if (*cur_ == ':') {
- token_.type_ = Token::COLON;
- ++cur_;
- } else if (*cur_ == '=') {
- token_.type_ = Token::EQUALS;
- ++cur_;
- } else if (*cur_ == '|') {
- if (cur_ + 1 < end_ && cur_[1] == '|') {
- token_.type_ = Token::PIPE2;
- cur_ += 2;
- } else {
- token_.type_ = Token::PIPE;
- ++cur_;
- }
- } else if (*cur_ == '\n') {
- token_.type_ = Token::NEWLINE;
- ++cur_;
- cur_line_ = cur_;
- cur_indent_ = -1;
- }
-
- SkipWhitespace();
-
- if (token_.type_ == Token::NONE) {
- token_.type_ = Token::UNKNOWN;
- token_.end_ = cur_ + 1;
- }
-
- return token_.type_;
-}
-
-void Tokenizer::ConsumeToken() {
- token_.Clear();
-}
-
ManifestParser::ManifestParser(State* state, FileReader* file_reader)
: state_(state), file_reader_(file_reader) {
env_ = &state->bindings_;
@@ -267,58 +31,66 @@ bool ManifestParser::Load(const string& filename, string* err) {
string contents;
if (!file_reader_->ReadFile(filename, &contents, err))
return false;
- return Parse(contents, err);
+ contents.resize(contents.size() + 10);
+ return Parse(filename, contents, err);
}
-bool ManifestParser::Parse(const string& input, string* err) {
- tokenizer_.Start(input.data(), input.data() + input.size());
-
- tokenizer_.SkipWhitespace(true);
-
- while (tokenizer_.token().type_ != Token::TEOF) {
- switch (tokenizer_.PeekToken()) {
- case Token::IDENT: {
- const Token& token = tokenizer_.token();
- int len = token.end_ - token.pos_;
- if (len == 4 && memcmp(token.pos_, "rule", 4) == 0) {
- if (!ParseRule(err))
- return false;
- } else if (len == 5 && memcmp(token.pos_, "build", 5) == 0) {
- if (!ParseEdge(err))
- return false;
- } else if (len == 7 && memcmp(token.pos_, "default", 7) == 0) {
- if (!ParseDefaults(err))
- return false;
- } else if ((len == 7 && memcmp(token.pos_, "include", 7) == 0) ||
- (len == 8 && memcmp(token.pos_, "subninja", 8) == 0)) {
- if (!ParseFileInclude(err))
- return false;
- } else {
- string name, value;
- if (!ParseLet(&name, &value, err))
- return false;
- env_->AddBinding(name, value);
- }
- break;
- }
- case Token::TEOF:
- continue;
- default:
- return tokenizer_.Error("unhandled " + tokenizer_.token().AsString(), err);
+bool ManifestParser::Parse(const string& filename, const string& input,
+ string* err) {
+ lexer_.Start(filename, input);
+
+ for (;;) {
+ Lexer::Token token = lexer_.ReadToken();
+ switch (token) {
+ case Lexer::BUILD:
+ if (!ParseEdge(err))
+ return false;
+ break;
+ case Lexer::RULE:
+ if (!ParseRule(err))
+ return false;
+ break;
+ case Lexer::DEFAULT:
+ if (!ParseDefault(err))
+ return false;
+ break;
+ case Lexer::IDENT: {
+ lexer_.UnreadToken();
+ string name;
+ EvalString value;
+ if (!ParseLet(&name, &value, err))
+ return false;
+ env_->AddBinding(name, value.Evaluate(env_));
+ break;
+ }
+ case Lexer::INCLUDE:
+ if (!ParseFileInclude(false, err))
+ return false;
+ break;
+ case Lexer::SUBNINJA:
+ if (!ParseFileInclude(true, err))
+ return false;
+ break;
+ case Lexer::ERROR:
+ return lexer_.Error("lexing error", err);
+ case Lexer::TEOF:
+ return true;
+ case Lexer::NEWLINE:
+ break;
+ default:
+ return lexer_.Error(string("unexpected") + Lexer::TokenName(token),
+ err);
}
- tokenizer_.SkipWhitespace(true);
}
-
- return true;
+ return false; // not reached
}
bool ManifestParser::ParseRule(string* err) {
- if (!tokenizer_.ExpectIdent("rule", err))
- return false;
string name;
- if (!tokenizer_.ReadIdent(&name))
- return tokenizer_.ErrorExpected("rule name", err);
- if (!tokenizer_.Newline(err))
+ if (!lexer_.ReadIdent(&name))
+ return lexer_.Error("expected rule name", err);
+
+ if (!ExpectToken(Lexer::NEWLINE, err))
return false;
if (state_->LookupRule(name) != NULL) {
@@ -328,167 +100,120 @@ bool ManifestParser::ParseRule(string* err) {
Rule* rule = new Rule(name); // XXX scoped_ptr
- if (tokenizer_.PeekToken() == Token::INDENT) {
- tokenizer_.ConsumeToken();
-
- while (tokenizer_.PeekToken() != Token::OUTDENT) {
- const char* let_loc = tokenizer_.token_.pos_;
-
- string key;
- if (!ParseLetKey(&key, err))
- return false;
+ while (lexer_.PeekToken(Lexer::INDENT)) {
+ string key;
+ EvalString value;
+ if (!ParseLet(&key, &value, err))
+ return false;
- EvalString* eval_target = NULL;
- if (key == "command") {
- eval_target = &rule->command_;
- } else if (key == "depfile") {
- eval_target = &rule->depfile_;
- } else if (key == "description") {
- eval_target = &rule->description_;
- } else if (key == "generator") {
- rule->generator_ = true;
- string dummy;
- if (!tokenizer_.ReadToNewline(&dummy, err))
- return false;
- continue;
- } else if (key == "restat") {
- rule->restat_ = true;
- string dummy;
- if (!tokenizer_.ReadToNewline(&dummy, err))
- return false;
- continue;
- } else {
- // Die on other keyvals for now; revisit if we want to add a
- // scope here.
- return tokenizer_.ErrorAt(let_loc, "unexpected variable '" + key + "'",
- err);
- }
-
- if (!ParseLetValue(eval_target, err))
- return false;
+ if (key == "command") {
+ rule->command_ = value;
+ } else if (key == "depfile") {
+ rule->depfile_ = value;
+ } else if (key == "description") {
+ rule->description_ = value;
+ } else if (key == "generator") {
+ rule->generator_ = true;
+ } else if (key == "restat") {
+ rule->restat_ = true;
+ } else {
+ // Die on other keyvals for now; revisit if we want to add a
+ // scope here.
+ return lexer_.Error("unexpected variable '" + key + "'", err);
}
- tokenizer_.ConsumeToken();
}
- if (rule->command_.unparsed().empty())
- return tokenizer_.Error("expected 'command =' line", err);
+ if (rule->command_.empty())
+ return lexer_.Error("expected 'command =' line", err);
state_->AddRule(rule);
return true;
}
-bool ManifestParser::ParseLet(string* key, string* value, string* err) {
- if (!ParseLetKey(key, err))
+bool ManifestParser::ParseLet(string* key, EvalString* value, string* err) {
+ if (!lexer_.ReadIdent(key))
return false;
-
- EvalString eval;
- if (!ParseLetValue(&eval, err))
+ if (!ExpectToken(Lexer::EQUALS, err))
return false;
-
- *value = eval.Evaluate(env_);
-
- return true;
-}
-
-bool ManifestParser::ParseLetKey(string* key, string* err) {
- if (!tokenizer_.ReadIdent(key))
- return tokenizer_.ErrorExpected("variable name", err);
- if (!tokenizer_.ExpectToken(Token::EQUALS, err))
- return false;
- return true;
-}
-
-bool ManifestParser::ParseLetValue(EvalString* eval, string* err) {
- // Backup the tokenizer state prior to consuming the line, for reporting
- // the source location in case of a parse error later.
- Tokenizer tokenizer_backup = tokenizer_;
-
- // XXX should we tokenize here? it means we'll need to understand
- // command syntax, though...
- string value;
- if (!tokenizer_.ReadToNewline(&value, err))
+ if (!lexer_.ReadVarValue(value, err))
return false;
-
- string eval_err;
- size_t err_index;
- if (!eval->Parse(value, &eval_err, &err_index)) {
- value.clear();
- // Advance the saved tokenizer state up to the error index to report the
- // error at the correct source location.
- tokenizer_backup.ReadToNewline(&value, err, err_index);
- return tokenizer_backup.Error(eval_err, err);
- }
-
return true;
}
-bool ManifestParser::ParseDefaults(string* err) {
- if (!tokenizer_.ExpectIdent("default", err))
+bool ManifestParser::ParseDefault(string* err) {
+ EvalString eval;
+ if (!lexer_.ReadPath(&eval, err))
return false;
-
- string target;
- if (!tokenizer_.ReadIdent(&target))
- return tokenizer_.ErrorExpected("target name", err);
+ if (eval.empty())
+ return lexer_.Error("expected target name", err);
do {
- EvalString eval;
- string eval_err;
- if (!eval.Parse(target, &eval_err))
- return tokenizer_.Error(eval_err, err);
string path = eval.Evaluate(env_);
- if (!CanonicalizePath(&path, &eval_err))
- return tokenizer_.Error(eval_err, err);
- if (!state_->AddDefault(path, &eval_err))
- return tokenizer_.Error(eval_err, err);
- } while (tokenizer_.ReadIdent(&target));
+ string path_err;
+ if (!CanonicalizePath(&path, &path_err))
+ return lexer_.Error(path_err, err);
+ if (!state_->AddDefault(path, &path_err))
+ return lexer_.Error(path_err, err);
+
+ eval.Clear();
+ if (!lexer_.ReadPath(&eval, err))
+ return false;
+ } while (!eval.empty());
- if (!tokenizer_.Newline(err))
+ if (!ExpectToken(Lexer::NEWLINE, err))
return false;
return true;
}
bool ManifestParser::ParseEdge(string* err) {
- vector<string> ins, outs;
+ vector<EvalString> ins, outs;
- if (!tokenizer_.ExpectIdent("build", err))
- return false;
+ {
+ EvalString out;
+ if (!lexer_.ReadPath(&out, err))
+ return false;
+ if (out.empty())
+ return lexer_.Error("expected path", err);
- for (;;) {
- if (tokenizer_.PeekToken() == Token::COLON) {
- tokenizer_.ConsumeToken();
- break;
- }
+ do {
+ outs.push_back(out);
- string out;
- if (!tokenizer_.ReadIdent(&out))
- return tokenizer_.ErrorExpected("output file list", err);
- outs.push_back(out);
+ out.Clear();
+ if (!lexer_.ReadPath(&out, err))
+ return false;
+ } while (!out.empty());
}
- // XXX check outs not empty
+
+ if (!ExpectToken(Lexer::COLON, err))
+ return false;
string rule_name;
- if (!tokenizer_.ReadIdent(&rule_name))
- return tokenizer_.ErrorExpected("build command name", err);
+ if (!lexer_.ReadIdent(&rule_name))
+ return lexer_.Error("expected build command name", err);
const Rule* rule = state_->LookupRule(rule_name);
if (!rule)
- return tokenizer_.Error("unknown build rule '" + rule_name + "'", err);
+ return lexer_.Error("unknown build rule '" + rule_name + "'", err);
for (;;) {
- string in;
- if (!tokenizer_.ReadIdent(&in))
+ // XXX should we require one path here?
+ EvalString in;
+ if (!lexer_.ReadPath(&in, err))
+ return false;
+ if (in.empty())
break;
ins.push_back(in);
}
// Add all order-only deps, counting how many as we go.
int implicit = 0;
- if (tokenizer_.PeekToken() == Token::PIPE) {
- tokenizer_.ConsumeToken();
+ if (lexer_.PeekToken(Lexer::PIPE)) {
for (;;) {
- string in;
- if (!tokenizer_.ReadIdent(&in))
+ EvalString in;
+ if (!lexer_.ReadPath(&in, err))
+ return err;
+ if (in.empty())
break;
ins.push_back(in);
++implicit;
@@ -497,97 +222,95 @@ bool ManifestParser::ParseEdge(string* err) {
// Add all order-only deps, counting how many as we go.
int order_only = 0;
- if (tokenizer_.PeekToken() == Token::PIPE2) {
- tokenizer_.ConsumeToken();
+ if (lexer_.PeekToken(Lexer::PIPE2)) {
for (;;) {
- string in;
- if (!tokenizer_.ReadIdent(&in))
+ EvalString in;
+ if (!lexer_.ReadPath(&in, err))
+ return false;
+ if (in.empty())
break;
ins.push_back(in);
++order_only;
}
}
- if (!tokenizer_.Newline(err))
+ if (!ExpectToken(Lexer::NEWLINE, err))
return false;
// Default to using outer env.
BindingEnv* env = env_;
- // But use a nested env if there are variables in scope.
- if (tokenizer_.PeekToken() == Token::INDENT) {
- tokenizer_.ConsumeToken();
-
+ // But create and fill a nested env if there are variables in scope.
+ if (lexer_.PeekToken(Lexer::INDENT)) {
// XXX scoped_ptr to handle error case.
env = new BindingEnv;
env->parent_ = env_;
- while (tokenizer_.PeekToken() != Token::OUTDENT) {
- string key, val;
+ do {
+ string key;
+ EvalString val;
if (!ParseLet(&key, &val, err))
return false;
- env->AddBinding(key, val);
- }
- tokenizer_.ConsumeToken();
- }
-
- // Evaluate all variables in paths.
- // XXX: fast path skip the eval parse if there's no $ in the path?
- vector<string>* paths[2] = { &ins, &outs };
- for (int p = 0; p < 2; ++p) {
- for (vector<string>::iterator i = paths[p]->begin();
- i != paths[p]->end(); ++i) {
- EvalString eval;
- string eval_err;
- if (!eval.Parse(*i, &eval_err))
- return tokenizer_.Error(eval_err, err);
- string path = eval.Evaluate(env);
- if (!CanonicalizePath(&path, &eval_err))
- return tokenizer_.Error(eval_err, err);
- *i = path;
- }
+ env->AddBinding(key, val.Evaluate(env_));
+ } while (lexer_.PeekToken(Lexer::INDENT));
}
Edge* edge = state_->AddEdge(rule);
edge->env_ = env;
- for (vector<string>::iterator i = ins.begin(); i != ins.end(); ++i)
- state_->AddIn(edge, *i);
- for (vector<string>::iterator i = outs.begin(); i != outs.end(); ++i)
- state_->AddOut(edge, *i);
+ for (vector<EvalString>::iterator i = ins.begin(); i != ins.end(); ++i) {
+ string path = i->Evaluate(env);
+ string path_err;
+ if (!CanonicalizePath(&path, &path_err))
+ return lexer_.Error(path_err, err);
+ state_->AddIn(edge, path);
+ }
+ for (vector<EvalString>::iterator i = outs.begin(); i != outs.end(); ++i) {
+ string path = i->Evaluate(env);
+ string path_err;
+ if (!CanonicalizePath(&path, &path_err))
+ return lexer_.Error(path_err, err);
+ state_->AddOut(edge, path);
+ }
edge->implicit_deps_ = implicit;
edge->order_only_deps_ = order_only;
return true;
}
-bool ManifestParser::ParseFileInclude(string* err) {
- string type;
- tokenizer_.ReadIdent(&type);
-
- string path;
- if (!tokenizer_.ReadIdent(&path))
- return tokenizer_.ErrorExpected("path to ninja file", err);
+bool ManifestParser::ParseFileInclude(bool new_scope, string* err) {
+ // XXX this should use ReadPath!
+ EvalString eval;
+ if (!lexer_.ReadPath(&eval, err))
+ return false;
+ string path = eval.Evaluate(env_);
string contents;
string read_err;
if (!file_reader_->ReadFile(path, &contents, &read_err))
- return tokenizer_.Error("loading " + path + ": " + read_err, err);
+ return lexer_.Error("loading '" + path + "': " + read_err, err);
ManifestParser subparser(state_, file_reader_);
- if (type == "subninja") {
- // subninja: Construct a new scope for the new parser.
+ if (new_scope) {
subparser.env_ = new BindingEnv;
subparser.env_->parent_ = env_;
} else {
- // include: Reuse the current scope.
subparser.env_ = env_;
}
- string sub_err;
- if (!subparser.Parse(contents, &sub_err))
- return tokenizer_.Error("in '" + path + "': " + sub_err, err);
+ if (!subparser.Parse(path, contents, err))
+ return false;
- if (!tokenizer_.Newline(err))
+ if (!ExpectToken(Lexer::NEWLINE, err))
return false;
return true;
}
+
+bool ManifestParser::ExpectToken(Lexer::Token expected, string* err) {
+ Lexer::Token token = lexer_.ReadToken();
+ if (token != expected) {
+ string message = string("expected ") + Lexer::TokenName(expected);
+ message += string(", got ") + Lexer::TokenName(token);
+ return lexer_.Error(message, err);
+ }
+ return true;
+}