diff options
Diffstat (limited to 'src/parsers.cc')
-rw-r--r-- | src/parsers.cc | 617 |
1 files changed, 170 insertions, 447 deletions
diff --git a/src/parsers.cc b/src/parsers.cc index 44c3711..095e93f 100644 --- a/src/parsers.cc +++ b/src/parsers.cc @@ -23,242 +23,6 @@ #include "state.h" #include "util.h" -string Token::AsString() const { - switch (type_) { - case IDENT: return "'" + string(pos_, end_ - pos_) + "'"; - case UNKNOWN: return "unknown '" + string(pos_, end_ - pos_) + "'"; - case NEWLINE: return "newline"; - case EQUALS: return "'='"; - case COLON: return "':'"; - case PIPE: return "'|'"; - case PIPE2: return "'||'"; - case TEOF: return "eof"; - case INDENT: return "indenting in"; - case OUTDENT: return "indenting out"; - case NONE: break; - } - assert(false); - return ""; -} - -bool Tokenizer::ErrorAt(const char* pos, const string& message, string* err) { - // Re-scan the input, counting newlines so that we can compute the - // correct position. - int line = 1; - const char* line_start = start_; - for (const char* p = start_; p < pos; ++p) { - if (*p == '\n') { - ++line; - line_start = p + 1; - } - } - int col = pos - line_start + 1; - - char buf[1024]; - snprintf(buf, sizeof(buf), - "line %d, col %d: %s", line, col, message.c_str()); - err->assign(buf); - return false; -} - -void Tokenizer::Start(const char* start, const char* end) { - cur_line_ = cur_ = start_ = start; - end_ = end; -} - -bool Tokenizer::ErrorExpected(const string& expected, string* err) { - return Error("expected " + expected + ", got " + token_.AsString(), err); -} - -void Tokenizer::SkipWhitespace(bool newline) { - if (token_.type_ == Token::NEWLINE && newline) - Newline(NULL); - - while (cur_ < end_) { - if (*cur_ == ' ') { - ++cur_; - } else if (newline && *cur_ == '\n') { - Newline(NULL); - } else if (*cur_ == '$' && cur_ + 1 < end_ && cur_[1] == '\n') { - ++cur_; ++cur_; - } else if (*cur_ == '#' && cur_ == cur_line_) { - while (cur_ < end_ && *cur_ != '\n') - ++cur_; - if (cur_ < end_ && *cur_ == '\n') - ++cur_; - cur_line_ = cur_; - } else { - break; - } - } -} - -bool Tokenizer::Newline(string* err) { - if (!ExpectToken(Token::NEWLINE, err)) - return false; - - return true; -} - -/// Return true if |c| is part of an identifier. -static bool IsIdentChar(char c) { - // This function shows up hot on profiles. Instead of the natural - // 'if' statement, use a table as generated by this Python script: - // import string - // cs = set() - // for c in string.ascii_letters + string.digits + r'+,-./\_$': - // cs.add(ord(c)) - // for i in range(128): - // if i in cs: - // print '1,', - // else: - // print '0,', - // if i % 16 == 15: - // print - static const bool kIdents[] = { - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, - }; - return kIdents[(int)c]; -} - -bool Tokenizer::ExpectToken(Token::Type expected, string* err) { - PeekToken(); - if (token_.type_ != expected) - return ErrorExpected(Token(expected).AsString(), err); - ConsumeToken(); - return true; -} - -bool Tokenizer::ExpectIdent(const char* expected, string* err) { - PeekToken(); - if (token_.type_ != Token::IDENT || - strncmp(token_.pos_, expected, token_.end_ - token_.pos_) != 0) { - return ErrorExpected(string("'") + expected + "'", err); - } - ConsumeToken(); - return true; -} - -bool Tokenizer::ReadIdent(StringPiece* out) { - PeekToken(); - if (token_.type_ != Token::IDENT) - return false; - out->str_ = token_.pos_; - out->len_ = token_.end_ - token_.pos_; - ConsumeToken(); - return true; -} - -bool Tokenizer::ReadIdent(string* out) { - StringPiece token; - if (!ReadIdent(&token)) - return false; - out->assign(token.str_, token.len_); - return true; -} - -bool Tokenizer::ReadToNewline(string *text, string* err, size_t max_length) { - // XXX token_.clear(); - while (cur_ < end_ && *cur_ != '\n') { - if (*cur_ == '$') { - // Might be a line continuation; peek ahead to check. - if (cur_ + 1 >= end_) - return Error("unexpected eof", err); - if (*(cur_ + 1) == '\n') { - // Let SkipWhitespace handle the continuation logic. - SkipWhitespace(); - continue; - } - - // Otherwise, just treat it like a normal character. - text->push_back(*cur_); - ++cur_; - } else { - text->push_back(*cur_); - ++cur_; - } - if (text->size() >= max_length) { - token_.pos_ = cur_; - return false; - } - } - return Newline(err); -} - -Token::Type Tokenizer::PeekToken() { - if (token_.type_ != Token::NONE) - return token_.type_; - - token_.pos_ = cur_; - if (cur_indent_ == -1) { - cur_indent_ = cur_ - cur_line_; - if (cur_indent_ != last_indent_) { - if (cur_indent_ > last_indent_) { - token_.type_ = Token::INDENT; - } else if (cur_indent_ < last_indent_) { - token_.type_ = Token::OUTDENT; - } - last_indent_ = cur_indent_; - return token_.type_; - } - } - - if (cur_ >= end_) { - token_.type_ = Token::TEOF; - return token_.type_; - } - - if (IsIdentChar(*cur_)) { - while (cur_ < end_ && IsIdentChar(*cur_)) { - if (*cur_ == '$' && cur_ + 1 < end_ && cur_[1] == ' ') { - ++cur_; - } - ++cur_; - } - token_.end_ = cur_; - token_.type_ = Token::IDENT; - } else if (*cur_ == ':') { - token_.type_ = Token::COLON; - ++cur_; - } else if (*cur_ == '=') { - token_.type_ = Token::EQUALS; - ++cur_; - } else if (*cur_ == '|') { - if (cur_ + 1 < end_ && cur_[1] == '|') { - token_.type_ = Token::PIPE2; - cur_ += 2; - } else { - token_.type_ = Token::PIPE; - ++cur_; - } - } else if (*cur_ == '\n') { - token_.type_ = Token::NEWLINE; - ++cur_; - cur_line_ = cur_; - cur_indent_ = -1; - } - - SkipWhitespace(); - - if (token_.type_ == Token::NONE) { - token_.type_ = Token::UNKNOWN; - token_.end_ = cur_ + 1; - } - - return token_.type_; -} - -void Tokenizer::ConsumeToken() { - token_.Clear(); -} - ManifestParser::ManifestParser(State* state, FileReader* file_reader) : state_(state), file_reader_(file_reader) { env_ = &state->bindings_; @@ -267,58 +31,66 @@ bool ManifestParser::Load(const string& filename, string* err) { string contents; if (!file_reader_->ReadFile(filename, &contents, err)) return false; - return Parse(contents, err); + contents.resize(contents.size() + 10); + return Parse(filename, contents, err); } -bool ManifestParser::Parse(const string& input, string* err) { - tokenizer_.Start(input.data(), input.data() + input.size()); - - tokenizer_.SkipWhitespace(true); - - while (tokenizer_.token().type_ != Token::TEOF) { - switch (tokenizer_.PeekToken()) { - case Token::IDENT: { - const Token& token = tokenizer_.token(); - int len = token.end_ - token.pos_; - if (len == 4 && memcmp(token.pos_, "rule", 4) == 0) { - if (!ParseRule(err)) - return false; - } else if (len == 5 && memcmp(token.pos_, "build", 5) == 0) { - if (!ParseEdge(err)) - return false; - } else if (len == 7 && memcmp(token.pos_, "default", 7) == 0) { - if (!ParseDefaults(err)) - return false; - } else if ((len == 7 && memcmp(token.pos_, "include", 7) == 0) || - (len == 8 && memcmp(token.pos_, "subninja", 8) == 0)) { - if (!ParseFileInclude(err)) - return false; - } else { - string name, value; - if (!ParseLet(&name, &value, err)) - return false; - env_->AddBinding(name, value); - } - break; - } - case Token::TEOF: - continue; - default: - return tokenizer_.Error("unhandled " + tokenizer_.token().AsString(), err); +bool ManifestParser::Parse(const string& filename, const string& input, + string* err) { + lexer_.Start(filename, input); + + for (;;) { + Lexer::Token token = lexer_.ReadToken(); + switch (token) { + case Lexer::BUILD: + if (!ParseEdge(err)) + return false; + break; + case Lexer::RULE: + if (!ParseRule(err)) + return false; + break; + case Lexer::DEFAULT: + if (!ParseDefault(err)) + return false; + break; + case Lexer::IDENT: { + lexer_.UnreadToken(); + string name; + EvalString value; + if (!ParseLet(&name, &value, err)) + return false; + env_->AddBinding(name, value.Evaluate(env_)); + break; + } + case Lexer::INCLUDE: + if (!ParseFileInclude(false, err)) + return false; + break; + case Lexer::SUBNINJA: + if (!ParseFileInclude(true, err)) + return false; + break; + case Lexer::ERROR: + return lexer_.Error("lexing error", err); + case Lexer::TEOF: + return true; + case Lexer::NEWLINE: + break; + default: + return lexer_.Error(string("unexpected") + Lexer::TokenName(token), + err); } - tokenizer_.SkipWhitespace(true); } - - return true; + return false; // not reached } bool ManifestParser::ParseRule(string* err) { - if (!tokenizer_.ExpectIdent("rule", err)) - return false; string name; - if (!tokenizer_.ReadIdent(&name)) - return tokenizer_.ErrorExpected("rule name", err); - if (!tokenizer_.Newline(err)) + if (!lexer_.ReadIdent(&name)) + return lexer_.Error("expected rule name", err); + + if (!ExpectToken(Lexer::NEWLINE, err)) return false; if (state_->LookupRule(name) != NULL) { @@ -328,167 +100,120 @@ bool ManifestParser::ParseRule(string* err) { Rule* rule = new Rule(name); // XXX scoped_ptr - if (tokenizer_.PeekToken() == Token::INDENT) { - tokenizer_.ConsumeToken(); - - while (tokenizer_.PeekToken() != Token::OUTDENT) { - const char* let_loc = tokenizer_.token_.pos_; - - string key; - if (!ParseLetKey(&key, err)) - return false; + while (lexer_.PeekToken(Lexer::INDENT)) { + string key; + EvalString value; + if (!ParseLet(&key, &value, err)) + return false; - EvalString* eval_target = NULL; - if (key == "command") { - eval_target = &rule->command_; - } else if (key == "depfile") { - eval_target = &rule->depfile_; - } else if (key == "description") { - eval_target = &rule->description_; - } else if (key == "generator") { - rule->generator_ = true; - string dummy; - if (!tokenizer_.ReadToNewline(&dummy, err)) - return false; - continue; - } else if (key == "restat") { - rule->restat_ = true; - string dummy; - if (!tokenizer_.ReadToNewline(&dummy, err)) - return false; - continue; - } else { - // Die on other keyvals for now; revisit if we want to add a - // scope here. - return tokenizer_.ErrorAt(let_loc, "unexpected variable '" + key + "'", - err); - } - - if (!ParseLetValue(eval_target, err)) - return false; + if (key == "command") { + rule->command_ = value; + } else if (key == "depfile") { + rule->depfile_ = value; + } else if (key == "description") { + rule->description_ = value; + } else if (key == "generator") { + rule->generator_ = true; + } else if (key == "restat") { + rule->restat_ = true; + } else { + // Die on other keyvals for now; revisit if we want to add a + // scope here. + return lexer_.Error("unexpected variable '" + key + "'", err); } - tokenizer_.ConsumeToken(); } - if (rule->command_.unparsed().empty()) - return tokenizer_.Error("expected 'command =' line", err); + if (rule->command_.empty()) + return lexer_.Error("expected 'command =' line", err); state_->AddRule(rule); return true; } -bool ManifestParser::ParseLet(string* key, string* value, string* err) { - if (!ParseLetKey(key, err)) +bool ManifestParser::ParseLet(string* key, EvalString* value, string* err) { + if (!lexer_.ReadIdent(key)) return false; - - EvalString eval; - if (!ParseLetValue(&eval, err)) + if (!ExpectToken(Lexer::EQUALS, err)) return false; - - *value = eval.Evaluate(env_); - - return true; -} - -bool ManifestParser::ParseLetKey(string* key, string* err) { - if (!tokenizer_.ReadIdent(key)) - return tokenizer_.ErrorExpected("variable name", err); - if (!tokenizer_.ExpectToken(Token::EQUALS, err)) - return false; - return true; -} - -bool ManifestParser::ParseLetValue(EvalString* eval, string* err) { - // Backup the tokenizer state prior to consuming the line, for reporting - // the source location in case of a parse error later. - Tokenizer tokenizer_backup = tokenizer_; - - // XXX should we tokenize here? it means we'll need to understand - // command syntax, though... - string value; - if (!tokenizer_.ReadToNewline(&value, err)) + if (!lexer_.ReadVarValue(value, err)) return false; - - string eval_err; - size_t err_index; - if (!eval->Parse(value, &eval_err, &err_index)) { - value.clear(); - // Advance the saved tokenizer state up to the error index to report the - // error at the correct source location. - tokenizer_backup.ReadToNewline(&value, err, err_index); - return tokenizer_backup.Error(eval_err, err); - } - return true; } -bool ManifestParser::ParseDefaults(string* err) { - if (!tokenizer_.ExpectIdent("default", err)) +bool ManifestParser::ParseDefault(string* err) { + EvalString eval; + if (!lexer_.ReadPath(&eval, err)) return false; - - string target; - if (!tokenizer_.ReadIdent(&target)) - return tokenizer_.ErrorExpected("target name", err); + if (eval.empty()) + return lexer_.Error("expected target name", err); do { - EvalString eval; - string eval_err; - if (!eval.Parse(target, &eval_err)) - return tokenizer_.Error(eval_err, err); string path = eval.Evaluate(env_); - if (!CanonicalizePath(&path, &eval_err)) - return tokenizer_.Error(eval_err, err); - if (!state_->AddDefault(path, &eval_err)) - return tokenizer_.Error(eval_err, err); - } while (tokenizer_.ReadIdent(&target)); + string path_err; + if (!CanonicalizePath(&path, &path_err)) + return lexer_.Error(path_err, err); + if (!state_->AddDefault(path, &path_err)) + return lexer_.Error(path_err, err); + + eval.Clear(); + if (!lexer_.ReadPath(&eval, err)) + return false; + } while (!eval.empty()); - if (!tokenizer_.Newline(err)) + if (!ExpectToken(Lexer::NEWLINE, err)) return false; return true; } bool ManifestParser::ParseEdge(string* err) { - vector<string> ins, outs; + vector<EvalString> ins, outs; - if (!tokenizer_.ExpectIdent("build", err)) - return false; + { + EvalString out; + if (!lexer_.ReadPath(&out, err)) + return false; + if (out.empty()) + return lexer_.Error("expected path", err); - for (;;) { - if (tokenizer_.PeekToken() == Token::COLON) { - tokenizer_.ConsumeToken(); - break; - } + do { + outs.push_back(out); - string out; - if (!tokenizer_.ReadIdent(&out)) - return tokenizer_.ErrorExpected("output file list", err); - outs.push_back(out); + out.Clear(); + if (!lexer_.ReadPath(&out, err)) + return false; + } while (!out.empty()); } - // XXX check outs not empty + + if (!ExpectToken(Lexer::COLON, err)) + return false; string rule_name; - if (!tokenizer_.ReadIdent(&rule_name)) - return tokenizer_.ErrorExpected("build command name", err); + if (!lexer_.ReadIdent(&rule_name)) + return lexer_.Error("expected build command name", err); const Rule* rule = state_->LookupRule(rule_name); if (!rule) - return tokenizer_.Error("unknown build rule '" + rule_name + "'", err); + return lexer_.Error("unknown build rule '" + rule_name + "'", err); for (;;) { - string in; - if (!tokenizer_.ReadIdent(&in)) + // XXX should we require one path here? + EvalString in; + if (!lexer_.ReadPath(&in, err)) + return false; + if (in.empty()) break; ins.push_back(in); } // Add all order-only deps, counting how many as we go. int implicit = 0; - if (tokenizer_.PeekToken() == Token::PIPE) { - tokenizer_.ConsumeToken(); + if (lexer_.PeekToken(Lexer::PIPE)) { for (;;) { - string in; - if (!tokenizer_.ReadIdent(&in)) + EvalString in; + if (!lexer_.ReadPath(&in, err)) + return err; + if (in.empty()) break; ins.push_back(in); ++implicit; @@ -497,97 +222,95 @@ bool ManifestParser::ParseEdge(string* err) { // Add all order-only deps, counting how many as we go. int order_only = 0; - if (tokenizer_.PeekToken() == Token::PIPE2) { - tokenizer_.ConsumeToken(); + if (lexer_.PeekToken(Lexer::PIPE2)) { for (;;) { - string in; - if (!tokenizer_.ReadIdent(&in)) + EvalString in; + if (!lexer_.ReadPath(&in, err)) + return false; + if (in.empty()) break; ins.push_back(in); ++order_only; } } - if (!tokenizer_.Newline(err)) + if (!ExpectToken(Lexer::NEWLINE, err)) return false; // Default to using outer env. BindingEnv* env = env_; - // But use a nested env if there are variables in scope. - if (tokenizer_.PeekToken() == Token::INDENT) { - tokenizer_.ConsumeToken(); - + // But create and fill a nested env if there are variables in scope. + if (lexer_.PeekToken(Lexer::INDENT)) { // XXX scoped_ptr to handle error case. env = new BindingEnv; env->parent_ = env_; - while (tokenizer_.PeekToken() != Token::OUTDENT) { - string key, val; + do { + string key; + EvalString val; if (!ParseLet(&key, &val, err)) return false; - env->AddBinding(key, val); - } - tokenizer_.ConsumeToken(); - } - - // Evaluate all variables in paths. - // XXX: fast path skip the eval parse if there's no $ in the path? - vector<string>* paths[2] = { &ins, &outs }; - for (int p = 0; p < 2; ++p) { - for (vector<string>::iterator i = paths[p]->begin(); - i != paths[p]->end(); ++i) { - EvalString eval; - string eval_err; - if (!eval.Parse(*i, &eval_err)) - return tokenizer_.Error(eval_err, err); - string path = eval.Evaluate(env); - if (!CanonicalizePath(&path, &eval_err)) - return tokenizer_.Error(eval_err, err); - *i = path; - } + env->AddBinding(key, val.Evaluate(env_)); + } while (lexer_.PeekToken(Lexer::INDENT)); } Edge* edge = state_->AddEdge(rule); edge->env_ = env; - for (vector<string>::iterator i = ins.begin(); i != ins.end(); ++i) - state_->AddIn(edge, *i); - for (vector<string>::iterator i = outs.begin(); i != outs.end(); ++i) - state_->AddOut(edge, *i); + for (vector<EvalString>::iterator i = ins.begin(); i != ins.end(); ++i) { + string path = i->Evaluate(env); + string path_err; + if (!CanonicalizePath(&path, &path_err)) + return lexer_.Error(path_err, err); + state_->AddIn(edge, path); + } + for (vector<EvalString>::iterator i = outs.begin(); i != outs.end(); ++i) { + string path = i->Evaluate(env); + string path_err; + if (!CanonicalizePath(&path, &path_err)) + return lexer_.Error(path_err, err); + state_->AddOut(edge, path); + } edge->implicit_deps_ = implicit; edge->order_only_deps_ = order_only; return true; } -bool ManifestParser::ParseFileInclude(string* err) { - string type; - tokenizer_.ReadIdent(&type); - - string path; - if (!tokenizer_.ReadIdent(&path)) - return tokenizer_.ErrorExpected("path to ninja file", err); +bool ManifestParser::ParseFileInclude(bool new_scope, string* err) { + // XXX this should use ReadPath! + EvalString eval; + if (!lexer_.ReadPath(&eval, err)) + return false; + string path = eval.Evaluate(env_); string contents; string read_err; if (!file_reader_->ReadFile(path, &contents, &read_err)) - return tokenizer_.Error("loading " + path + ": " + read_err, err); + return lexer_.Error("loading '" + path + "': " + read_err, err); ManifestParser subparser(state_, file_reader_); - if (type == "subninja") { - // subninja: Construct a new scope for the new parser. + if (new_scope) { subparser.env_ = new BindingEnv; subparser.env_->parent_ = env_; } else { - // include: Reuse the current scope. subparser.env_ = env_; } - string sub_err; - if (!subparser.Parse(contents, &sub_err)) - return tokenizer_.Error("in '" + path + "': " + sub_err, err); + if (!subparser.Parse(path, contents, err)) + return false; - if (!tokenizer_.Newline(err)) + if (!ExpectToken(Lexer::NEWLINE, err)) return false; return true; } + +bool ManifestParser::ExpectToken(Lexer::Token expected, string* err) { + Lexer::Token token = lexer_.ReadToken(); + if (token != expected) { + string message = string("expected ") + Lexer::TokenName(expected); + message += string(", got ") + Lexer::TokenName(token); + return lexer_.Error(message, err); + } + return true; +} |