From ed07eb9f2f25ddee464e786f0f2f82e9e8a33e0a Mon Sep 17 00:00:00 2001 From: Evan Martin Date: Thu, 2 Aug 2012 15:24:32 -0700 Subject: reject tabs (and CRs) in input files more aggressively --- src/lexer.cc | 37 +++++++++++++++++++------------------ src/lexer.h | 7 +++++-- src/lexer.in.cc | 37 +++++++++++++++++++------------------ src/lexer_test.cc | 10 ++++++++++ src/manifest_parser.cc | 5 +++-- src/manifest_parser_test.cc | 2 +- 6 files changed, 57 insertions(+), 41 deletions(-) diff --git a/src/lexer.cc b/src/lexer.cc index ca6f367..f4036d4 100644 --- a/src/lexer.cc +++ b/src/lexer.cc @@ -90,24 +90,25 @@ const char* Lexer::TokenName(Token t) { return NULL; // not reached } -const char* Lexer::TokenErrorHint(Token t) { - switch (t) { - case ERROR: return ""; - case BUILD: return ""; - case COLON: return " ($ also escapes ':')"; - case DEFAULT: return ""; - case EQUALS: return ""; - case IDENT: return ""; - case INCLUDE: return ""; - case INDENT: return ""; - case NEWLINE: return ""; - case PIPE2: return ""; - case PIPE: return ""; - case RULE: return ""; - case SUBNINJA: return ""; - case TEOF: return ""; +const char* Lexer::TokenErrorHint(Token expected) { + switch (expected) { + case COLON: + return " ($ also escapes ':')"; + default: + return ""; + } +} + +string Lexer::DescribeLastError() { + if (last_token_) { + switch (last_token_[0]) { + case '\r': + return "carriage returns are not allowed, use newlines"; + case '\t': + return "tabs are not allowed, use spaces"; + } } - return ""; + return "lexing error"; } void Lexer::UnreadToken() { @@ -689,7 +690,7 @@ yy94: yy95: { last_token_ = start; - return Error("lexing error", err); + return Error(DescribeLastError(), err); } yy96: ++p; diff --git a/src/lexer.h b/src/lexer.h index 19008d7..03c59f2 100644 --- a/src/lexer.h +++ b/src/lexer.h @@ -49,9 +49,12 @@ struct Lexer { /// Return a human-readable form of a token, used in error messages. static const char* TokenName(Token t); - /// Return a human-readable token hint, used in error messages. - static const char* TokenErrorHint(Token t); + static const char* TokenErrorHint(Token expected); + + /// If the last token read was an ERROR token, provide more info + /// or the empty string. + string DescribeLastError(); /// Start parsing some input. void Start(StringPiece filename, StringPiece input); diff --git a/src/lexer.in.cc b/src/lexer.in.cc index 852d6e9..ec3ad6b 100644 --- a/src/lexer.in.cc +++ b/src/lexer.in.cc @@ -89,24 +89,25 @@ const char* Lexer::TokenName(Token t) { return NULL; // not reached } -const char* Lexer::TokenErrorHint(Token t) { - switch (t) { - case ERROR: return ""; - case BUILD: return ""; - case COLON: return " ($ also escapes ':')"; - case DEFAULT: return ""; - case EQUALS: return ""; - case IDENT: return ""; - case INCLUDE: return ""; - case INDENT: return ""; - case NEWLINE: return ""; - case PIPE2: return ""; - case PIPE: return ""; - case RULE: return ""; - case SUBNINJA: return ""; - case TEOF: return ""; +const char* Lexer::TokenErrorHint(Token expected) { + switch (expected) { + case COLON: + return " ($ also escapes ':')"; + default: + return ""; + } +} + +string Lexer::DescribeLastError() { + if (last_token_) { + switch (last_token_[0]) { + case '\r': + return "carriage returns are not allowed, use newlines"; + case '\t': + return "tabs are not allowed, use spaces"; + } } - return ""; + return "lexing error"; } void Lexer::UnreadToken() { @@ -248,7 +249,7 @@ bool Lexer::ReadEvalString(EvalString* eval, bool path, string* err) { } [^] { last_token_ = start; - return Error("lexing error", err); + return Error(DescribeLastError(), err); } */ } diff --git a/src/lexer_test.cc b/src/lexer_test.cc index 5795e5e..e8a1642 100644 --- a/src/lexer_test.cc +++ b/src/lexer_test.cc @@ -85,3 +85,13 @@ TEST(Lexer, CommentEOF) { Lexer::Token token = lexer.ReadToken(); EXPECT_EQ(Lexer::ERROR, token); } + +TEST(Lexer, Tabs) { + // Verify we print a useful error on a disallowed character. + Lexer lexer(" \tfoobar"); + Lexer::Token token = lexer.ReadToken(); + EXPECT_EQ(Lexer::INDENT, token); + token = lexer.ReadToken(); + EXPECT_EQ(Lexer::ERROR, token); + EXPECT_EQ("tabs are not allowed, use spaces", lexer.DescribeLastError()); +} diff --git a/src/manifest_parser.cc b/src/manifest_parser.cc index 057e12c..405e244 100644 --- a/src/manifest_parser.cc +++ b/src/manifest_parser.cc @@ -76,8 +76,9 @@ bool ManifestParser::Parse(const string& filename, const string& input, if (!ParseFileInclude(true, err)) return false; break; - case Lexer::ERROR: - return lexer_.Error("lexing error", err); + case Lexer::ERROR: { + return lexer_.Error(lexer_.DescribeLastError(), err); + } case Lexer::TEOF: return true; case Lexer::NEWLINE: diff --git a/src/manifest_parser_test.cc b/src/manifest_parser_test.cc index 3261d39..a48c99e 100644 --- a/src/manifest_parser_test.cc +++ b/src/manifest_parser_test.cc @@ -697,7 +697,7 @@ TEST_F(ParserTest, CRLF) { EXPECT_FALSE(parser.ParseTest("foo = foo\nbar = bar\r\n", &err)); - EXPECT_EQ("input:2: lexing error\n" + EXPECT_EQ("input:2: carriage returns are not allowed, use newlines\n" "bar = bar\r\n" " ^ near here", err); -- cgit v0.12