From 55fd4232d35de1f163310333d216d81a35cc0ef1 Mon Sep 17 00:00:00 2001 From: Evan Martin Date: Sun, 22 May 2011 10:15:11 -0700 Subject: switch to $ as the line continuation char This means that backslashes are passed through without interpretation, allowing us to support Windows paths without worrying about escaping. --- doc/manual.asciidoc | 2 +- misc/ninja.py | 2 +- src/parsers.cc | 34 ++++++++++++++++++++++++---------- src/parsers.h | 17 ++++++++++++----- src/parsers_test.cc | 16 ++++++++-------- 5 files changed, 46 insertions(+), 25 deletions(-) diff --git a/doc/manual.asciidoc b/doc/manual.asciidoc index 4752c1c..9ef972e 100644 --- a/doc/manual.asciidoc +++ b/doc/manual.asciidoc @@ -374,7 +374,7 @@ A file is a series of declarations. A declaration can be one of: Comments begin with `#` and extend to the end of the line. -Newlines are significant, but they can be escaped by putting a `\` +Newlines are significant, but they can be escaped by putting a `$` before them. Other whitespace is only significant if it's at the beginning of a diff --git a/misc/ninja.py b/misc/ninja.py index e1abbf7..38b13c9 100644 --- a/misc/ninja.py +++ b/misc/ninja.py @@ -58,7 +58,7 @@ class Writer(object): while len(text) > self.width: space = text.rfind(' ', 0, self.width - 4) assert space != -1 # TODO: handle if no space found. - self.output.write(text[0:space] + ' \\\n') + self.output.write(text[0:space] + ' $\n') text = ' ' * (indent+2) + text[space:].lstrip() self.output.write(text + '\n') diff --git a/src/parsers.cc b/src/parsers.cc index 4f04e42..2599b07 100644 --- a/src/parsers.cc +++ b/src/parsers.cc @@ -66,12 +66,14 @@ void Tokenizer::SkipWhitespace(bool newline) { if (token_.type_ == Token::NEWLINE && newline) Newline(NULL); + const char kContinuation = makefile_flavor_ ? '\\' : '$'; + while (cur_ < end_) { if (*cur_ == ' ') { ++cur_; } else if (newline && *cur_ == '\n') { Newline(NULL); - } else if (*cur_ == '\\' && cur_ + 1 < end_ && cur_[1] == '\n') { + } else if (*cur_ == kContinuation && cur_ + 1 < end_ && cur_[1] == '\n') { ++cur_; ++cur_; cur_line_ = cur_; ++line_number_; @@ -101,7 +103,7 @@ static bool IsIdentChar(char c) { ('a' <= c && c <= 'z') || ('+' <= c && c <= '9') || // +,-./ and numbers ('A' <= c && c <= 'Z') || - (c == '_') || (c == '$'); + (c == '_') || (c == '$') || (c == '\\'); } bool Tokenizer::ExpectToken(Token::Type expected, string* err) { @@ -131,10 +133,23 @@ bool Tokenizer::ReadIdent(string* out) { return true; } +// A note on backslashes in Makefiles, from reading the docs: +// Backslash-newline is the line continuation character. +// Backslash-# escapes a # (otherwise meaningful as a comment start). +// Backslash-% escapes a % (otherwise meaningful as a special). +// Finally, quoting the GNU manual, "Backslashes that are not in danger +// of quoting ‘%’ characters go unmolested." +// How do you end a line with a backslash? The netbsd Make docs suggest +// reading the result of a shell command echoing a backslash! +// +// Rather than implement the above, we do the simpler thing here. +// If anyone actually has depfiles that rely on the more complicated +// behavior we can adjust this. bool Tokenizer::ReadToNewline(string *text, string* err, size_t max_length) { // XXX token_.clear(); + const char kContinuation = makefile_flavor_ ? '\\' : '$'; while (cur_ < end_ && *cur_ != '\n') { - if (*cur_ == '\\') { + if (*cur_ == kContinuation) { // Might be a line continuation; peek ahead to check. if (cur_ + 1 >= end_) return Error("unexpected eof", err); @@ -144,10 +159,7 @@ bool Tokenizer::ReadToNewline(string *text, string* err, size_t max_length) { continue; } - // XXX we just let other backslashes through verbatim now. - // This may not be wise. - text->push_back(*cur_); - ++cur_; + // Otherwise, just treat it like a normal character. text->push_back(*cur_); ++cur_; } else { @@ -167,7 +179,7 @@ Token::Type Tokenizer::PeekToken() { return token_.type_; token_.pos_ = cur_; - if (whitespace_significant_ && cur_indent_ == -1) { + if (!makefile_flavor_ && cur_indent_ == -1) { cur_indent_ = cur_ - cur_line_; if (cur_indent_ != last_indent_) { if (cur_indent_ > last_indent_) { @@ -227,7 +239,9 @@ void Tokenizer::ConsumeToken() { token_.Clear(); } -MakefileParser::MakefileParser() : tokenizer_(false) {} +MakefileParser::MakefileParser() { + tokenizer_.SetMakefileFlavor(); +} bool MakefileParser::Parse(const string& input, string* err) { tokenizer_.Start(input.data(), input.data() + input.size()); @@ -252,7 +266,7 @@ bool MakefileParser::Parse(const string& input, string* err) { } ManifestParser::ManifestParser(State* state, FileReader* file_reader) - : state_(state), file_reader_(file_reader), tokenizer_(true) { + : state_(state), file_reader_(file_reader) { env_ = &state->bindings_; } bool ManifestParser::Load(const string& filename, string* err) { diff --git a/src/parsers.h b/src/parsers.h index b67a861..eb50523 100644 --- a/src/parsers.h +++ b/src/parsers.h @@ -63,10 +63,17 @@ struct SourceLocation { /// Processes an input stream into Tokens. struct Tokenizer { - Tokenizer(bool whitespace_significant) - : whitespace_significant_(whitespace_significant), - token_(Token::NONE), line_number_(0), - last_indent_(0), cur_indent_(-1) {} + Tokenizer() + : makefile_flavor_(false), + token_(Token::NONE), line_number_(0), + last_indent_(0), cur_indent_(-1) {} + + /// Tokenization differs slightly between ninja files and Makefiles. + /// By default we tokenize as ninja files; calling this changes to + /// Makefile-style tokenization. + void SetMakefileFlavor() { + makefile_flavor_ = true; + } void Start(const char* start, const char* end); /// Report an error with a location pointing at the current token. @@ -91,7 +98,7 @@ struct Tokenizer { return SourceLocation(line_number_ + 1, token_.pos_ - cur_line_ + 1); } - bool whitespace_significant_; + bool makefile_flavor_; const char* cur_; const char* end_; diff --git a/src/parsers_test.cc b/src/parsers_test.cc index 7acfe33..abbfb2f 100644 --- a/src/parsers_test.cc +++ b/src/parsers_test.cc @@ -109,10 +109,10 @@ TEST_F(ParserTest, VariableScope) { TEST_F(ParserTest, Continuation) { ASSERT_NO_FATAL_FAILURE(AssertParse( "rule link\n" -" command = foo bar \\\n" +" command = foo bar $\n" " baz\n" "\n" -"build a: link c \\\n" +"build a: link c $\n" " d e f\n")); ASSERT_EQ(2u, state.rules_.size()); @@ -215,15 +215,15 @@ TEST_F(ParserTest, Errors) { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("x = $\n", &err)); - EXPECT_EQ("line 1, col 6: expected variable after $", err); + EXPECT_FALSE(parser.Parse("x = $", &err)); + EXPECT_EQ("line 1, col 3: unexpected eof", err); } { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("x = \\\n $[\n", &err)); + EXPECT_FALSE(parser.Parse("x = $\n $[\n", &err)); EXPECT_EQ("line 2, col 3: expected variable after $", err); } @@ -231,8 +231,8 @@ TEST_F(ParserTest, Errors) { State state; ManifestParser parser(&state, NULL); string err; - EXPECT_FALSE(parser.Parse("x = a\\\n b\\\n $\n", &err)); - EXPECT_EQ("line 3, col 3: expected variable after $", err); + EXPECT_FALSE(parser.Parse("x = a$\n b$\n $\n", &err)); + EXPECT_EQ("line 4, col 1: expected newline, got eof", err); } { @@ -256,7 +256,7 @@ TEST_F(ParserTest, Errors) { ManifestParser parser(&state, NULL); string err; EXPECT_FALSE(parser.Parse("rule cat\n command = cat ok\n" - "build x: cat \\\n :\n", + "build x: cat $\n :\n", &err)); EXPECT_EQ("line 4, col 2: expected newline, got ':'", err); } -- cgit v0.12