From 52649de2c56b63f42bc59513d51286531c595b44 Mon Sep 17 00:00:00 2001 From: zero9178 Date: Wed, 20 May 2020 14:38:36 +0200 Subject: bugfix: Process escaped colon in GCC depfiles. (#1774) * Added ability to parse escaped colons in GCC Dep files enabling ninja to parse dep files of GCC 10 on Windows * Added generated depfile_parser.cc * Addressed formatting * Added extra tests with real world examples of paths produced by both GCC 10 and Clang and GCC pre 10. Adjusted one test so it doesn't fail * Adjusted regular expression to not match \: if the character following the : is either EOF or whitespace * Fixed typo in regex (should be 0x20 for space not 0xa) * Changed regular expression form using lookahead to instead matching a separate expression. This was needed as re2c pre version 1.17 is broken when using lookaheads. Also added tests for \: followed by whitespace * Addressed formatting * Forgot a missing std:: * Fixed formatting for spaces after , as well as respecting column width --- src/depfile_parser.cc | 87 +++++++++++++++++++++++++++++++++------------- src/depfile_parser.in.cc | 23 ++++++++++++ src/depfile_parser_test.cc | 35 +++++++++++++++++++ 3 files changed, 121 insertions(+), 24 deletions(-) diff --git a/src/depfile_parser.cc b/src/depfile_parser.cc index 90d4a8a..0b7dce1 100644 --- a/src/depfile_parser.cc +++ b/src/depfile_parser.cc @@ -1,4 +1,4 @@ -/* Generated by re2c 1.1.1 */ +/* Generated by re2c 1.3 */ // Copyright 2011 Google Inc. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); @@ -166,22 +166,23 @@ yy12: goto yy5; yy13: yych = *(yymarker = ++in); - if (yych <= 0x1F) { + if (yych <= ' ') { if (yych <= '\n') { if (yych <= 0x00) goto yy5; if (yych <= '\t') goto yy16; goto yy17; } else { if (yych == '\r') goto yy19; - goto yy16; + if (yych <= 0x1F) goto yy16; + goto yy21; } } else { - if (yych <= '#') { - if (yych <= ' ') goto yy21; - if (yych <= '"') goto yy16; - goto yy23; + if (yych <= '9') { + if (yych == '#') goto yy23; + goto yy16; } else { - if (yych == '\\') goto yy25; + if (yych <= ':') goto yy25; + if (yych == '\\') goto yy27; goto yy16; } } @@ -231,26 +232,63 @@ yy23: } yy25: yych = *++in; - if (yych <= 0x1F) { + if (yych <= '\f') { + if (yych <= 0x00) goto yy28; + if (yych <= 0x08) goto yy26; + if (yych <= '\n') goto yy28; + } else { + if (yych <= '\r') goto yy28; + if (yych == ' ') goto yy28; + } +yy26: + { + // De-escape colon sign, but preserve other leading backslashes. + // Regular expression uses lookahead to make sure that no whitespace + // nor EOF follows. In that case it'd be the : at the end of a target + int len = (int)(in - start); + if (len > 2 && out < start) + memset(out, '\\', len - 2); + out += len - 2; + *out++ = ':'; + continue; + } +yy27: + yych = *++in; + if (yych <= ' ') { if (yych <= '\n') { if (yych <= 0x00) goto yy11; if (yych <= '\t') goto yy16; goto yy11; } else { if (yych == '\r') goto yy11; - goto yy16; + if (yych <= 0x1F) goto yy16; + goto yy30; } } else { - if (yych <= '#') { - if (yych <= ' ') goto yy26; - if (yych <= '"') goto yy16; - goto yy23; + if (yych <= '9') { + if (yych == '#') goto yy23; + goto yy16; } else { - if (yych == '\\') goto yy28; + if (yych <= ':') goto yy25; + if (yych == '\\') goto yy32; goto yy16; } } -yy26: +yy28: + ++in; + { + // Backslash followed by : and whitespace. + // It is therefore normal text and not an escaped colon + int len = (int)(in - start - 1); + // Need to shift it over if we're overwriting backslashes. + if (out < start) + memmove(out, start, len); + out += len; + if (*(in - 1) == '\n') + have_newline = true; + break; + } +yy30: ++in; { // 2N backslashes plus space -> 2N backslashes, end of filename. @@ -260,24 +298,25 @@ yy26: out += len - 1; break; } -yy28: +yy32: yych = *++in; - if (yych <= 0x1F) { + if (yych <= ' ') { if (yych <= '\n') { if (yych <= 0x00) goto yy11; if (yych <= '\t') goto yy16; goto yy11; } else { if (yych == '\r') goto yy11; - goto yy16; + if (yych <= 0x1F) goto yy16; + goto yy21; } } else { - if (yych <= '#') { - if (yych <= ' ') goto yy21; - if (yych <= '"') goto yy16; - goto yy23; + if (yych <= '9') { + if (yych == '#') goto yy23; + goto yy16; } else { - if (yych == '\\') goto yy25; + if (yych <= ':') goto yy25; + if (yych == '\\') goto yy27; goto yy16; } } diff --git a/src/depfile_parser.in.cc b/src/depfile_parser.in.cc index b32b942..95b4346 100644 --- a/src/depfile_parser.in.cc +++ b/src/depfile_parser.in.cc @@ -103,6 +103,29 @@ bool DepfileParser::Parse(string* content, string* err) { *out++ = '#'; continue; } + '\\'+ ':' [\x00\x20\r\n\t] { + // Backslash followed by : and whitespace. + // It is therefore normal text and not an escaped colon + int len = (int)(in - start - 1); + // Need to shift it over if we're overwriting backslashes. + if (out < start) + memmove(out, start, len); + out += len; + if (*(in - 1) == '\n') + have_newline = true; + break; + } + '\\'+ ':' { + // De-escape colon sign, but preserve other leading backslashes. + // Regular expression uses lookahead to make sure that no whitespace + // nor EOF follows. In that case it'd be the : at the end of a target + int len = (int)(in - start); + if (len > 2 && out < start) + memset(out, '\\', len - 2); + out += len - 2; + *out++ = ':'; + continue; + } '$$' { // De-escape dollar character. *out++ = '$'; diff --git a/src/depfile_parser_test.cc b/src/depfile_parser_test.cc index bf1a0bc..8e2cd25 100644 --- a/src/depfile_parser_test.cc +++ b/src/depfile_parser_test.cc @@ -142,6 +142,41 @@ TEST_F(DepfileParserTest, Escapes) { ASSERT_EQ(0u, parser_.ins_.size()); } +TEST_F(DepfileParserTest, EscapedColons) +{ + std::string err; + // Tests for correct parsing of depfiles produced on Windows + // by both Clang, GCC pre 10 and GCC 10 + EXPECT_TRUE(Parse( +"c\\:\\gcc\\x86_64-w64-mingw32\\include\\stddef.o: \\\n" +" c:\\gcc\\x86_64-w64-mingw32\\include\\stddef.h \n", + &err)); + ASSERT_EQ("", err); + ASSERT_EQ(1u, parser_.outs_.size()); + EXPECT_EQ("c:\\gcc\\x86_64-w64-mingw32\\include\\stddef.o", + parser_.outs_[0].AsString()); + ASSERT_EQ(1u, parser_.ins_.size()); + EXPECT_EQ("c:\\gcc\\x86_64-w64-mingw32\\include\\stddef.h", + parser_.ins_[0].AsString()); +} + +TEST_F(DepfileParserTest, EscapedTargetColon) +{ + std::string err; + EXPECT_TRUE(Parse( +"foo1\\: x\n" +"foo1\\:\n" +"foo1\\:\r\n" +"foo1\\:\t\n" +"foo1\\:", + &err)); + ASSERT_EQ("", err); + ASSERT_EQ(1u, parser_.outs_.size()); + EXPECT_EQ("foo1\\", parser_.outs_[0].AsString()); + ASSERT_EQ(1u, parser_.ins_.size()); + EXPECT_EQ("x", parser_.ins_[0].AsString()); +} + TEST_F(DepfileParserTest, SpecialChars) { // See filenames like istreambuf.iterator_op!= in // https://github.com/google/libcxx/tree/master/test/iterators/stream.iterators/istreambuf.iterator/ -- cgit v0.12