From e24d31901fc79aa7348be46bef5dea8d0dce6c4b Mon Sep 17 00:00:00 2001 From: Scott Graham Date: Fri, 7 Nov 2014 22:20:34 -0800 Subject: track back->forward conversions in a bitmask --- src/util.cc | 52 +++++++++++++++++++++++++++++++++++----- src/util.h | 6 +++++ src/util_test.cc | 72 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 124 insertions(+), 6 deletions(-) diff --git a/src/util.cc b/src/util.cc index cb8adf1..6a9079e 100644 --- a/src/util.cc +++ b/src/util.cc @@ -86,18 +86,40 @@ void Error(const char* msg, ...) { } bool CanonicalizePath(string* path, string* err) { + unsigned int unused; + return CanonicalizePath(path, err, &unused); +} + +bool CanonicalizePath(string* path, string* err, unsigned int* slash_bits) { METRIC_RECORD("canonicalize str"); size_t len = path->size(); char* str = 0; if (len > 0) str = &(*path)[0]; - if (!CanonicalizePath(str, &len, err)) + if (!CanonicalizePath(str, &len, err, slash_bits)) return false; path->resize(len); return true; } bool CanonicalizePath(char* path, size_t* len, string* err) { + unsigned int unused; + return CanonicalizePath(path, len, err, &unused); +} + +unsigned int ShiftOverBit(int offset, unsigned int bits) { + // e.g. for |offset| == 2: + // | ... 9 8 7 6 5 4 3 2 1 0 | + // \_________________/ \_/ + // above below + // So we drop the bit at offset and move above "down" into its place. + unsigned int above = bits & ~((1 << (offset + 1)) - 1); + unsigned int below = bits & ((1 << offset) - 1); + return (above >> 1) | below; +} + +bool CanonicalizePath(char* path, size_t* len, string* err, + unsigned int* slash_bits) { // WARNING: this function is performance-critical; please benchmark // any changes you make to it. METRIC_RECORD("canonicalize path"); @@ -106,15 +128,22 @@ bool CanonicalizePath(char* path, size_t* len, string* err) { return false; } -#ifdef _WIN32 - for (char* c = path; (c = strchr(c, '\\')) != NULL;) - *c = '/'; -#endif - const int kMaxPathComponents = 30; char* components[kMaxPathComponents]; int component_count = 0; +#ifdef _WIN32 + // kMaxPathComponents protects this from overflowing. + unsigned int bits = 0; + int bits_offset = 0; + for (char* c = path; (c = strpbrk(c, "/\\")) != NULL;) { + bits |= (*c == '\\') << bits_offset; + *c++ = '/'; + bits_offset++; + } + bits_offset = 0; +#endif + char* start = path; char* dst = start; const char* src = start; @@ -122,10 +151,12 @@ bool CanonicalizePath(char* path, size_t* len, string* err) { if (*src == '/') { #ifdef _WIN32 + bits_offset++; // network path starts with // if (*len > 1 && *(src + 1) == '/') { src += 2; dst += 2; + bits_offset++; } else { ++src; ++dst; @@ -141,6 +172,7 @@ bool CanonicalizePath(char* path, size_t* len, string* err) { if (src + 1 == end || src[1] == '/') { // '.' component; eliminate. src += 2; + bits = ShiftOverBit(bits_offset, bits); continue; } else if (src[1] == '.' && (src + 2 == end || src[2] == '/')) { // '..' component. Back up if possible. @@ -148,6 +180,9 @@ bool CanonicalizePath(char* path, size_t* len, string* err) { dst = components[component_count - 1]; src += 3; --component_count; + bits = ShiftOverBit(bits_offset, bits); + bits_offset--; + bits = ShiftOverBit(bits_offset, bits); } else { *dst++ = *src++; *dst++ = *src++; @@ -159,6 +194,7 @@ bool CanonicalizePath(char* path, size_t* len, string* err) { if (*src == '/') { src++; + bits_offset++; continue; } @@ -169,6 +205,7 @@ bool CanonicalizePath(char* path, size_t* len, string* err) { while (*src != '/' && src != end) *dst++ = *src++; + bits_offset++; *dst++ = *src++; // Copy '/' or final \0 character as well. } @@ -178,6 +215,9 @@ bool CanonicalizePath(char* path, size_t* len, string* err) { } *len = dst - start - 1; +#ifdef _WIN32 + *slash_bits = bits; +#endif return true; } diff --git a/src/util.h b/src/util.h index 7101770..36f31f3 100644 --- a/src/util.h +++ b/src/util.h @@ -45,6 +45,12 @@ bool CanonicalizePath(string* path, string* err); bool CanonicalizePath(char* path, size_t* len, string* err); +/// |slash_bits| has bits set starting from lowest for a backslash that was +/// normalized to a forward slash. (only used on Windows) +bool CanonicalizePath(string* path, string* err, unsigned int* slash_bits); +bool CanonicalizePath(char* path, size_t* len, string* err, + unsigned int* slash_bits); + /// Appends |input| to |*result|, escaping according to the whims of either /// Bash, or Win32's CommandLineToArgvW(). /// Appends the string directly to |result| without modification if we can diff --git a/src/util_test.cc b/src/util_test.cc index e82f227..36f212e 100644 --- a/src/util_test.cc +++ b/src/util_test.cc @@ -148,6 +148,78 @@ TEST(CanonicalizePath, PathSamplesWindows) { EXPECT_TRUE(CanonicalizePath(&path, &err)); EXPECT_EQ("", path); } + +TEST(CanonicalizePath, SlashTracking) { + string path; + string err; + unsigned int slash_bits; + + path = "foo.h"; err = ""; + EXPECT_TRUE(CanonicalizePath(&path, &err, &slash_bits)); + EXPECT_EQ("foo.h", path); + EXPECT_EQ(0, slash_bits); + + path = "a\\foo.h"; + EXPECT_TRUE(CanonicalizePath(&path, &err, &slash_bits)); + EXPECT_EQ("a/foo.h", path); + EXPECT_EQ(1, slash_bits); + + path = "a/bcd/efh\\foo.h"; + EXPECT_TRUE(CanonicalizePath(&path, &err, &slash_bits)); + EXPECT_EQ("a/bcd/efh/foo.h", path); + EXPECT_EQ(4, slash_bits); + + path = "a\\bcd/efh\\foo.h"; + EXPECT_TRUE(CanonicalizePath(&path, &err, &slash_bits)); + EXPECT_EQ("a/bcd/efh/foo.h", path); + EXPECT_EQ(5, slash_bits); + + path = "a\\bcd\\efh\\foo.h"; + EXPECT_TRUE(CanonicalizePath(&path, &err, &slash_bits)); + EXPECT_EQ("a/bcd/efh/foo.h", path); + EXPECT_EQ(7, slash_bits); + + path = "a/bcd/efh/foo.h"; + EXPECT_TRUE(CanonicalizePath(&path, &err, &slash_bits)); + EXPECT_EQ("a/bcd/efh/foo.h", path); + EXPECT_EQ(0, slash_bits); + + path = "a\\./efh\\foo.h"; + EXPECT_TRUE(CanonicalizePath(&path, &err, &slash_bits)); + EXPECT_EQ("a/efh/foo.h", path); + EXPECT_EQ(3, slash_bits); + + path = "a\\../efh\\foo.h"; + EXPECT_TRUE(CanonicalizePath(&path, &err, &slash_bits)); + EXPECT_EQ("efh/foo.h", path); + EXPECT_EQ(1, slash_bits); + + path = "a\\b\\c\\d\\e\\f\\g\\foo.h"; + EXPECT_TRUE(CanonicalizePath(&path, &err, &slash_bits)); + EXPECT_EQ("a/b/c/d/e/f/g/foo.h", path); + EXPECT_EQ(127, slash_bits); + + path = "a\\b\\c\\..\\..\\..\\g\\foo.h"; + EXPECT_TRUE(CanonicalizePath(&path, &err, &slash_bits)); + EXPECT_EQ("g/foo.h", path); + EXPECT_EQ(1, slash_bits); + + path = "a\\b/c\\../../..\\g\\foo.h"; + EXPECT_TRUE(CanonicalizePath(&path, &err, &slash_bits)); + EXPECT_EQ("g/foo.h", path); + EXPECT_EQ(1, slash_bits); + + path = "a\\b/c\\./../..\\g\\foo.h"; + EXPECT_TRUE(CanonicalizePath(&path, &err, &slash_bits)); + EXPECT_EQ("a/g/foo.h", path); + EXPECT_EQ(3, slash_bits); + + path = "a\\b/c\\./../..\\g/foo.h"; + EXPECT_TRUE(CanonicalizePath(&path, &err, &slash_bits)); + EXPECT_EQ("a/g/foo.h", path); + EXPECT_EQ(1, slash_bits); +} + #endif TEST(CanonicalizePath, EmptyResult) { -- cgit v0.12