summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNico Weber <thakis@chromium.org>2011-11-10 20:58:00 (GMT)
committerEvan Martin <martine@danga.com>2011-11-13 20:17:53 (GMT)
commitd0a3c5c3735c52aa2fd4ecfb0d2c84dc9ebbb45a (patch)
treedcc66684f8b5b19f902e731034c69a7c7b07ca93
parent04097eb434d96d9c6e6aefd83c9c9d8970c2e84e (diff)
downloadNinja-d0a3c5c3735c52aa2fd4ecfb0d2c84dc9ebbb45a.zip
Ninja-d0a3c5c3735c52aa2fd4ecfb0d2c84dc9ebbb45a.tar.gz
Ninja-d0a3c5c3735c52aa2fd4ecfb0d2c84dc9ebbb45a.tar.bz2
Add a EditDistance() function based on the one in llvm/lib/Support/StringRef.cpp.
-rwxr-xr-xconfigure.py6
-rw-r--r--src/edit_distance.cc68
-rw-r--r--src/edit_distance.h25
-rw-r--r--src/edit_distance_test.cc49
-rw-r--r--src/string_piece.h2
5 files changed, 147 insertions, 3 deletions
diff --git a/configure.py b/configure.py
index 17afea4..be8eea6 100755
--- a/configure.py
+++ b/configure.py
@@ -140,8 +140,9 @@ if platform not in ('mingw'):
n.newline()
n.comment('Core source files all build into ninja library.')
-for name in ['build', 'build_log', 'clean', 'eval_env', 'graph', 'graphviz',
- 'parsers', 'util', 'stat_cache', 'disk_interface', 'state']:
+for name in ['build', 'build_log', 'clean', 'edit_distance', 'eval_env',
+ 'graph', 'graphviz', 'parsers', 'util', 'stat_cache',
+ 'disk_interface', 'state']:
objs += cxx(name)
if platform == 'mingw':
objs += cxx('subprocess-win32')
@@ -176,6 +177,7 @@ for name in ['build_log_test',
'build_test',
'clean_test',
'disk_interface_test',
+ 'edit_distance_test',
'eval_env_test',
'graph_test',
'parsers_test',
diff --git a/src/edit_distance.cc b/src/edit_distance.cc
new file mode 100644
index 0000000..fe05f64
--- /dev/null
+++ b/src/edit_distance.cc
@@ -0,0 +1,68 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "edit_distance.h"
+
+#include <vector>
+
+#include "string_piece.h"
+
+int EditDistance(const StringPiece& s1,
+ const StringPiece& s2,
+ bool allow_replacements,
+ int max_edit_distance) {
+ // The algorithm implemented below is the "classic"
+ // dynamic-programming algorithm for computing the Levenshtein
+ // distance, which is described here:
+ //
+ // http://en.wikipedia.org/wiki/Levenshtein_distance
+ //
+ // Although the algorithm is typically described using an m x n
+ // array, only two rows are used at a time, so this implemenation
+ // just keeps two separate vectors for those two rows.
+ int m = s1.len_;
+ int n = s2.len_;
+
+ std::vector<int> previous(n + 1);
+ std::vector<int> current(n + 1);
+
+ for (int i = 0; i <= n; ++i)
+ previous[i] = i;
+
+ for (int y = 1; y <= m; ++y) {
+ current[0] = y;
+ int best_this_row = current[0];
+
+ for (int x = 1; x <= n; ++x) {
+ if (allow_replacements) {
+ current[x] = min(previous[x-1] + (s1.str_[y-1] == s2.str_[x-1] ? 0 : 1),
+ min(current[x-1], previous[x])+1);
+ }
+ else {
+ if (s1.str_[y-1] == s2.str_[x-1])
+ current[x] = previous[x-1];
+ else
+ current[x] = min(current[x-1], previous[x]) + 1;
+ }
+ best_this_row = min(best_this_row, current[x]);
+ }
+
+ if (max_edit_distance && best_this_row > max_edit_distance)
+ return max_edit_distance + 1;
+
+ current.swap(previous);
+ }
+
+ return previous[n];
+}
diff --git a/src/edit_distance.h b/src/edit_distance.h
new file mode 100644
index 0000000..186a0d7
--- /dev/null
+++ b/src/edit_distance.h
@@ -0,0 +1,25 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef NINJA_EDIT_DISTANCE_H_
+#define NINJA_EDIT_DISTANCE_H_
+
+struct StringPiece;
+
+int EditDistance(const StringPiece& s1,
+ const StringPiece& s2,
+ bool allow_replacements = true,
+ int max_edit_distance = 0);
+
+#endif // NINJA_EDIT_DISTANCE_H_
diff --git a/src/edit_distance_test.cc b/src/edit_distance_test.cc
new file mode 100644
index 0000000..a4c0486
--- /dev/null
+++ b/src/edit_distance_test.cc
@@ -0,0 +1,49 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "edit_distance.h"
+
+#include "string_piece.h"
+#include "test.h"
+
+TEST(EditDistanceTest, TestEmpty) {
+ EXPECT_EQ(5, EditDistance("", "ninja"));
+ EXPECT_EQ(5, EditDistance("ninja", ""));
+ EXPECT_EQ(0, EditDistance("", ""));
+}
+
+TEST(EditDistanceTest, TestMaxDistance) {
+ const bool allow_replacements = true;
+ for (int max_distance = 1; max_distance < 7; ++max_distance) {
+ EXPECT_EQ(max_distance + 1,
+ EditDistance("abcdefghijklmnop", "ponmlkjihgfedcba",
+ allow_replacements, max_distance));
+ }
+}
+
+TEST(EditDistanceTest, TestAllowReplacements) {
+ bool allow_replacements = true;
+ EXPECT_EQ(1, EditDistance("ninja", "njnja", allow_replacements));
+ EXPECT_EQ(1, EditDistance("njnja", "ninja", allow_replacements));
+
+ allow_replacements = false;
+ EXPECT_EQ(2, EditDistance("ninja", "njnja", allow_replacements));
+ EXPECT_EQ(2, EditDistance("njnja", "ninja", allow_replacements));
+}
+
+TEST(EditDistanceTest, TestBasics) {
+ EXPECT_EQ(0, EditDistance("browser_tests", "browser_tests"));
+ EXPECT_EQ(1, EditDistance("browser_test", "browser_tests"));
+ EXPECT_EQ(1, EditDistance("browser_tests", "browser_test"));
+}
diff --git a/src/string_piece.h b/src/string_piece.h
index 0e55afb..3b94ce3 100644
--- a/src/string_piece.h
+++ b/src/string_piece.h
@@ -48,4 +48,4 @@ struct StringPiece {
int len_;
};
-#endif // NINJA_BROWSE_H_
+#endif // NINJA_STRINGPIECE_H_