diff options
author | Nico Weber <thakis@chromium.org> | 2011-11-10 20:58:00 (GMT) |
---|---|---|
committer | Evan Martin <martine@danga.com> | 2011-11-13 20:17:53 (GMT) |
commit | d0a3c5c3735c52aa2fd4ecfb0d2c84dc9ebbb45a (patch) | |
tree | dcc66684f8b5b19f902e731034c69a7c7b07ca93 /src/edit_distance.cc | |
parent | 04097eb434d96d9c6e6aefd83c9c9d8970c2e84e (diff) | |
download | Ninja-d0a3c5c3735c52aa2fd4ecfb0d2c84dc9ebbb45a.zip Ninja-d0a3c5c3735c52aa2fd4ecfb0d2c84dc9ebbb45a.tar.gz Ninja-d0a3c5c3735c52aa2fd4ecfb0d2c84dc9ebbb45a.tar.bz2 |
Add a EditDistance() function based on the one in llvm/lib/Support/StringRef.cpp.
Diffstat (limited to 'src/edit_distance.cc')
-rw-r--r-- | src/edit_distance.cc | 68 |
1 files changed, 68 insertions, 0 deletions
diff --git a/src/edit_distance.cc b/src/edit_distance.cc new file mode 100644 index 0000000..fe05f64 --- /dev/null +++ b/src/edit_distance.cc @@ -0,0 +1,68 @@ +// Copyright 2011 Google Inc. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "edit_distance.h" + +#include <vector> + +#include "string_piece.h" + +int EditDistance(const StringPiece& s1, + const StringPiece& s2, + bool allow_replacements, + int max_edit_distance) { + // The algorithm implemented below is the "classic" + // dynamic-programming algorithm for computing the Levenshtein + // distance, which is described here: + // + // http://en.wikipedia.org/wiki/Levenshtein_distance + // + // Although the algorithm is typically described using an m x n + // array, only two rows are used at a time, so this implemenation + // just keeps two separate vectors for those two rows. + int m = s1.len_; + int n = s2.len_; + + std::vector<int> previous(n + 1); + std::vector<int> current(n + 1); + + for (int i = 0; i <= n; ++i) + previous[i] = i; + + for (int y = 1; y <= m; ++y) { + current[0] = y; + int best_this_row = current[0]; + + for (int x = 1; x <= n; ++x) { + if (allow_replacements) { + current[x] = min(previous[x-1] + (s1.str_[y-1] == s2.str_[x-1] ? 0 : 1), + min(current[x-1], previous[x])+1); + } + else { + if (s1.str_[y-1] == s2.str_[x-1]) + current[x] = previous[x-1]; + else + current[x] = min(current[x-1], previous[x]) + 1; + } + best_this_row = min(best_this_row, current[x]); + } + + if (max_edit_distance && best_this_row > max_edit_distance) + return max_edit_distance + 1; + + current.swap(previous); + } + + return previous[n]; +} |