Fix lev_distance

This commit is contained in:
Till Hoeppner 2014-10-03 14:59:47 +02:00
parent 9a2286d3a1
commit 3aea7f1889

View File

@ -778,13 +778,11 @@ pub trait StrAllocating: Str {
/// Returns the Levenshtein Distance between two strings.
fn lev_distance(&self, t: &str) -> uint {
let me = self.as_slice();
let slen = me.len();
let tlen = t.len();
if me.is_empty() { return t.char_len(); }
if t.is_empty() { return me.char_len(); }
if slen == 0 { return tlen; }
if tlen == 0 { return slen; }
let mut dcol = Vec::from_fn(tlen + 1, |x| x);
let mut dcol = Vec::from_fn(t.len() + 1, |x| x);
let mut t_last = 0;
for (i, sc) in me.chars().enumerate() {
@ -799,15 +797,15 @@ pub trait StrAllocating: Str {
*dcol.get_mut(j + 1) = current;
} else {
*dcol.get_mut(j + 1) = cmp::min(current, next);
*dcol.get_mut(j + 1) = cmp::min(dcol[j + 1],
dcol[j]) + 1;
*dcol.get_mut(j + 1) = cmp::min(dcol[j + 1], dcol[j]) + 1;
}
current = next;
t_last = j;
}
}
return dcol[tlen];
dcol[t_last + 1]
}
/// Returns an iterator over the string in Unicode Normalization Form D
@ -1878,6 +1876,27 @@ mod tests {
assert_eq!(words, vec!["Märy", "häd", "ä", "little", "lämb", "Little", "lämb"])
}
#[test]
fn test_lev_distance() {
use std::char::{ from_u32, MAX };
// Test bytelength agnosticity
for c in range(0u32, MAX as u32)
.filter_map(|i| from_u32(i))
.map(|i| String::from_char(1, i)) {
assert_eq!(c[].lev_distance(c[]), 0);
}
let a = "\nMäry häd ä little lämb\n\nLittle lämb\n";
let b = "\nMary häd ä little lämb\n\nLittle lämb\n";
let c = "Mary häd ä little lämb\n\nLittle lämb\n";
assert_eq!(a.lev_distance(b), 1);
assert_eq!(b.lev_distance(a), 1);
assert_eq!(a.lev_distance(c), 2);
assert_eq!(c.lev_distance(a), 2);
assert_eq!(b.lev_distance(c), 1);
assert_eq!(c.lev_distance(b), 1);
}
#[test]
fn test_nfd_chars() {
macro_rules! t {