Java判断两个String的相似度

如何判断两个String的相似度

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
public static double similarity(String s1, String s2) {
String long_str = s1;
String short_str = s2;
long_str = s1.length() >= s2.length() ? s1:s2;
short_str = s1.length() < s2.length() ? s1:s2;
int long_str_len = long_str.length();
if (long_str_len == 0) {
return 1.0;
}
return (long_str_len - editDistance(long_str, short_str)) / (double) long_str_len;

}

public static int editDistance(String long_str, String short_str) {

int[] costs = new int[short_str.length() + 1];
for (int i = 0; i <= long_str.length(); i++) {
int prev_i = i;
for (int j = 0; j <= short_str.length(); j++) {
if (i == 0){
costs[j] = j;
}
else {
if (j > 0) {
int newValue = costs[j - 1];
if (long_str.charAt(i - 1) != short_str.charAt(j - 1))
newValue = Math.min(Math.min(newValue, prev_i),
costs[j]) + 1;
costs[j - 1] = prev_i;
prev_i = newValue;
}
}
}
if (i > 0) {
costs[short_str.length()] = prev_i;
}
}
return costs[short_str.length()];
}