-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Longest common substring & Edit distance added
- Loading branch information
Showing
3 changed files
with
144 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,68 @@ | ||
/** | ||
* An implementation of the edit distance algorithm | ||
* | ||
* <p>Time Complexity: O(nm) | ||
* | ||
* @author Micah Stairs | ||
*/ | ||
|
||
public class EditDistance { | ||
|
||
// Computes the cost to convert a string 'a' into a string 'b' using dynamic | ||
// programming given the insertionCost, deletionCost and substitutionCost, O(nm) | ||
public static int editDistance( | ||
String a, String b, int insertionCost, int deletionCost, int substitutionCost) { | ||
|
||
final int AL = a.length(), BL = b.length(); | ||
int[][] arr = new int[AL + 1][BL + 1]; | ||
|
||
for (int i = 0; i <= AL; i++) { | ||
for (int j = (i == 0 ? 1 : 0); j <= BL; j++) { | ||
|
||
int min = Integer.MAX_VALUE; | ||
|
||
// Substitution | ||
if (i > 0 && j > 0) | ||
min = arr[i - 1][j - 1] + (a.charAt(i - 1) == b.charAt(j - 1) ? 0 : substitutionCost); | ||
|
||
// Deletion | ||
if (i > 0) min = Math.min(min, arr[i - 1][j] + deletionCost); | ||
|
||
// Insertion | ||
if (j > 0) min = Math.min(min, arr[i][j - 1] + insertionCost); | ||
|
||
arr[i][j] = min; | ||
} | ||
} | ||
|
||
return arr[AL][BL]; | ||
} | ||
|
||
public static void main(String[] args) { | ||
|
||
String a = "abcdefg"; | ||
String b = "abcdefg"; | ||
|
||
// The strings are the same so the cost is zero | ||
System.out.println(editDistance(a, b, 10, 10, 10)); | ||
|
||
a = "aaa"; | ||
b = "aaabbb"; | ||
|
||
// 10*3 = 30 because of three insertions | ||
System.out.println(editDistance(a, b, 10, 2, 3)); | ||
|
||
a = "1023"; | ||
b = "10101010"; | ||
|
||
// Outputs 2*2 + 4*5 = 24 for 2 substitutions and 4 insertions | ||
System.out.println(editDistance(a, b, 5, 7, 2)); | ||
|
||
a = "923456789"; | ||
b = "12345"; | ||
|
||
// Outputs 4*4 + 1 = 16 because we need to delete 4 | ||
// characters and perform one substitution | ||
System.out.println(editDistance(a, b, 2, 4, 1)); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
/** | ||
* This file contains an implementation of finding the Longest Common Substring (LCS) between two | ||
* strings using dynamic programming. | ||
* | ||
* <p>Time Complexity: O(nm) | ||
* | ||
* @author William Fiset, [email protected] | ||
*/ | ||
|
||
public class LongestCommonSubstring { | ||
|
||
// Returns a non unique Longest Common Substring | ||
// between the strings str1 and str2 in O(nm) | ||
public static String lcs(char[] A, char[] B) { | ||
|
||
if (A == null || B == null) return null; | ||
|
||
final int n = A.length; | ||
final int m = B.length; | ||
|
||
if (n == 0 || m == 0) return null; | ||
|
||
int[][] dp = new int[n + 1][m + 1]; | ||
|
||
// Suppose A = a1a2..an-1an and B = b1b2..bn-1bn | ||
for (int i = 1; i <= n; i++) { | ||
for (int j = 1; j <= m; j++) { | ||
|
||
// If ends match the LCS(a1a2..an-1an, b1b2..bn-1bn) = LCS(a1a2..an-1, b1b2..bn-1) + 1 | ||
if (A[i - 1] == B[j - 1]) dp[i][j] = dp[i - 1][j - 1] + 1; | ||
|
||
// If the ends do not match the LCS of a1a2..an-1an and b1b2..bn-1bn is | ||
// max( LCS(a1a2..an-1, b1b2..bn-1bn), LCS(a1a2..an-1an, b1b2..bn-1) ) | ||
else dp[i][j] = Math.max(dp[i - 1][j], dp[i][j - 1]); | ||
} | ||
} | ||
|
||
int lcsLen = dp[n][m]; | ||
char[] lcs = new char[lcsLen]; | ||
int index = 0; | ||
|
||
// Backtrack to find a LCS. We search for the cells | ||
// where we included an element which are those with | ||
// dp[i][j] != dp[i-1][j] and dp[i][j] != dp[i][j-1]) | ||
int i = n, j = m; | ||
while (i >= 1 && j >= 1) { | ||
|
||
int v = dp[i][j]; | ||
|
||
// The order of these may output different LCSs | ||
while (i > 1 && dp[i - 1][j] == v) i--; | ||
while (j > 1 && dp[i][j - 1] == v) j--; | ||
|
||
// Make sure there is a match before adding | ||
if (v > 0) lcs[lcsLen - index++ - 1] = A[i - 1]; // or B[j-1]; | ||
|
||
i--; | ||
j--; | ||
} | ||
|
||
return new String(lcs, 0, lcsLen); | ||
} | ||
|
||
public static void main(String[] args) { | ||
|
||
char[] A = {'A', 'X', 'B', 'C', 'Y'}; | ||
char[] B = {'Z', 'A', 'Y', 'W', 'B', 'C'}; | ||
System.out.println(lcs(A, B)); // ABC | ||
|
||
A = new char[] {'3', '9', '8', '3', '9', '7', '9', '7', '0'}; | ||
B = new char[] {'3', '3', '9', '9', '9', '1', '7', '2', '0', '6'}; | ||
System.out.println(lcs(A, B)); // 339970 | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters