changed
README.md
|
@@ -16,7 +16,7 @@ The package can be installed by adding `leven` to your list of dependencies in
|
16
16
|
```elixir
|
17
17
|
def deps do
|
18
18
|
[
|
19
|
- {:leven, "~> 1.0.0"}
|
19
|
+ {:leven, "~> 1.0"}
|
20
20
|
]
|
21
21
|
end
|
22
22
|
```
|
|
@@ -35,22 +35,7 @@ Two single-character edits are required to get from "house" to "horses":
|
35
35
|
|
36
36
|
## Benchmarks
|
37
37
|
|
38
|
- These benchmarks show how many operations per second Leven can perform for two
|
39
|
- random strings of length `N` on my machine (Ryzen 7 / 16 Core / 32 GB).
|
40
|
-
|
41
|
- | String Length | ops/sec |
|
42
|
- |---------------|---------|
|
43
|
- | Empty LHS | 2.5 M |
|
44
|
- | Empty RHS | 2.47 M |
|
45
|
- | N=4 | 244.5 K |
|
46
|
- | N=8 | 86.57 K |
|
47
|
- | N=16 | 22.51 K |
|
48
|
- | N=32 | 4.67 K |
|
49
|
- | N=64 | 863 |
|
50
|
- | N=128 | 152 |
|
51
|
- | N=256 | 23 |
|
52
|
-
|
53
|
- To benchmark Leven on your own machine, clone the repo and `mix bench`.
|
38
|
+ To benchmark Leven on your machine, clone the repo and run `mix bench`.
|
54
39
|
|
55
40
|
## License
|
changed
hex_metadata.config
|
@@ -1,18 +1,18 @@
|
1
|
- {<<"app">>,<<"leven">>}.
|
2
|
- {<<"build_tools">>,[<<"mix">>]}.
|
1
|
+ {<<"links">>,[{<<"GitHub">>,<<"https://github.com/elliotekj/leven">>}]}.
|
2
|
+ {<<"name">>,<<"leven">>}.
|
3
|
+ {<<"version">>,<<"1.0.1">>}.
|
3
4
|
{<<"description">>,
|
4
5
|
<<"An efficient, tabulated implementation of the Levenshtein distance algorithm\nused to compute the edit distance between two strings.">>}.
|
5
6
|
{<<"elixir">>,<<"~> 1.14">>}.
|
7
|
+ {<<"app">>,<<"leven">>}.
|
8
|
+ {<<"licenses">>,[<<"Apache-2.0">>]}.
|
9
|
+ {<<"requirements">>,
|
10
|
+ [[{<<"name">>,<<"benchee">>},
|
11
|
+ {<<"app">>,<<"benchee">>},
|
12
|
+ {<<"optional">>,true},
|
13
|
+ {<<"requirement">>,<<"~> 1.0">>},
|
14
|
+ {<<"repository">>,<<"hexpm">>}]]}.
|
6
15
|
{<<"files">>,
|
7
16
|
[<<"lib">>,<<"lib/leven.ex">>,<<".formatter.exs">>,<<"mix.exs">>,
|
8
17
|
<<"README.md">>,<<"LICENSE">>]}.
|
9
|
- {<<"licenses">>,[<<"Apache-2.0">>]}.
|
10
|
- {<<"links">>,[{<<"GitHub">>,<<"https://github.com/elliotekj/leven">>}]}.
|
11
|
- {<<"name">>,<<"leven">>}.
|
12
|
- {<<"requirements">>,
|
13
|
- [[{<<"app">>,<<"benchee">>},
|
14
|
- {<<"name">>,<<"benchee">>},
|
15
|
- {<<"optional">>,true},
|
16
|
- {<<"repository">>,<<"hexpm">>},
|
17
|
- {<<"requirement">>,<<"~> 1.0">>}]]}.
|
18
|
- {<<"version">>,<<"1.0.0">>}.
|
18
|
+ {<<"build_tools">>,[<<"mix">>]}.
|
changed
lib/leven.ex
|
@@ -25,47 +25,35 @@ defmodule Leven do
|
25
25
|
source_len = length(source)
|
26
26
|
target_len = length(target)
|
27
27
|
|
28
|
- matrix = get_distance_table(source_len, target_len)
|
29
|
- filled_matrix = process_rows(source_len, target_len, matrix, source, target)
|
30
|
- get_distance(filled_matrix, source_len, target_len)
|
28
|
+ matrix = build_matrix(source_len, target_len, source, target)
|
29
|
+ get_distance(matrix, source_len, target_len)
|
31
30
|
end
|
32
31
|
|
33
|
- defp get_distance_table(source_len, target_len) do
|
34
|
- first_row = Enum.reduce(0..target_len, {}, fn i, acc -> Tuple.append(acc, i) end)
|
35
|
- rest_zeros = List.duplicate(0, target_len)
|
36
|
- rest_rows = Enum.map(1..source_len, fn i -> List.to_tuple([i | rest_zeros]) end)
|
37
|
- List.to_tuple([first_row | rest_rows])
|
32
|
+ defp build_matrix(source_len, target_len, source, target) do
|
33
|
+ Enum.reduce(1..source_len, %{}, fn y, acc ->
|
34
|
+ Enum.reduce(1..target_len, acc, fn x, acc ->
|
35
|
+ source_char = Enum.at(source, y - 1)
|
36
|
+ target_char = Enum.at(target, x - 1)
|
37
|
+
|
38
|
+ calculate_new_cell(x, y, acc, source_char, target_char)
|
39
|
+ end)
|
40
|
+ end)
|
38
41
|
end
|
39
42
|
|
40
|
- defp process_rows(rows, columns, matrix, source, target) do
|
41
|
- Enum.reduce(1..rows, matrix, &process_columns(&1, columns, &2, source, target))
|
43
|
+ defp calculate_new_cell(x, y, acc, char, char) do
|
44
|
+ diag = Map.get(acc, {x - 1, y - 1}, 0)
|
45
|
+ Map.put(acc, {x, y}, diag)
|
42
46
|
end
|
43
47
|
|
44
|
- defp process_columns(i, columns, matrix, source, target) do
|
45
|
- Enum.reduce(1..columns, matrix, &process_cell(i, &1, &2, source, target))
|
46
|
- end
|
47
|
-
|
48
|
- defp process_cell(i, j, matrix, source, target) do
|
49
|
- source_char = Enum.at(source, i - 1)
|
50
|
- target_char = Enum.at(target, j - 1)
|
51
|
- new_cell = calculate_new_value(i, j, matrix, source_char, target_char)
|
52
|
- new_row = put_elem(elem(matrix, i), j, new_cell)
|
53
|
- put_elem(matrix, i, new_row)
|
54
|
- end
|
55
|
-
|
56
|
- defp calculate_new_value(i, j, matrix, source_char, target_char)
|
57
|
- when source_char == target_char do
|
58
|
- matrix |> elem(i - 1) |> elem(j - 1)
|
59
|
- end
|
60
|
-
|
61
|
- defp calculate_new_value(i, j, matrix, _source_char, _target_char) do
|
62
|
- delete = (elem(matrix, i - 1) |> elem(j)) + 1
|
63
|
- insert = (elem(matrix, i) |> elem(j - 1)) + 1
|
64
|
- substitute = (elem(matrix, i - 1) |> elem(j - 1)) + 1
|
65
|
- Enum.min([delete, insert, substitute])
|
48
|
+ defp calculate_new_cell(x, y, acc, _source_char, _target_char) do
|
49
|
+ delete = Map.get(acc, {x, y - 1}, 0) |> Kernel.+(1)
|
50
|
+ insert = Map.get(acc, {x - 1, y}, 0) |> Kernel.+(1)
|
51
|
+ sub = Map.get(acc, {x - 1, y - 1}, 0) |> Kernel.+(1)
|
52
|
+ min = Enum.min([delete, insert, sub])
|
53
|
+ Map.put(acc, {x, y}, min)
|
66
54
|
end
|
67
55
|
|
68
56
|
defp get_distance(matrix, source_len, target_len) do
|
69
|
- matrix |> elem(source_len) |> elem(target_len)
|
57
|
+ Map.get(matrix, {target_len, source_len})
|
70
58
|
end
|
71
59
|
end
|
changed
mix.exs
|
@@ -1,7 +1,7 @@
|
1
1
|
defmodule Leven.MixProject do
|
2
2
|
use Mix.Project
|
3
3
|
|
4
|
- @version "1.0.0"
|
4
|
+ @version "1.0.1"
|
5
5
|
@repo_url "https://github.com/elliotekj/leven"
|
6
6
|
|
7
7
|
def project do
|
|
@@ -35,7 +35,7 @@ defmodule Leven.MixProject do
|
35
35
|
|
36
36
|
defp aliases do
|
37
37
|
[
|
38
|
- bench: "run bench/benchmarks.exs"
|
38
|
+ bench: "run bench.exs"
|
39
39
|
]
|
40
40
|
end
|