Skip to content

Commit

Permalink
Adding nlp.Hunspell
Browse files Browse the repository at this point in the history
  • Loading branch information
marigostra committed Jun 20, 2022
1 parent 015eedc commit 4f083d0
Show file tree
Hide file tree
Showing 2 changed files with 108 additions and 1 deletion.
27 changes: 26 additions & 1 deletion src/main/java/org/luwrain/controls/EditSpellChecking.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,34 @@ public EditSpellChecking(Luwrain luwrain )
luwrain.executeBkg(()->check(editArea, text));
}

public void initialChecking(EditArea editArea)
{
final SortedMap<Integer, String> text = new TreeMap<>();
final Lines lines = editArea.getContent();
for(int i = 0;i < lines.getLineCount();i++)
text.put(Integer.valueOf(i), lines.getLine(i));
luwrain.executeBkg(()->check(editArea, text));
}

public void eraseSpellingMarks(EditArea editArea)
{editArea.update((lines, hotPoint)->{
for(int i = 0;i < lines.getLineCount();i++)
{
final LineMarks marks = lines.getLineMarks(i);
if (marks == null)
continue;
lines.setLineMarks(i, marks.filter((mark)->{
return mark.getMarkObject() == null || !(mark.getMarkObject() instanceof SpellProblem);
}));
}
return false;
});
}


private void check(EditArea editArea, SortedMap<Integer, String> text)
{
Log.debug(LOG_COMPONENT, "Checking lines: " + text.size());
// Log.debug(LOG_COMPONENT, "Checking lines: " + text.size());
final List<String> textLines = new ArrayList<>();
for(Map.Entry<Integer, String> e: text.entrySet())
textLines.add(e.getValue());
Expand Down
82 changes: 82 additions & 0 deletions src/main/java/org/luwrain/nlp/Hunspell.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
// * Copyright (C) 2021 Pavel Bakhvalov

package org.luwrain.io.nlp;

import dumonts.hunspell.bindings.HunspellLibrary;
import org.bridj.Pointer;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Collectors;

public final class Hunspell
{
private final Pointer<HunspellLibrary.Hunhandle> handle;
private final Charset charset;

public Hunspell(Path dictionary, Path affix)
{
try {
Pointer<Byte> aff = Pointer.pointerToCString(affix.toString());
Pointer<Byte> dic = Pointer.pointerToCString(dictionary.toString());
handle = HunspellLibrary.Hunspell_create(aff, dic);
charset = Charset.forName(HunspellLibrary.Hunspell_get_dic_encoding(handle).getCString());
if (this.handle == null) {
throw new RuntimeException("Unable to create Hunspell instance");
}
} catch (UnsatisfiedLinkError e) {
throw new RuntimeException("Could not create hunspell instance. Please note that LanguageTool supports only 64-bit platforms " +
"(Linux, Windows, Mac) and that it requires a 64-bit JVM (Java).", e);
}
}

public boolean spell(String word) {
if (handle == null) {
throw new RuntimeException("Attempt to use hunspell instance after closing");
}
@SuppressWarnings("unchecked")
Pointer<Byte> str = (Pointer<Byte>) Pointer.pointerToString(word, Pointer.StringType.C, charset);
int result = HunspellLibrary.Hunspell_spell(handle, str);
return result != 0;
}

public void add(String word) {
if (handle == null) {
throw new RuntimeException("Attempt to use hunspell instance after closing");
}
@SuppressWarnings("unchecked")
Pointer<Byte> str = (Pointer<Byte>) Pointer.pointerToString(word, Pointer.StringType.C, charset);
HunspellLibrary.Hunspell_add(handle, str);
}

public List<String> suggest(String word) {
// Create pointer to native string
@SuppressWarnings("unchecked")
Pointer<Byte> str = (Pointer<Byte>) Pointer.pointerToString(word, Pointer.StringType.C, charset);
// Create pointer to native string array
Pointer<Pointer<Pointer<Byte>>> nativeSuggestionArray = Pointer.allocatePointerPointer(Byte.class);
// Hunspell will allocate the array and fill it with suggestions
int suggestionCount = HunspellLibrary.Hunspell_suggest(handle, nativeSuggestionArray, str);
if (suggestionCount == 0) {
// Return early and don't try to free the array
return new ArrayList<>();
}
// Ask bridj for a `java.util.List` that wraps `nativeSuggestionArray`
List<Pointer<Byte>> nativeSuggestionList = nativeSuggestionArray.get().validElements(suggestionCount).asList();
// Convert C Strings to java strings
List<String> suggestions = nativeSuggestionList.stream().map(p -> p.getStringAtOffset(0, Pointer.StringType.C, charset)).collect(Collectors.toList());

// We can free the underlying buffer now because Java's `String` owns it's own memory
HunspellLibrary.Hunspell_free_list(handle, nativeSuggestionArray, suggestionCount);
return suggestions;
}

public void close() throws IOException {
if (handle != null) {
HunspellLibrary.Hunspell_destroy(handle);
}
}
}

0 comments on commit 4f083d0

Please sign in to comment.