Skip to content

Commit

Permalink
Fixing SpellExclusion
Browse files Browse the repository at this point in the history
  • Loading branch information
marigostra committed Jun 23, 2022
1 parent dc5c527 commit 635f30e
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 4 deletions.
2 changes: 1 addition & 1 deletion src/main/java/org/luwrain/nlp/SpellChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,9 @@

import java.util.*;


public interface SpellChecker
{
List<SpellProblem> check(String text);
List<String> suggestCorrections(String word);
SpellExclusion getExclusion();
}
15 changes: 13 additions & 2 deletions src/main/java/org/luwrain/nlp/SpellText.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@ public class SpellText
static public final String
LOG_COMPONENT = "spelling";

final SpellExclusion exclusion;
final String text;
final List<Fragment> fragments = new ArrayList<>();
final List<SpellProblem> problems;

public SpellText(String[] text, SpellChecker checker)
{
this.exclusion = checker.getExclusion();
if (text.length == 0)
{
this.text = "";
Expand Down Expand Up @@ -93,12 +95,21 @@ public List<SpellProblem> getProblems()

private List<SpellProblem> filterExclusions(String text, List<SpellProblem> source)
{
if (this.exclusion == null)
return source;
//FIXME: Better to do this on class creation
final Set<String> wordsToExclude = new HashSet<>();
for(SpellExclusion.Exclusion e: exclusion.getExclusions())
{
final String word = e.getText().trim();
if (!word.isEmpty())
wordsToExclude.add(word.toUpperCase());
}
final List<SpellProblem> res = new ArrayList<>();
for(SpellProblem p: source)
{
final String f = text.substring(p.getStart(), p.getEnd()).toUpperCase();
Log.debug("proba", "fragment " + f);
if (f.equals(""))
if (wordsToExclude.contains(f))
continue;
res.add(p);
}
Expand Down
9 changes: 8 additions & 1 deletion src/main/java/org/luwrain/nlp/ru/spell/RuSpellChecker.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ public final class RuSpellChecker implements SpellChecker
{
final JLanguageTool langTool;
final Hunspell hunspell;
final SpellExclusion exclusion;

public RuSpellChecker(Luwrain luwrain)
{
Expand All @@ -40,13 +41,16 @@ public RuSpellChecker(Luwrain luwrain)
dictFile = new File(hunspellDataDir, "ru_RU.dic"),
affFile = new File(hunspellDataDir, "ru_RU.aff");
this.hunspell = new Hunspell(dictFile.getAbsolutePath(), affFile.getAbsolutePath());
this.exclusion = new SpellExclusion(luwrain);
this.exclusion.load();
}

//Just for unit tests
RuSpellChecker()
{
this.langTool = new JLanguageTool(new Russian());
this.hunspell = null;
this.exclusion = null;
}

@Override public List<SpellProblem> check(String text)
Expand All @@ -71,5 +75,8 @@ public RuSpellChecker(Luwrain luwrain)
return hunspell.suggest(word);
}


@Override public SpellExclusion getExclusion()
{
return exclusion;
}
}

0 comments on commit 635f30e

Please sign in to comment.