Adding SpellExclusion

luwrain · Jun 23, 2022 · dc5c527 · dc5c527
1 parent d96c85f
commit dc5c527
Show file tree

Hide file tree

Showing 2 changed files with 114 additions and 2 deletions.
diff --git a/src/main/java/org/luwrain/nlp/SpellExclusion.java b/src/main/java/org/luwrain/nlp/SpellExclusion.java
@@ -0,0 +1,98 @@
+/*
+ Copyright 2012-2022 Michael Pozhidaev <[email protected]>
+
+ This file is part of LUWRAIN.
+
+ LUWRAIN is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public
+ License as published by the Free Software Foundation; either
+ version 3 of the License, or (at your option) any later version.
+
+ LUWRAIN is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+*/
+
+package org.luwrain.nlp;
+
+import java.util.*;
+import java.io.*;
+import java.lang.reflect.*;
+
+import com.google.gson.*;
+import com.google.gson.reflect.*;
+
+import org.luwrain.core.*;
+import org.luwrain.controls.*;
+import org.luwrain.controls.DefaultLineMarks.*;
+import static org.luwrain.util.RangeUtils.*;
+
+public class SpellExclusion
+{
+ static private final String
+ NLP_DIR = "luwrain.nlp",
+ EXCLUSION_FILE = "spelling-exclusion.json";
+ static final Type
+ EXCLUSION_LIST_TYPE = new TypeToken<List<Exclusion>>(){}.getType();
+
+ private final Luwrain luwrain;
+ private final List<Exclusion> exclusions = new ArrayList<>();
+ public SpellExclusion(Luwrain luwrain)
+ {
+ NullCheck.notNull(luwrain, "luwrain");
+ this.luwrain = luwrain;
+ }
+
+ public List<Exclusion> getExclusions()
+ {
+ return this.exclusions;
+ }
+
+ public void load()
+ {
+ final Gson gson = new Gson();
+ try {
+ final List<Exclusion> res; 
+ try(final BufferedReader r = new BufferedReader(new InputStreamReader(new FileInputStream(getFile()), "UTF-8"))) {
+ res = gson.fromJson(r, EXCLUSION_LIST_TYPE);
+ }
+ exclusions.clear();
+ if (res != null)
+ exclusions.addAll(res);
+ }
+ catch(IOException e)
+ {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public void save()
+ {
+ final Gson gson = new Gson();
+ try {
+ try(final BufferedWriter w = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(getFile()), "UTF-8"))) {
+ gson.toJson(exclusions, w);
+ w.flush();
+ }
+ }
+ catch(IOException e)
+ {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private File getFile()
+ {
+ return new File(luwrain.getAppDataDir(NLP_DIR).toFile(), EXCLUSION_FILE);
+ }
+
+ static public final class Exclusion
+ {
+ String type = null, text = null;
+ public String getType() { return type != null?type:""; }
+ public void setType(String type) { this.type = type; }
+ public String getText() { return text != null?text:""; }
+ public void setText(String text) { this.text = text; }
+ }
+}
diff --git a/src/main/java/org/luwrain/nlp/SpellText.java b/src/main/java/org/luwrain/nlp/SpellText.java
@@ -45,7 +45,7 @@ public SpellText(String[] text, SpellChecker checker)
  this.text = text[0];
  fragments.add(new Fragment(0, text[0].length()));
  Log.debug(LOG_COMPONENT, "Checking '" + this.text + "'");
-  this.problems = checker.check(this.text);
+ this.problems = filterExclusions(this.text, checker.check(this.text));
  return;
  }
  final StringBuilder b = new StringBuilder();
@@ -60,7 +60,7 @@ public SpellText(String[] text, SpellChecker checker)
  if (fragments.size() != text.length)
  throw new IllegalStateException("the fragments and text arrays have different length");
  Log.debug(LOG_COMPONENT, "Checking '" + this.text + "'");
- this.problems = checker.check(this.text);
+  this.problems = filterExclusions(this.text, checker.check(this.text));
  }
 
  public List<List<LineMarks.Mark>> buildMarks()
@@ -91,6 +91,20 @@ public List<SpellProblem> getProblems()
  return problems;
  }
 
+ private List<SpellProblem> filterExclusions(String text, List<SpellProblem> source)
+ {
+ final List<SpellProblem> res = new ArrayList<>();
+ for(SpellProblem p: source)
+ {
+ final String f = text.substring(p.getStart(), p.getEnd()).toUpperCase();
+ Log.debug("proba", "fragment " + f);
+ if (f.equals(""))
+ continue;
+ res.add(p);
+ }
+ return res;
+ }
+
  static public final class Fragment
  {
  final int posFrom, posTo;