Skip to content

Commit

Permalink
Adding SpellCheckerFactory
Browse files Browse the repository at this point in the history
  • Loading branch information
marigostra committed Jun 10, 2022
1 parent 06768e9 commit 10e7c9f
Show file tree
Hide file tree
Showing 7 changed files with 244 additions and 1 deletion.
38 changes: 38 additions & 0 deletions src/main/java/org/luwrain/nlp/SpellCheckerFactory.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
/*
Copyright 2012-2022 Michael Pozhidaev <[email protected]>
This file is part of LUWRAIN.
LUWRAIN is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
LUWRAIN is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
*/

package org.luwrain.nlp;

import org.luwrain.nlp.ru.spell.*;

public final class SpellCheckerFactory
{
static private RuSpellChecker ru = null;

public SpellChecker newChecker(String lang)
{
switch(lang.toUpperCase())
{
case "RU": {
if (ru == null)
ru = new RuSpellChecker();
return ru;
}
default:
throw new IllegalArgumentException("Unknown language: " + lang);
}
}
}
6 changes: 5 additions & 1 deletion src/main/java/org/luwrain/nlp/SpellProblem.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,12 @@

package org.luwrain.nlp;

public interface SpellProblem
import org.luwrain.core.*;

public interface SpellProblem extends LineMarks.MarkObject
{
String getComment();
String getShortComment();
int getStart();
int getEnd();
}
23 changes: 23 additions & 0 deletions src/main/java/org/luwrain/nlp/SpellText.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,11 @@

import java.util.*;

import org.luwrain.core.*;
import org.luwrain.controls.*;
import org.luwrain.controls.DefaultLineMarks.*;
import static org.luwrain.util.RangeUtils.*;

public class SpellText
{
final String text;
Expand Down Expand Up @@ -51,6 +56,24 @@ public SpellText(String[] text, SpellChecker checker)
this.problems = checker.check(this.text);
}

public List<List<LineMarks.Mark>> buildMarks()
{
final List<List<LineMarks.Mark>> res = new ArrayList<>();
for(Fragment f: fragments)
{
final List<LineMarks.Mark> a = new ArrayList();
for(SpellProblem p: problems)
{
final int[] range = commonRangeByBounds(p.getStart(), p.getEnd(), f.posFrom, f.posTo);
if (range == null)
continue;
a.add(new MarkImpl(LineMarks.Mark.Type.WEAK, range[0] - f.posFrom, range[1] - f.posFrom, p));
}
res.add(a);
}
return res;
}

static public final class Fragment
{
final int posFrom, posTo;
Expand Down
47 changes: 47 additions & 0 deletions src/main/java/org/luwrain/nlp/ru/spell/Problem.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/*
Copyright 2012-2022 Michael Pozhidaev <[email protected]>
This file is part of LUWRAIN.
LUWRAIN is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
LUWRAIN is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
*/

package org.luwrain.nlp.ru.spell;

import org.languagetool.rules.*;

import org.luwrain.nlp.*;

final class Problem implements SpellProblem
{
private final String message, shortMessage;

Problem(RuleMatch match)
{
this.message = match.getMessage();
this.shortMessage = match.getShortMessage();
/*
List<SuggestedReplacement> repl = r.getSuggestedReplacementObjects();
for(SuggestedReplacement rr: repl)
System.out.println(rr);
*/
}

@Override public String getComment() { return message; }
@Override public String getShortComment(){ return shortMessage; }
@Override public int getStart(){ return 0; }
@Override public int getEnd(){ return 0;}

@Override public String toString()
{
return message;
}
}
54 changes: 54 additions & 0 deletions src/main/java/org/luwrain/nlp/ru/spell/RuSpellChecker.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/*
Copyright 2012-2022 Michael Pozhidaev <[email protected]>
This file is part of LUWRAIN.
LUWRAIN is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
LUWRAIN is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
*/

package org.luwrain.nlp.ru.spell;

import java.util.*;
import java.io.*;

import org.languagetool.*;
import org.languagetool.language.*;
import org.languagetool.rules.*;
import org.languagetool.rules.spelling.*;

import org.luwrain.core.*;
import org.luwrain.nlp.*;

public final class RuSpellChecker implements SpellChecker
{
final JLanguageTool langTool;

public RuSpellChecker()
{
this.langTool = new JLanguageTool(new Russian());
}

@Override public List<SpellProblem> check(String text)
{
final List<SpellProblem> res = new ArrayList<>();
try {
final List<RuleMatch> m = langTool.check(text);
System.out.println("match " + m.size());
for(RuleMatch mm: m)
res.add(new Problem(mm));
return res;
}
catch(IOException e)
{
throw new RuntimeException(e);
}
}
}
35 changes: 35 additions & 0 deletions src/test/java/org/luwrain/io/nlp/SpellTextTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
/*
Copyright 2012-2022 Michael Pozhidaev <[email protected]>
This file is part of LUWRAIN.
LUWRAIN is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
LUWRAIN is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
*/

package org.luwrain.nlp;

import org.junit.*;

import org.luwrain.core.*;

public class SpellTextTest extends Assert
{
private SpellChecker checker = null;

@Test public void main()
{
}

@Before public void create()
{
checker = new SpellCheckerFactory().newChecker("ru");
}
}
42 changes: 42 additions & 0 deletions src/test/java/org/luwrain/io/nlp/ru/spell/RuSpellCheckerTest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/*
Copyright 2012-2022 Michael Pozhidaev <[email protected]>
This file is part of LUWRAIN.
LUWRAIN is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
LUWRAIN is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
*/


package org.luwrain.nlp.ru.spell;

//https://languagetool.org/development/api/org/languagetool/rules/spelling/SpellingCheckRule.html


import java.io.*;
import java.net.*;

import org.junit.*;

import org.luwrain.core.*;

public class RuSpellCheckerTest extends Assert
{
private RuSpellChecker c = null;

@Test public void main()
{
}

@Before public void create()
{
c = new RuSpellChecker();
}
}

0 comments on commit 10e7c9f

Please sign in to comment.