Skip to content

Commit

Permalink
Add a test of the icepahc operations Stanza will use to prepare the I…
Browse files Browse the repository at this point in the history
…celandic treebank
  • Loading branch information
AngledLuffa committed May 18, 2024
1 parent bb4d17f commit ef31e6b
Showing 1 changed file with 20 additions and 0 deletions.
20 changes: 20 additions & 0 deletions test/src/edu/stanford/nlp/trees/tregex/tsurgeon/TsurgeonTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -430,6 +430,26 @@ public void testRelabel() {
"(barfoo (curlew 0) (avocet 1))");
}

/**
* Test relabeling a tree from icepahc
*
* The goal is to check that removing the lemmas and combining detached words both work as expected
*/
public void testRelabelICE() {
String treeText = "( (IP-MAT (NP-SBJ (PRO-N Það-það)) (BEPI er-vera) (ADVP (ADV eiginlega-eiginlega)) (ADJP (NEG ekki-ekki) (ADJ-N hægt-hægur)) (IP-INF (TO að-að) (VB lýsa-lýsa)) (NP-OB1 (N-D tilfinningu$-tilfinning) (D-D $nni-hinn)) (IP-INF (TO að-að) (VB fá-fá)) (IP-INF (TO að-að) (VB taka-taka)) (NP-OB1 (N-A þátt-þáttur)) (PP (P í-í) (NP (D-D þessu-þessi))) (, ,-,) (VBPI segir-segja) (NP-SBJ (NPR-N Sverrir-sverrir) (NPR-N Ingi-ingi)) (. .-.)))";

String relabeledTreeText = "( (IP-MAT (NP-SBJ (PRO-N Það)) (BEPI er) (ADVP (ADV eiginlega)) (ADJP (NEG ekki) (ADJ-N hægt)) (IP-INF (TO að) (VB lýsa)) (NP-OB1 (N-D tilfinningu$) (D-D $nni)) (IP-INF (TO að) (VB fá)) (IP-INF (TO að) (VB taka)) (NP-OB1 (N-A þátt)) (PP (P í) (NP (D-D þessu))) (, ,) (VBPI segir) (NP-SBJ (NPR-N Sverrir) (NPR-N Ingi)) (. .)))";

TregexPattern tregex = TregexPattern.compile("/^(.+)-.+$/#1%form=word !< __");
TsurgeonPattern tsurgeon = Tsurgeon.parseOperation("relabel word /^(.+)-.+$/%{form}/");
runTest(tregex, tsurgeon, treeText, relabeledTreeText);

tregex = TregexPattern.compile("/^N-/ < /^([^$]+)[$]$/#1%noun=noun $+ (/^D-/ < /^[$]([^$]+)$/#1%det=det)");
tsurgeon = Tsurgeon.parseOperation("relabel noun /^.+$/%{noun}%{det}/");
runTest(tregex, tsurgeon, relabeledTreeText,
"( (IP-MAT (NP-SBJ (PRO-N Það)) (BEPI er) (ADVP (ADV eiginlega)) (ADJP (NEG ekki) (ADJ-N hægt)) (IP-INF (TO að) (VB lýsa)) (NP-OB1 (N-D tilfinningunni) (D-D $nni)) (IP-INF (TO að) (VB fá)) (IP-INF (TO að) (VB taka)) (NP-OB1 (N-A þátt)) (PP (P í) (NP (D-D þessu))) (, ,) (VBPI segir) (NP-SBJ (NPR-N Sverrir) (NPR-N Ingi)) (. .)))");
}

public void testReplaceNode() {
TsurgeonPattern tsurgeon = Tsurgeon.parseOperation("replace foo blah");
TregexPattern tregex = TregexPattern.compile("B=foo : C=blah");
Expand Down

0 comments on commit ef31e6b

Please sign in to comment.