Add an Eval which checks the accuracy of the top layer

dzelemba · Jun 16, 2014 · 1400a9e · 1400a9e
1 parent 53dfcf2
commit 1400a9e
Show file tree

Hide file tree

Showing 3 changed files with 67 additions and 5 deletions.
diff --git a/src/edu/stanford/nlp/parser/lexparser/EvaluateTreebank.java b/src/edu/stanford/nlp/parser/lexparser/EvaluateTreebank.java
@@ -31,6 +31,7 @@
 import edu.stanford.nlp.parser.metrics.LeafAncestorEval;
 import edu.stanford.nlp.parser.metrics.ParserQueryEval;
 import edu.stanford.nlp.parser.metrics.TaggingEval;
+import edu.stanford.nlp.parser.metrics.TopMatchEval;
 import edu.stanford.nlp.parser.metrics.UnlabeledAttachmentEval;
 import edu.stanford.nlp.trees.LeftHeadFinder;
 import edu.stanford.nlp.trees.Tree;
@@ -204,6 +205,9 @@ public EvaluateTreebank(Options op, Lexicon lex, ParserGrammar pqFactory, Functi
     if (Boolean.parseBoolean(op.testOptions.evals.getProperty("factLL"))) {
       factLL = new AbstractEval.ScoreEval("factLL", runningAverages);
     }
+    if (Boolean.parseBoolean(op.testOptions.evals.getProperty("topMatch"))) {
+      evals.add(new TopMatchEval("topMatch", runningAverages));
+    }
     // this one is for the various k Good/Best options.  Just for individual results
     kGoodLB = new Evalb("kGood LP/LR", false);
 

diff --git a/src/edu/stanford/nlp/parser/metrics/TopMatchEval.java b/src/edu/stanford/nlp/parser/metrics/TopMatchEval.java
@@ -0,0 +1,37 @@
+package edu.stanford.nlp.parser.metrics;
+
+import java.util.Collections;
+import java.util.Set;
+
+import edu.stanford.nlp.trees.Constituent;
+import edu.stanford.nlp.trees.ConstituentFactory;
+import edu.stanford.nlp.trees.LabeledScoredConstituentFactory;
+import edu.stanford.nlp.trees.Tree;
+
+/**
+ * Measures accuracy by only considering the very top of the parse tree, eg where S, SINV, etc go
+ *
+ * @author John Bauer
+ */
+public class TopMatchEval extends AbstractEval {
+
+  private final ConstituentFactory cf;
+
+  public TopMatchEval(String name, boolean runningAverages) {
+    super(name, runningAverages);
+    cf = new LabeledScoredConstituentFactory();
+  }
+
+  @Override
+  protected Set<Constituent> makeObjects(Tree tree) {
+    if (tree == null) {
+      return Collections.emptySet();
+    }
+    // The eval trees won't have a root level, instead starting with
+    // the S/SINV/FRAG/whatever, so just eval at the top level
+    Set<Constituent> result = tree.constituents(cf, 0);
+    return result;
+  }
+
+}
+
diff --git a/src/edu/stanford/nlp/trees/Tree.java b/src/edu/stanford/nlp/trees/Tree.java
@@ -452,6 +452,24 @@ public Set<Constituent> constituents(ConstituentFactory cf) {
     return constituents(cf,false);
   }
 
+  /**
+   * Returns the Constituents generated by the parse tree.
+   * The Constituents of a sentence include the preterminal categories
+   * but not the leaves.
+   *
+   * @param cf ConstituentFactory used to build the Constituent objects
+   * @param maxDepth The maximum depth at which to add constituents,
+   *                 where 0 is the root level.  Negative maxDepth
+   *                 indicates no maximum.
+   * @return a Set of the constituents as SimpleConstituent type
+   *         (in the current implementation, a <code>HashSet</code>
+   */
+  public Set<Constituent> constituents(ConstituentFactory cf, int maxDepth) {
+    Set<Constituent> constituentsSet = Generics.newHashSet();
+    constituents(constituentsSet, 0, cf, false, null, maxDepth, 0);
+    return constituentsSet;
+  }
+
   /**
    * Returns the Constituents generated by the parse tree.
    * The Constituents of a sentence include the preterminal categories
@@ -464,13 +482,13 @@ public Set<Constituent> constituents(ConstituentFactory cf) {
    */
   public Set<Constituent> constituents(ConstituentFactory cf, boolean charLevel) {
     Set<Constituent> constituentsSet = Generics.newHashSet();
-    constituents(constituentsSet, 0, cf, charLevel, null);
+    constituents(constituentsSet, 0, cf, charLevel, null, -1, 0);
     return constituentsSet;
   }
 
   public Set<Constituent> constituents(ConstituentFactory cf, boolean charLevel, Filter<Tree> filter) {
     Set<Constituent> constituentsSet = Generics.newHashSet();
-    constituents(constituentsSet, 0, cf, charLevel, filter);
+    constituents(constituentsSet, 0, cf, charLevel, filter, -1, 0);
     return constituentsSet;
   }
 
@@ -521,9 +539,11 @@ private int constituentsNodes(int left) {
    * @param cf              ConstituentFactory used to build the Constituent objects
    * @param charLevel       If true, compute constituents without respect to whitespace. Otherwise, preserve whitespace boundaries.
    * @param filter          A filter to use to decide whether or not to add a tree as a constituent.
+   * @param maxDepth        The maximum depth at which to allow constituents.  Set to negative to indicate all depths allowed.
+   * @param depth           The current depth
    * @return Index of right frontier of Constituent
    */
-  private int constituents(Set<Constituent> constituentsSet, int left, ConstituentFactory cf, boolean charLevel, Filter<Tree> filter) {
+  private int constituents(Set<Constituent> constituentsSet, int left, ConstituentFactory cf, boolean charLevel, Filter<Tree> filter, int maxDepth, int depth) {
 
     if(isPreTerminal())
       return left + ((charLevel) ? firstChild().value().length() : 1);
@@ -535,11 +555,12 @@ private int constituents(Set<Constituent> constituentsSet, int left, Constituent
     //                       "; num daughters: " + children().length);
     Tree[] kids = children();
     for (Tree kid : kids) {
-      position = kid.constituents(constituentsSet, position, cf, charLevel, filter);
+      position = kid.constituents(constituentsSet, position, cf, charLevel, filter, maxDepth, depth + 1);
       // System.err.println("  position went to " + position);
     }
 
-    if (filter == null || filter.accept(this)) {
+    if ((filter == null || filter.accept(this)) &&
+        (maxDepth < 0 || depth <= maxDepth)) {
       //Compute span of entire tree at the end of recursion
       constituentsSet.add(cf.newConstituent(left, position - 1, label(), score()));
     }