From e4ffa9e6a3afcac88f9bdae9eb79cdf3625aa66e Mon Sep 17 00:00:00 2001
From: Spence Green <spence@spencegreen.com>
Date: Fri, 13 Sep 2013 10:50:03 -0700
Subject: [PATCH] Merge branch 'master' into prefix-decode

---
 .../CRFLogConditionalObjectiveFunction.java   | 14 ++++++---
 ...nditionalObjectiveFunctionWithDropout.java | 15 ++++-----
 src/edu/stanford/nlp/math/ArrayMath.java      | 31 ++++++++++++++++---
 .../AbstractCachingDiffFunction.java          |  6 ++--
 .../ColumnDocumentReaderAndWriter.java        |  2 +-
 .../nlp/sequences/SeqClassifierFlags.java     | 24 ++++++++++++++
 src/edu/stanford/nlp/trees/Tree.java          |  3 +-
 test/src/edu/stanford/nlp/trees/TreeTest.java | 10 ++++++
 8 files changed, 86 insertions(+), 19 deletions(-)
diff --git a/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunction.java b/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunction.java
index d6899a067c..8c4277b499 100644
--- a/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunction.java
+++ b/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunction.java
@@ -46,9 +46,9 @@ public class CRFLogConditionalObjectiveFunction extends AbstractStochasticCachin
   protected final int numClasses;
   public static Index<String> featureIndex;
   protected final int[] map;
-  protected final int[][][][] data;  // data[docIndex][tokenIndex][][]
-  protected final double[][][][] featureVal;  // featureVal[docIndex][tokenIndex][][]
-  protected final int[][] labels;    // labels[docIndex][tokenIndex]
+  protected int[][][][] data;  // data[docIndex][tokenIndex][][]
+  protected double[][][][] featureVal;  // featureVal[docIndex][tokenIndex][][]
+  protected int[][] labels;    // labels[docIndex][tokenIndex]
   protected final int domainDimension;
   protected double[][] eHat4Update, e4Update;
 
@@ -760,7 +760,13 @@ protected Pair<double[][][], double[][][]> getCondProbs(CRFCliqueTree cTree, int
     return new Pair<double[][][], double[][][]>(prevGivenCurr, nextGivenCurr);
   }
 
-  protected static void combine2DArr(double[][] combineInto, double[][] toBeCombined) {
+  protected void combine2DArr(double[][] combineInto, double[][] toBeCombined, double scale) {
+    for (int i = 0; i < toBeCombined.length; i++)
+      for (int j = 0; j < toBeCombined[i].length; j++)
+        combineInto[i][j] += toBeCombined[i][j] * scale;
+  }
+
+  protected void combine2DArr(double[][] combineInto, double[][] toBeCombined) {
     for (int i = 0; i < toBeCombined.length; i++)
       for (int j = 0; j < toBeCombined[i].length; j++)
         combineInto[i][j] += toBeCombined[i][j];
diff --git a/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunctionWithDropout.java b/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunctionWithDropout.java
index 9a68180777..25033faae9 100644
--- a/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunctionWithDropout.java
+++ b/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunctionWithDropout.java
@@ -21,7 +21,7 @@ public class CRFLogConditionalObjectiveFunctionWithDropout extends CRFLogConditi
 
   private final double delta;
   private final double dropoutScale;
-  private double[][] dropoutPriorGrad;
+  private double[][] dropoutPriorGradTotal;
   private final boolean dropoutApprox;
   private double[][] weightSquare;
 
@@ -57,7 +57,7 @@ public ThreadsafeProcessor<Pair<Integer, Boolean>, Quadruple<Integer, Double, Ma
     this.delta = delta;
     this.dropoutScale = dropoutScale;
     this.dropoutApprox = dropoutApprox;
-    dropoutPriorGrad = empty2D();
+    dropoutPriorGradTotal = empty2D();
     this.unsupDropoutStartIndex = data.length;
     this.unsupDropoutScale = unsupDropoutScale;
     if (unsupDropoutData != null) {
@@ -727,6 +727,7 @@ public void calculate(double[] x) {
     // first index is feature index, second index is of possible labeling
     // double[][] E = empty2D();
     clear2D(E);
+    clear2D(dropoutPriorGradTotal);
 
     MulticoreWrapper<Pair<Integer, Boolean>, Quadruple<Integer, Double, Map<Integer, double[]>, Map<Integer, double[]>>> wrapper =
       new MulticoreWrapper<Pair<Integer, Boolean>, Quadruple<Integer, Double, Map<Integer, double[]>, Map<Integer, double[]>>>(multiThreadGrad, dropoutPriorThreadProcessor); 
@@ -747,9 +748,9 @@ public void calculate(double[] x) {
         Map<Integer, double[]> partialDropout = result.fourth();
         if (partialDropout != null) {
           if (isUnsup) {
-            combine2DArr(dropoutPriorGrad, partialDropout, unsupDropoutScale);
+            combine2DArr(dropoutPriorGradTotal, partialDropout, unsupDropoutScale);
           } else {
-            combine2DArr(dropoutPriorGrad, partialDropout);
+            combine2DArr(dropoutPriorGradTotal, partialDropout);
           }
         }
 
@@ -774,9 +775,9 @@ public void calculate(double[] x) {
       Map<Integer, double[]> partialDropout = result.fourth();
       if (partialDropout != null) {
         if (isUnsup) {
-          combine2DArr(dropoutPriorGrad, partialDropout, unsupDropoutScale);
+          combine2DArr(dropoutPriorGradTotal, partialDropout, unsupDropoutScale);
         } else {
-          combine2DArr(dropoutPriorGrad, partialDropout);
+          combine2DArr(dropoutPriorGradTotal, partialDropout);
         }
       }
 
@@ -805,7 +806,7 @@ public void calculate(double[] x) {
       for (int j = 0; j < E[i].length; j++) {
         // because we minimize -L(\theta)
         derivative[index] = (E[i][j] - Ehat[i][j]);
-        derivative[index] += dropoutScale * dropoutPriorGrad[i][j];
+        derivative[index] += dropoutScale * dropoutPriorGradTotal[i][j];
         if (VERBOSE) {
           System.err.println("deriv(" + i + "," + j + ") = " + E[i][j] + " - " + Ehat[i][j] + " = " + derivative[index]);
         }
diff --git a/src/edu/stanford/nlp/math/ArrayMath.java b/src/edu/stanford/nlp/math/ArrayMath.java
index a55a2b930f..4a9189e52c 100644
--- a/src/edu/stanford/nlp/math/ArrayMath.java
+++ b/src/edu/stanford/nlp/math/ArrayMath.java
@@ -434,6 +434,20 @@ public static void pairwiseMultiply(float[] a, float[] b, float[] result) {
     }
   }
 
+  /**
+   * Divide the first array by the second elementwise,
+   * and store results in place. Assume arrays have 
+   * the same length
+   */
+  public static void pairwiseDivideInPlace(double[] a, double[] b) {
+    if (a.length != b.length) {
+      throw new RuntimeException();
+    }
+    for (int i = 0; i < a.length; i++) {
+      a[i] = a[i] / b[i];
+    }
+  }
+
   // ERROR CHECKING
 
   public static boolean hasNaN(double[] a) {
@@ -2011,11 +2025,21 @@ public static void multiplyInto(double[] a, double[] b, double c) {
    * @param newSize
    */
   public static double[] copyOf(double[] original, int newSize) {
-     double[] a = new double[newSize];
-     System.arraycopy(original, 0, a, 0, original.length);
-     return a;
+    double[] a = new double[newSize];
+    System.arraycopy(original, 0, a, 0, original.length);
+    return a;
   }
 
+  public static double entropy(double[] probs) {
+    double e = 0;
+    double p = 0;
+    for (int i = 0; i < probs.length; i++) {
+      p = probs[i];
+      if (p != 0.0)
+        e -= p * Math.log(p);
+    }
+    return e;
+  }
 
   public static void assertFinite(double[] vector, String vectorName) throws InvalidElementException {
     for(int i=0; i<vector.length; i++){
@@ -2027,7 +2051,6 @@ public static void assertFinite(double[] vector, String vectorName) throws Inval
     }
   }
 
-
   public static class InvalidElementException extends RuntimeException {
 
     private static final long serialVersionUID = 1647150702529757545L;
diff --git a/src/edu/stanford/nlp/optimization/AbstractCachingDiffFunction.java b/src/edu/stanford/nlp/optimization/AbstractCachingDiffFunction.java
index 9b76e7fd71..53b5273310 100644
--- a/src/edu/stanford/nlp/optimization/AbstractCachingDiffFunction.java
+++ b/src/edu/stanford/nlp/optimization/AbstractCachingDiffFunction.java
@@ -2,8 +2,10 @@
 
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Random;
+import java.util.Set;
 
 /** A differentiable function that caches the last evaluation of its value and
  *  derivative.
@@ -35,7 +37,7 @@ public boolean gradientCheck(int numOfChecks, int numOfRandomChecks, double[] x)
     System.arraycopy(derivative, 0, savedDeriv, 0, derivative.length); 
     double oldX, plusVal, minusVal, appDeriv, calcDeriv, diff, pct = 0;
     int interval = Math.max(1, x.length / numOfChecks);
-    List<Integer> indicesToCheck = new ArrayList<Integer>();
+    Set<Integer> indicesToCheck = new HashSet<Integer>();
     for (int paramIndex = 0; paramIndex < xLen; paramIndex+=interval) {
       indicesToCheck.add(paramIndex);
     }
@@ -110,7 +112,7 @@ protected static void copy(double[] copy, double[] orig) {
     System.arraycopy(orig, 0, copy, 0, orig.length);
   }
 
-  void ensure(double[] x) {
+  public void ensure(double[] x) {
     if (Arrays.equals(x, lastX)) {
       return;
     }
diff --git a/src/edu/stanford/nlp/sequences/ColumnDocumentReaderAndWriter.java b/src/edu/stanford/nlp/sequences/ColumnDocumentReaderAndWriter.java
index b3633d128d..7f2a53525b 100644
--- a/src/edu/stanford/nlp/sequences/ColumnDocumentReaderAndWriter.java
+++ b/src/edu/stanford/nlp/sequences/ColumnDocumentReaderAndWriter.java
@@ -45,7 +45,7 @@ public void init(SeqClassifierFlags flags) {
   public void init(String map) {
 //    this.flags = null;
     this.map = StringUtils.mapStringToArray(map);
-    factory = DelimitRegExIterator.getFactory("\n(\\s*\n)+", new ColumnDocParser());
+    factory = DelimitRegExIterator.getFactory("\n(?:\\s*\n)+", new ColumnDocParser());
   }
 
   @Override
diff --git a/src/edu/stanford/nlp/sequences/SeqClassifierFlags.java b/src/edu/stanford/nlp/sequences/SeqClassifierFlags.java
index 5292472eb2..77f6753b9a 100644
--- a/src/edu/stanford/nlp/sequences/SeqClassifierFlags.java
+++ b/src/edu/stanford/nlp/sequences/SeqClassifierFlags.java
@@ -988,6 +988,14 @@ public class SeqClassifierFlags implements Serializable {
   public transient String serializeFeatureIndexTo = null;
   public String loadFeatureIndexFromEN = null;
   public String loadFeatureIndexFromCH = null;
+  public double lambdaEN = 1.0;
+  public double lambdaCH = 1.0;
+  public boolean alternateTraining = false;
+  public boolean weightByEntropy = false;
+  public boolean useKL = false;
+  public boolean useHardGE = false;
+  public boolean useCRFforUnsup = false;
+  public boolean useGEforSup = false;
 
   // "ADD VARIABLES ABOVE HERE"
 
@@ -2454,6 +2462,22 @@ public void setProperties(Properties props, boolean printProps) {
         loadFeatureIndexFromEN = val;
       } else if (key.equalsIgnoreCase("loadFeatureIndexFromCH")){
         loadFeatureIndexFromCH = val;
+      } else if (key.equalsIgnoreCase("lambdaEN")){
+        lambdaEN = Double.parseDouble(val);
+      } else if (key.equalsIgnoreCase("lambdaCH")){
+        lambdaCH = Double.parseDouble(val);
+      } else if (key.equalsIgnoreCase("alternateTraining")){
+        alternateTraining = Boolean.parseBoolean(val);
+      } else if (key.equalsIgnoreCase("weightByEntropy")){
+        weightByEntropy = Boolean.parseBoolean(val);
+      } else if (key.equalsIgnoreCase("useKL")){
+        useKL = Boolean.parseBoolean(val);
+      } else if (key.equalsIgnoreCase("useHardGE")){
+        useHardGE = Boolean.parseBoolean(val);
+      } else if (key.equalsIgnoreCase("useCRFforUnsup")){
+        useCRFforUnsup = Boolean.parseBoolean(val);
+      } else if (key.equalsIgnoreCase("useGEforSup")){
+        useGEforSup = Boolean.parseBoolean(val);
 
         // ADD VALUE ABOVE HERE
       } else if (key.length() > 0 && !key.equals("prop")) {
diff --git a/src/edu/stanford/nlp/trees/Tree.java b/src/edu/stanford/nlp/trees/Tree.java
index adf65d9410..636cb6a420 100644
--- a/src/edu/stanford/nlp/trees/Tree.java
+++ b/src/edu/stanford/nlp/trees/Tree.java
@@ -2313,7 +2313,8 @@ public Tree setChild(int i, Tree t) {
    * t.dominates(t) returns false.
    */
   public boolean dominates(Tree t) {
-    return !(dominationPath(t) == null);
+    List<Tree> dominationPath = dominationPath(t);
+    return dominationPath != null && dominationPath.size() > 1;
   }
 
   /**
diff --git a/test/src/edu/stanford/nlp/trees/TreeTest.java b/test/src/edu/stanford/nlp/trees/TreeTest.java
index 64e5addde8..efe047b220 100644
--- a/test/src/edu/stanford/nlp/trees/TreeTest.java
+++ b/test/src/edu/stanford/nlp/trees/TreeTest.java
@@ -77,4 +77,14 @@ public void testRemove() {
     assertEquals("ROOT", t.toString());
   }
 
+
+  public void testDominates() {
+    Tree t = Tree.valueOf("(A (B this) (C (D is) (E a) (F small)) (G test))");
+    assertFalse(t.dominates(t));
+
+    for (Tree child : t.children()) {
+      assertTrue(t.dominates(child));
+      assertFalse(child.dominates(t));
+    }
+  }
 }