Skip to content

Commit

Permalink
Merge branch 'master' of jamie:/u/nlp/git/javanlp
Browse files Browse the repository at this point in the history
  • Loading branch information
gangeli authored and Stanford NLP committed Jun 11, 2014
1 parent f794734 commit 420dc65
Show file tree
Hide file tree
Showing 18 changed files with 213 additions and 120 deletions.
1 change: 1 addition & 0 deletions commonbuildjsp.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
</fileset>
<pathelement location="${build.path}"/>
<pathelement location="${project.core}/lib/commons-logging.jar"/>
<pathelement location="${project.core}/lib/javax.servlet.jar"/>
</path>

<target name="jsp" depends="classpath,compile">
Expand Down
Binary file modified lib/javax.servlet.jar
Binary file not shown.
Binary file removed lib/tomcat/servlet-api.jar
Binary file not shown.
15 changes: 7 additions & 8 deletions src/edu/stanford/nlp/dcoref/SieveCoreferenceSystem.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.SystemUtils;
import edu.stanford.nlp.util.logging.NewlineLogFormatter;
import edu.stanford.nlp.util.logging.Redwood;


/**
* Multi-pass Sieve coreference resolution system (see EMNLP 2010 paper).
Expand Down Expand Up @@ -343,6 +343,7 @@ public static void main(String[] args) throws Exception {
initializeAndRunCoref(props);
}

/** Returns the name of the log file that this method writes. */
public static String initializeAndRunCoref(Properties props) throws Exception {
String timeStamp = Calendar.getInstance().getTime().toString().replaceAll("\\s", "-").replaceAll(":", "-");

Expand Down Expand Up @@ -1036,19 +1037,17 @@ public static List<List<Mention>> filterMentionsWithSingletonClusters(Document d
return res;
}
public static void runConllEval(String conllMentionEvalScript,
String goldFile, String predictFile, String evalFile, String errFile) throws IOException
{
String goldFile, String predictFile, String evalFile, String errFile) throws IOException {
ProcessBuilder process = new ProcessBuilder(conllMentionEvalScript, "all", goldFile, predictFile);
PrintWriter out = new PrintWriter(new FileOutputStream(evalFile));
PrintWriter err = new PrintWriter(new FileOutputStream(errFile));
SystemUtils.run(process, out, err);
out.close();
err.close();
}
}

public static String getConllEvalSummary(String conllMentionEvalScript,
String goldFile, String predictFile) throws IOException
{
String goldFile, String predictFile) throws IOException {
ProcessBuilder process = new ProcessBuilder(conllMentionEvalScript, "all", goldFile, predictFile, "none");
StringOutputStream errSos = new StringOutputStream();
StringOutputStream outSos = new StringOutputStream();
Expand All @@ -1059,11 +1058,11 @@ public static String getConllEvalSummary(String conllMentionEvalScript,
err.close();
String summary = outSos.toString();
String errStr = errSos.toString();
if (errStr.length() > 0) {
if ( ! errStr.isEmpty()) {
summary += "\nERROR: " + errStr;
}
return summary;
}
}

/** Print logs for error analysis */
public void printTopK(Logger logger, Document document, Semantics semantics) {
Expand Down
7 changes: 6 additions & 1 deletion src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java
Original file line number Diff line number Diff line change
Expand Up @@ -1795,11 +1795,16 @@ protected void printFeatures(IN wi, Collection<String> features) {
writtenNum++;
}

/** Print the String features generated from a token */
/** Print the String features generated from a token. */
protected void printFeatureLists(IN wi, Collection<List<String>> features) {
if (flags.printFeatures == null || writtenNum >= flags.printFeaturesUpto) {
return;
}
printFeatureListsHelper(wi, features);
}

// Separating this method out lets printFeatureLists be inlined, which is good since it is usually a no-op.
private void printFeatureListsHelper(IN wi, Collection<List<String>> features) {
if (cliqueWriter == null) {
cliqueWriter = IOUtils.getPrintWriterOrDie("feats-" + flags.printFeatures + ".txt");
writtenNum = 0;
Expand Down
90 changes: 36 additions & 54 deletions src/edu/stanford/nlp/ie/NERFeatureFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -389,79 +389,61 @@ public void init(SeqClassifierFlags flags) {
@Override
public Collection<String> getCliqueFeatures(PaddedList<IN> cInfo, int loc, Clique clique) {
Collection<String> features = Generics.newHashSet();

boolean doFE = cInfo.get(0).containsKey(CoreAnnotations.DomainAnnotation.class);
String domain = (doFE ? cInfo.get(0).get(CoreAnnotations.DomainAnnotation.class) : null);
String domain = cInfo.get(0).get(CoreAnnotations.DomainAnnotation.class);
final boolean doFE = domain != null;

// System.err.println(doFE+"\t"+domain);

// there are two special cases below, because 2 cliques have 2 names
Collection<String> c;
String suffix;
if (clique == cliqueC) {
//200710: tried making this clique null; didn't improve performance (rafferty)
Collection<String> c = featuresC(cInfo, loc);
addAllInterningAndSuffixing(features, c, "C");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-C");
}
c = featuresC(cInfo, loc);
suffix = "C";
} else if (clique == cliqueCpC) {
Collection<String> c = featuresCpC(cInfo, loc);
addAllInterningAndSuffixing(features, c, "CpC");
c = featuresCpC(cInfo, loc);
suffix = "CpC";
addAllInterningAndSuffixing(features, c, suffix);
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-CpC");
addAllInterningAndSuffixing(features, c, domain + '-' + suffix);
}

c = featuresCnC(cInfo, loc-1);
addAllInterningAndSuffixing(features, c, "CnC");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-CnC");
}
suffix = "CnC";
} else if (clique == cliqueCp2C) {
Collection<String> c = featuresCp2C(cInfo, loc);
addAllInterningAndSuffixing(features, c, "Cp2C");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-Cp2C");
}
c = featuresCp2C(cInfo, loc);
suffix = "Cp2C";
} else if (clique == cliqueCp3C) {
Collection<String> c = featuresCp3C(cInfo, loc);
addAllInterningAndSuffixing(features, c, "Cp3C");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-Cp3C");
}
c = featuresCp3C(cInfo, loc);
suffix = "Cp3C";
} else if (clique == cliqueCp4C) {
Collection<String> c = featuresCp4C(cInfo, loc);
addAllInterningAndSuffixing(features, c, "Cp4C");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-Cp4C");
}
c = featuresCp4C(cInfo, loc);
suffix = "Cp4C";
} else if (clique == cliqueCp5C) {
Collection<String> c = featuresCp5C(cInfo, loc);
addAllInterningAndSuffixing(features, c, "Cp5C");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-Cp5C");
}
c = featuresCp5C(cInfo, loc);
suffix = "Cp5C";
} else if (clique == cliqueCpCp2C) {
Collection<String> c = featuresCpCp2C(cInfo, loc);
addAllInterningAndSuffixing(features, c, "CpCp2C");
c = featuresCpCp2C(cInfo, loc);
suffix = "CpCp2C";
addAllInterningAndSuffixing(features, c, suffix);
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-CpCp2C");
addAllInterningAndSuffixing(features, c, domain+ '-' + suffix);
}

c = featuresCpCnC(cInfo, loc-1);
addAllInterningAndSuffixing(features, c, "CpCnC");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-CpCnC");
}
suffix = "CpCnC";
} else if (clique == cliqueCpCp2Cp3C) {
Collection<String> c = featuresCpCp2Cp3C(cInfo, loc);
addAllInterningAndSuffixing(features, c, "CpCp2Cp3C");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-CpCp2Cp3C");
}
c = featuresCpCp2Cp3C(cInfo, loc);
suffix = "CpCp2Cp3C";
} else if (clique == cliqueCpCp2Cp3Cp4C) {
Collection<String> c = featuresCpCp2Cp3Cp4C(cInfo, loc);
addAllInterningAndSuffixing(features, c, "CpCp2Cp3Cp4C");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-CpCp2Cp3Cp4C");
}
c = featuresCpCp2Cp3Cp4C(cInfo, loc);
suffix = "CpCp2Cp3Cp4C";
} else {
throw new IllegalArgumentException("Unknown clique: " + clique);
}

addAllInterningAndSuffixing(features, c, suffix);
if (doFE) {
addAllInterningAndSuffixing(features, c, domain + '-' + suffix);
}

// System.err.println(StringUtils.join(features,"\n")+"\n");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ public class CRFLogConditionalObjectiveFunction extends AbstractStochasticCachin
protected final int numClasses;
public static Index<String> featureIndex;
protected final int[] map;
protected final int[][][][] data; // data[docIndex][tokenIndex][][]
protected final double[][][][] featureVal; // featureVal[docIndex][tokenIndex][][]
protected final int[][] labels; // labels[docIndex][tokenIndex]
protected int[][][][] data; // data[docIndex][tokenIndex][][]
protected double[][][][] featureVal; // featureVal[docIndex][tokenIndex][][]
protected int[][] labels; // labels[docIndex][tokenIndex]
protected final int domainDimension;
protected double[][] eHat4Update, e4Update;

Expand Down Expand Up @@ -760,7 +760,13 @@ protected Pair<double[][][], double[][][]> getCondProbs(CRFCliqueTree cTree, int
return new Pair<double[][][], double[][][]>(prevGivenCurr, nextGivenCurr);
}

protected static void combine2DArr(double[][] combineInto, double[][] toBeCombined) {
protected void combine2DArr(double[][] combineInto, double[][] toBeCombined, double scale) {
for (int i = 0; i < toBeCombined.length; i++)
for (int j = 0; j < toBeCombined[i].length; j++)
combineInto[i][j] += toBeCombined[i][j] * scale;
}

protected void combine2DArr(double[][] combineInto, double[][] toBeCombined) {
for (int i = 0; i < toBeCombined.length; i++)
for (int j = 0; j < toBeCombined[i].length; j++)
combineInto[i][j] += toBeCombined[i][j];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public class CRFLogConditionalObjectiveFunctionWithDropout extends CRFLogConditi

private final double delta;
private final double dropoutScale;
private double[][] dropoutPriorGrad;
private double[][] dropoutPriorGradTotal;
private final boolean dropoutApprox;
private double[][] weightSquare;

Expand Down Expand Up @@ -57,7 +57,7 @@ public ThreadsafeProcessor<Pair<Integer, Boolean>, Quadruple<Integer, Double, Ma
this.delta = delta;
this.dropoutScale = dropoutScale;
this.dropoutApprox = dropoutApprox;
dropoutPriorGrad = empty2D();
dropoutPriorGradTotal = empty2D();
this.unsupDropoutStartIndex = data.length;
this.unsupDropoutScale = unsupDropoutScale;
if (unsupDropoutData != null) {
Expand Down Expand Up @@ -727,6 +727,7 @@ public void calculate(double[] x) {
// first index is feature index, second index is of possible labeling
// double[][] E = empty2D();
clear2D(E);
clear2D(dropoutPriorGradTotal);

MulticoreWrapper<Pair<Integer, Boolean>, Quadruple<Integer, Double, Map<Integer, double[]>, Map<Integer, double[]>>> wrapper =
new MulticoreWrapper<Pair<Integer, Boolean>, Quadruple<Integer, Double, Map<Integer, double[]>, Map<Integer, double[]>>>(multiThreadGrad, dropoutPriorThreadProcessor);
Expand All @@ -747,9 +748,9 @@ public void calculate(double[] x) {
Map<Integer, double[]> partialDropout = result.fourth();
if (partialDropout != null) {
if (isUnsup) {
combine2DArr(dropoutPriorGrad, partialDropout, unsupDropoutScale);
combine2DArr(dropoutPriorGradTotal, partialDropout, unsupDropoutScale);
} else {
combine2DArr(dropoutPriorGrad, partialDropout);
combine2DArr(dropoutPriorGradTotal, partialDropout);
}
}

Expand All @@ -774,9 +775,9 @@ public void calculate(double[] x) {
Map<Integer, double[]> partialDropout = result.fourth();
if (partialDropout != null) {
if (isUnsup) {
combine2DArr(dropoutPriorGrad, partialDropout, unsupDropoutScale);
combine2DArr(dropoutPriorGradTotal, partialDropout, unsupDropoutScale);
} else {
combine2DArr(dropoutPriorGrad, partialDropout);
combine2DArr(dropoutPriorGradTotal, partialDropout);
}
}

Expand Down Expand Up @@ -805,7 +806,7 @@ public void calculate(double[] x) {
for (int j = 0; j < E[i].length; j++) {
// because we minimize -L(\theta)
derivative[index] = (E[i][j] - Ehat[i][j]);
derivative[index] += dropoutScale * dropoutPriorGrad[i][j];
derivative[index] += dropoutScale * dropoutPriorGradTotal[i][j];
if (VERBOSE) {
System.err.println("deriv(" + i + "," + j + ") = " + E[i][j] + " - " + Ehat[i][j] + " = " + derivative[index]);
}
Expand Down
31 changes: 27 additions & 4 deletions src/edu/stanford/nlp/math/ArrayMath.java
Original file line number Diff line number Diff line change
Expand Up @@ -434,6 +434,20 @@ public static void pairwiseMultiply(float[] a, float[] b, float[] result) {
}
}

/**
* Divide the first array by the second elementwise,
* and store results in place. Assume arrays have
* the same length
*/
public static void pairwiseDivideInPlace(double[] a, double[] b) {
if (a.length != b.length) {
throw new RuntimeException();
}
for (int i = 0; i < a.length; i++) {
a[i] = a[i] / b[i];
}
}

// ERROR CHECKING

public static boolean hasNaN(double[] a) {
Expand Down Expand Up @@ -2011,11 +2025,21 @@ public static void multiplyInto(double[] a, double[] b, double c) {
* @param newSize
*/
public static double[] copyOf(double[] original, int newSize) {
double[] a = new double[newSize];
System.arraycopy(original, 0, a, 0, original.length);
return a;
double[] a = new double[newSize];
System.arraycopy(original, 0, a, 0, original.length);
return a;
}

public static double entropy(double[] probs) {
double e = 0;
double p = 0;
for (int i = 0; i < probs.length; i++) {
p = probs[i];
if (p != 0.0)
e -= p * Math.log(p);
}
return e;
}

public static void assertFinite(double[] vector, String vectorName) throws InvalidElementException {
for(int i=0; i<vector.length; i++){
Expand All @@ -2027,7 +2051,6 @@ public static void assertFinite(double[] vector, String vectorName) throws Inval
}
}


public static class InvalidElementException extends RuntimeException {

private static final long serialVersionUID = 1647150702529757545L;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;

/** A differentiable function that caches the last evaluation of its value and
* derivative.
Expand Down Expand Up @@ -35,7 +37,7 @@ public boolean gradientCheck(int numOfChecks, int numOfRandomChecks, double[] x)
System.arraycopy(derivative, 0, savedDeriv, 0, derivative.length);
double oldX, plusVal, minusVal, appDeriv, calcDeriv, diff, pct = 0;
int interval = Math.max(1, x.length / numOfChecks);
List<Integer> indicesToCheck = new ArrayList<Integer>();
Set<Integer> indicesToCheck = new HashSet<Integer>();
for (int paramIndex = 0; paramIndex < xLen; paramIndex+=interval) {
indicesToCheck.add(paramIndex);
}
Expand Down Expand Up @@ -110,7 +112,7 @@ protected static void copy(double[] copy, double[] orig) {
System.arraycopy(orig, 0, copy, 0, orig.length);
}

void ensure(double[] x) {
public void ensure(double[] x) {
if (Arrays.equals(x, lastX)) {
return;
}
Expand Down
2 changes: 2 additions & 0 deletions src/edu/stanford/nlp/optimization/SGDMinimizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ public static void main(String[] args) {
final double[] grads = new double[dim];

final DiffFunction f = new DiffFunction() {
@Override
public double[] derivativeAt(double[] x) {
double val = Math.PI * valuePow(x, Math.PI - 1);
for (int i = 0; i < dim; i++) {
Expand All @@ -139,6 +140,7 @@ private double valuePow(double[] x, double pow) {
return Math.pow(val * 0.5, pow);
}

@Override
public int domainDimension() {
return dim;
}
Expand Down
Loading

0 comments on commit 420dc65

Please sign in to comment.