Skip to content

Commit

Permalink
Reading data works for every KBP year
Browse files Browse the repository at this point in the history
  • Loading branch information
gangeli authored and Stanford NLP committed Jun 11, 2014
1 parent 420dc65 commit 9bd03e8
Show file tree
Hide file tree
Showing 19 changed files with 134 additions and 214 deletions.
1 change: 0 additions & 1 deletion commonbuildjsp.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
</fileset>
<pathelement location="${build.path}"/>
<pathelement location="${project.core}/lib/commons-logging.jar"/>
<pathelement location="${project.core}/lib/javax.servlet.jar"/>
</path>

<target name="jsp" depends="classpath,compile">
Expand Down
Binary file modified lib/javax.servlet.jar
Binary file not shown.
Binary file added lib/tomcat/servlet-api.jar
Binary file not shown.
15 changes: 8 additions & 7 deletions src/edu/stanford/nlp/dcoref/SieveCoreferenceSystem.java
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.SystemUtils;
import edu.stanford.nlp.util.logging.NewlineLogFormatter;

import edu.stanford.nlp.util.logging.Redwood;

/**
* Multi-pass Sieve coreference resolution system (see EMNLP 2010 paper).
Expand Down Expand Up @@ -343,7 +343,6 @@ public static void main(String[] args) throws Exception {
initializeAndRunCoref(props);
}

/** Returns the name of the log file that this method writes. */
public static String initializeAndRunCoref(Properties props) throws Exception {
String timeStamp = Calendar.getInstance().getTime().toString().replaceAll("\\s", "-").replaceAll(":", "-");

Expand Down Expand Up @@ -1037,17 +1036,19 @@ public static List<List<Mention>> filterMentionsWithSingletonClusters(Document d
return res;
}
public static void runConllEval(String conllMentionEvalScript,
String goldFile, String predictFile, String evalFile, String errFile) throws IOException {
String goldFile, String predictFile, String evalFile, String errFile) throws IOException
{
ProcessBuilder process = new ProcessBuilder(conllMentionEvalScript, "all", goldFile, predictFile);
PrintWriter out = new PrintWriter(new FileOutputStream(evalFile));
PrintWriter err = new PrintWriter(new FileOutputStream(errFile));
SystemUtils.run(process, out, err);
out.close();
err.close();
}
}

public static String getConllEvalSummary(String conllMentionEvalScript,
String goldFile, String predictFile) throws IOException {
String goldFile, String predictFile) throws IOException
{
ProcessBuilder process = new ProcessBuilder(conllMentionEvalScript, "all", goldFile, predictFile, "none");
StringOutputStream errSos = new StringOutputStream();
StringOutputStream outSos = new StringOutputStream();
Expand All @@ -1058,11 +1059,11 @@ public static String getConllEvalSummary(String conllMentionEvalScript,
err.close();
String summary = outSos.toString();
String errStr = errSos.toString();
if ( ! errStr.isEmpty()) {
if (errStr.length() > 0) {
summary += "\nERROR: " + errStr;
}
return summary;
}
}

/** Print logs for error analysis */
public void printTopK(Logger logger, Document document, Semantics semantics) {
Expand Down
7 changes: 1 addition & 6 deletions src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java
Original file line number Diff line number Diff line change
Expand Up @@ -1795,16 +1795,11 @@ protected void printFeatures(IN wi, Collection<String> features) {
writtenNum++;
}

/** Print the String features generated from a token. */
/** Print the String features generated from a token */
protected void printFeatureLists(IN wi, Collection<List<String>> features) {
if (flags.printFeatures == null || writtenNum >= flags.printFeaturesUpto) {
return;
}
printFeatureListsHelper(wi, features);
}

// Separating this method out lets printFeatureLists be inlined, which is good since it is usually a no-op.
private void printFeatureListsHelper(IN wi, Collection<List<String>> features) {
if (cliqueWriter == null) {
cliqueWriter = IOUtils.getPrintWriterOrDie("feats-" + flags.printFeatures + ".txt");
writtenNum = 0;
Expand Down
90 changes: 54 additions & 36 deletions src/edu/stanford/nlp/ie/NERFeatureFactory.java
Original file line number Diff line number Diff line change
Expand Up @@ -389,61 +389,79 @@ public void init(SeqClassifierFlags flags) {
@Override
public Collection<String> getCliqueFeatures(PaddedList<IN> cInfo, int loc, Clique clique) {
Collection<String> features = Generics.newHashSet();
String domain = cInfo.get(0).get(CoreAnnotations.DomainAnnotation.class);
final boolean doFE = domain != null;

boolean doFE = cInfo.get(0).containsKey(CoreAnnotations.DomainAnnotation.class);
String domain = (doFE ? cInfo.get(0).get(CoreAnnotations.DomainAnnotation.class) : null);

// System.err.println(doFE+"\t"+domain);

// there are two special cases below, because 2 cliques have 2 names
Collection<String> c;
String suffix;
if (clique == cliqueC) {
//200710: tried making this clique null; didn't improve performance (rafferty)
c = featuresC(cInfo, loc);
suffix = "C";
Collection<String> c = featuresC(cInfo, loc);
addAllInterningAndSuffixing(features, c, "C");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-C");
}
} else if (clique == cliqueCpC) {
c = featuresCpC(cInfo, loc);
suffix = "CpC";
addAllInterningAndSuffixing(features, c, suffix);
Collection<String> c = featuresCpC(cInfo, loc);
addAllInterningAndSuffixing(features, c, "CpC");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain + '-' + suffix);
addAllInterningAndSuffixing(features, c, domain+"-CpC");
}

c = featuresCnC(cInfo, loc-1);
suffix = "CnC";
addAllInterningAndSuffixing(features, c, "CnC");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-CnC");
}
} else if (clique == cliqueCp2C) {
c = featuresCp2C(cInfo, loc);
suffix = "Cp2C";
Collection<String> c = featuresCp2C(cInfo, loc);
addAllInterningAndSuffixing(features, c, "Cp2C");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-Cp2C");
}
} else if (clique == cliqueCp3C) {
c = featuresCp3C(cInfo, loc);
suffix = "Cp3C";
Collection<String> c = featuresCp3C(cInfo, loc);
addAllInterningAndSuffixing(features, c, "Cp3C");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-Cp3C");
}
} else if (clique == cliqueCp4C) {
c = featuresCp4C(cInfo, loc);
suffix = "Cp4C";
Collection<String> c = featuresCp4C(cInfo, loc);
addAllInterningAndSuffixing(features, c, "Cp4C");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-Cp4C");
}
} else if (clique == cliqueCp5C) {
c = featuresCp5C(cInfo, loc);
suffix = "Cp5C";
Collection<String> c = featuresCp5C(cInfo, loc);
addAllInterningAndSuffixing(features, c, "Cp5C");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-Cp5C");
}
} else if (clique == cliqueCpCp2C) {
c = featuresCpCp2C(cInfo, loc);
suffix = "CpCp2C";
addAllInterningAndSuffixing(features, c, suffix);
Collection<String> c = featuresCpCp2C(cInfo, loc);
addAllInterningAndSuffixing(features, c, "CpCp2C");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+ '-' + suffix);
addAllInterningAndSuffixing(features, c, domain+"-CpCp2C");
}

c = featuresCpCnC(cInfo, loc-1);
suffix = "CpCnC";
addAllInterningAndSuffixing(features, c, "CpCnC");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-CpCnC");
}
} else if (clique == cliqueCpCp2Cp3C) {
c = featuresCpCp2Cp3C(cInfo, loc);
suffix = "CpCp2Cp3C";
Collection<String> c = featuresCpCp2Cp3C(cInfo, loc);
addAllInterningAndSuffixing(features, c, "CpCp2Cp3C");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-CpCp2Cp3C");
}
} else if (clique == cliqueCpCp2Cp3Cp4C) {
c = featuresCpCp2Cp3Cp4C(cInfo, loc);
suffix = "CpCp2Cp3Cp4C";
} else {
throw new IllegalArgumentException("Unknown clique: " + clique);
}

addAllInterningAndSuffixing(features, c, suffix);
if (doFE) {
addAllInterningAndSuffixing(features, c, domain + '-' + suffix);
Collection<String> c = featuresCpCp2Cp3Cp4C(cInfo, loc);
addAllInterningAndSuffixing(features, c, "CpCp2Cp3Cp4C");
if (doFE) {
addAllInterningAndSuffixing(features, c, domain+"-CpCp2Cp3Cp4C");
}
}

// System.err.println(StringUtils.join(features,"\n")+"\n");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@ public class CRFLogConditionalObjectiveFunction extends AbstractStochasticCachin
protected final int numClasses;
public static Index<String> featureIndex;
protected final int[] map;
protected int[][][][] data; // data[docIndex][tokenIndex][][]
protected double[][][][] featureVal; // featureVal[docIndex][tokenIndex][][]
protected int[][] labels; // labels[docIndex][tokenIndex]
protected final int[][][][] data; // data[docIndex][tokenIndex][][]
protected final double[][][][] featureVal; // featureVal[docIndex][tokenIndex][][]
protected final int[][] labels; // labels[docIndex][tokenIndex]
protected final int domainDimension;
protected double[][] eHat4Update, e4Update;

Expand Down Expand Up @@ -760,13 +760,7 @@ protected Pair<double[][][], double[][][]> getCondProbs(CRFCliqueTree cTree, int
return new Pair<double[][][], double[][][]>(prevGivenCurr, nextGivenCurr);
}

protected void combine2DArr(double[][] combineInto, double[][] toBeCombined, double scale) {
for (int i = 0; i < toBeCombined.length; i++)
for (int j = 0; j < toBeCombined[i].length; j++)
combineInto[i][j] += toBeCombined[i][j] * scale;
}

protected void combine2DArr(double[][] combineInto, double[][] toBeCombined) {
protected static void combine2DArr(double[][] combineInto, double[][] toBeCombined) {
for (int i = 0; i < toBeCombined.length; i++)
for (int j = 0; j < toBeCombined[i].length; j++)
combineInto[i][j] += toBeCombined[i][j];
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ public class CRFLogConditionalObjectiveFunctionWithDropout extends CRFLogConditi

private final double delta;
private final double dropoutScale;
private double[][] dropoutPriorGradTotal;
private double[][] dropoutPriorGrad;
private final boolean dropoutApprox;
private double[][] weightSquare;

Expand Down Expand Up @@ -57,7 +57,7 @@ public ThreadsafeProcessor<Pair<Integer, Boolean>, Quadruple<Integer, Double, Ma
this.delta = delta;
this.dropoutScale = dropoutScale;
this.dropoutApprox = dropoutApprox;
dropoutPriorGradTotal = empty2D();
dropoutPriorGrad = empty2D();
this.unsupDropoutStartIndex = data.length;
this.unsupDropoutScale = unsupDropoutScale;
if (unsupDropoutData != null) {
Expand Down Expand Up @@ -727,7 +727,6 @@ public void calculate(double[] x) {
// first index is feature index, second index is of possible labeling
// double[][] E = empty2D();
clear2D(E);
clear2D(dropoutPriorGradTotal);

MulticoreWrapper<Pair<Integer, Boolean>, Quadruple<Integer, Double, Map<Integer, double[]>, Map<Integer, double[]>>> wrapper =
new MulticoreWrapper<Pair<Integer, Boolean>, Quadruple<Integer, Double, Map<Integer, double[]>, Map<Integer, double[]>>>(multiThreadGrad, dropoutPriorThreadProcessor);
Expand All @@ -748,9 +747,9 @@ public void calculate(double[] x) {
Map<Integer, double[]> partialDropout = result.fourth();
if (partialDropout != null) {
if (isUnsup) {
combine2DArr(dropoutPriorGradTotal, partialDropout, unsupDropoutScale);
combine2DArr(dropoutPriorGrad, partialDropout, unsupDropoutScale);
} else {
combine2DArr(dropoutPriorGradTotal, partialDropout);
combine2DArr(dropoutPriorGrad, partialDropout);
}
}

Expand All @@ -775,9 +774,9 @@ public void calculate(double[] x) {
Map<Integer, double[]> partialDropout = result.fourth();
if (partialDropout != null) {
if (isUnsup) {
combine2DArr(dropoutPriorGradTotal, partialDropout, unsupDropoutScale);
combine2DArr(dropoutPriorGrad, partialDropout, unsupDropoutScale);
} else {
combine2DArr(dropoutPriorGradTotal, partialDropout);
combine2DArr(dropoutPriorGrad, partialDropout);
}
}

Expand Down Expand Up @@ -806,7 +805,7 @@ public void calculate(double[] x) {
for (int j = 0; j < E[i].length; j++) {
// because we minimize -L(\theta)
derivative[index] = (E[i][j] - Ehat[i][j]);
derivative[index] += dropoutScale * dropoutPriorGradTotal[i][j];
derivative[index] += dropoutScale * dropoutPriorGrad[i][j];
if (VERBOSE) {
System.err.println("deriv(" + i + "," + j + ") = " + E[i][j] + " - " + Ehat[i][j] + " = " + derivative[index]);
}
Expand Down
15 changes: 14 additions & 1 deletion src/edu/stanford/nlp/ie/machinereading/structure/Span.java
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,20 @@ public static Span fromValues(int val1, int val2) {
return new Span(val2, val1);
}
}


public static Span fromValues(Object... values) {
if (values.length != 2) { throw new IllegalArgumentException("fromValues() must take an array with 2 elements"); }
int val1;
if (values[0] instanceof Number) { val1 = ((Number) values[0]).intValue(); }
else if (values[0] instanceof String) { val1 = Integer.parseInt((String) values[0]); }
else { throw new IllegalArgumentException("Unknown value for span: " + values[0]); }
int val2;
if (values[1] instanceof Number) { val2 = ((Number) values[1]).intValue(); }
else if (values[0] instanceof String) { val2 = Integer.parseInt((String) values[1]); }
else { throw new IllegalArgumentException("Unknown value for span: " + values[1]); }
return fromValues(val1, val2);
}

public int start() { return start; }
public int end() { return end; }

Expand Down
31 changes: 4 additions & 27 deletions src/edu/stanford/nlp/math/ArrayMath.java
Original file line number Diff line number Diff line change
Expand Up @@ -434,20 +434,6 @@ public static void pairwiseMultiply(float[] a, float[] b, float[] result) {
}
}

/**
* Divide the first array by the second elementwise,
* and store results in place. Assume arrays have
* the same length
*/
public static void pairwiseDivideInPlace(double[] a, double[] b) {
if (a.length != b.length) {
throw new RuntimeException();
}
for (int i = 0; i < a.length; i++) {
a[i] = a[i] / b[i];
}
}

// ERROR CHECKING

public static boolean hasNaN(double[] a) {
Expand Down Expand Up @@ -2025,21 +2011,11 @@ public static void multiplyInto(double[] a, double[] b, double c) {
* @param newSize
*/
public static double[] copyOf(double[] original, int newSize) {
double[] a = new double[newSize];
System.arraycopy(original, 0, a, 0, original.length);
return a;
double[] a = new double[newSize];
System.arraycopy(original, 0, a, 0, original.length);
return a;
}

public static double entropy(double[] probs) {
double e = 0;
double p = 0;
for (int i = 0; i < probs.length; i++) {
p = probs[i];
if (p != 0.0)
e -= p * Math.log(p);
}
return e;
}

public static void assertFinite(double[] vector, String vectorName) throws InvalidElementException {
for(int i=0; i<vector.length; i++){
Expand All @@ -2051,6 +2027,7 @@ public static void assertFinite(double[] vector, String vectorName) throws Inval
}
}


public static class InvalidElementException extends RuntimeException {

private static final long serialVersionUID = 1647150702529757545L;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Random;
import java.util.Set;

/** A differentiable function that caches the last evaluation of its value and
* derivative.
Expand Down Expand Up @@ -37,7 +35,7 @@ public boolean gradientCheck(int numOfChecks, int numOfRandomChecks, double[] x)
System.arraycopy(derivative, 0, savedDeriv, 0, derivative.length);
double oldX, plusVal, minusVal, appDeriv, calcDeriv, diff, pct = 0;
int interval = Math.max(1, x.length / numOfChecks);
Set<Integer> indicesToCheck = new HashSet<Integer>();
List<Integer> indicesToCheck = new ArrayList<Integer>();
for (int paramIndex = 0; paramIndex < xLen; paramIndex+=interval) {
indicesToCheck.add(paramIndex);
}
Expand Down Expand Up @@ -112,7 +110,7 @@ protected static void copy(double[] copy, double[] orig) {
System.arraycopy(orig, 0, copy, 0, orig.length);
}

public void ensure(double[] x) {
void ensure(double[] x) {
if (Arrays.equals(x, lastX)) {
return;
}
Expand Down
Loading

0 comments on commit 9bd03e8

Please sign in to comment.