diff --git a/commonbuildjsp.xml b/commonbuildjsp.xml index 67097feaf1..9604c97de3 100644 --- a/commonbuildjsp.xml +++ b/commonbuildjsp.xml @@ -9,7 +9,6 @@ - diff --git a/lib/javax.servlet.jar b/lib/javax.servlet.jar index 6b34e5f45b..2aa9f27e5e 100644 Binary files a/lib/javax.servlet.jar and b/lib/javax.servlet.jar differ diff --git a/lib/tomcat/servlet-api.jar b/lib/tomcat/servlet-api.jar new file mode 100644 index 0000000000..6b34e5f45b Binary files /dev/null and b/lib/tomcat/servlet-api.jar differ diff --git a/src/edu/stanford/nlp/dcoref/SieveCoreferenceSystem.java b/src/edu/stanford/nlp/dcoref/SieveCoreferenceSystem.java index 8c83b21ba5..b6b2d6839f 100644 --- a/src/edu/stanford/nlp/dcoref/SieveCoreferenceSystem.java +++ b/src/edu/stanford/nlp/dcoref/SieveCoreferenceSystem.java @@ -76,7 +76,7 @@ import edu.stanford.nlp.util.StringUtils; import edu.stanford.nlp.util.SystemUtils; import edu.stanford.nlp.util.logging.NewlineLogFormatter; - +import edu.stanford.nlp.util.logging.Redwood; /** * Multi-pass Sieve coreference resolution system (see EMNLP 2010 paper). @@ -343,7 +343,6 @@ public static void main(String[] args) throws Exception { initializeAndRunCoref(props); } - /** Returns the name of the log file that this method writes. */ public static String initializeAndRunCoref(Properties props) throws Exception { String timeStamp = Calendar.getInstance().getTime().toString().replaceAll("\\s", "-").replaceAll(":", "-"); @@ -1037,17 +1036,19 @@ public static List> filterMentionsWithSingletonClusters(Document d return res; } public static void runConllEval(String conllMentionEvalScript, - String goldFile, String predictFile, String evalFile, String errFile) throws IOException { + String goldFile, String predictFile, String evalFile, String errFile) throws IOException + { ProcessBuilder process = new ProcessBuilder(conllMentionEvalScript, "all", goldFile, predictFile); PrintWriter out = new PrintWriter(new FileOutputStream(evalFile)); PrintWriter err = new PrintWriter(new FileOutputStream(errFile)); SystemUtils.run(process, out, err); out.close(); err.close(); - } + } public static String getConllEvalSummary(String conllMentionEvalScript, - String goldFile, String predictFile) throws IOException { + String goldFile, String predictFile) throws IOException + { ProcessBuilder process = new ProcessBuilder(conllMentionEvalScript, "all", goldFile, predictFile, "none"); StringOutputStream errSos = new StringOutputStream(); StringOutputStream outSos = new StringOutputStream(); @@ -1058,11 +1059,11 @@ public static String getConllEvalSummary(String conllMentionEvalScript, err.close(); String summary = outSos.toString(); String errStr = errSos.toString(); - if ( ! errStr.isEmpty()) { + if (errStr.length() > 0) { summary += "\nERROR: " + errStr; } return summary; - } + } /** Print logs for error analysis */ public void printTopK(Logger logger, Document document, Semantics semantics) { diff --git a/src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java b/src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java index 596ebfce3c..e87ea3af72 100644 --- a/src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java +++ b/src/edu/stanford/nlp/ie/AbstractSequenceClassifier.java @@ -1795,16 +1795,11 @@ protected void printFeatures(IN wi, Collection features) { writtenNum++; } - /** Print the String features generated from a token. */ + /** Print the String features generated from a token */ protected void printFeatureLists(IN wi, Collection> features) { if (flags.printFeatures == null || writtenNum >= flags.printFeaturesUpto) { return; } - printFeatureListsHelper(wi, features); - } - - // Separating this method out lets printFeatureLists be inlined, which is good since it is usually a no-op. - private void printFeatureListsHelper(IN wi, Collection> features) { if (cliqueWriter == null) { cliqueWriter = IOUtils.getPrintWriterOrDie("feats-" + flags.printFeatures + ".txt"); writtenNum = 0; diff --git a/src/edu/stanford/nlp/ie/NERFeatureFactory.java b/src/edu/stanford/nlp/ie/NERFeatureFactory.java index 153163801c..acc0ed78a8 100644 --- a/src/edu/stanford/nlp/ie/NERFeatureFactory.java +++ b/src/edu/stanford/nlp/ie/NERFeatureFactory.java @@ -389,61 +389,79 @@ public void init(SeqClassifierFlags flags) { @Override public Collection getCliqueFeatures(PaddedList cInfo, int loc, Clique clique) { Collection features = Generics.newHashSet(); - String domain = cInfo.get(0).get(CoreAnnotations.DomainAnnotation.class); - final boolean doFE = domain != null; + + boolean doFE = cInfo.get(0).containsKey(CoreAnnotations.DomainAnnotation.class); + String domain = (doFE ? cInfo.get(0).get(CoreAnnotations.DomainAnnotation.class) : null); // System.err.println(doFE+"\t"+domain); - // there are two special cases below, because 2 cliques have 2 names - Collection c; - String suffix; if (clique == cliqueC) { //200710: tried making this clique null; didn't improve performance (rafferty) - c = featuresC(cInfo, loc); - suffix = "C"; + Collection c = featuresC(cInfo, loc); + addAllInterningAndSuffixing(features, c, "C"); + if (doFE) { + addAllInterningAndSuffixing(features, c, domain+"-C"); + } } else if (clique == cliqueCpC) { - c = featuresCpC(cInfo, loc); - suffix = "CpC"; - addAllInterningAndSuffixing(features, c, suffix); + Collection c = featuresCpC(cInfo, loc); + addAllInterningAndSuffixing(features, c, "CpC"); if (doFE) { - addAllInterningAndSuffixing(features, c, domain + '-' + suffix); + addAllInterningAndSuffixing(features, c, domain+"-CpC"); } + c = featuresCnC(cInfo, loc-1); - suffix = "CnC"; + addAllInterningAndSuffixing(features, c, "CnC"); + if (doFE) { + addAllInterningAndSuffixing(features, c, domain+"-CnC"); + } } else if (clique == cliqueCp2C) { - c = featuresCp2C(cInfo, loc); - suffix = "Cp2C"; + Collection c = featuresCp2C(cInfo, loc); + addAllInterningAndSuffixing(features, c, "Cp2C"); + if (doFE) { + addAllInterningAndSuffixing(features, c, domain+"-Cp2C"); + } } else if (clique == cliqueCp3C) { - c = featuresCp3C(cInfo, loc); - suffix = "Cp3C"; + Collection c = featuresCp3C(cInfo, loc); + addAllInterningAndSuffixing(features, c, "Cp3C"); + if (doFE) { + addAllInterningAndSuffixing(features, c, domain+"-Cp3C"); + } } else if (clique == cliqueCp4C) { - c = featuresCp4C(cInfo, loc); - suffix = "Cp4C"; + Collection c = featuresCp4C(cInfo, loc); + addAllInterningAndSuffixing(features, c, "Cp4C"); + if (doFE) { + addAllInterningAndSuffixing(features, c, domain+"-Cp4C"); + } } else if (clique == cliqueCp5C) { - c = featuresCp5C(cInfo, loc); - suffix = "Cp5C"; + Collection c = featuresCp5C(cInfo, loc); + addAllInterningAndSuffixing(features, c, "Cp5C"); + if (doFE) { + addAllInterningAndSuffixing(features, c, domain+"-Cp5C"); + } } else if (clique == cliqueCpCp2C) { - c = featuresCpCp2C(cInfo, loc); - suffix = "CpCp2C"; - addAllInterningAndSuffixing(features, c, suffix); + Collection c = featuresCpCp2C(cInfo, loc); + addAllInterningAndSuffixing(features, c, "CpCp2C"); if (doFE) { - addAllInterningAndSuffixing(features, c, domain+ '-' + suffix); + addAllInterningAndSuffixing(features, c, domain+"-CpCp2C"); } + c = featuresCpCnC(cInfo, loc-1); - suffix = "CpCnC"; + addAllInterningAndSuffixing(features, c, "CpCnC"); + if (doFE) { + addAllInterningAndSuffixing(features, c, domain+"-CpCnC"); + } } else if (clique == cliqueCpCp2Cp3C) { - c = featuresCpCp2Cp3C(cInfo, loc); - suffix = "CpCp2Cp3C"; + Collection c = featuresCpCp2Cp3C(cInfo, loc); + addAllInterningAndSuffixing(features, c, "CpCp2Cp3C"); + if (doFE) { + addAllInterningAndSuffixing(features, c, domain+"-CpCp2Cp3C"); + } } else if (clique == cliqueCpCp2Cp3Cp4C) { - c = featuresCpCp2Cp3Cp4C(cInfo, loc); - suffix = "CpCp2Cp3Cp4C"; - } else { - throw new IllegalArgumentException("Unknown clique: " + clique); - } - - addAllInterningAndSuffixing(features, c, suffix); - if (doFE) { - addAllInterningAndSuffixing(features, c, domain + '-' + suffix); + Collection c = featuresCpCp2Cp3Cp4C(cInfo, loc); + addAllInterningAndSuffixing(features, c, "CpCp2Cp3Cp4C"); + if (doFE) { + addAllInterningAndSuffixing(features, c, domain+"-CpCp2Cp3Cp4C"); + } } // System.err.println(StringUtils.join(features,"\n")+"\n"); diff --git a/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunction.java b/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunction.java index 8c4277b499..d6899a067c 100644 --- a/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunction.java +++ b/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunction.java @@ -46,9 +46,9 @@ public class CRFLogConditionalObjectiveFunction extends AbstractStochasticCachin protected final int numClasses; public static Index featureIndex; protected final int[] map; - protected int[][][][] data; // data[docIndex][tokenIndex][][] - protected double[][][][] featureVal; // featureVal[docIndex][tokenIndex][][] - protected int[][] labels; // labels[docIndex][tokenIndex] + protected final int[][][][] data; // data[docIndex][tokenIndex][][] + protected final double[][][][] featureVal; // featureVal[docIndex][tokenIndex][][] + protected final int[][] labels; // labels[docIndex][tokenIndex] protected final int domainDimension; protected double[][] eHat4Update, e4Update; @@ -760,13 +760,7 @@ protected Pair getCondProbs(CRFCliqueTree cTree, int return new Pair(prevGivenCurr, nextGivenCurr); } - protected void combine2DArr(double[][] combineInto, double[][] toBeCombined, double scale) { - for (int i = 0; i < toBeCombined.length; i++) - for (int j = 0; j < toBeCombined[i].length; j++) - combineInto[i][j] += toBeCombined[i][j] * scale; - } - - protected void combine2DArr(double[][] combineInto, double[][] toBeCombined) { + protected static void combine2DArr(double[][] combineInto, double[][] toBeCombined) { for (int i = 0; i < toBeCombined.length; i++) for (int j = 0; j < toBeCombined[i].length; j++) combineInto[i][j] += toBeCombined[i][j]; diff --git a/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunctionWithDropout.java b/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunctionWithDropout.java index 25033faae9..9a68180777 100644 --- a/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunctionWithDropout.java +++ b/src/edu/stanford/nlp/ie/crf/CRFLogConditionalObjectiveFunctionWithDropout.java @@ -21,7 +21,7 @@ public class CRFLogConditionalObjectiveFunctionWithDropout extends CRFLogConditi private final double delta; private final double dropoutScale; - private double[][] dropoutPriorGradTotal; + private double[][] dropoutPriorGrad; private final boolean dropoutApprox; private double[][] weightSquare; @@ -57,7 +57,7 @@ public ThreadsafeProcessor, Quadruple, Quadruple, Map>> wrapper = new MulticoreWrapper, Quadruple, Map>>(multiThreadGrad, dropoutPriorThreadProcessor); @@ -748,9 +747,9 @@ public void calculate(double[] x) { Map partialDropout = result.fourth(); if (partialDropout != null) { if (isUnsup) { - combine2DArr(dropoutPriorGradTotal, partialDropout, unsupDropoutScale); + combine2DArr(dropoutPriorGrad, partialDropout, unsupDropoutScale); } else { - combine2DArr(dropoutPriorGradTotal, partialDropout); + combine2DArr(dropoutPriorGrad, partialDropout); } } @@ -775,9 +774,9 @@ public void calculate(double[] x) { Map partialDropout = result.fourth(); if (partialDropout != null) { if (isUnsup) { - combine2DArr(dropoutPriorGradTotal, partialDropout, unsupDropoutScale); + combine2DArr(dropoutPriorGrad, partialDropout, unsupDropoutScale); } else { - combine2DArr(dropoutPriorGradTotal, partialDropout); + combine2DArr(dropoutPriorGrad, partialDropout); } } @@ -806,7 +805,7 @@ public void calculate(double[] x) { for (int j = 0; j < E[i].length; j++) { // because we minimize -L(\theta) derivative[index] = (E[i][j] - Ehat[i][j]); - derivative[index] += dropoutScale * dropoutPriorGradTotal[i][j]; + derivative[index] += dropoutScale * dropoutPriorGrad[i][j]; if (VERBOSE) { System.err.println("deriv(" + i + "," + j + ") = " + E[i][j] + " - " + Ehat[i][j] + " = " + derivative[index]); } diff --git a/src/edu/stanford/nlp/ie/machinereading/structure/Span.java b/src/edu/stanford/nlp/ie/machinereading/structure/Span.java index ef39d7423b..5700a9129e 100644 --- a/src/edu/stanford/nlp/ie/machinereading/structure/Span.java +++ b/src/edu/stanford/nlp/ie/machinereading/structure/Span.java @@ -53,7 +53,20 @@ public static Span fromValues(int val1, int val2) { return new Span(val2, val1); } } - + + public static Span fromValues(Object... values) { + if (values.length != 2) { throw new IllegalArgumentException("fromValues() must take an array with 2 elements"); } + int val1; + if (values[0] instanceof Number) { val1 = ((Number) values[0]).intValue(); } + else if (values[0] instanceof String) { val1 = Integer.parseInt((String) values[0]); } + else { throw new IllegalArgumentException("Unknown value for span: " + values[0]); } + int val2; + if (values[1] instanceof Number) { val2 = ((Number) values[1]).intValue(); } + else if (values[0] instanceof String) { val2 = Integer.parseInt((String) values[1]); } + else { throw new IllegalArgumentException("Unknown value for span: " + values[1]); } + return fromValues(val1, val2); + } + public int start() { return start; } public int end() { return end; } diff --git a/src/edu/stanford/nlp/math/ArrayMath.java b/src/edu/stanford/nlp/math/ArrayMath.java index 4a9189e52c..a55a2b930f 100644 --- a/src/edu/stanford/nlp/math/ArrayMath.java +++ b/src/edu/stanford/nlp/math/ArrayMath.java @@ -434,20 +434,6 @@ public static void pairwiseMultiply(float[] a, float[] b, float[] result) { } } - /** - * Divide the first array by the second elementwise, - * and store results in place. Assume arrays have - * the same length - */ - public static void pairwiseDivideInPlace(double[] a, double[] b) { - if (a.length != b.length) { - throw new RuntimeException(); - } - for (int i = 0; i < a.length; i++) { - a[i] = a[i] / b[i]; - } - } - // ERROR CHECKING public static boolean hasNaN(double[] a) { @@ -2025,21 +2011,11 @@ public static void multiplyInto(double[] a, double[] b, double c) { * @param newSize */ public static double[] copyOf(double[] original, int newSize) { - double[] a = new double[newSize]; - System.arraycopy(original, 0, a, 0, original.length); - return a; + double[] a = new double[newSize]; + System.arraycopy(original, 0, a, 0, original.length); + return a; } - public static double entropy(double[] probs) { - double e = 0; - double p = 0; - for (int i = 0; i < probs.length; i++) { - p = probs[i]; - if (p != 0.0) - e -= p * Math.log(p); - } - return e; - } public static void assertFinite(double[] vector, String vectorName) throws InvalidElementException { for(int i=0; i indicesToCheck = new HashSet(); + List indicesToCheck = new ArrayList(); for (int paramIndex = 0; paramIndex < xLen; paramIndex+=interval) { indicesToCheck.add(paramIndex); } @@ -112,7 +110,7 @@ protected static void copy(double[] copy, double[] orig) { System.arraycopy(orig, 0, copy, 0, orig.length); } - public void ensure(double[] x) { + void ensure(double[] x) { if (Arrays.equals(x, lastX)) { return; } diff --git a/src/edu/stanford/nlp/optimization/SGDMinimizer.java b/src/edu/stanford/nlp/optimization/SGDMinimizer.java index 2fdc97833c..32320452a8 100644 --- a/src/edu/stanford/nlp/optimization/SGDMinimizer.java +++ b/src/edu/stanford/nlp/optimization/SGDMinimizer.java @@ -118,7 +118,6 @@ public static void main(String[] args) { final double[] grads = new double[dim]; final DiffFunction f = new DiffFunction() { - @Override public double[] derivativeAt(double[] x) { double val = Math.PI * valuePow(x, Math.PI - 1); for (int i = 0; i < dim; i++) { @@ -140,7 +139,6 @@ private double valuePow(double[] x, double pow) { return Math.pow(val * 0.5, pow); } - @Override public int domainDimension() { return dim; } diff --git a/src/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.java b/src/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.java index 3aedb107b9..c8a3fef73d 100644 --- a/src/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.java +++ b/src/edu/stanford/nlp/semgraph/semgrex/SemgrexPattern.java @@ -26,12 +26,12 @@ * * For example, {lemma:slice;tag:/VB.* /} represents any verb nodes * with "slice" as their lemma.

- * + * * The root of the graph can be marked by the $ sign, that is {$} * represents the root node.

- * + * * Relations are defined by a symbol representing the type of relationship and a - * string or regular expression representing the value of the relationship. A + * string, or regular expression representing the value of the relationship. A * relationship string of % means any relationship. It is * also OK simply to omit the relationship symbol altogether. *

@@ -53,12 +53,12 @@ * * In a chain of relations, all relations are relative to the first * node in the chain. For example, "{} >nsubj {} >dobj - * {}" means "any node that is the governor of both a nsubj and + * {}" means "any node that is the governer of both a nsubj and * a dobj relation". If instead what you want is a node that is the * governer of a nsubj relation with a node that is itself the * governer of dobj relation, you should write: "{} >nsubj * ({} >dobj {})".

- * + * * If a relation type is specified for the << relation, the * relation type is only used for the first relation in the sequence. * Therefore, if B depends on A with the relation type foo, the @@ -116,7 +116,7 @@ * be stored in a map that maps names to nodes so that if a match is found, the * node corresponding to the named node can be extracted from the map. For * example ({tag:NN}=noun) will match a singular noun node and - * after a match is found, the map can be queried with the name to retrieved the + * after a match is found, the map can be queried with the name to retreived the * matched node using {@link SemgrexMatcher#getNode(String o)} with (String) * argument "noun" (not "=noun"). Note that you are not allowed to * name a node that is under the scope of a negation operator (the semantics @@ -124,10 +124,10 @@ * Trying to do so will cause a {@link ParseException} to be thrown. Named nodes * can be put within the scope of an optionality operator.

* - * Named nodes that refer back to previously named nodes need not have a node + * Named nodes that refer back to previous named nodes need not have a node * description -- this is known as "backreferencing". In this case, the * expression will match only when all instances of the same name get matched to - * the same node. For example: the pattern + * the same node. For example: the pattern * {} >dobj ({} > {}=foo) >mod ({} > {}=foo) * will match a graph in which there are two nodes, X and * Y, for which X is the grandparent of @@ -138,11 +138,10 @@ * @author Chloe Kiddon */ public abstract class SemgrexPattern implements Serializable { - private static final long serialVersionUID = 1722052832350596732L; private boolean neg = false; private boolean opt = false; - private String patternString; // conceptually final, but can't do because of parsing + private String patternString; // package private constructor SemgrexPattern() { @@ -157,17 +156,17 @@ public abstract class SemgrexPattern implements Serializable { abstract void setChild(SemgrexPattern child); void negate() { - if (opt) { + neg = true; + if (neg && opt) { throw new RuntimeException("Node cannot be both negated and optional."); } - neg = true; } void makeOptional() { - if (neg) { + opt = true; + if (neg && opt) { throw new RuntimeException("Node cannot be both negated and optional."); } - opt = true; } boolean isNegated() { @@ -191,7 +190,7 @@ abstract SemgrexMatcher matcher(SemanticGraph sg, Alignment alignment, SemanticG /** * Get a {@link SemgrexMatcher} for this pattern in this graph. - * + * * @param sg * the SemanticGraph to match on * @return a SemgrexMatcher @@ -211,7 +210,7 @@ public SemgrexMatcher matcher(SemanticGraph sg, Map variabl /** * Get a {@link SemgrexMatcher} for this pattern in this graph. - * + * * @param sg * the SemanticGraph to match on * @param ignoreCase @@ -239,7 +238,7 @@ public SemgrexMatcher matcher(SemanticGraph hypGraph, Alignment alignment, Seman /** * Creates a pattern from the given string. - * + * * @param semgrex * the pattern string * @return a SemgrexPattern for the string. @@ -250,7 +249,7 @@ public static SemgrexPattern compile(String semgrex) { try { SemgrexParser parser = new SemgrexParser(new StringReader(semgrex + "\n")); SemgrexPattern newPattern = parser.Root(); - newPattern.patternString = semgrex; + newPattern.setPatternString(semgrex); return newPattern; } catch (ParseException ex) { throw new SemgrexParseException("Error parsing semgrex pattern " + semgrex, ex); @@ -263,6 +262,10 @@ public String pattern() { return patternString; } + public void setPatternString(String patternString) { + this.patternString = patternString; + } + // printing methods // ----------------------------------------------------------- @@ -270,42 +273,43 @@ public String pattern() { * @return A single-line string representation of the pattern */ @Override - public abstract String toString(); + abstract public String toString(); /** - * @param hasPrecedence indicates that this pattern has precedence in terms + * hasPrecedence indicates that this pattern has precedence in terms * of "order of operations", so there is no need to parenthesize the * expression */ - public abstract String toString(boolean hasPrecedence); + abstract public String toString(boolean hasPrecedence); private void prettyPrint(PrintWriter pw, int indent) { for (int i = 0; i < indent; i++) { pw.print(" "); } pw.println(localString()); - for (SemgrexPattern child : getChildren()) { + for (Iterator iter = getChildren().iterator(); iter.hasNext();) { + SemgrexPattern child = iter.next(); child.prettyPrint(pw, indent + 1); } } /** - * Print a multi-line representation of the pattern illustrating its syntax. + * Print a multi-line respresentation of the pattern illustrating its syntax. */ public void prettyPrint(PrintWriter pw) { prettyPrint(pw, 0); } /** - * Print a multi-line representation of the pattern illustrating its syntax. + * Print a multi-line respresentation of the pattern illustrating its syntax. */ public void prettyPrint(PrintStream ps) { prettyPrint(new PrintWriter(new OutputStreamWriter(ps), true)); } /** - * Print a multi-line representation of the pattern illustrating its syntax - * to {@code System.out}. + * Print a multi-line respresentation of the pattern illustrating its syntax + * to System.out. */ public void prettyPrint() { prettyPrint(System.out); @@ -314,13 +318,16 @@ public void prettyPrint() { @Override public boolean equals(Object o) { if (!(o instanceof SemgrexPattern)) return false; - return o.toString().equals(this.toString()); + if (((SemgrexPattern) o).toString().equals(this.toString())) + return true; + else + return false; } @Override public int hashCode() { - // if (this == null) return 0; + if (this == null) return 0; return this.toString().hashCode(); - } + } } diff --git a/src/edu/stanford/nlp/sequences/ColumnDocumentReaderAndWriter.java b/src/edu/stanford/nlp/sequences/ColumnDocumentReaderAndWriter.java index 7f2a53525b..b3633d128d 100644 --- a/src/edu/stanford/nlp/sequences/ColumnDocumentReaderAndWriter.java +++ b/src/edu/stanford/nlp/sequences/ColumnDocumentReaderAndWriter.java @@ -45,7 +45,7 @@ public void init(SeqClassifierFlags flags) { public void init(String map) { // this.flags = null; this.map = StringUtils.mapStringToArray(map); - factory = DelimitRegExIterator.getFactory("\n(?:\\s*\n)+", new ColumnDocParser()); + factory = DelimitRegExIterator.getFactory("\n(\\s*\n)+", new ColumnDocParser()); } @Override diff --git a/src/edu/stanford/nlp/sequences/SeqClassifierFlags.java b/src/edu/stanford/nlp/sequences/SeqClassifierFlags.java index 77f6753b9a..5292472eb2 100644 --- a/src/edu/stanford/nlp/sequences/SeqClassifierFlags.java +++ b/src/edu/stanford/nlp/sequences/SeqClassifierFlags.java @@ -988,14 +988,6 @@ public class SeqClassifierFlags implements Serializable { public transient String serializeFeatureIndexTo = null; public String loadFeatureIndexFromEN = null; public String loadFeatureIndexFromCH = null; - public double lambdaEN = 1.0; - public double lambdaCH = 1.0; - public boolean alternateTraining = false; - public boolean weightByEntropy = false; - public boolean useKL = false; - public boolean useHardGE = false; - public boolean useCRFforUnsup = false; - public boolean useGEforSup = false; // "ADD VARIABLES ABOVE HERE" @@ -2462,22 +2454,6 @@ public void setProperties(Properties props, boolean printProps) { loadFeatureIndexFromEN = val; } else if (key.equalsIgnoreCase("loadFeatureIndexFromCH")){ loadFeatureIndexFromCH = val; - } else if (key.equalsIgnoreCase("lambdaEN")){ - lambdaEN = Double.parseDouble(val); - } else if (key.equalsIgnoreCase("lambdaCH")){ - lambdaCH = Double.parseDouble(val); - } else if (key.equalsIgnoreCase("alternateTraining")){ - alternateTraining = Boolean.parseBoolean(val); - } else if (key.equalsIgnoreCase("weightByEntropy")){ - weightByEntropy = Boolean.parseBoolean(val); - } else if (key.equalsIgnoreCase("useKL")){ - useKL = Boolean.parseBoolean(val); - } else if (key.equalsIgnoreCase("useHardGE")){ - useHardGE = Boolean.parseBoolean(val); - } else if (key.equalsIgnoreCase("useCRFforUnsup")){ - useCRFforUnsup = Boolean.parseBoolean(val); - } else if (key.equalsIgnoreCase("useGEforSup")){ - useGEforSup = Boolean.parseBoolean(val); // ADD VALUE ABOVE HERE } else if (key.length() > 0 && !key.equals("prop")) { diff --git a/src/edu/stanford/nlp/trees/EnglishGrammaticalRelations.java b/src/edu/stanford/nlp/trees/EnglishGrammaticalRelations.java index befa886484..cf019b0bf5 100644 --- a/src/edu/stanford/nlp/trees/EnglishGrammaticalRelations.java +++ b/src/edu/stanford/nlp/trees/EnglishGrammaticalRelations.java @@ -936,8 +936,8 @@ public static class AdjectivalModifierGRAnnotation extends GrammaticalRelationAn "/^(?:WH)?(?:NP|NX|NML)(?:-TMP|-ADV)?$/ < (ADJP=target <: (QP !< /^[$]$/))", "/^(?:WH)?(?:NP|NX|NML)(?:-TMP|-ADV)?|(?:WH)?ADJP$/ < (QP < QP=target < /^[$]$/)", // Phrases such as $ 100 million get converted from (QP ($ $) (CD 100) (CD million)) to - // (QP ($ $) (QP (CD 100) (CD million))). This next tregex covers those phrases. - // Note that the earlier tregexes are usually enough to cover those phrases, such as when + // (QP ($ $) (QP (CD 100) (CD million))). This next tregex covers those phrases. + // Note that the earlier tregexes are usually enough to cover those phrases, such as when // the QP is by itself in an ADJP or NP, but sometimes it can have other siblings such // as in the phrase "$ 100 million or more". In that case, this next expression is needed. "QP < QP=target < /^[$]$/" @@ -1037,7 +1037,6 @@ public static class NounCompoundModifierGRAnnotation extends GrammaticalRelation // which is the normal case for such a pattern. "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < (NP=target !<: CD $- /^,$/ $-- /^(?:WH)?NP/ !$ CC|CONJP)", "WHNP|WHNP-TMP|WHNP-ADV|NP|NP-TMP|NP-ADV < (PRN=target < (NP < /^(?:NN|CD)/ $-- /^-LRB-$/ $+ /^-RRB-$/))", - // Maybe delete '@' in next pattern, since not clearly appositional when NP-ADV or NP-TMP. But then what is it? "@WHNP|NP < (@NP=target !<: CD <, /^-LRB-$/ <` /^-RRB-$/ $-- /^(?:WH)?NP/ !$ CC|CONJP)", // TODO: next pattern with NNP doesn't work because leftmost NNP is deemed head in a // structure like (NP (NNP Norway) (, ,) (NNP Verdens_Gang) (, ,)) diff --git a/src/edu/stanford/nlp/trees/Tree.java b/src/edu/stanford/nlp/trees/Tree.java index 636cb6a420..adf65d9410 100644 --- a/src/edu/stanford/nlp/trees/Tree.java +++ b/src/edu/stanford/nlp/trees/Tree.java @@ -2313,8 +2313,7 @@ public Tree setChild(int i, Tree t) { * t.dominates(t) returns false. */ public boolean dominates(Tree t) { - List dominationPath = dominationPath(t); - return dominationPath != null && dominationPath.size() > 1; + return !(dominationPath(t) == null); } /** diff --git a/src/edu/stanford/nlp/util/Characters.java b/src/edu/stanford/nlp/util/Characters.java index d7748568f2..866e9809f9 100644 --- a/src/edu/stanford/nlp/util/Characters.java +++ b/src/edu/stanford/nlp/util/Characters.java @@ -63,15 +63,4 @@ public static boolean isPunctuation(char c) { cType == Character.INITIAL_QUOTE_PUNCTUATION || cType == Character.FINAL_QUOTE_PUNCTUATION); } - - /** - * Returns true if a character is a control character, and - * false otherwise. - * - * @param c - * @return - */ - public static boolean isControl(char c) { - return Character.getType(c) == Character.CONTROL; - } } diff --git a/test/src/edu/stanford/nlp/trees/EnglishGrammaticalStructureTest.java b/test/src/edu/stanford/nlp/trees/EnglishGrammaticalStructureTest.java index 45c80712d0..71a5d462cc 100644 --- a/test/src/edu/stanford/nlp/trees/EnglishGrammaticalStructureTest.java +++ b/test/src/edu/stanford/nlp/trees/EnglishGrammaticalStructureTest.java @@ -642,49 +642,6 @@ public void testMoreBasicRelations() { } - /** - * Test the various verb "to be" cases in statements, questions, and imperatives. Added as part of the SD reform - * that abolished attr. - */ - public void testToBeRelations() { - // the trees to test - String[] testTrees = { - "(ROOT (S (NP (NNP Sue)) (VP (VBZ is) (VP (VBG speaking))) (. .)))", - "(ROOT (SBARQ (WHNP (WP Who)) (SQ (VBZ is) (VP (VBG speaking))) (. ?)))", - "(ROOT (S (VP (VB Be) (VP (VBG caring))) (. !)))", - }; - - // the expected dependency answers (basic) - String[] testAnswers = { - // "dobj(missed-6, Which-1)\n" + "nsubj(realized-4, I-2)\n" + "advmod(realized-4, then-3)\n" + "root(ROOT-0, realized-4)\n" + "nsubj(missed-6, I-5)\n" + "ccomp(realized-4, missed-6)\n", - "nsubj(speaking-3, Sue-1)\n" + - "aux(speaking-3, is-2)\n" + - "root(ROOT-0, speaking-3)\n", - "nsubj(speaking-3, Who-1)\n" + - "aux(speaking-3, is-2)\n" + - "root(ROOT-0, speaking-3)\n", - "aux(caring-2, Be-1)\n" + - "root(ROOT-0, caring-2)\n", - }; - - assertEquals("Test array lengths mismatch!", testTrees.length, testAnswers.length); - // TreeReaderFactory trf = new PennTreeReaderFactory(); - TreeReaderFactory trf = new NPTmpRetainingTreeNormalizer.NPTmpAdvRetainingTreeReaderFactory(); - for (int i = 0; i < testTrees.length; i++) { - String testTree = testTrees[i]; - String testAnswer = testAnswers[i]; - - // specifying our own TreeReaderFactory is vital so that functional - // categories - that is -TMP and -ADV in particular - are not stripped off - Tree tree = Tree.valueOf(testTree, trf); - GrammaticalStructure gs = new EnglishGrammaticalStructure(tree); - - assertEquals("Unexpected basic dependencies for tree " + testTree, - testAnswer, EnglishGrammaticalStructure.dependenciesToString(gs, gs.typedDependencies(false), tree, false, false)); - } - - } - /** * Tests that we can extract the basic grammatical relations correctly from * some hard-coded trees. diff --git a/test/src/edu/stanford/nlp/trees/TreeTest.java b/test/src/edu/stanford/nlp/trees/TreeTest.java index efe047b220..64e5addde8 100644 --- a/test/src/edu/stanford/nlp/trees/TreeTest.java +++ b/test/src/edu/stanford/nlp/trees/TreeTest.java @@ -77,14 +77,4 @@ public void testRemove() { assertEquals("ROOT", t.toString()); } - - public void testDominates() { - Tree t = Tree.valueOf("(A (B this) (C (D is) (E a) (F small)) (G test))"); - assertFalse(t.dominates(t)); - - for (Tree child : t.children()) { - assertTrue(t.dominates(child)); - assertFalse(child.dominates(t)); - } - } }