first version of enhanced/enhanced++ UD conversion

stanfordnlp · May 26, 2016 · 19a9a58 · 19a9a58
1 parent e9529fd
commit 19a9a58
Show file tree

Hide file tree

Showing 141 changed files with 72,317 additions and 74,395 deletions.
diff --git a/README.md b/README.md
@@ -5,24 +5,12 @@ Stanford CoreNLP provides a set of natural language analysis tools written in Ja
 
 The Stanford CoreNLP code is written in Java and licensed under the GNU General Public License (v3 or later). Note that this is the full GPL, which allows many free uses, but not its use in proprietary software that you distribute to others.
 
-#### How To Compile (with ant)
-
-1. cd CoreNLP ; ant
-
-#### How To Create A Jar 
-
-1. compile the code
-2. cd CoreNLP/classes ; jar -cf ../stanford-corenlp.jar edu
-
 You can find releases of Stanford CoreNLP on [Maven Central](http:https://search.maven.org/#browse%7C11864822).
 
 You can find more explanation and documentation on [the Stanford CoreNLP homepage](http:https://nlp.stanford.edu/software/corenlp.shtml#Demo).
 
 The most recent models associated with the code in the HEAD of this repository can be found [here](http:https://nlp.stanford.edu/software/stanford-corenlp-models-current.jar).
 
-Some of the larger (English) models -- like the shift-reduce parser and WikiDict -- are not distributed with our default models jar. 
-The most recent version of these models can be found [here](http:https://nlp.stanford.edu/software/stanford-english-corenlp-models-current.jar).
-
 For information about making contributions to Stanford CoreNLP, see the file [CONTRIBUTING.md](CONTRIBUTING.md).
 
 Questions about CoreNLP can either be posted on StackOverflow with the tag [stanford-nlp](http:https://stackoverflow.com/questions/tagged/stanford-nlp), 

diff --git a/build.xml b/build.xml
@@ -133,11 +133,6 @@
  <exclude name="**/*.java"/>
  </fileset>
  </copy>
- <copy todir="${build.path}/edu/stanford/nlp/pipeline">
- <fileset dir="${source.path}/edu/stanford/nlp/pipeline">
- <exclude name="**/*.java"/>
- </fileset>
- </copy>
  </target>
 
  <target name="test" depends="classpath,compile"
@@ -178,7 +173,7 @@
  <target name="slowitest" depends="classpath,compile"
  description="Run really slow integration tests">
  <echo message="${ant.project.name}" />
- <junit fork="yes" maxmemory="12g" printsummary="off" outputtoformatters="false" forkmode="perTest" haltonfailure="true">
+ <junit fork="yes" maxmemory="8g" printsummary="off" outputtoformatters="false" forkmode="perTest" haltonfailure="true">
  <classpath refid="classpath"/>
  <classpath path="${build.path}"/>
  <classpath path="${data.path}"/>

diff --git a/doc/corenlp/META-INF/MANIFEST.MF b/doc/corenlp/META-INF/MANIFEST.MF
diff --git a/doc/corenlp/pom-full.xml b/doc/corenlp/pom-full.xml
@@ -65,11 +65,6 @@
  <artifactId>slf4j-api</artifactId>
  <version>1.7.12</version>
  </dependency>
- <dependency>
- <groupId>com.google.protobuf</groupId>
- <artifactId>protobuf-java</artifactId>
- <version>2.6.1</version>
- </dependency>
  </dependencies>
  <build>
  <sourceDirectory>src</sourceDirectory>

diff --git a/doc/releasenotes/v3.6.0/classifier.out b/doc/releasenotes/v3.6.0/classifier.out
diff --git a/doc/releasenotes/v3.6.0/corenlp.out b/doc/releasenotes/v3.6.0/corenlp.out
diff --git a/doc/releasenotes/v3.6.0/ner.out b/doc/releasenotes/v3.6.0/ner.out
diff --git a/doc/releasenotes/v3.6.0/parser.out b/doc/releasenotes/v3.6.0/parser.out
diff --git a/doc/releasenotes/v3.6.0/segmenter.out b/doc/releasenotes/v3.6.0/segmenter.out
diff --git a/doc/releasenotes/v3.6.0/tagger.full.out b/doc/releasenotes/v3.6.0/tagger.full.out
diff --git a/doc/releasenotes/v3.6.0/tagger.out b/doc/releasenotes/v3.6.0/tagger.out
diff --git a/doc/releasenotes/v3.6.0/tregex.out b/doc/releasenotes/v3.6.0/tregex.out
diff --git a/itest/src/edu/stanford/nlp/ie/crf/CRFClassifierITest.java b/itest/src/edu/stanford/nlp/ie/crf/CRFClassifierITest.java
@@ -4,8 +4,6 @@
 import java.util.Arrays;
 import java.util.List;
 
-import junit.framework.TestCase;
-
 import edu.stanford.nlp.ling.CoreAnnotations;
 import edu.stanford.nlp.ling.CoreLabel;
 import edu.stanford.nlp.ling.SentenceUtils;
@@ -17,6 +15,7 @@
 import edu.stanford.nlp.stats.Counters;
 import edu.stanford.nlp.util.Pair;
 import edu.stanford.nlp.util.Triple;
+import junit.framework.TestCase;
 
 
 /** Test some of the methods of CRFClassifier.
@@ -26,7 +25,6 @@
 public class CRFClassifierITest extends TestCase {
 
  private static final String nerPath = "edu/stanford/nlp/models/ner/english.all.3class.distsim.crf.ser.gz";
- private static final String caselessPath = "/u/nlp/data/ner/classifiers-2014-08-31/english.all.3class.caseless.distsim.crf.ser.gz";
 
  /* The extra spaces and tab (after fate) are there to test space preservation.
  * Each item of the top level array is an array of 7 Strings:
@@ -252,17 +250,8 @@ public class CRFClassifierITest extends TestCase {
  },
  };
 
- private static final String[][] caselessTests = {
- { "AISLINN JEWEL Y. CAPAROSO AND REV. CARMELO B. CAPAROSS ARE UPPERCASE NAMES.",
- "AISLINN/PERSON JEWEL/PERSON Y./PERSON CAPAROSO/PERSON AND/O REV./O CARMELO/PERSON B./PERSON CAPAROSS/PERSON ARE/O UPPERCASE/O NAMES/O ./O \n" },
- { "Aislinn Jewel Y. Caparoso and Rev. Carmelo B. Capaross are names.",
- "Aislinn/PERSON Jewel/PERSON Y./PERSON Caparoso/PERSON and/O Rev./O Carmelo/PERSON B./PERSON Capaross/PERSON are/O names/O ./O \n" },
- { "aislinn jewel y. caparoso and rev. carmelo b. capaross are lowercase names.",
- "aislinn/PERSON jewel/PERSON y./PERSON caparoso/PERSON and/O rev./O carmelo/PERSON b./PERSON capaross/PERSON are/O lowercase/O names/O ./O \n" },
- };
-
- /** Each of these array entries corresponds to one of the inputs in testTexts,
- * and gives the entity output as entity type and character offset triples.
+ /* Each of these array entries corresponds to one of the inputs in testTexts,
+ * and gives the entity output as entity type and character offset triples.
  */
  @SuppressWarnings({"unchecked"})
  private static final Triple[][] testTrip =
@@ -296,34 +285,13 @@ public void testCRF() {
  crf = CRFClassifier.getDefaultClassifier();
  runCRFTest(crf);
 
- final boolean isStoredAnswer = Boolean.valueOf(System.getProperty("ner.useStoredAnswer", "false"));
  String txt1 = "Jenny Finkel works for Mixpanel in San Francisco .";
- if (isStoredAnswer) {
- crf = CRFClassifier.getClassifierNoExceptions(nerPath2);
- }
- runKBestTest(crf, txt1, isStoredAnswer);
-
- CRFClassifier<CoreLabel> crfCaseless = CRFClassifier.getClassifierNoExceptions(
- System.getProperty("ner.caseless.model", caselessPath));
- runSimpleCRFTest(crfCaseless, caselessTests);
+ runKBestTest(crf, txt1, false);
  }
 
 
- private static void runSimpleCRFTest(CRFClassifier<CoreLabel> crf, String[][] testTexts) {
- for (int i = 0; i < testTexts.length; i++) {
- String[] testText = testTexts[i];
- assertEquals("Wrong array size in test", 2, testText.length);
-
- String out = crf.classifyToString(testText[0], "slashTags", false).replaceAll("\r", "");
- // System.out.println("Gold: |" + testText[5] + "|");
- // System.out.println("Guess: |" + out + "|");
- assertEquals("CRF buggy on classifyToString(slashTags, false)", testText[1], out);
-
- }
- }
-
-
- private static void runCRFTest(CRFClassifier<CoreLabel> crf) {
+ @SuppressWarnings({"AssertEqualsBetweenInconvertibleTypes"})
+ public static void runCRFTest(CRFClassifier<CoreLabel> crf) {
  for (int i = 0; i < testTexts.length; i++) {
  String[] testText = testTexts[i];
 
@@ -378,9 +346,7 @@ private static void runCRFTest(CRFClassifier<CoreLabel> crf) {
  }
  }
 
- /** adapt changes from {@code Counter<int[]>} to an ordered {@code List<Pair<CRFLabel, Double>>} to make comparisons
- * easier for the asserts.
- */
+ /** adapt changes from Counter<int[]> to an ordered List<Pair<CRFLabel, Double>> to make comparisons easier for the asserts. */
  private static List<Pair<CRFLabel, Double>> adapt(Counter<int[]> in) {
  List<Pair<int[], Double>> mid = Counters.toSortedListWithCounts(in);
  List<Pair<CRFLabel, Double>> ret = new ArrayList<>();
@@ -390,9 +356,7 @@ private static List<Pair<CRFLabel, Double>> adapt(Counter<int[]> in) {
  return ret;
  }
 
- /** adapt2 changes from {@code Pair<List<CoreLabel>, Double>} to {@code Pair<List<String>, Double>} to make printout
- * better.
- */
+ /** adapt2 changes from Pair<List<CoreLabel>, Double> to Pair<List<String>, Double> to make printout better. */
  private static List<Pair<List<String>, Double>> adapt2(List<Pair<List<CoreLabel>, Double>> in) {
  List<Pair<List<String>, Double>> ret = new ArrayList<>();
  for (Pair<List<CoreLabel>, Double> pair : in) {

diff --git a/itest/src/edu/stanford/nlp/pipeline/StanfordCoreNLPITest.java b/itest/src/edu/stanford/nlp/pipeline/StanfordCoreNLPITest.java
@@ -2,6 +2,7 @@
 
 import edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations;
 import edu.stanford.nlp.ie.machinereading.structure.RelationMention;
+import edu.stanford.nlp.ling.CoreAnnotation;
 import edu.stanford.nlp.ling.CoreAnnotations;
 import edu.stanford.nlp.ling.CoreLabel;
 import edu.stanford.nlp.ling.IndexedWord;
@@ -10,8 +11,7 @@
 import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations;
 import edu.stanford.nlp.util.CoreMap;
 import edu.stanford.nlp.util.StringUtils;
-
-import org.junit.Assert;
+import junit.framework.Assert;
 import junit.framework.TestCase;
 
 import java.io.*;
@@ -367,7 +367,7 @@ public void testSentenceNewlinesThree() {
  Assert.assertEquals("Wrong number of sentTokens: " + sentTokens, 11, sentTokens.size());
  }
 
- private static void checkSUTimeAnnotation(String message,
+ private void checkSUTimeAnnotation(String message,
  StanfordCoreNLP pipeline, String text,
  int nExpectedSentences, int nExpectedTokens,
  Map<Integer,String> expectedNormalizedNER) {
@@ -397,7 +397,7 @@ public void testSUTimeProperty() {
 
  // CoreNLP without properties
  StanfordCoreNLP pipeline1 = new StanfordCoreNLP();
- Map<Integer,String> expectedValues1 = new HashMap<>();
+ Map<Integer,String> expectedValues1 = new HashMap<Integer,String>();
  expectedValues1.put(3, "2001-10-02");
  expectedValues1.put(9, "OFFSET P1D");
  checkSUTimeAnnotation("Default properties", pipeline1, text, nExpectedSentences, nExpectedTokens, expectedValues1);
@@ -407,10 +407,9 @@ public void testSUTimeProperty() {
  props.setProperty("sutime.searchForDocDate", "true");
 
  StanfordCoreNLP pipeline2 = new StanfordCoreNLP(props);
- Map<Integer,String> expectedValues2 = new HashMap<>();
+ Map<Integer,String> expectedValues2 = new HashMap<Integer,String>();
  expectedValues2.put(3, "2001-10-02");
  expectedValues2.put(9, "2001-10-03");
  checkSUTimeAnnotation("With searchForDocDate", pipeline2, text, nExpectedSentences, nExpectedTokens, expectedValues2);
  }
-
-}
+}
diff --git a/itest/src/edu/stanford/nlp/time/SUTimeITest.java b/itest/src/edu/stanford/nlp/time/SUTimeITest.java
@@ -1004,8 +1004,8 @@ public void testSUTimeDateTime() throws IOException {
  "It happened late this afternoon.\n" +
  "It happened at 1800 hours.\n" +
  "The early nineteen fifties.\n" +
- "The story broke in the last week of October.\n" +
- "It was 7pm and then 7:20pm.";
+ "The story broke in the last week of October.\n";
+// "It was 7pm and then 7:20pm."; // TODO: re-enable me
 
  // set up expected results
  Iterator<Timex> expectedTimexes =
@@ -1021,9 +1021,9 @@ public void testSUTimeDateTime() throws IOException {
  Timex.fromXml("<TIMEX3 tid=\"t12\" alt_value=\"THIS AF\" type=\"DATE\" mod=\"LATE\" temporalFunction=\"true\" valueFromFunction=\"tf2\" anchorTimeID=\"t0\">late this afternoon</TIMEX3>"), // TODO: time
  Timex.fromXml("<TIMEX3 tid=\"t13\" value=\"T18:00\" type=\"TIME\">1800 hours</TIMEX3>"),
  Timex.fromXml("<TIMEX3 tid=\"t14\" value=\"195X\" type=\"DATE\" mod=\"EARLY\">The early nineteen fifties</TIMEX3>"),
- Timex.fromXml("<TIMEX3 tid=\"t15\" alt_value=\"PREV_IMMEDIATE P1W INTERSECT XXXX-10\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf3\" anchorTimeID=\"t16\">the last week of October</TIMEX3>"),
- Timex.fromXml("<TIMEX3 tid=\"t17\" value=\"T19:00\" type=\"TIME\">7pm</TIMEX3>"),
- Timex.fromXml("<TIMEX3 tid=\"t18\" value=\"T19:20\" type=\"TIME\">7:20pm.</TIMEX3>") // TODO: the period should be dropped
+ Timex.fromXml("<TIMEX3 tid=\"t15\" alt_value=\"PREV_IMMEDIATE P1W INTERSECT XXXX-10\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf3\" anchorTimeID=\"t16\">the last week of October</TIMEX3>")
+// Timex.fromXml("<TIMEX3 tid=\"t17\" value=\"T19:00\" type=\"TIME\">7pm</TIMEX3>"),
+// Timex.fromXml("<TIMEX3 tid=\"t18\" value=\"T19:20\" type=\"TIME\">7:20pm.</TIMEX3>") // TODO: the period should be dropped
  ).iterator();
 
  Iterator<Timex> expectedTimexesResolved =
@@ -1039,9 +1039,9 @@ public void testSUTimeDateTime() throws IOException {
  Timex.fromXml("<TIMEX3 tid=\"t10\" value=\"2005-08-12TAF\" type=\"TIME\" mod=\"LATE\">late this afternoon</TIMEX3>"),
  Timex.fromXml("<TIMEX3 tid=\"t11\" value=\"2005-08-12T18:00\" type=\"TIME\">1800 hours</TIMEX3>"),
  Timex.fromXml("<TIMEX3 tid=\"t12\" value=\"195X\" type=\"DATE\" mod=\"EARLY\">The early nineteen fifties</TIMEX3>"),
- Timex.fromXml("<TIMEX3 tid=\"t13\" alt_value=\"PREV_IMMEDIATE P1W INTERSECT XXXX-10\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf0\" anchorTimeID=\"t14\">the last week of October</TIMEX3>"), // TODO: Resolve
- Timex.fromXml("<TIMEX3 tid=\"t15\" value=\"2005-08-12T19:00\" type=\"TIME\">7pm</TIMEX3>"),
- Timex.fromXml("<TIMEX3 tid=\"t16\" value=\"2005-08-12T19:20\" type=\"TIME\">7:20pm.</TIMEX3>") // TODO: the period should be dropped
+ Timex.fromXml("<TIMEX3 tid=\"t13\" alt_value=\"PREV_IMMEDIATE P1W INTERSECT XXXX-10\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf0\" anchorTimeID=\"t14\">the last week of October</TIMEX3>") // TODO: Resolve
+// Timex.fromXml("<TIMEX3 tid=\"t15\" value=\"2005-08-12T19:00\" type=\"TIME\">7pm</TIMEX3>"),
+// Timex.fromXml("<TIMEX3 tid=\"t16\" value=\"2005-08-12T19:20\" type=\"TIME\">7:20pm.</TIMEX3>") // TODO: the period should be dropped
  ).iterator();
 
  // create document
@@ -1069,7 +1069,7 @@ public void testSUTimeDateTime() throws IOException {
  }
 
  // TODO: Re-enable me
- public void testSUTimeDateTime2() throws IOException {
+ public void _testSUTimeDateTime2() throws IOException {
  // Set up test text
  String testText = "The meeting is scheduled for 09/18/05 or 18 Sep '05.\n" +
  "1 year ago tomorrow.\n" +

diff --git a/liblocal/README b/liblocal/README
@@ -13,32 +13,34 @@ DESCRIPTION: ANTLR runtime, for compiled software
 
 URL: http:https://www.antlr.com
 
-USED BY: The Quickcheck library (not directly used in Stanford NLP code)
+USED BY:
+The Quickcheck library
 
 LAST UPDATE: 2015/10/5
 
 LAST UPDATE BY: Keenon Werling
 
 -----------------------------------------------------------------------
-java-hamcrest.jar
+hamcrest-core.jar
 
-ORIGINAL JAR NAME: java-hamcrest-2.0.0.0.jar
+ORIGINAL JAR NAME: hamcrest-core-1.3.jar
 
-VERSION: 2.0.0.0
+VERSION: 1.3
 
-RELEASE DATE: January 2015
+RELEASE DATE: Jul, 2010
 
 SOURCE AVAILABLE: Maven Central
 
 DESCRIPTION: Hamcrest shennanigans, for JUnit
 
 URL: http:https://www.hamcrest.org
 
-USED BY: The JUnit library (not directly used in Stanford NLP code)
+USED BY:
+The JUnit library
 
-LAST UPDATE: 2016-04-30
+LAST UPDATE: 2015/10/5
 
-LAST UPDATE BY: John Bauer
+LAST UPDATE BY: Keenon Werling
 
 -----------------------------------------------------------------------
 javaruntype.jar
@@ -55,7 +57,8 @@ DESCRIPTION: Something for Quickcheck
 
 URL: http:https://www.javaruntype.org
 
-USED BY: The Quickcheck library (not directly used in Stanford NLP code)
+USED BY:
+The Quickcheck library
 
 LAST UPDATE: 2015/10/5
 
@@ -76,7 +79,8 @@ DESCRIPTION: Quickcheck, runs random inputs and validates outputs
 
 URL: https://github.com/pholser/junit-quickcheck
 
-USED BY: loglinear package tests
+USED BY:
+The Quickcheck library
 
 LAST UPDATE: 2015/10/5
 
@@ -93,7 +97,7 @@ RELEASE DATE: Nov, 2013
 
 SOURCE AVAILABLE: Maven Central
 
-DESCRIPTION: loglinear package tests
+DESCRIPTION: Quickcheck, runs random inputs and validates outputs
 
 URL: https://github.com/pholser/junit-quickcheck
 
@@ -119,7 +123,8 @@ DESCRIPTION: JUnit theories run JUnit against a number of inputs
 
 URL: junit.org
 
-USED BY: loglinear package tests
+USED BY:
+The Quickcheck library
 
 LAST UPDATE: 2015/10/5
 
@@ -140,7 +145,8 @@ DESCRIPTION: Object graph navigation library, used by Quickcheck
 
 URL: https://commons.apache.org/proper/commons-ognl/
 
-USED BY: The Quickcheck library (not directly used in Stanford NLP code)
+USED BY:
+The Quickcheck library
 
 LAST UPDATE: 2015/10/5
 

diff --git a/liblocal/hamcrest-core.jar b/liblocal/hamcrest-core.jar
diff --git a/liblocal/java-hamcrest.jar b/liblocal/java-hamcrest.jar
diff --git a/libsrc/java-hamcrest-sources.jar b/libsrc/java-hamcrest-sources.jar