add boolean to indicate presence of coref info

starcount · Nov 1, 2017 · 94e5776 · 94e5776
1 parent 00dfa60
commit 94e5776
Show file tree

Hide file tree

Showing 11 changed files with 102 additions and 101 deletions.
diff --git a/doc/lexparser/pom.xml b/doc/lexparser/pom.xml
@@ -2,7 +2,7 @@
  <modelVersion>4.0.0</modelVersion>
  <groupId>edu.stanford.nlp</groupId>
  <artifactId>stanford-parser</artifactId>
- <version>3.7.0</version>
+ <version>3.8.0</version>
  <packaging>jar</packaging>
  <name>Stanford Parser</name>
  <description>Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.</description>
@@ -14,8 +14,8 @@
  </license>
  </licenses>
  <scm>
- <url>https://nlp.stanford.edu/software/stanford-parser-2016-10-31.zip</url>
- <connection>https://nlp.stanford.edu/software/stanford-parser-2016-10-31.zip</connection>
+ <url>https://nlp.stanford.edu/software/stanford-parser-2017-06-09.zip</url>
+ <connection>https://nlp.stanford.edu/software/stanford-parser-2017-06-09.zip</connection>
  </scm>
  <developers>
  <developer>
@@ -68,7 +68,7 @@
  <configuration>
  <artifacts>
  <artifact>
- <file>${project.basedir}/stanford-parser-3.7.0-models.jar</file>
+ <file>${project.basedir}/stanford-parser-3.8.0-models.jar</file>
  <type>jar</type>
  <classifier>models</classifier>
  </artifact>

diff --git a/doc/ner/ner-gui.bat b/doc/ner/ner-gui.bat
@@ -1 +1 @@
-java -mx1500m -cp "stanford-ner.jar;lib/*" edu.stanford.nlp.ie.crf.NERGUI
+java -mx1500m -cp "stanford-ner.jar;lib/*" edu.stanford.nlp.ie.crf.NERGUI
diff --git a/doc/ner/ner.bat b/doc/ner/ner.bat
@@ -1 +1 @@
-java -mx1000m -cp stanford-ner.jar;lib/* edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier classifiers\english.all.3class.distsim.crf.ser.gz -textFile %1
+java -mx1000m -cp stanford-ner.jar;lib/* edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier classifiers\english.all.3class.distsim.crf.ser.gz -textFile %1
diff --git a/doc/segmenter/README-Arabic.txt b/doc/segmenter/README-Arabic.txt
@@ -1,4 +1,4 @@
-Stanford Arabic Segmenter - v3.7.0 - 2016-10-31
+Stanford Arabic Segmenter - v3.8.0 - 2017-06-09
 --------------------------------------
 
 (c) 2012 The Board of Trustees of The Leland Stanford Junior University.

diff --git a/doc/segmenter/README-Chinese.txt b/doc/segmenter/README-Chinese.txt
@@ -1,4 +1,4 @@
-Stanford Chinese Segmenter - v3.7.0 - 2016-10-31
+Stanford Chinese Segmenter - v3.8.0 - 2017-06-09
 --------------------------------------------
 
 (c) 2003-2012 The Board of Trustees of The Leland Stanford Junior University.

diff --git a/doc/segmenter/segment-05202008.bat b/doc/segmenter/segment-05202008.bat
@@ -1,84 +1,84 @@
-@echo off
-
-:: Usage: "segment ctb|pk filename encoding kBest"
-:: encoding can be UTF-8 or GB18030 or GB
-
-if "%4"=="" (
- echo Too few arguments
- call :usage %~nx0
- goto :EOF
- )
-if not "%6"=="" (
- echo Too many arguments
- call :usage %~nx0
- goto :EOF
- )
-
-set ARGS=-keepAllWhitespaces false
-if not "%5"=="" (
- if not "%1"=="-k" (
- echo First argument must be "-k"
- call :usage %~nx0
- goto :EOF
- )
- set ARGS=-keepAllWhitespaces true
- set lang=%~2
- set file=%~3
- set enc=%~4
- set kBest=%~5
-) else (
- if not "%4"=="" (
- set lang=%~1
- set file=%~2
- set enc=%~3
- set kBest=%~4
- ) else (
- echo Unknown argument error
- call :usage %~nx0
- goto :EOF
- )
- )
-
-if "%lang%"=="ctb" (
- echo CTB: Chinese Treebank segmentation >&2
-) else (
- if "%lang%"=="pku" (
- echo PKU: Beijing University segmentation >&2
- ) else (
- echo Language argument should be either ctb or pku. Abort
- goto :EOF
- )
- )
-
-echo File: "%file%" >&2
-echo Encoding: "%enc%" >&2
-echo kBest: "%kBest%" >&2
-echo ------------------------------- >&2
-
-set BASEDIR=%~dp0
-set DATADIR=%BASEDIR%data
-:: set LEXDIR=%DATADIR%lexicons
-set JAVACMD=java -mx1024m -cp "%BASEDIR%*;" edu.stanford.nlp.ie.crf.CRFClassifier -sighanCorporaDict "%DATADIR%" -textFile "%file%" -inputEncoding %enc% -sighanPostProcessing true %ARGS%
-set DICTS=%DATADIR%\dict-chris6.ser.gz
-set KBESTCMD=
-if not %kBest%==0 set kBestCmd=-kBest %kBest%
-
-if "%lang%"=="ctb" (
- %JAVACMD% -loadClassifier "%DATADIR%\%lang%.gz" -serDictionary "%DICTS%" "%KBESTCMD%"
- )
-if "%lang%"=="pku" (
- %JAVACMD% -loadClassifier "%DATADIR%\%lang%.gz" -serDictionary "%DICTS%" "%KBESTCMD%"
- )
-
-goto :EOF
-
-:usage
- echo Usage: "%1 [-k] ctb|pku filename encoding kBest" >&2
- echo -k : keep whitespaces >&2
- echo ctb : use Chinese Treebank segmentation >&2
- echo pku : Beijing University segmentation >&2
- echo kBest: print kBest best segmenations; 0 means kBest mode is off. >&2
- echo. >&2
- echo Example: %1 ctb test.simp.utf8 UTF-8 0 >&2
- echo Example: %1 pku test.simp.utf8 UTF-8 0 >&2
- goto :EOF
+@echo off
+
+:: Usage: "segment ctb|pk filename encoding kBest"
+:: encoding can be UTF-8 or GB18030 or GB
+
+if "%4"=="" (
+ echo Too few arguments
+ call :usage %~nx0
+ goto :EOF
+ )
+if not "%6"=="" (
+ echo Too many arguments
+ call :usage %~nx0
+ goto :EOF
+ )
+
+set ARGS=-keepAllWhitespaces false
+if not "%5"=="" (
+ if not "%1"=="-k" (
+ echo First argument must be "-k"
+ call :usage %~nx0
+ goto :EOF
+ )
+ set ARGS=-keepAllWhitespaces true
+ set lang=%~2
+ set file=%~3
+ set enc=%~4
+ set kBest=%~5
+) else (
+ if not "%4"=="" (
+ set lang=%~1
+ set file=%~2
+ set enc=%~3
+ set kBest=%~4
+ ) else (
+ echo Unknown argument error
+ call :usage %~nx0
+ goto :EOF
+ )
+ )
+
+if "%lang%"=="ctb" (
+ echo CTB: Chinese Treebank segmentation >&2
+) else (
+ if "%lang%"=="pku" (
+ echo PKU: Beijing University segmentation >&2
+ ) else (
+ echo Language argument should be either ctb or pku. Abort
+ goto :EOF
+ )
+ )
+
+echo File: "%file%" >&2
+echo Encoding: "%enc%" >&2
+echo kBest: "%kBest%" >&2
+echo ------------------------------- >&2
+
+set BASEDIR=%~dp0
+set DATADIR=%BASEDIR%data
+:: set LEXDIR=%DATADIR%lexicons
+set JAVACMD=java -mx1024m -cp "%BASEDIR%*;" edu.stanford.nlp.ie.crf.CRFClassifier -sighanCorporaDict "%DATADIR%" -textFile "%file%" -inputEncoding %enc% -sighanPostProcessing true %ARGS%
+set DICTS=%DATADIR%\dict-chris6.ser.gz
+set KBESTCMD=
+if not %kBest%==0 set kBestCmd=-kBest %kBest%
+
+if "%lang%"=="ctb" (
+ %JAVACMD% -loadClassifier "%DATADIR%\%lang%.gz" -serDictionary "%DICTS%" "%KBESTCMD%"
+ )
+if "%lang%"=="pku" (
+ %JAVACMD% -loadClassifier "%DATADIR%\%lang%.gz" -serDictionary "%DICTS%" "%KBESTCMD%"
+ )
+
+goto :EOF
+
+:usage
+ echo Usage: "%1 [-k] ctb|pku filename encoding kBest" >&2
+ echo -k : keep whitespaces >&2
+ echo ctb : use Chinese Treebank segmentation >&2
+ echo pku : Beijing University segmentation >&2
+ echo kBest: print kBest best segmenations; 0 means kBest mode is off. >&2
+ echo. >&2
+ echo Example: %1 ctb test.simp.utf8 UTF-8 0 >&2
+ echo Example: %1 pku test.simp.utf8 UTF-8 0 >&2
+ goto :EOF
diff --git a/doc/tregex/README-tregex.txt b/doc/tregex/README-tregex.txt
@@ -1,4 +1,4 @@
-Tregex v3.7.0 - 2016-10-31
+Tregex v3.8.0 - 2017-06-09
 ----------------------------------------------
 
 Copyright (c) 2003-2012 The Board of Trustees of 

diff --git a/doc/tsurgeon/README-tsurgeon.txt b/doc/tsurgeon/README-tsurgeon.txt
@@ -1,4 +1,4 @@
-Tsurgeon v3.7.0 - 2016-10-31
+Tsurgeon v3.8.0 - 2017-06-09
 ----------------------------------------------
 
 Copyright (c) 2003-2012 The Board of Trustees of 

diff --git a/scripts/lexparser/lexparser-gui.bat b/scripts/lexparser/lexparser-gui.bat
@@ -1,3 +1,3 @@
-:: runs the parser GUI
-:: usage lexparser-gui [parserDataFilename [textFileName]]
-java -mx800m -cp "*" edu.stanford.nlp.parser.ui.Parser
+:: runs the parser GUI
+:: usage lexparser-gui [parserDataFilename [textFileName]]
+java -mx800m -cp "*" edu.stanford.nlp.parser.ui.Parser
diff --git a/scripts/lexparser/lexparser.bat b/scripts/lexparser/lexparser.bat
@@ -1,4 +1,4 @@
-@echo off
-:: Runs the English PCFG parser on one or more files, printing trees only
-:: usage: lexparser fileToparse
-java -mx150m -cp "*;" edu.stanford.nlp.parser.lexparser.LexicalizedParser -outputFormat "penn,typedDependencies" edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz %1
+@echo off
+:: Runs the English PCFG parser on one or more files, printing trees only
+:: usage: lexparser fileToparse
+java -mx150m -cp "*;" edu.stanford.nlp.parser.lexparser.LexicalizedParser -outputFormat "penn,typedDependencies" edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz %1
diff --git a/src/edu/stanford/nlp/pipeline/CoreNLP.proto b/src/edu/stanford/nlp/pipeline/CoreNLP.proto
@@ -59,6 +59,7 @@ message Document {
 
  /** coref mentions for entire document **/
  repeated Mention mentionsForCoref = 14;
+ optional bool hasCorefAnnotation = 15;
 
  extensions 100 to 255;
 }