Skip to content

Commit

Permalink
add boolean to indicate presence of coref info
Browse files Browse the repository at this point in the history
  • Loading branch information
J38 authored and Stanford NLP committed Nov 1, 2017
1 parent 00dfa60 commit 94e5776
Show file tree
Hide file tree
Showing 11 changed files with 102 additions and 101 deletions.
8 changes: 4 additions & 4 deletions doc/lexparser/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
<modelVersion>4.0.0</modelVersion>
<groupId>edu.stanford.nlp</groupId>
<artifactId>stanford-parser</artifactId>
<version>3.7.0</version>
<version>3.8.0</version>
<packaging>jar</packaging>
<name>Stanford Parser</name>
<description>Stanford Parser processes raw text in English, Chinese, German, Arabic, and French, and extracts constituency parse trees.</description>
Expand All @@ -14,8 +14,8 @@
</license>
</licenses>
<scm>
<url>https://nlp.stanford.edu/software/stanford-parser-2016-10-31.zip</url>
<connection>https://nlp.stanford.edu/software/stanford-parser-2016-10-31.zip</connection>
<url>https://nlp.stanford.edu/software/stanford-parser-2017-06-09.zip</url>
<connection>https://nlp.stanford.edu/software/stanford-parser-2017-06-09.zip</connection>
</scm>
<developers>
<developer>
Expand Down Expand Up @@ -68,7 +68,7 @@
<configuration>
<artifacts>
<artifact>
<file>${project.basedir}/stanford-parser-3.7.0-models.jar</file>
<file>${project.basedir}/stanford-parser-3.8.0-models.jar</file>
<type>jar</type>
<classifier>models</classifier>
</artifact>
Expand Down
2 changes: 1 addition & 1 deletion doc/ner/ner-gui.bat
Original file line number Diff line number Diff line change
@@ -1 +1 @@
java -mx1500m -cp "stanford-ner.jar;lib/*" edu.stanford.nlp.ie.crf.NERGUI
java -mx1500m -cp "stanford-ner.jar;lib/*" edu.stanford.nlp.ie.crf.NERGUI
2 changes: 1 addition & 1 deletion doc/ner/ner.bat
Original file line number Diff line number Diff line change
@@ -1 +1 @@
java -mx1000m -cp stanford-ner.jar;lib/* edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier classifiers\english.all.3class.distsim.crf.ser.gz -textFile %1
java -mx1000m -cp stanford-ner.jar;lib/* edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier classifiers\english.all.3class.distsim.crf.ser.gz -textFile %1
2 changes: 1 addition & 1 deletion doc/segmenter/README-Arabic.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Stanford Arabic Segmenter - v3.7.0 - 2016-10-31
Stanford Arabic Segmenter - v3.8.0 - 2017-06-09
--------------------------------------

(c) 2012 The Board of Trustees of The Leland Stanford Junior University.
Expand Down
2 changes: 1 addition & 1 deletion doc/segmenter/README-Chinese.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Stanford Chinese Segmenter - v3.7.0 - 2016-10-31
Stanford Chinese Segmenter - v3.8.0 - 2017-06-09
--------------------------------------------

(c) 2003-2012 The Board of Trustees of The Leland Stanford Junior University.
Expand Down
168 changes: 84 additions & 84 deletions doc/segmenter/segment-05202008.bat
Original file line number Diff line number Diff line change
@@ -1,84 +1,84 @@
@echo off

:: Usage: "segment ctb|pk filename encoding kBest"
:: encoding can be UTF-8 or GB18030 or GB

if "%4"=="" (
echo Too few arguments
call :usage %~nx0
goto :EOF
)
if not "%6"=="" (
echo Too many arguments
call :usage %~nx0
goto :EOF
)

set ARGS=-keepAllWhitespaces false
if not "%5"=="" (
if not "%1"=="-k" (
echo First argument must be "-k"
call :usage %~nx0
goto :EOF
)
set ARGS=-keepAllWhitespaces true
set lang=%~2
set file=%~3
set enc=%~4
set kBest=%~5
) else (
if not "%4"=="" (
set lang=%~1
set file=%~2
set enc=%~3
set kBest=%~4
) else (
echo Unknown argument error
call :usage %~nx0
goto :EOF
)
)

if "%lang%"=="ctb" (
echo CTB: Chinese Treebank segmentation >&2
) else (
if "%lang%"=="pku" (
echo PKU: Beijing University segmentation >&2
) else (
echo Language argument should be either ctb or pku. Abort
goto :EOF
)
)

echo File: "%file%" >&2
echo Encoding: "%enc%" >&2
echo kBest: "%kBest%" >&2
echo ------------------------------- >&2

set BASEDIR=%~dp0
set DATADIR=%BASEDIR%data
:: set LEXDIR=%DATADIR%lexicons
set JAVACMD=java -mx1024m -cp "%BASEDIR%*;" edu.stanford.nlp.ie.crf.CRFClassifier -sighanCorporaDict "%DATADIR%" -textFile "%file%" -inputEncoding %enc% -sighanPostProcessing true %ARGS%
set DICTS=%DATADIR%\dict-chris6.ser.gz
set KBESTCMD=
if not %kBest%==0 set kBestCmd=-kBest %kBest%

if "%lang%"=="ctb" (
%JAVACMD% -loadClassifier "%DATADIR%\%lang%.gz" -serDictionary "%DICTS%" "%KBESTCMD%"
)
if "%lang%"=="pku" (
%JAVACMD% -loadClassifier "%DATADIR%\%lang%.gz" -serDictionary "%DICTS%" "%KBESTCMD%"
)

goto :EOF

:usage
echo Usage: "%1 [-k] ctb|pku filename encoding kBest" >&2
echo -k : keep whitespaces >&2
echo ctb : use Chinese Treebank segmentation >&2
echo pku : Beijing University segmentation >&2
echo kBest: print kBest best segmenations; 0 means kBest mode is off. >&2
echo. >&2
echo Example: %1 ctb test.simp.utf8 UTF-8 0 >&2
echo Example: %1 pku test.simp.utf8 UTF-8 0 >&2
goto :EOF
@echo off

:: Usage: "segment ctb|pk filename encoding kBest"
:: encoding can be UTF-8 or GB18030 or GB

if "%4"=="" (
echo Too few arguments
call :usage %~nx0
goto :EOF
)
if not "%6"=="" (
echo Too many arguments
call :usage %~nx0
goto :EOF
)

set ARGS=-keepAllWhitespaces false
if not "%5"=="" (
if not "%1"=="-k" (
echo First argument must be "-k"
call :usage %~nx0
goto :EOF
)
set ARGS=-keepAllWhitespaces true
set lang=%~2
set file=%~3
set enc=%~4
set kBest=%~5
) else (
if not "%4"=="" (
set lang=%~1
set file=%~2
set enc=%~3
set kBest=%~4
) else (
echo Unknown argument error
call :usage %~nx0
goto :EOF
)
)

if "%lang%"=="ctb" (
echo CTB: Chinese Treebank segmentation >&2
) else (
if "%lang%"=="pku" (
echo PKU: Beijing University segmentation >&2
) else (
echo Language argument should be either ctb or pku. Abort
goto :EOF
)
)

echo File: "%file%" >&2
echo Encoding: "%enc%" >&2
echo kBest: "%kBest%" >&2
echo ------------------------------- >&2

set BASEDIR=%~dp0
set DATADIR=%BASEDIR%data
:: set LEXDIR=%DATADIR%lexicons
set JAVACMD=java -mx1024m -cp "%BASEDIR%*;" edu.stanford.nlp.ie.crf.CRFClassifier -sighanCorporaDict "%DATADIR%" -textFile "%file%" -inputEncoding %enc% -sighanPostProcessing true %ARGS%
set DICTS=%DATADIR%\dict-chris6.ser.gz
set KBESTCMD=
if not %kBest%==0 set kBestCmd=-kBest %kBest%

if "%lang%"=="ctb" (
%JAVACMD% -loadClassifier "%DATADIR%\%lang%.gz" -serDictionary "%DICTS%" "%KBESTCMD%"
)
if "%lang%"=="pku" (
%JAVACMD% -loadClassifier "%DATADIR%\%lang%.gz" -serDictionary "%DICTS%" "%KBESTCMD%"
)

goto :EOF

:usage
echo Usage: "%1 [-k] ctb|pku filename encoding kBest" >&2
echo -k : keep whitespaces >&2
echo ctb : use Chinese Treebank segmentation >&2
echo pku : Beijing University segmentation >&2
echo kBest: print kBest best segmenations; 0 means kBest mode is off. >&2
echo. >&2
echo Example: %1 ctb test.simp.utf8 UTF-8 0 >&2
echo Example: %1 pku test.simp.utf8 UTF-8 0 >&2
goto :EOF
2 changes: 1 addition & 1 deletion doc/tregex/README-tregex.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Tregex v3.7.0 - 2016-10-31
Tregex v3.8.0 - 2017-06-09
----------------------------------------------

Copyright (c) 2003-2012 The Board of Trustees of
Expand Down
2 changes: 1 addition & 1 deletion doc/tsurgeon/README-tsurgeon.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
Tsurgeon v3.7.0 - 2016-10-31
Tsurgeon v3.8.0 - 2017-06-09
----------------------------------------------

Copyright (c) 2003-2012 The Board of Trustees of
Expand Down
6 changes: 3 additions & 3 deletions scripts/lexparser/lexparser-gui.bat
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
:: runs the parser GUI
:: usage lexparser-gui [parserDataFilename [textFileName]]
java -mx800m -cp "*" edu.stanford.nlp.parser.ui.Parser
:: runs the parser GUI
:: usage lexparser-gui [parserDataFilename [textFileName]]
java -mx800m -cp "*" edu.stanford.nlp.parser.ui.Parser
8 changes: 4 additions & 4 deletions scripts/lexparser/lexparser.bat
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
@echo off
:: Runs the English PCFG parser on one or more files, printing trees only
:: usage: lexparser fileToparse
java -mx150m -cp "*;" edu.stanford.nlp.parser.lexparser.LexicalizedParser -outputFormat "penn,typedDependencies" edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz %1
@echo off
:: Runs the English PCFG parser on one or more files, printing trees only
:: usage: lexparser fileToparse
java -mx150m -cp "*;" edu.stanford.nlp.parser.lexparser.LexicalizedParser -outputFormat "penn,typedDependencies" edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz %1
1 change: 1 addition & 0 deletions src/edu/stanford/nlp/pipeline/CoreNLP.proto
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ message Document {

/** coref mentions for entire document **/
repeated Mention mentionsForCoref = 14;
optional bool hasCorefAnnotation = 15;

extensions 100 to 255;
}
Expand Down

0 comments on commit 94e5776

Please sign in to comment.