Merge branch 'master' of github.com:StanfordHCI/termite

Conflicts: execute.py pipeline/io_utils.py
StanfordHCI · Feb 15, 2014 · e70bbc1 · e70bbc1
2 parents 1e4303c + 3035c87
commit e70bbc1
Show file tree

Hide file tree

Showing 4 changed files with 33 additions and 4 deletions.
diff --git a/README.md b/README.md
@@ -0,0 +1,29 @@
+Current Development
+===================
+
+Starting in 2014, we have split Termite into two components:
+ * **[Termite Data Server](https://github.com/uwdata/termite-data-server)** for processing the output of topic models and providing the content as a web service
+ * **[Termite Visualizations](https://github.com/uwdata/termite-visualizations)** for visualizing topic model outputs in a web browser
+
+Our goals are to:
+ * support multiple topic modeling tools
+ * reduce the cost of developing new visualizations through shared infrastructure
+ * allow multiple visualizations to interact with any number of topic modeling software and with other visualizations
+
+Please see the respective repositories for the latest software and additional information.
+
+Termite
+-------
+
+Termite is a visualization tool for inspecting the output of statistical topic models based on the techniques described in the following publication:
+
+ **Termite: Visualization Techniques for Assessing Textual Topic Models** 
+ Jason Chuang, Christopher D. Manning, Jeffrey Heer 
+ Computer Science Dept, Stanford University 
+ https://vis.stanford.edu/papers/termite 
+
+
+More information
+----------------
+
+For more details about this repository, see the file "README.old".
diff --git a/README → README.old b/README → README.old
diff --git a/pipeline/tokenize.py b/pipeline/tokenize.py
@@ -23,7 +23,7 @@ class Tokenize( object ):
  WHITESPACE_TOKENIZATION = r'[^ ]+'
  ALPHANUMERIC_TOKENIZATION = r'[0-9A-Za-z_]*[A-Za-z_]+[0-9A-Za-z_]*'
  ALPHA_TOKENIZATION = r'[A-Za-z_]+'
- UNICODE_TOKENIZATION = r'[\p{L}\p{M}]+'
+ UNICODE_TOKENIZATION = r'[\w]+'
  DEFAULT_TOKENIZATION = ALPHA_TOKENIZATION
 
  def __init__( self, logging_level ):
@@ -52,7 +52,7 @@ def execute( self, corpus_format, corpus_path, data_path, tokenization ):
  self.logger.info( 'Tokenizing source corpus...' )
  self.logger.info( ' corpus_path = %s (%s)', corpus_path, corpus_format )
  self.logger.info( ' data_path = %s', data_path )
- self.logger.info( ' tokenziation = %s', tokenization )
+ self.logger.info( ' tokenization = %s', tokenization )
 
  self.logger.info( 'Connecting to data...' )
  self.documents = DocumentsAPI( corpus_format, corpus_path )

diff --git a/setup.sh b/setup.sh
@@ -31,7 +31,7 @@ fi
 
 echo
 echo "Downloading D3 javascript library..."
-curl --insecure --location http:https://d3js.org/d3.v3.zip > $LIBRARY/d3.v3.zip
+curl --insecure --location https:https://github.com/mbostock/d3/releases/download/v3.4.1/d3.v3.zip > $LIBRARY/d3.v3.zip
 
 echo
 echo "Uncompressing D3 javascript library..."
@@ -140,7 +140,7 @@ cp $LIBRARY/LICENSE $LIBRARY/LICENSE-stmt
 
 echo
 echo "Downloading Google Closure Compiler..."
-curl --insecure --location https://closure-compiler.googlecode.com/files/compiler-latest.zip > $LIBRARY/compiler-latest.zip
+curl --insecure --location https://dl.google.com/closure-compiler/compiler-latest.zip > $LIBRARY/compiler-latest.zip
 
 echo
 echo "Uncompressing Google Closure Compiler..."