MERGE: with master; ADD: shell script to launch training

awni · Mar 21, 2017 · f0aafe6 · f0aafe6
2 parents 44139ae + 458243b
commit f0aafe6
Show file tree

Hide file tree

Showing 13 changed files with 806 additions and 36 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,4 +1,4 @@
-saved_models/
+saved_models
 
 # Byte-compiled / optimized / DLL files
 __pycache__/

diff --git a/LICENSE b/LICENSE
diff --git a/README.md b/README.md
@@ -32,5 +32,5 @@ To view results run:
 ```
 port=8888
 log_dir=<directory_of_saved_models>
-tensorboard --port $port --log_dir $log_dir
+tensorboard --port $port --logdir $log_dir
 ```
diff --git a/configs/train.json b/configs/train.json
@@ -8,7 +8,7 @@
 
  "optimizer" : {
  "epochs" : 50,
- "learning_rate" : 1e-1,
+ "learning_rate" : 1e-3,
  "momentum" : 0.95,
  "decay_rate" : 1.0,
  "decay_steps" : 2000
@@ -78,6 +78,10 @@
  "num_filters" : 64,
  "stride" : 2
  },
+ { "kernel_size" : 64,
+ "num_filters" : 64,
+ "stride" : 2
+ },
  { "kernel_size" : 64,
  "num_filters" : 64,
  "stride" : 2
@@ -86,6 +90,6 @@
  },
 
  "io" : {
- "save_path" : "saved_models/cnn16"
+ "save_path" : "saved_models/cnn16-lr-3"
  }
 }
diff --git a/entry/AUTHORS.txt b/entry/AUTHORS.txt
@@ -0,0 +1,2 @@
+Awni Hannun, Stanford University
+Sudnya Diamos, Stanford University
diff --git a/entry/dependencies.txt b/entry/dependencies.txt
@@ -0,0 +1,5 @@
+# file: dependencies.txt
+#
+# This file lists additional Debian packages that must be installed
+# prior to running your entry's 'setup.sh' and 'next.sh' scripts.
+# Each package must be on a separate line.
diff --git a/entry/next.sh b/entry/next.sh
@@ -0,0 +1,27 @@
+#! /bin/bash
+#
+# file: next.sh
+#
+# This bash script analyzes the record named in its command-line
+# argument ($1), and writes the answer to the file 'answers.txt'.
+# This script is run once for each record in the Challenge test set.
+#
+# The program should print the record name, followed by a comma,
+# followed by one of the following characters:
+# N for normal rhythm
+# A for atrial fibrillation
+# O for other abnormal rhythms
+# ~ for records too noisy to classify
+#
+# For example, if invoked as
+# next.sh A00001
+# it analyzes record A00001 and (assuming the recording is
+# considered to be normal) writes "A00001,N" to answers.txt.
+
+set -e
+set -o pipefail
+
+RECORD=$1
+
+# TODO actually eval record and write to answers.txt
+echo $RECORD
diff --git a/entry/prepare-entry.sh b/entry/prepare-entry.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+#
+# file: prepare-entry.sh
+
+set -e
+set -o pipefail
+
+cp ../LICENSE LICENSE.txt
+
+echo "==== running entry script on validation set ===="
+validation=/deep/group/med/alivecor/sample2017/validation
+
+rm -f answers.txt
+for r in `cat $validation/RECORDS`; do
+ echo $r
+ ln -sf $validation/$r.hea .
+ ln -sf $validation/$r.mat .
+ ./next.sh $r
+ rm $r.hea $r.mat
+done
diff --git a/entry/setup.sh b/entry/setup.sh
@@ -0,0 +1,10 @@
+#! /bin/bash
+#
+# file: setup.sh
+#
+# This bash script performs any setup necessary in order to test your
+# entry. It is run only once, before running any other code belonging
+# to your entry.
+
+set -e
+set -o pipefail
diff --git a/eval.py b/eval.py
@@ -1,19 +1,13 @@
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
+import argparse
 import json
 import numpy as np
 import os
+import pickle
 import tensorflow as tf
 
 import loader
 import network
 
-tf.flags.DEFINE_string("save_path", None,
- "Path to saved model.")
-FLAGS = tf.flags.FLAGS
-
 class Evaler:
 
  def __init__(self, save_path, batch_size=1):
@@ -39,31 +33,28 @@ def probs(self, inputs):
 
  def predict(self, inputs):
  probs = self.probs(inputs)
- return np.argmax(probs, axis=2)
+ return np.argmax(probs, axis=1)
 
-def main(argv=None):
- assert FLAGS.save_path is not None, \
- "Must provide the path to a model directory."
+def predict_record(record_id, model_path):
+ evaler = Evaler(model_path)
 
- config_file = os.path.join(FLAGS.save_path, "config.json")
- with open(config_file, 'r') as fid:
- config = json.load(fid)
+ ldr_path = os.path.join(model_path, "loader.pkl")
+ with open(ldr_path, 'rb') as fid:
+ ldr = pickle.load(fid)
 
- batch_size = 32
- data_loader = loader.Loader(config['data']['path'], batch_size,
-  seed=config['data']['seed'])
+ inputs = ldr.load_preprocess(record_id)
+ outputs = evaler.predict([inputs])
+ return ldr.int_to_class(outputs[0])
 
- evaler = Evaler(FLAGS.save_path, batch_size=batch_size)
-
- corr = 0.0
- total = 0
- for inputs, labels in data_loader.batches(data_loader.val):
- probs = evaler.probs(inputs)
- predictions = np.vstack(predictions)
- corr += np.sum(predictions == np.vstack(labels))
- total += predictions.size
- print("Number {}, Accuracy {:.3f}".format(total, corr / total))
+def main():
+ parser = argparse.ArgumentParser(description="Evaluater Script")
+ parser.add_argument("model_path")
+ parser.add_argument("record")
 
+ args = parser.parse_args()
+ prediction = predict_record(args.record, args.model_path)
+ print(prediction)
 
 if __name__ == "__main__":
- tf.app.run()
+ main()
+
diff --git a/launch.sh b/launch.sh
@@ -0,0 +1,5 @@
+#!/bin/bash
+
+gpu=3
+env CUDA_VISIBLE_DEVICES=$gpu python train.py configs/cnn20-lr-1.json &
+echo "Started training at: ", $(date +%m:%d:%y-%H:%M:%S)
diff --git a/loader.py b/loader.py
@@ -94,8 +94,8 @@ def compute_mean_std(self):
  Estimates the mean and std over the training set.
  """
  all_dat = np.hstack(w for w, _ in self._train)
- self.mean = np.mean(all_dat)
- self.std = np.std(all_dat)
+ self.mean = np.mean(all_dat, dtype=np.float32)
+ self.std = np.std(all_dat, dtype=np.float32)
 
  @property
  def output_dim(self):
@@ -112,6 +112,32 @@ def val(self):
  """ Returns the raw validation set. """
  return self._val
 
+ def load_preprocess(self, record_id):
+ ecg = load_ecg_mat(record_id + ".mat")
+ return self.normalize(ecg)
+
+ def int_to_class(self, label_int):
+ """ Convert integer label to class label. """
+ return self._int_to_class[label_int]
+
+ def __getstate__(self):
+ """
+ For pickling.
+ """
+ return (self.mean,
+ self.std,
+ self._int_to_class,
+ self._class_to_int)
+
+ def __setstate__(self, state):
+ """
+ For unpickling.
+ """
+ self.mean = state[0]
+ self.std = state[1]
+ self._int_to_class = state[2]
+ self._class_to_int = state[3]
+
 def load_all_data(data_path, val_frac):
  """
  Returns tuple of training and validation sets. Each set
@@ -127,7 +153,7 @@ def load_all_data(data_path, val_frac):
  # Load raw ecg
  for record, label in records:
  ecg_file = os.path.join(data_path, record + ".mat")
- ecg = sio.loadmat(ecg_file)['val'].squeeze()
+ ecg = load_ecg_mat(ecg_file)
  all_records.append((ecg, label))
 
  # Shuffle before train/val split
@@ -136,6 +162,9 @@ def load_all_data(data_path, val_frac):
  train, val = all_records[cut:], all_records[:cut]
  return train, val
 
+def load_ecg_mat(ecg_file):
+ return sio.loadmat(ecg_file)['val'].squeeze()
+
 def main():
  parser = argparse.ArgumentParser(description="Data Loader")
  parser.add_argument("-v", "--verbose",

diff --git a/train.py b/train.py
@@ -5,6 +5,7 @@
 import json
 import numpy as np
 import os
+import pickle
 import random
 import tensorflow as tf
 import time
@@ -68,6 +69,8 @@ def main(argv=None):
  config['model']['output_dim'] = data_loader.output_dim
  with open(os.path.join(save_path, "config.json"), 'w') as fid:
  json.dump(config, fid)
+ with open(os.path.join(save_path, "loader.pkl"), 'wb') as fid:
+ pickle.dump(data_loader, fid)
 
  with tf.Graph().as_default(), tf.Session() as sess:
  tf.set_random_seed(config['seed'])