updated readme with some basic text, updated requirements and put the…

…m into a txt file, fixed print commands so the code can be imported in py3
stanford-futuredata · Jul 9, 2017 · fa098f9 · fa098f9
1 parent 1f18396
commit fa098f9
Show file tree

Hide file tree

Showing 8 changed files with 54 additions and 29 deletions.
diff --git a/README.md b/README.md
@@ -1 +1,23 @@
-# noscope
+# noscope
+
+# Installation
+
+## Requirements
+- python
+The project is a python 2.7 project (not 3 compatible yet) and it is probably easist to setup with an Anaconda distribution. 
+- ffmpeg 
+The ffmpeg utility is required for some of the scripts which convert video
+- Python / PIP
+The python requirements are listed in the requirements.txt file. The most complicated package is opencv3 which requires a number of compilation steps to install on most platforms, if using Anaconda there are prepackaged versions which make setup much easier.
+
+## Steps
+
+```bash
+git clone https://github.com/stanford-futuredata/noscope.git
+cd noscope
+python setup.py install
+```
+
+# Usage
+
+There are a number of scripts in the ```exp/``` and ```scripts/``` directory which can be used to start training and using models on various datasets.
diff --git a/noscope/DataUtils.py b/noscope/DataUtils.py
@@ -64,7 +64,7 @@ def get_binary(csv_fname, OBJECTS=['person'], limit=None, interval=1, start=0, W
     counts = np.where(counts == 0, 0, 1)
     smoothed_counts = np.convolve(np.ones(WINDOW), np.ravel(counts), mode='same') > WINDOW * 0.7
     smoothed_counts = smoothed_counts.reshape(len(counts), 1)
-    print np.sum(smoothed_counts != counts)
+    print(np.sum(smoothed_counts != counts))
     counts = smoothed_counts
     return counts
 
@@ -139,39 +139,39 @@ def get_data(csv_fname, video_fname, binary=False, num_frames=None,
     def print_class_numbers(Y, nb_classes):
         classes = np_utils.probas_to_classes(Y)
         for i in xrange(nb_classes):
-            print 'class %d: %d' % (i, np.sum(classes == i))
+            print('class %d: %d' % (i, np.sum(classes == i)))
 
-    print '\tParsing %s, extracting %s' % (csv_fname, str(OBJECTS))
+    print('\tParsing %s, extracting %s' % (csv_fname, str(OBJECTS)))
     if binary:
         all_counts = get_binary(csv_fname, limit=num_frames, OBJECTS=OBJECTS)
     else:
         all_counts = get_counts(csv_fname, limit=num_frames, OBJECTS=OBJECTS)
-    print '\tRetrieving all frames from %s' % video_fname
+    print('\tRetrieving all frames from %s' % video_fname)
     all_frames = VideoUtils.get_all_frames(
             len(all_counts), video_fname, scale=resol, dtype=dtype)
-    print '\tSplitting data into training and test sets'
+    print('\tSplitting data into training and test sets')
     X_train, X_test, Y_train, Y_test = to_test_train(
             all_frames, all_counts, regression=regression,
             center=center, dtype=dtype, train_ratio=train_ratio)
     if regression:
         nb_classes = 1
-        print '(train) mean, std: %f, %f' % \
-            (np.mean(Y_train), np.std(Y_train))
-        print '(test) mean, std: %f %f' % \
-            (np.mean(Y_test), np.std(Y_test))
+        print('(train) mean, std: %f, %f' % \
+            (np.mean(Y_train), np.std(Y_train)))
+        print('(test) mean, std: %f %f' % \
+            (np.mean(Y_test), np.std(Y_test)))
     else:
         nb_classes = all_counts.max() + 1
-        print '(train) positive examples: %d, total examples: %d' % \
+        print('(train) positive examples: %d, total examples: %d' % \
             (np.count_nonzero(np_utils.probas_to_classes(Y_train)),
-             len(Y_train))
+             len(Y_train)))
         print_class_numbers(Y_train, nb_classes)
-        print '(test) positive examples: %d, total examples: %d' % \
+        print('(test) positive examples: %d, total examples: %d' % \
             (np.count_nonzero(np_utils.probas_to_classes(Y_test)),
-             len(Y_test))
+             len(Y_test)))
         print_class_numbers(Y_test, nb_classes)
 
-    print 'shape of image: ' + str(all_frames[0].shape)
-    print 'number of classes: %d' % (nb_classes)
+    print('shape of image: ' + str(all_frames[0].shape))
+    print('number of classes: %d' % (nb_classes))
 
     data = (X_train, Y_train, X_test, Y_test)
     return data, nb_classes

diff --git a/noscope/Learner.py b/noscope/Learner.py
@@ -85,13 +85,13 @@ def run(self, random_learner, top_n=5):
             n = int(ceil(B / self.max_iter / (s+1) * eta**s))
             r = self.max_iter * eta**(-s)
 
-            print 'Running outer loop loop with: %d %d' % (n, r)
+            print('Running outer loop loop with: %d %d' % (n, r))
             learners = [random_learner() for i in range(n)]
             for i in range(s+1):
                 n_i = n * eta**(-i)
                 r_i = int(r * eta**(i))
-                print 'Running inner loop with: n_i=%d r_i=%d len(learners)=%d' % \
-                    (n_i, r_i, len(learners))
+                print('Running inner loop with: n_i=%d r_i=%d len(learners)=%d' % \
+                    (n_i, r_i, len(learners)))
                 val_losses = [learner.run_iters(r_i) for learner in learners]
 
                 # Unfortunate -_-

diff --git a/noscope/Models.py b/noscope/Models.py
@@ -156,7 +156,7 @@ def run_model(model, data, batch_size=32, nb_epoch=1, patience=2,
         validation_split = 0.33333333
         if len(Y_train) * validation_split > 50000.0:
             validation_split = 50000.0 / float(len(Y_train))
-        print validation_split
+        print(validation_split)
 
         begin_train = time.time()
         model.fit(X_train, Y_train,
@@ -241,7 +241,7 @@ def evaluate_model_regression(model, X_test, Y_test, batch_size=256):
         proba = np.concatenate([1 - predictions, predictions], axis=1)
         metrics = stats_from_proba(proba, Y_classes)
         metrics['cutoff'] = cutoff
-        print 'Cutoff: %f, metrics: %s' % (cutoff, str(metrics))
+        print('Cutoff: %f, metrics: %s' % (cutoff, str(metrics)))
         if metrics['accuracy'] > best['accuracy']:
             best = metrics
 
@@ -347,10 +347,9 @@ def metrics_to_list(metrics):
         model.save(model_fname)
 
         to_write.append(list(param[2:]) + [train_time] + metrics_to_list(metrics))
-        print param
-        print train_time, metrics
-        print
-    print to_write
+        print(param)
+        print(train_time, metrics)
+    print(to_write)
     # First two params don't need to be written out
     param_column_names = map(lambda i: 'param' + str(i), xrange(len(params[0]) - 2))
     column_names = param_column_names + ['train_time'] + metrics_names(metrics)

diff --git a/noscope/SpeedTests.py b/noscope/SpeedTests.py
@@ -34,7 +34,7 @@ def run_speed_test(model, batch_sizes=[16, 32, 64, 128, 256],
         _ = model.predict(data, batch_size=batch_size)
         end = time.time()
         total_time = end - begin
-        print 'Batch size: %d, FPS: %f' % (batch_size, nb_images / total_time)
+        print('Batch size: %d, FPS: %f' % (batch_size, nb_images / total_time))
 
 
 def main():
@@ -43,4 +43,4 @@ def main():
 
 
 if __name__ == '__main__':
-    main()
+    main()
diff --git a/noscope/VideoUtils.py b/noscope/VideoUtils.py
@@ -40,7 +40,7 @@ def VideoHistIterator(video_fname, scale=None, start=0):
 
 def get_all_frames(num_frames, video_fname, scale=None, interval=1, start=0, dtype='float32'):
     true_num_frames = int(ceil((num_frames + 0.0) / interval))
-    print '%d total frames / %d frame interval = %d actual frames' % (num_frames, interval, true_num_frames)
+    print('%d total frames / %d frame interval = %d actual frames' % (num_frames, interval, true_num_frames))
     vid_it = VideoIterator(video_fname, scale=scale, interval=interval, start=start)
 
     _, frame = vid_it.next()

diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,4 @@
+numpy
+keras
+opencv3
+pathos
diff --git a/setup.py b/setup.py
@@ -16,4 +16,4 @@
       author='Daniel Kang',
       author_email='[email protected]',
       packages=['noscope'],
-      install_requires=['numpy'])
+      install_requires=open('requirements.txt').read().split('\n'))