add TensorBoard support

AlexHex7 · Feb 16, 2017 · 7ace298 · 7ace298
1 parent 264bf4a
commit 7ace298
Show file tree

Hide file tree

Showing 2 changed files with 55 additions and 13 deletions.
diff --git a/README.md b/README.md
@@ -10,12 +10,12 @@ by Shaoqing Ren, Kaiming He, Ross Girshick, Jian Sun.
 
 ### Progress
 
-- [x] forward pass for detecting
-- [x] using models trained by Tensorflow
-- [x] roi pooling layer implemented by python and pytorch
-- [x] roi pooling layer with C extensions on CPU (only forward)
-- [x] roi pooling layer on GPU (forward and backward)
-- [x] backward pass for training (experimental)
+- [x] Forward for detecting
+- [x] RoI Pooling layer with C extensions on CPU (only forward)
+- [x] RoI Pooling layer on GPU (forward and backward)
+- [x] Training on VOC2007 (experimental)
+- [x] TensroBoard support
+- [ ] Evaluation
 
 ### Installation and demo
 1. Clone the Faster R-CNN repository
@@ -50,4 +50,12 @@ ln -s $VOCdevkit VOCdevkit2007
 
 Then you can set some hyper-parameters in `train.py` and training parameters in the `.yml` file.
 
-You may need to tune the loss function defined in `faster_rcnn/faster_rcnn.py` by yourself.
+You may need to tune the loss function defined in `faster_rcnn/faster_rcnn.py` by yourself.
+
+### Training with TensorBoard
+With the aid of [Crayon](https://github.com/torrvision/crayon),
+we can access the visualisation power of TensorBoard for any 
+deep learning framework.
+
+To use the TensorBoard, install Crayon (https://github.com/torrvision/crayon)
+and set `use_tensorboard = True` in `faster_rcnn/train.py`.
diff --git a/train.py b/train.py
@@ -1,6 +1,7 @@
 import os
 import torch
 import numpy as np
+from datetime import datetime
 
 from faster_rcnn import network
 from faster_rcnn.faster_rcnn import FasterRCNN, RPN
@@ -16,6 +17,11 @@
 except ImportError:
  cprint = None
 
+try:
+ from pycrayon import CrayonClient
+except ImportError:
+ CrayonClient = None
+
 
 def log_print(text, color=None, on_color=None, attrs=None):
  if cprint is not None:
@@ -35,6 +41,8 @@ def log_print(text, color=None, on_color=None, attrs=None):
 max_iters = 100000
 rand_seed = 1024
 _DEBUG = True
+use_tensorboard = True
+remove_all_log = True # remove all historical experiments in TensorBoard
 
 # ------------
 
@@ -46,7 +54,8 @@ def log_print(text, color=None, on_color=None, attrs=None):
 lr = cfg.TRAIN.LEARNING_RATE
 momentum = cfg.TRAIN.MOMENTUM
 weight_decay = cfg.TRAIN.WEIGHT_DECAY
-log_interval = cfg.TRAIN.DISPLAY
+disp_interval = cfg.TRAIN.DISPLAY
+log_interval = cfg.TRAIN.LOG_IMAGE_ITERS
 
 # load data
 imdb = get_imdb(imdb_name)
@@ -73,10 +82,20 @@ def log_print(text, color=None, on_color=None, attrs=None):
 if not os.path.exists(output_dir):
  os.mkdir(output_dir)
 
+# tensorboad
+use_tensorboard = use_tensorboard and CrayonClient is not None
+if use_tensorboard:
+ cc = CrayonClient(hostname='127.0.0.1')
+ if remove_all_log:
+ cc.remove_all_experiments()
+ exp_name = datetime.now().strftime('vgg16_%m-%d_%H-%M')
+ exp = cc.create_experiment(exp_name)
+
 # training
 train_loss = 0
 tp, tf, fg, bg = 0., 0., 0, 0
 step_cnt = 0
+re_cnt = False
 t = Timer()
 t.tic()
 for step in range(0, max_iters+1):
@@ -108,7 +127,7 @@ def log_print(text, color=None, on_color=None, attrs=None):
  network.clip_gradient(net, 10.)
  optimizer.step()
 
- if step % log_interval == 0:
+ if step % disp_interval == 0:
  duration = t.toc(average=False)
  fps = step_cnt / duration
 
@@ -122,10 +141,18 @@ def log_print(text, color=None, on_color=None, attrs=None):
  net.rpn.cross_entropy.data.cpu().numpy()[0], net.rpn.loss_box.data.cpu().numpy()[0],
  net.cross_entropy.data.cpu().numpy()[0], net.loss_box.data.cpu().numpy()[0])
  )
- tp, tf, fg, bg = 0., 0., 0, 0
- train_loss = 0
- step_cnt = 0
- t.tic()
+ re_cnt = True
+
+ if use_tensorboard and step % log_interval == 0:
+ exp.add_scalar_value('train_loss', train_loss / step_cnt, step=step)
+ if _DEBUG:
+ exp.add_scalar_value('true_positive', tp/fg*100., step=step)
+ exp.add_scalar_value('true_negative', tf/bg*100., step=step)
+ losses = {'rpn_cls': float(net.rpn.cross_entropy.data.cpu().numpy()[0]),
+ 'rpn_box': float(net.rpn.loss_box.data.cpu().numpy()[0]),
+ 'rcnn_cls': float(net.cross_entropy.data.cpu().numpy()[0]),
+ 'rcnn_box': float(net.loss_box.data.cpu().numpy()[0])}
+ exp.add_scalar_dict(losses, step=step)
 
  if (step % 10000 == 0) and step > 0:
  save_name = os.path.join(output_dir, 'faster_rcnn_{}.h5'.format(step))
@@ -135,3 +162,10 @@ def log_print(text, color=None, on_color=None, attrs=None):
  lr /= 10.
  optimizer = torch.optim.SGD(params[8:], lr=lr, momentum=momentum, weight_decay=weight_decay)
 
+ if re_cnt:
+ tp, tf, fg, bg = 0., 0., 0, 0
+ train_loss = 0
+ step_cnt = 0
+ t.tic()
+ re_cnt = False
+