diff --git a/benchmark.py b/benchmark.py index f4f1485f..f55a5535 100644 --- a/benchmark.py +++ b/benchmark.py @@ -55,15 +55,15 @@ def iteration(input_data): target_size = torch.IntTensor(batch_size).fill_(int((seconds * 100) / 2)) input_percentages = torch.IntTensor(batch_size).fill_(1) - inputs = Variable(input_data) - target_sizes = Variable(target_size) - targets = Variable(target) + inputs = Variable(input_data, requires_grad=False) + target_sizes = Variable(target_size requires_grad=False) + targets = Variable(target requires_grad=False) start = time.time() out = model(inputs) out = out.transpose(0, 1) # TxNxH seq_length = out.size(0) - sizes = Variable(input_percentages.mul_(int(seq_length)).int()) + sizes = Variable(input_percentages.mul_(int(seq_length)).int() requires_grad=False) loss = criterion(out, targets, sizes, target_sizes) loss = loss / inputs.size(0) # average the loss by minibatch # compute gradient diff --git a/logger.py b/logger.py new file mode 100644 index 00000000..4ccd3c96 --- /dev/null +++ b/logger.py @@ -0,0 +1,73 @@ +# Code referenced from https://gist.github.com/gyglim/1f8dfb1b5c82627ae3efcfbbadb9f514 +import tensorflow as tf +import numpy as np +import scipy.misc +try: + from StringIO import StringIO # Python 2.7 +except ImportError: + from io import BytesIO # Python 3.x + + +class Logger(object): + + def __init__(self, log_dir): + """Create a summary writer logging to log_dir.""" + self.writer = tf.summary.FileWriter(log_dir) + + def scalar_summary(self, tag, value, step): + """Log a scalar variable.""" + summary = tf.Summary(value=[tf.Summary.Value(tag=tag, simple_value=value)]) + self.writer.add_summary(summary, step) + self.writer.flush() + + def image_summary(self, tag, images, step): + """Log a list of images.""" + + img_summaries = [] + for i, img in enumerate(images): + # Write the image to a string + try: + s = StringIO() + except: + s = BytesIO() + scipy.misc.toimage(img).save(s, format="png") + + # Create an Image object + img_sum = tf.Summary.Image(encoded_image_string=s.getvalue(), + height=img.shape[0], + width=img.shape[1]) + # Create a Summary value + img_summaries.append(tf.Summary.Value(tag='%s/%d' % (tag, i), image=img_sum)) + + # Create and write Summary + summary = tf.Summary(value=img_summaries) + self.writer.add_summary(summary, step) + self.writer.flush() + + def histo_summary(self, tag, values, step, bins=1000): + """Log a histogram of the tensor of values.""" + + # Create a histogram using numpy + counts, bin_edges = np.histogram(values, bins=bins) + + # Fill the fields of the histogram proto + hist = tf.HistogramProto() + hist.min = float(np.min(values)) + hist.max = float(np.max(values)) + hist.num = int(np.prod(values.shape)) + hist.sum = float(np.sum(values)) + hist.sum_squares = float(np.sum(values**2)) + + # Drop the start of the first bin + bin_edges = bin_edges[1:] + + # Add bin edges and counts + for edge in bin_edges: + hist.bucket_limit.append(edge) + for c in counts: + hist.bucket.append(c) + + # Create and write Summary + summary = tf.Summary(value=[tf.Summary.Value(tag=tag, histo=hist)]) + self.writer.add_summary(summary, step) + self.writer.flush() diff --git a/predict.py b/predict.py index fa4bc9d1..5af646d9 100644 --- a/predict.py +++ b/predict.py @@ -26,7 +26,7 @@ parser = SpectrogramParser(audio_conf, normalize=True) spect = parser.parse_audio(args.audio_path).contiguous() spect = spect.view(1, 1, spect.size(0), spect.size(1)) - out = model(Variable(spect)) + out = model(Variable(spect, volatile=True)) out = out.transpose(0, 1) # TxNxH decoded_output = decoder.decode(out.data) print(decoded_output[0]) diff --git a/test.py b/test.py index 7283a8ca..28118689 100644 --- a/test.py +++ b/test.py @@ -34,7 +34,7 @@ for i, (data) in enumerate(test_loader): inputs, targets, input_percentages, target_sizes = data - inputs = Variable(inputs) + inputs = Variable(inputs, volatile=True) # unflatten targets split_targets = [] @@ -49,7 +49,7 @@ out = model(inputs) out = out.transpose(0, 1) # TxNxH seq_length = out.size(0) - sizes = Variable(input_percentages.mul_(int(seq_length)).int()) + sizes = Variable(input_percentages.mul_(int(seq_length)).int(), volatile=True) decoded_output = decoder.decode(out.data, sizes) target_strings = decoder.process_strings(decoder.convert_to_strings(split_targets)) diff --git a/train.py b/train.py index 3efe4570..2fbea2fd 100644 --- a/train.py +++ b/train.py @@ -49,8 +49,13 @@ help='Minimum noise level to sample from. (1.0 means all noise, not original signal)', type=float) parser.add_argument('--noise_max', default=0.5, help='Maximum noise levels to sample from. Maximum 1.0', type=float) -parser.set_defaults(cuda=False, silent=False, checkpoint=False, visdom=False, augment=False) +parser.add_argument('--tensorboard', dest='tensorboard', action='store_true', help='Turn on tensorboard graphing') +parser.add_argument('--log_dir', default='visualize/deepspeech_final', help='Location of tensorboard log') +parser.add_argument('--log_params', dest='log_params', action='store_true', help='Log parameter values and gradients') +parser.set_defaults(cuda=False, silent=False, checkpoint=False, visdom=False, augment=False, tensorboard=False, log_params=False) +def to_np(x): + return x.data.cpu().numpy() class AverageMeter(object): """Computes and stores the average and current value""" @@ -87,6 +92,25 @@ def main(): loss_results, cer_results, wer_results = torch.Tensor(args.epochs), torch.Tensor(args.epochs), torch.Tensor( args.epochs) epochs = torch.arange(1, args.epochs + 1) + if args.tensorboard: + from logger import Logger + try: + os.makedirs(args.log_dir) + except OSError as e: + if e.errno == errno.EEXIST: + print('Directory already exists.') + for file in os.listdir(args.log_dir): + file_path = os.path.join(args.log_dir, file) + try: + if os.path.isfile(file_path): + os.unlink(file_path) + except Exception as e: + raise + else: + raise + loss_results, cer_results, wer_results = torch.Tensor(args.epochs), torch.Tensor(args.epochs), torch.Tensor( + args.epochs) + logger = Logger(args.log_dir) try: os.makedirs(save_folder) @@ -146,7 +170,7 @@ def main(): if args.visdom and \ package['loss_results'] is not None and start_epoch > 0: # Add previous scores to visdom graph epoch = start_epoch - loss_results, cer_results, wer_results = package['loss_results'], package['cer_results'], package[ + loss_results[0:epoch], cer_results[0:epoch], wer_results[0:epoch] = package['loss_results'], package['cer_results'], package[ 'wer_results'] x_axis = epochs[0:epoch] y_axis = [loss_results[0:epoch], wer_results[0:epoch], cer_results[0:epoch]] @@ -156,6 +180,18 @@ def main(): Y=y_axis[x], opts=opts[x], ) + if args.tensorboard and package['loss_results'] is not None and start_epoch > 0: # Add previous scores to tensorboard logs + epoch = start_epoch + loss_results, cer_results, wer_results = package['loss_results'], package['cer_results'], package[ + 'wer_results'] + for i in range(len(loss_results)): + info = { + 'Avg Train Loss': loss_results[i], + 'Avg WER': wer_results[i], + 'Avg CER': cer_results[i] + } + for tag, val in info.items(): + logger.scalar_summary(tag, val, i+1) else: avg_loss = 0 start_epoch = 0 @@ -177,9 +213,9 @@ def main(): inputs, targets, input_percentages, target_sizes = data # measure data loading time data_time.update(time.time() - end) - inputs = Variable(inputs) - target_sizes = Variable(target_sizes) - targets = Variable(targets) + inputs = Variable(inputs, requires_grad=False) + target_sizes = Variable(target_sizes, requires_grad=False) + targets = Variable(targets, requires_grad=False) if args.cuda: inputs = inputs.cuda() @@ -188,7 +224,7 @@ def main(): out = out.transpose(0, 1) # TxNxH seq_length = out.size(0) - sizes = Variable(input_percentages.mul_(int(seq_length)).int()) + sizes = Variable(input_percentages.mul_(int(seq_length)).int(), requires_grad=False) loss = criterion(out, targets, sizes, target_sizes) loss = loss / inputs.size(0) # average the loss by minibatch @@ -238,12 +274,12 @@ def main(): epoch + 1, loss=avg_loss)) start_iter = 0 # Reset start iteration for next epoch - total_cer, total_wer = 0, 0 + total_cer, total_wer= 0, 0 model.eval() for i, (data) in enumerate(test_loader): # test inputs, targets, input_percentages, target_sizes = data - inputs = Variable(inputs) + inputs = Variable(inputs, volatile=True) # unflatten targets split_targets = [] @@ -258,7 +294,7 @@ def main(): out = model(inputs) out = out.transpose(0, 1) # TxNxH seq_length = out.size(0) - sizes = Variable(input_percentages.mul_(int(seq_length)).int()) + sizes = Variable(input_percentages.mul_(int(seq_length)).int(), volatile=True) decoded_output = decoder.decode(out.data, sizes) target_strings = decoder.process_strings(decoder.convert_to_strings(split_targets)) @@ -271,7 +307,6 @@ def main(): if args.cuda: torch.cuda.synchronize() - wer = total_wer / len(test_loader.dataset) cer = total_cer / len(test_loader.dataset) wer *= 100 @@ -286,9 +321,9 @@ def main(): loss_results[epoch] = avg_loss wer_results[epoch] = wer cer_results[epoch] = cer - epoch += 1 - x_axis = epochs[0:epoch] - y_axis = [loss_results[0:epoch], wer_results[0:epoch], cer_results[0:epoch]] + # epoch += 1 + x_axis = epochs[0:epoch+1] + y_axis = [loss_results[0:epoch+1], wer_results[0:epoch+1], cer_results[0:epoch+1]] for x in range(len(viz_windows)): if viz_windows[x] is None: viz_windows[x] = viz.line( @@ -303,6 +338,23 @@ def main(): win=viz_windows[x], update='replace', ) + if args.tensorboard: + loss_results[epoch] = avg_loss + wer_results[epoch] = wer + cer_results[epoch] = cer + info = { + 'Avg Train Loss': avg_loss, + 'Avg WER': wer, + 'Avg CER': cer + } + for tag, val in info.items(): + logger.scalar_summary(tag, val, epoch+1) + if args.log_params: + for tag, value in model.named_parameters(): + tag = tag.replace('.', '/') + logger.histo_summary(tag, to_np(value), epoch+1) + if value.grad is not None: # Condition inserted because batch_norm RNN_0 weights.grad and bias.grad are None. Check why + logger.histo_summary(tag+'/grad', to_np(value.grad), epoch+1) if args.checkpoint: file_path = '%s/deepspeech_%d.pth.tar' % (save_folder, epoch + 1) torch.save(DeepSpeech.serialize(model, optimizer=optimizer, epoch=epoch, loss_results=loss_results,