remove uneccesary changes

ultralytics · glenn-jocher · Mar 22, 2021 · Feb 2, 2021 · Feb 3, 2021 · Feb 3, 2021
commit 9772645242fc22f6142fbd1b163ce26db051eb89
diff --git a/train.py b/train.py
@@ -1,25 +1,25 @@
 import argparse
 import logging
 import math
-import numpy as np
 import os
 import random
 import time
+from copy import deepcopy
+from pathlib import Path
+from threading import Thread
+
+import numpy as np
 import torch.distributed as dist
 import torch.nn as nn
 import torch.nn.functional as F
 import torch.optim as optim
 import torch.optim.lr_scheduler as lr_scheduler
 import torch.utils.data
 import yaml
-from copy import deepcopy
-from pathlib import Path
-from threading import Thread
 from torch.cuda import amp
 from torch.nn.parallel import DistributedDataParallel as DDP
 from torch.utils.tensorboard import SummaryWriter
 from tqdm import tqdm
-
 import test # import test.py to get mAP after each epoch
 from models.experimental import attempt_load
 from models.yolo import Model
@@ -38,8 +38,7 @@
 
 
 def train(hyp, opt, device, tb_writer=None):
- logger.info(colorstr('hyperparameters: ') +
- ', '.join(f'{k}={v}' for k, v in hyp.items()))
+ logger.info(colorstr('hyperparameters: ') + ', '.join(f'{k}={v}' for k, v in hyp.items()))
  save_dir, epochs, batch_size, total_batch_size, weights, rank = \
  Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank
 
@@ -124,18 +123,15 @@ def train(hyp, opt, device, tb_writer=None):
  optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
  else:
  optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)
- # add pg1 with weight_decay
- optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']})
+ optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay
  optimizer.add_param_group({'params': pg2}) # add pg2 (biases)
  logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
  del pg0, pg1, pg2
 
  # Scheduler https://arxiv.org/pdf/1812.01187.pdf
  # https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
  if opt.linear_lr:
- def lf(x):
- return (1 - x / (epochs - 1)) * \
- (1.0 - hyp['lrf']) + hyp['lrf'] # linear
+ lf = lambda x: (1 - x / (epochs - 1)) * (1.0 - hyp['lrf']) + hyp['lrf'] # linear
  else:
  lf = one_cycle(1, hyp['lrf'], epochs) # cosine 1->hyp['lrf']
  scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
@@ -165,8 +161,8 @@ def lf(x):
  if opt.resume:
  assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs)
  if epochs < start_epoch:
- logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' % (
- weights, ckpt['epoch'], epochs))
+ logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
+  (weights, ckpt['epoch'], epochs))
  epochs += ckpt['epoch'] # finetune additional epochs
 
  del ckpt, state_dict
@@ -204,6 +200,7 @@ def lf(x):
  hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True, rank=-1,
  world_size=opt.world_size, workers=opt.workers, pad=0.5,
  prefix=colorstr('val: '))[0]
+
  if not opt.resume:
  labels = np.concatenate(dataset.labels, 0)
  c = torch.tensor(labels[:, 0]) # classes
@@ -222,8 +219,7 @@ def lf(x):
  # Model parameters
  hyp['box'] *= 3. / nl # scale to layers
  hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers
- # scale to image size and layers
- hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl
+ hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl # scale to image size and layers
  model.nc = nc # attach number of classes to model
  model.hyp = hyp # attach hyperparameters to model
  model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou)
@@ -232,9 +228,8 @@ def lf(x):
 
  # Start training
  t0 = time.time()
- # number of warmup iterations, max(3 epochs, 1k iterations)
- nw = max(round(hyp['warmup_epochs'] * nb),
- 1000) # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
+ nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations)
+ # nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
  maps = np.zeros(nc) # mAP per class
  results = (0, 0, 0, 0, 0, 0, 0) # P, R, [email protected], [email protected], val_loss(box, obj, cls)
  scheduler.last_epoch = start_epoch - 1 # do not move
@@ -244,8 +239,7 @@ def lf(x):
  f'Using {dataloader.num_workers} dataloader workers\n'
  f'Logging results to {save_dir}\n'
  f'Starting training for {epochs} epochs...')
- # epoch ------------------------------------------------------------------
- for epoch in range(start_epoch, epochs):
+ for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
  model.train()
 
  # Update image weights (optional)
@@ -274,16 +268,14 @@ def lf(x):
  if rank in [-1, 0]:
  pbar = tqdm(pbar, total=nb) # progress bar
  optimizer.zero_grad()
- # batch -------------------------------------------------------------
- for i, (imgs, targets, paths, _) in pbar:
- # number integrated batches (since train start)
- ni = i + nb * epoch
+ for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------
+ ni = i + nb * epoch # number integrated batches (since train start)
  imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0
  # Warmup
  if ni <= nw:
  xi = [0, nw] # x interp
  # model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou)
- accumulate = max(1, np.interp(i, xi, [1, nbs / total_batch_size]).round())
+ accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())
  for j, x in enumerate(optimizer.param_groups):
  # bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
  x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
@@ -292,12 +284,10 @@ def lf(x):
 
  # Multi-scale
  if opt.multi_scale:
- sz = random.randrange(
- imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size
+ sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size
  sf = sz / max(imgs.shape[2:]) # scale factor
  if sf != 1:
- # new shape (stretched to gs-multiple)
- ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]]
+ ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple)
  imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)
 
  # Forward
@@ -331,8 +321,7 @@ def lf(x):
  # Plot
  if plots and ni < 3:
  f = save_dir / f'train_batch{ni}.jpg' # filename
- Thread(target=plot_images, args=(
- imgs, targets, paths, f), daemon=True).start()
+ Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
  # if tb_writer:
  # tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
  # tb_writer.add_graph(model, imgs) # add model to tensorboard
@@ -350,8 +339,7 @@ def lf(x):
  # DDP process 0 or single-GPU
  if rank in [-1, 0]:
  # mAP
- ema.update_attr(
- model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights'])
+ ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights'])
  final_epoch = epoch + 1 == epochs
  if not opt.notest or final_epoch: # Calculate mAP
  wandb_logger.current_epoch = epoch + 1
@@ -370,8 +358,7 @@ def lf(x):
 
  # Write
  with open(results_file, 'a') as f:
- # append metrics, val_loss
- f.write(s + '%10.4g' * 7 % results + '\n')
+ f.write(s + '%10.4g' * 7 % results + '\n') # append metrics, val_loss
  if len(opt.name) and opt.bucket:
  os.system('gsutil cp %s gs:https://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name))
 
@@ -387,8 +374,7 @@ def lf(x):
  wandb_logger.log({tag: x}) # W&B
 
  # Update best mAP
- # weighted combination of [P, R, [email protected], [email protected]]
- fi = fitness(np.array(results).reshape(1, -1))
+ fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, [email protected], [email protected]]
  if fi > best_fitness:
  best_fitness = fi
 
@@ -441,7 +427,7 @@ def lf(x):
  plots=False,
  is_coco=is_coco)
  wandb_logger.finish_run()
- 
+
  # Strip optimizers
  final = best if best.exists() else last # final model
  for f in [last, best]:
@@ -548,8 +534,7 @@ def lf(x):
  else:
  # Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
  meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)
- # final OneCycleLR learning rate (lr0 * lrf)
- 'lrf': (1, 0.01, 1.0),
+ 'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
  'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1
  'weight_decay': (1, 0.0, 0.001), # optimizer weight decay
  'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok)
@@ -562,22 +547,16 @@ def lf(x):
  'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight
  'iou_t': (0, 0.1, 0.7), # IoU training threshold
  'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold
- # anchors per output grid (0 to ignore)
- 'anchors': (2, 2.0, 10.0),
- # focal loss gamma (efficientDet default gamma=1.5)
- 'fl_gamma': (0, 0.0, 2.0),
- # image HSV-Hue augmentation (fraction)
- 'hsv_h': (1, 0.0, 0.1),
- # image HSV-Saturation augmentation (fraction)
- 'hsv_s': (1, 0.0, 0.9),
- # image HSV-Value augmentation (fraction)
- 'hsv_v': (1, 0.0, 0.9),
+ 'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore)
+ 'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)
+ 'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)
+ 'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)
+ 'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction)
  'degrees': (1, 0.0, 45.0), # image rotation (+/- deg)
  'translate': (1, 0.0, 0.9), # image translation (+/- fraction)
  'scale': (1, 0.0, 0.9), # image scale (+/- gain)
  'shear': (1, 0.0, 10.0), # image shear (+/- deg)
- # image perspective (+/- fraction), range 0-0.001
- 'perspective': (0, 0.0, 0.001),
+ 'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
  'flipud': (1, 0.0, 1.0), # image flip up-down (probability)
  'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)
  'mosaic': (1, 0.0, 1.0), # image mixup (probability)
@@ -588,7 +567,7 @@ def lf(x):
  # ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices
  yaml_file = Path(opt.save_dir) / 'hyp_evolved.yaml' # save best result here
  if opt.bucket:
- os.system('gsutil cp gs:https://%s/evolve.txt .' % pt.bucket) # download evolve.txt if exists
+ os.system('gsutil cp gs:https://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists
 
  for _ in range(300): # generations to evolve
  if Path('evolve.txt').exists(): # if evolve.txt exists: select best hyps and mutate