support lightning-1.9.0 (#489)

* support lightning-1.9.0 * remove deprecated args * add precision to config * update readme * update readme * update readme
RangiLyu · Jan 20, 2023 · 0036f94 · 0036f94
1 parent a59db3c
commit 0036f94
Show file tree

Hide file tree

Showing 12 changed files with 24 additions and 19 deletions.
diff --git a/.github/workflows/workflow.yml b/.github/workflows/workflow.yml
@@ -34,16 +34,16 @@ jobs:
  runs-on: ubuntu-latest
  strategy:
  matrix:
- torch: [1.9.0, 1.10.1, 1.11.0, 1.12.1]
+ torch: [1.10.1, 1.11.0, 1.12.1, 1.13.1]
  include:
- - torch: 1.9.0
- torchvision: 0.10.0
  - torch: 1.10.1
  torchvision: 0.11.2
  - torch: 1.11.0
  torchvision: 0.12.0
  - torch: 1.12.1
  torchvision: 0.13.1
+ - torch: 1.13.1
+ torchvision: 0.14.1
  steps:
  - name: Checkout
  uses: actions/checkout@v2
@@ -56,7 +56,7 @@ jobs:
  python -m pip install -U pip
  python -m pip install ninja opencv-python-headless onnx pytest-xdist codecov
  python -m pip install torch==${{matrix.torch}}+cpu torchvision==${{matrix.torchvision}}+cpu -f https://download.pytorch.org/whl/torch_stable.html
- python -m pip install Cython termcolor numpy tensorboard pycocotools matplotlib pyaml opencv-python tqdm pytorch-lightning==1.8.0 torchmetrics codecov flake8 pytest timm
+ python -m pip install Cython termcolor numpy tensorboard pycocotools matplotlib pyaml opencv-python tqdm pytorch-lightning torchmetrics codecov flake8 pytest timm
  python -m pip install -r requirements.txt
  - name: Setup
  run: rm -rf .eggs && python setup.py develop

diff --git a/README.md b/README.md
@@ -73,6 +73,8 @@ MobileDet | 320*320 | 25.6 | - | -
 
 ## NEWS!!!
 
+* [2023.01.20] Upgrade to [pytorch-lightning-1.9](https://github.com/Lightning-AI/lightning/releases/tag/1.9.0). The minimum PyTorch version is upgraded to 1.10. Support FP16 training(Thanks @crisp-snakey). Support ignore label(Thanks @zero0kiriyu).
+
 * [2022.08.26] Upgrade to [pytorch-lightning-1.7](https://lightning.ai/). The minimum PyTorch version is upgraded to 1.9. To use previous version of PyTorch, please install [NanoDet <= v1.0.0-alpha-1](https://github.com/RangiLyu/nanodet/tags)
 
 * [2021.12.25] **NanoDet-Plus** release! Adding **AGM**(Assign Guidance Module) & **DSLA**(Dynamic Soft Label Assigner) to improve **7 mAP** with only a little cost.
@@ -141,8 +143,7 @@ Besides, We provide a notebook [here](./demo/demo-inference-with-pytorch.ipynb)
 * Linux or MacOS
 * CUDA >= 10.0
 * Python >= 3.6
-* Pytorch >= 1.9
-* experimental support Windows (Notice: Windows not support distributed training before pytorch1.7)
+* Pytorch >= 1.10
 
 ### Step
 

diff --git a/config/nanodet-plus-m-1.5x_320.yml b/config/nanodet-plus-m-1.5x_320.yml
@@ -91,6 +91,7 @@ device:
  gpu_ids: [0]
  workers_per_gpu: 10
  batchsize_per_gpu: 96
+ precision: 32 # set to 16 to use AMP training
 schedule:
 # resume:
 # load_model:

diff --git a/config/nanodet-plus-m-1.5x_416.yml b/config/nanodet-plus-m-1.5x_416.yml
@@ -91,6 +91,7 @@ device:
  gpu_ids: [0]
  workers_per_gpu: 10
  batchsize_per_gpu: 96
+ precision: 32 # set to 16 to use AMP training
 schedule:
 # resume:
 # load_model:

diff --git a/config/nanodet-plus-m_320.yml b/config/nanodet-plus-m_320.yml
@@ -91,6 +91,7 @@ device:
  gpu_ids: [0] # Set like [0, 1, 2, 3] if you have multi-GPUs
  workers_per_gpu: 10
  batchsize_per_gpu: 96
+ precision: 32 # set to 16 to use AMP training
 schedule:
 # resume:
 # load_model:

diff --git a/config/nanodet-plus-m_416.yml b/config/nanodet-plus-m_416.yml
@@ -91,6 +91,7 @@ device:
  gpu_ids: [0]
  workers_per_gpu: 10
  batchsize_per_gpu: 96
+ precision: 32 # set to 16 to use AMP training
 schedule:
 # resume:
 # load_model:

diff --git a/config/nanodet_custom_xml_dataset.yml b/config/nanodet_custom_xml_dataset.yml
@@ -89,6 +89,7 @@ device:
  gpu_ids: [0] # Set like [0, 1, 2, 3] if you have multi-GPUs
  workers_per_gpu: 8
  batchsize_per_gpu: 96
+ precision: 32 # set to 16 to use AMP training
 schedule:
 # resume:
 # load_model: YOUR_MODEL_PATH

diff --git a/docs/config_file_detail.md b/docs/config_file_detail.md
@@ -143,6 +143,7 @@ device:
  gpu_ids: [0]
  workers_per_gpu: 12
  batchsize_per_gpu: 160
+ precision: 32
 ```
 
 `gpu_ids`: CUDA device id. For multi-gpu training, set [0, 1, 2...].
@@ -151,6 +152,8 @@ device:
 
 `batchsize_per_gpu`: amount of images in one batch for each gpu
 
+`precision`: Training precision. The default value `32` means FP32 training. Set to `16` to enable AMP training.
+
 ## schedule
 
 ```yaml

diff --git a/nanodet/trainer/task.py b/nanodet/trainer/task.py
@@ -245,7 +245,6 @@ def optimizer_step(
  optimizer_idx=None,
  optimizer_closure=None,
  on_tpu=None,
- using_native_amp=None,
  using_lbfgs=None,
  ):
  """
@@ -257,7 +256,6 @@ def optimizer_step(
  optimizer_idx: If you used multiple optimizers this indexes into that list.
  optimizer_closure: closure for all optimizers
  on_tpu: true if TPU backward is required
- using_native_amp: True if using native amp
  using_lbfgs: True if the matching optimizer is lbfgs
  """
  # warm up lr

diff --git a/nanodet/util/logger.py b/nanodet/util/logger.py
@@ -17,10 +17,10 @@
 import time
 
 import numpy as np
+from lightning_fabric.utilities.cloud_io import get_filesystem
 from pytorch_lightning.loggers import Logger as LightningLoggerBase
 from pytorch_lightning.loggers.logger import rank_zero_experiment
 from pytorch_lightning.utilities import rank_zero_only
-from pytorch_lightning.utilities.cloud_io import get_filesystem
 from termcolor import colored
 
 from .path import mkdir
@@ -115,10 +115,10 @@ def __init__(self, save_dir="./", **kwargs):
  super().__init__()
  self._name = "NanoDet"
  self._version = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
- self.log_dir = os.path.join(save_dir, f"logs-{self._version}")
+ self._save_dir = os.path.join(save_dir, f"logs-{self._version}")
 
  self._fs = get_filesystem(save_dir)
- self._fs.makedirs(self.log_dir, exist_ok=True)
+ self._fs.makedirs(self._save_dir, exist_ok=True)
  self._init_logger()
 
  self._experiment = None
@@ -154,20 +154,19 @@ def experiment(self):
  "(applicable to PyTorch 1.1 or higher)"
  ) from None
 
- self._experiment = SummaryWriter(log_dir=self.log_dir, **self._kwargs)
+ self._experiment = SummaryWriter(log_dir=self._save_dir, **self._kwargs)
  return self._experiment
 
  @property
  def version(self):
  return self._version
 
- @rank_zero_only
  def _init_logger(self):
  self.logger = logging.getLogger(name=self.name)
  self.logger.setLevel(logging.INFO)
 
  # create file handler
- fh = logging.FileHandler(os.path.join(self.log_dir, "logs.txt"))
+ fh = logging.FileHandler(os.path.join(self._save_dir, "logs.txt"))
  fh.setLevel(logging.INFO)
  # set file formatter
  f_fmt = "[%(name)s][%(asctime)s]%(levelname)s: %(message)s"
@@ -201,7 +200,7 @@ def log(self, string):
 
  @rank_zero_only
  def dump_cfg(self, cfg_node):
- with open(os.path.join(self.log_dir, "train_cfg.yml"), "w") as f:
+ with open(os.path.join(self._save_dir, "train_cfg.yml"), "w") as f:
  cfg_node.dump(stream=f)
 
  @rank_zero_only

diff --git a/requirements.txt b/requirements.txt
@@ -7,11 +7,11 @@ onnx-simplifier
 opencv-python
 pyaml
 pycocotools
-pytorch-lightning>=1.7.0,<1.9.0
+pytorch-lightning>=1.9.0
 tabulate
 tensorboard
 termcolor
-torch>=1.9
+torch>=1.10
 torchmetrics
 torchvision
 tqdm
diff --git a/tools/train.py b/tools/train.py
@@ -139,7 +139,6 @@ def main(args):
  devices=devices,
  log_every_n_steps=cfg.log.interval,
  num_sanity_val_steps=0,
- resume_from_checkpoint=model_resume_path,
  callbacks=[TQDMProgressBar(refresh_rate=0)], # disable tqdm bar
  logger=logger,
  benchmark=cfg.get("cudnn_benchmark", True),
@@ -148,7 +147,7 @@ def main(args):
  precision=precision,
  )
 
- trainer.fit(task, train_dataloader, val_dataloader)
+ trainer.fit(task, train_dataloader, val_dataloader, ckpt_path=model_resume_path)
 
 
 if __name__ == "__main__":