Skip to content

Commit

Permalink
Merge pull request #59 from moskomule/dev
Browse files Browse the repository at this point in the history
Accumulated updates
  • Loading branch information
moskomule committed Jul 10, 2021
2 parents 815750e + ed9697a commit 1d6ffa2
Show file tree
Hide file tree
Showing 35 changed files with 166 additions and 53 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ghpage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ jobs:
. venv/bin/activate
pip install -U pip
pip install Sphinx sphinx-rtd-theme
pip install torch==1.8.1+cpu torchvision==0.9.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
pip install torch==1.9.0+cpu torchvision==0.10.0+cpu -f https://download.pytorch.org/whl/torch_stable.html
pip install -U .
- name: build
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ jobs:
matrix:
python: [ '3.9' ]
torch: [ 'torch==1.8.1+cpu torchvision==0.9.1+cpu -f https://download.pytorch.org/whl/torch_stable.html',
'torch==1.9.0+cpu torchvision==0.10.0+cpu -f https://download.pytorch.org/whl/torch_stable.html',
'--pre torch torchvision -f https://download.pytorch.org/whl/nightly/cpu/torch_nightly.html' ]

steps:
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ with trainers.SupervisedTrainer(model,
trainer.run(train_loader, test_loader,
total_iterations=1_000, val_intervals=10)

print(f"Max Accuracy={max(trainer.history['accuracy']['test'])}")
print(f"Max Accuracy={max(trainer.history['accuracy']['tests'])}")
```

You can customize `iteration` of `trainer` as follows.
Expand Down
11 changes: 6 additions & 5 deletions examples/imagenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@
from torch.nn import functional as F
from torchvision.models import resnet50

from homura import distributed_ready_main, enable_accimage, get_num_nodes, is_distributed, lr_scheduler, optim, \
from homura import distributed_ready_main, enable_accimage, get_world_size, is_distributed, lr_scheduler, optim, \
reporters
from homura.trainers import SupervisedTrainer
from homura.vision.data import DATASET_REGISTRY


@chika.config
class Config:
base_lr: float = 0.1
epochs: int = 90
batch_size: int = 256
enable_accimage: bool = False
Expand All @@ -30,10 +31,10 @@ def main(cfg: Config):
enable_accimage()

model = resnet50()
optimizer = optim.SGD(lr=1e-1 * cfg.batch_size * get_num_nodes() / 256, momentum=0.9, weight_decay=1e-4)
scheduler = lr_scheduler.MultiStepLR([30, 60, 80])
train_loader, test_loader = DATASET_REGISTRY("fast_imagenet" if cfg.use_fast_collate else
"imagenet")(cfg.batch_size,
optimizer = optim.SGD(lr=cfg.base_lr * cfg.batch_size * get_world_size() / 256, momentum=0.9, weight_decay=1e-4,
multi_tensor=True)
scheduler = lr_scheduler.MultiStepLR([30, 60, 90])
train_loader, test_loader = DATASET_REGISTRY("imagenet")(cfg.batch_size,
train_size=cfg.batch_size * 50 if cfg.debug else None,
test_size=cfg.batch_size * 50 if cfg.debug else None,
num_workers=cfg.num_workers)
Expand Down
8 changes: 4 additions & 4 deletions homura/__init__.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from .register import Registry
from .utils import TensorDataClass, TensorTuple, distributed_print, enable_accimage, get_args, get_environ, \
get_git_hash, get_global_rank, get_local_rank, get_num_nodes, get_world_size, if_is_master, init_distributed, \
is_accimage_available, is_distributed, is_distributed_available, is_faiss_available, is_master, set_deterministic, \
set_seed, distributed_ready_main
from .utils import TensorDataClass, TensorTuple, disable_tf32, disable_tf32_locally, distributed_print, \
distributed_ready_main, enable_accimage, get_args, get_environ, get_git_hash, get_global_rank, get_local_rank, \
get_num_nodes, get_world_size, if_is_master, init_distributed, is_accimage_available, is_distributed, \
is_distributed_available, is_faiss_available, is_master, set_deterministic, set_seed

Registry.import_modules('homura.vision')
# to avoid circular import
Expand Down
15 changes: 12 additions & 3 deletions homura/modules/ema.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,17 @@ def __init__(self,

self._original_model = original_model
self._ema_model = copy.deepcopy(original_model)
for p in self._ema_model.parameters():
for p in self.ema_model.parameters():
p.requires_grad_(False)

def __getattr__(self,
item: str):
# fallback
try:
return super().__getattr__(item)
except AttributeError:
return getattr(self.original_model, item)

@property
def original_model(self) -> nn.Module:
return self._original_model
Expand All @@ -53,14 +61,15 @@ def ema_model(self) -> nn.Module:
return self._ema_model

def parameters(self, recurse: bool = True) -> Iterator[nn.Parameter]:
# this makes it simple, but may incur unexpected behavior
return self._original_model.parameters(recurse)

def requires_grad_(self, requires_grad: bool = True):
def requires_grad_(self, requires_grad: bool = True) -> nn.Module:
return self._original_model.requires_grad_(requires_grad)

@torch.no_grad()
def _update(self):
if torch.cuda.is_available():
torch.cuda.synchronize()
# _foreach_** is n times faster than for loops
o_p = [p.data for p in self._original_model.parameters() if isinstance(p, torch.Tensor)]
e_p = [p.data for p in self._ema_model.parameters() if isinstance(p, torch.Tensor)]
Expand Down
2 changes: 1 addition & 1 deletion homura/reporters.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,4 +452,4 @@ def _clear_epoch_hist(self
def exit(self
) -> None:
# expected to be used in TrainerBase.exit
self._persistent_hist = defaultdict(list)
ReporterList._persistent_hist = defaultdict(list)
19 changes: 17 additions & 2 deletions homura/trainers.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ def __init__(self,
use_sync_bn: bool = False,
tqdm_ncols: int = 120,
debug: bool = False,
dist_kwargs: Optional[dict] = None,
**kwargs):

if kwargs.get("update_scheduler_by_epoch"):
Expand Down Expand Up @@ -106,7 +107,8 @@ def __init__(self,
self.logger.info(f"cuda: False (torch.cuda.is_available()={torch.cuda.is_available()})")

if is_distributed():
self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[rank])
dist_kwargs = dist_kwargs or {}
self.model = nn.parallel.DistributedDataParallel(self.model, device_ids=[rank], **dist_kwargs)
self.logger.debug(f"model converted to DistributedDataParallel at rank={rank}")

# self.accessible_model is useful for e.g., checkpointing
Expand Down Expand Up @@ -139,7 +141,7 @@ def __init__(self,

# to nest, leave=False (https://github.com/tqdm/tqdm/blob/master/examples/simple_examples.py#L19)
self._tqdm = lambda x: x
if self._verbose:
if self.verbose:
self._tqdm = Partial(tqdm, ncols=tqdm_ncols, leave=False)
set_tqdm_stdout_stderr()
self.logger.debug("verbose: setup tqdm")
Expand All @@ -156,6 +158,11 @@ def __init__(self,
setattr(self, k, v)
self.logger.debug(f"trainer sets {k} as a new attribute")

@property
def verbose(self
) -> bool:
return self._verbose

@property
def step(self
) -> int:
Expand Down Expand Up @@ -447,6 +454,7 @@ def __init__(self,
use_amp=False,
use_channel_last=False,
report_accuracy_topk: Optional[int or List[int]] = None,
update_scheduler_iter: bool = False,
**kwargs):
if isinstance(model, dict):
raise TypeError(f"{type(self)} does not support dict model")
Expand All @@ -469,6 +477,11 @@ def __init__(self,
if report_accuracy_topk is not None and not isinstance(report_accuracy_topk, Iterable):
report_accuracy_topk = [report_accuracy_topk]
self._report_topk = report_accuracy_topk
self.update_scheduler_iter = update_scheduler_iter & (scheduler is not None)
if self.update_scheduler_iter:
self.logger.info("scheduler is set to be updated after every iteration")
else:
self.logger.debug("self.update_scheduler_iter=False. Update scheduler manually")

def iteration(self,
data: Tuple[Tensor, Tensor]
Expand All @@ -487,6 +500,8 @@ def iteration(self,
else:
loss.backward()
self.optimizer.step()
if self.update_scheduler_iter:
self.scheduler.step()
if self._is_debug and torch.isnan(loss):
self.logger.warning("loss is NaN")

Expand Down
4 changes: 2 additions & 2 deletions homura/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@
from .distributed import (distributed_print, distributed_ready_main, get_global_rank, get_local_rank, get_num_nodes,
get_world_size, if_is_master, init_distributed, is_distributed, is_distributed_available,
is_master)
from .environment import (enable_accimage, get_args, get_environ, get_git_hash, is_accimage_available,
is_faiss_available)
from .environment import (disable_tf32, disable_tf32_locally, enable_accimage, get_args, get_environ, get_git_hash,
is_accimage_available, is_faiss_available)
from .reproducibility import set_deterministic, set_seed
3 changes: 2 additions & 1 deletion homura/utils/distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,8 @@ def distributed_ready_main(func: Callable = None,
""" Wrap a main function to make it distributed ready
"""

init_distributed(backend=backend, init_method=init_method, disable_distributed_print=disable_distributed_print)
if is_distributed():
init_distributed(backend=backend, init_method=init_method, disable_distributed_print=disable_distributed_print)

@wraps(func)
def inner(*args, **kwargs):
Expand Down
49 changes: 49 additions & 0 deletions homura/utils/environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
import sys as python_sys
from typing import Any, Optional

import torch

from homura.liblog import get_logger

logger = get_logger("homura.environment")
Expand Down Expand Up @@ -44,6 +46,53 @@ def is_opteinsum_available() -> bool:
return importlib.util.find_spec("opt_einsum") is not None


# TF32
def _enable_tf32(mode: bool) -> None:
try:
torch.backends.cuda.matmul.allow_tf32 = mode
torch.backends.cudnn.allow_tf32 = mode
if mode:
logger.info("TF32 is enabled")
else:
logger.info("TF32 is disabled")

except Exception as e:
logger.exception(e)


def disable_tf32() -> None:
""" Globally disable TF32
"""

_enable_tf32(False)


class disable_tf32_locally(object):
""" Locally disable TF32
>>> with disable_tf32_locally():
>>> ...
or
>>> @disable_tf32_locally()
>>> def function():
>>> ...
"""

def __call__(self):
_enable_tf32(False)

def __enter__(self):
_enable_tf32(False)

def __exit__(self, exc_type, exc_val, exc_tb):
_enable_tf32(True)


# get environment information

def get_git_hash() -> str:
Expand Down
21 changes: 8 additions & 13 deletions homura/utils/reproducibility.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,24 +47,19 @@ def set_seed(seed: Optional[int] = None,
@contextlib.contextmanager
def set_deterministic(seed: Optional[int] = None,
by_rank: bool = False):
""" Set seed of `torch`, `random` and `numpy` to `seed` for making it deterministic. Because of CUDA's limitation, this
does not make everything deterministic, however.
""" Set seed of `torch`, `random` and `numpy` to `seed` for making it deterministic. Because of CUDA's limitation,
this may not make everything deterministic, however.
"""

has_set_deterministic = hasattr(torch, "set_deterministic")
with set_seed(seed, by_rank):
if seed is not None:
if has_set_deterministic:
torch.set_deterministic(True)
else:
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
torch.set_deterministic(True)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
logger.info("Set deterministic. But some GPU computations might be still non-deterministic. "
"Also, this may affect the performance.")
yield
if has_set_deterministic:
torch.set_deterministic(False)
else:
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True
torch.set_deterministic(False)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True
logger.info("Back to non-deterministic.")
Loading

0 comments on commit 1d6ffa2

Please sign in to comment.