Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Feature] Lookahead optimizer #3

Merged
merged 4 commits into from
Sep 21, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 9 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
.PHONY: init check format requirements

init:
python3 -m pip install -U pipenv setuptools
python3 -m pipenv install --dev
python3 -m pip install -U pipenv setuptools
python3 -m pipenv install --dev

check:
isort --check-only --profile black pytorch_optimizer -l 79
black -S -l 79 --check pytorch_optimizer
pylint pytorch_optimizer
isort --check-only --profile black pytorch_optimizer -l 79
black -S -l 79 --check pytorch_optimizer
pylint pytorch_optimizer

format:
isort --profile black pytorch_optimizer -l 79
black -S -l 79 pytorch_optimizer
isort --profile black pytorch_optimizer -l 79
black -S -l 79 pytorch_optimizer

requirements:
python3 -m pipenv lock -r > requirements.txt
python3 -m pipenv lock -dr > requirements-dev.txt
python3 -m pipenv lock -r > requirements.txt
python3 -m pipenv lock -dr > requirements-dev.txt
12 changes: 12 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,22 @@ Bunch of optimizer implementations in PyTorch with clean-code.

| Optimizer | Official Code | Paper | Note |
| :---: | :---: | :---: | :---: |
| Lookahead | [github](https://github.com/alphadl/lookahead.pytorch) | [https://arxiv.org/abs/1907.08610v2](https://arxiv.org/abs/1907.08610v2)
| RAdam | [github](https://github.com/LiyuanLucasLiu/RAdam) | [https://arxiv.org/abs/1908.03265](https://arxiv.org/abs/1908.03265) |

## Citations

### Lookahead

```
@article{zhang2019lookahead,
title={Lookahead optimizer: k steps forward, 1 step back},
author={Zhang, Michael R and Lucas, James and Hinton, Geoffrey and Ba, Jimmy},
journal={arXiv preprint arXiv:1907.08610},
year={2019}
}
```

### RAdam

```
Expand Down
77 changes: 77 additions & 0 deletions pytorch_optimizer/lookahead.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
from collections import defaultdict
from typing import Callable, Dict, Optional, List

import torch
from torch.optim import Optimizer


class Lookahead(Optimizer):
def __init__(self, optimizer: Optimizer, k: int = 5, alpha: float = 0.5):
self.optimizer = optimizer
self.k = k
self.alpha = alpha

self.param_groups: List[Dict] = self.optimizer.param_groups
self.fast_state: Dict = self.optimizer.state
self.state = defaultdict(dict)

for group in self.param_groups:
group['counter'] = 0

def update(self, group: Dict):
for fast in group['params']:
param_state = self.state[fast]
if 'slow_param' not in param_state:
param_state['slow_param'] = torch.zeros_like(fast.data)
param_state['slow_param'].copy_(fast.data)
slow = param_state['slow_param']
slow += (fast.data - slow) * self.alpha
fast.data.copy_(slow)

def update_lookahead(self):
for group in self.param_groups:
self.update(group)

def step(self, closure: Optional[Callable] = None) -> float:
loss: float = self.optimizer.step(closure)
for group in self.param_groups:
if group['counter'] == 0:
self.update(group)
group['counter'] += 1
if group['counter'] >= self.k:
group['counter'] = 0
return loss

def state_dict(self) -> Dict[str, torch.Tensor]:
fast_state_dict = self.optimizer.state_dict()
fast_state = fast_state_dict['state']
param_groups = fast_state_dict['param_groups']

slow_state: Dict[int, torch.Tensor] = {
(id(k) if isinstance(k, torch.Tensor) else k): v
for k, v in self.state.items()
}

return {
'fast_state': fast_state,
'slow_state': slow_state,
'param_groups': param_groups,
}

def load_state_dict(self, state_dict: Dict[str, torch.Tensor]):
slow_state_dict: Dict[str, torch.Tensor] = {
'state': state_dict['slow_state'],
'param_groups': state_dict['param_groups'],
}
fast_state_dict: Dict[str, torch.Tensor] = {
'state': state_dict['fast_state'],
'param_groups': state_dict['param_groups'],
}
super().load_state_dict(slow_state_dict)

self.optimizer.load_state_dict(fast_state_dict)
self.fast_state = self.optimizer.state

def add_param_group(self, param_group: Dict):
param_group['counter'] = 0
self.optimizer.add_param_group(param_group)
4 changes: 2 additions & 2 deletions pytorch_optimizer/radam.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def check_valid_parameters(self):
def __setstate__(self, state: Dict):
super().__setstate__(state)

def step(self, closure: Optional[Callable] = None) -> torch.Tensor:
loss: Optional[torch.Tensor] = None
def step(self, closure: Optional[Callable] = None) -> float:
loss: Optional[float] = None
if closure is not None:
loss = closure()

Expand Down