Skip to content

Commit

Permalink
Revert "[ci/requirements] Upgrade torch to 2.0.1 (ray-project#37128)"
Browse files Browse the repository at this point in the history
This reverts commit 47b4189.
  • Loading branch information
edoakes committed Jul 12, 2023
1 parent 8f3ca4b commit a8dd150
Show file tree
Hide file tree
Showing 17 changed files with 46 additions and 108 deletions.
8 changes: 4 additions & 4 deletions ci/env/install-dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -419,17 +419,17 @@ install_pip_packages() {
pip install -U "torch==${TORCH_VERSION-1.9.0}" "torchvision==${TORCHVISION_VERSION-0.10.0}"
# We won't add dl-cpu-requirements.txt as it would otherwise overwrite our custom
# torch. Thus we have also have to install tensorflow manually.
TF_PACKAGE=$(grep -ohE "tensorflow==[^ ;]+" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt" | head -n 1)
TFPROB_PACKAGE=$(grep -ohE "tensorflow-probability==[^ ;]+" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt" | head -n 1)
TF_PACKAGE=$(grep "tensorflow==" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt")
TFPROB_PACKAGE=$(grep "tensorflow-probability==" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt")

# %%;* deletes everything after ; to get rid of e.g. python version specifiers
pip install -U "${TF_PACKAGE%%;*}" "${TFPROB_PACKAGE%%;*}"
else
# Otherwise, use pinned default torch version.
# Again, install right away, as some dependencies (e.g. torch-spline-conv) need
# torch to be installed for their own install.
TORCH_PACKAGE=$(grep -ohE "torch==[^ ;]+" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt" | head -n 1)
TORCHVISION_PACKAGE=$(grep -ohE "torchvision==[^ ;]+" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt" | head -n 1)
TORCH_PACKAGE=$(grep "torch==" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt")
TORCHVISION_PACKAGE=$(grep "torchvision==" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt")

# %%;* deletes everything after ; to get rid of e.g. python version specifiers
pip install "${TORCH_PACKAGE%%;*}" "${TORCHVISION_PACKAGE%%;*}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@
" \"transformers>=4.26.0\",\n",
" \"diffusers>=0.13.1\",\n",
" \"xformers>=0.0.16\",\n",
" \"torch<2\",\n",
" \"torch\",\n",
" ]\n",
" }\n",
")"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,7 @@
" nn.ReLU(),\n",
" )\n",
" self.lr = lr\n",
" self.accuracy = Accuracy(task=\"multiclass\", num_classes=10, top_k=1)\n",
" self.accuracy = Accuracy(task=\"multiclass\", num_classes=10)\n",
" self.eval_loss = []\n",
" self.eval_accuracy = []\n",
" self.test_accuracy = []\n",
Expand Down
2 changes: 1 addition & 1 deletion doc/source/tune/examples/tune-pytorch-lightning.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@
"class MNISTClassifier(pl.LightningModule):\n",
" def __init__(self, config):\n",
" super(MNISTClassifier, self).__init__()\n",
" self.accuracy = Accuracy(task=\"multiclass\", num_classes=10, top_k=1)\n",
" self.accuracy = Accuracy()\n",
" self.layer_1_size = config[\"layer_1_size\"]\n",
" self.layer_2_size = config[\"layer_2_size\"]\n",
" self.lr = config[\"lr\"]\n",
Expand Down
9 changes: 4 additions & 5 deletions python/ray/train/examples/mosaic_cifar10_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
import torchvision
from torchvision import transforms, datasets

from torchmetrics.classification.accuracy import Accuracy


import ray
from ray.air.config import ScalingConfig
Expand All @@ -16,7 +18,6 @@ def trainer_init_per_worker(config):
from composer.core.evaluator import Evaluator
from composer.models.tasks import ComposerClassifier
import composer.optim
from torchmetrics.classification.accuracy import Accuracy

BATCH_SIZE = 64
# prepare the model for distributed training and wrap with ComposerClassifier for
Expand Down Expand Up @@ -56,9 +57,7 @@ def trainer_init_per_worker(config):
test_dataloader = train.torch.prepare_data_loader(test_dataloader)

evaluator = Evaluator(
dataloader=test_dataloader,
label="my_evaluator",
metrics=Accuracy(task="multiclass", num_classes=10, top_k=1),
dataloader=test_dataloader, label="my_evaluator", metrics=Accuracy()
)

# prepare optimizer
Expand Down Expand Up @@ -116,6 +115,6 @@ def train_mosaic_cifar10(num_workers=2, use_gpu=False, max_duration="5ep"):

args, _ = parser.parse_known_args()

runtime_env = {"pip": ["mosaicml==0.12.1"]}
runtime_env = {"pip": ["mosaicml==0.10.1"]}
ray.init(address=args.address, runtime_env=runtime_env)
train_mosaic_cifar10(num_workers=args.num_workers, use_gpu=args.use_gpu)
2 changes: 1 addition & 1 deletion python/ray/train/lightning/lightning_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,7 +274,7 @@ def __init__(self, lr, feature_dim):
self.fc1 = torch.nn.Linear(28 * 28, feature_dim)
self.fc2 = torch.nn.Linear(feature_dim, 10)
self.lr = lr
self.accuracy = Accuracy(task="multiclass", num_classes=10, top_k=1)
self.accuracy = Accuracy()
self.val_loss = []
self.val_acc = []
Expand Down
2 changes: 1 addition & 1 deletion python/ray/train/tests/lightning_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def __init__(self, lr: float, layer_1: int, layer_2: int):
self.layer_1 = torch.nn.Linear(28 * 28, layer_1)
self.layer_2 = torch.nn.Linear(layer_1, layer_2)
self.layer_3 = torch.nn.Linear(layer_2, 10)
self.accuracy = Accuracy(task="multiclass", num_classes=10, top_k=1)
self.accuracy = Accuracy(task="multiclass", num_classes=10)
self.val_acc_list = []
self.val_loss_list = []

Expand Down
4 changes: 1 addition & 3 deletions python/ray/train/tests/test_mosaic_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,7 @@ def trainer_init_per_worker(config):
test_dataloader = train.torch.prepare_data_loader(test_dataloader)

evaluator = Evaluator(
dataloader=test_dataloader,
label="my_evaluator",
metrics=Accuracy(task="multiclass", num_classes=10, top_k=1),
dataloader=test_dataloader, label="my_evaluator", metrics=Accuracy()
)

# prepare optimizer
Expand Down
5 changes: 3 additions & 2 deletions python/ray/tune/examples/mlflow_ptl.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,14 @@
import tempfile

import pytorch_lightning as pl
from pl_bolts.datamodules import MNISTDataModule

import mlflow

from ray import air, tune
from ray.air.integrations.mlflow import setup_mlflow
from ray.tune.integration.pytorch_lightning import TuneReportCallback
from ray.tune.examples.mnist_ptl_mini import LightningMNISTClassifier, MNISTDataModule
from ray.tune.examples.mnist_ptl_mini import LightningMNISTClassifier


def train_mnist_tune(config, data_dir=None, num_epochs=10, num_gpus=0):
Expand Down Expand Up @@ -44,7 +45,7 @@ def tune_mnist(
):
data_dir = os.path.join(tempfile.gettempdir(), "mnist_data_")
# Download data
MNISTDataModule(data_dir=data_dir, batch_size=32).prepare_data()
MNISTDataModule(data_dir=data_dir).prepare_data()

# Set the MLflow experiment, or create it if it does not exist.
mlflow.set_tracking_uri(tracking_uri)
Expand Down
62 changes: 7 additions & 55 deletions python/ray/tune/examples/mnist_ptl_mini.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,17 @@
import math

import os
import torch
from filelock import FileLock

import pytorch_lightning as pl


from torch.nn import functional as F
from torch.utils.data import DataLoader, random_split
from torchmetrics import Accuracy
from torchvision import transforms
from torchvision.datasets import MNIST
import pytorch_lightning as pl
from pl_bolts.datamodules.mnist_datamodule import MNISTDataModule
import os
from ray.tune.integration.pytorch_lightning import TuneReportCallback

from ray import air, tune


PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")


class MNISTDataModule(pl.LightningDataModule):
def __init__(self, batch_size: int, data_dir: str = PATH_DATASETS):
super().__init__()
self.data_dir = data_dir
self.transform = transforms.Compose(
[
transforms.ToTensor(),
transforms.Normalize((0.1307,), (0.3081,)),
]
)

self.batch_size = batch_size
self.dims = (1, 28, 28)
self.num_classes = 10

def prepare_data(self):
# download
MNIST(self.data_dir, train=True, download=True)
MNIST(self.data_dir, train=False, download=True)

def setup(self, stage=None):
# Assign train/val datasets for use in dataloaders
if stage == "fit" or stage is None:
mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)
self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])

# Assign test dataset for use in dataloader(s)
if stage == "test" or stage is None:
self.mnist_test = MNIST(
self.data_dir, train=False, transform=self.transform
)

def train_dataloader(self):
return DataLoader(self.mnist_train, batch_size=self.batch_size)

def val_dataloader(self):
return DataLoader(self.mnist_val, batch_size=self.batch_size)

def test_dataloader(self):
return DataLoader(self.mnist_test, batch_size=self.batch_size)


class LightningMNISTClassifier(pl.LightningModule):
def __init__(self, config, data_dir=None):
super(LightningMNISTClassifier, self).__init__()
Expand All @@ -75,7 +25,7 @@ def __init__(self, config, data_dir=None):
self.layer_1 = torch.nn.Linear(28 * 28, layer_1)
self.layer_2 = torch.nn.Linear(layer_1, layer_2)
self.layer_3 = torch.nn.Linear(layer_2, 10)
self.accuracy = Accuracy(task="multiclass", num_classes=10, top_k=1)
self.accuracy = Accuracy()

def forward(self, x):
batch_size, channels, width, height = x.size()
Expand Down Expand Up @@ -118,7 +68,9 @@ def train_mnist_tune(config, num_epochs=10, num_gpus=0):
data_dir = os.path.abspath("./data")
model = LightningMNISTClassifier(config, data_dir)
with FileLock(os.path.expanduser("~/.data.lock")):
dm = MNISTDataModule(data_dir=data_dir, batch_size=config["batch_size"])
dm = MNISTDataModule(
data_dir=data_dir, num_workers=1, batch_size=config["batch_size"]
)
metrics = {"loss": "ptl/val_loss", "acc": "ptl/val_accuracy"}
trainer = pl.Trainer(
max_epochs=num_epochs,
Expand Down
25 changes: 9 additions & 16 deletions python/requirements/ml/dl-cpu-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,12 @@ tensorflow-datasets

--extra-index-url https://download.pytorch.org/whl/cpu # for CPU versions of torch, torchvision
--find-links https://data.pyg.org/whl/torch-1.13.0+cpu.html # for CPU versions of torch-scatter, torch-sparse, torch-cluster, torch-spline-conv
--find-links https://data.pyg.org/whl/torch-2.0.1+cpu.html # for CPU versions of torch-scatter, torch-sparse, torch-cluster, torch-spline-conv
torch==1.13.0; python_version <= '3.7'
torchmetrics==0.9.3; python_version <= '3.7'
torchtext==0.14.0; python_version <= '3.7'
torchvision==0.14.0; python_version <= '3.7'

torch==2.0.1; python_version > '3.7'
torchmetrics==0.10.3; python_version > '3.7'
torchtext==0.15.2; python_version > '3.7'
torchvision==0.15.2; python_version > '3.7'

torch-scatter==2.1.1
torch-sparse==0.6.17
torch-cluster==1.6.1
torch-spline-conv==1.2.2
torch-geometric==2.3.1
torch==1.13.0
torchmetrics==0.9.3
torchtext==0.14.0
torchvision==0.14.0
torch-scatter==2.1.0
torch-sparse==0.6.16
torch-cluster==1.6.0
torch-spline-conv==1.2.1
torch-geometric==2.1.0
16 changes: 8 additions & 8 deletions python/requirements/ml/dl-gpu-requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# If you make changes below this line, please also make the corresponding changes to `dl-cpu-requirements.txt`!

--extra-index-url https://download.pytorch.org/whl/cu118 # for GPU versions of torch, torchvision
--find-links https://data.pyg.org/whl/torch-2.0.1+cu118.html # for GPU versions of torch-scatter, torch-sparse, torch-cluster, torch-spline-conv
--extra-index-url https://download.pytorch.org/whl/cu116 # for GPU versions of torch, torchvision
--find-links https://data.pyg.org/whl/torch-1.13.0+cu116.html # for GPU versions of torch-scatter, torch-sparse, torch-cluster, torch-spline-conv
# specifying explicit plus-notation below so pip overwrites the existing cpu verisons
torch==2.0.1+cu118
torchvision==0.15.2+cu118
torch-scatter==2.1.1+pt20cu118
torch-sparse==0.6.17+pt20cu118
torch-cluster==1.6.1+pt20cu118
torch-spline-conv==1.2.2+pt20cu118
torch==1.13.0+cu116
torchvision==0.14.0+cu116
torch-scatter==2.1.0+pt113cu116
torch-sparse==0.6.15+pt113cu116
torch-cluster==1.6.0+pt113cu116
torch-spline-conv==1.2.1+pt113cu116
1 change: 1 addition & 0 deletions python/requirements/ml/tune-test-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ jupyterlab==3.6.1
matplotlib!=3.4.3

pytest-remotedata==0.3.2
lightning-bolts==0.4.0
pytorch-lightning==1.6.5
fairscale==0.4.6
shortuuid==1.0.1
Expand Down
2 changes: 1 addition & 1 deletion release/lightning_tests/workloads/lightning_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ def __init__(self, lr, feature_dim):
self.fc1 = torch.nn.Linear(28 * 28, feature_dim)
self.fc2 = torch.nn.Linear(feature_dim, 10)
self.lr = lr
self.accuracy = Accuracy(task="multiclass", num_classes=10, top_k=1)
self.accuracy = Accuracy()

def forward(self, x):
x = x.view(-1, 28 * 28)
Expand Down
6 changes: 3 additions & 3 deletions rllib/core/learner/tests/test_learner.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,10 +49,10 @@ def test_end_to_end_update(self):
min_loss = min(loss, min_loss)
print(f"[iter = {iter_i}] Loss: {loss:.3f}, Min Loss: {min_loss:.3f}")
# The loss is initially around 0.69 (ln2). When it gets to around
# 0.58 the return of the policy gets to around 100.
if min_loss < 0.58:
# 0.57 the return of the policy gets to around 100.
if min_loss < 0.57:
break
self.assertLess(min_loss, 0.58)
self.assertLess(min_loss, 0.57)

def test_compute_gradients(self):
"""Tests the compute_gradients correctness.
Expand Down
4 changes: 0 additions & 4 deletions rllib/core/learner/torch/tests/test_torch_learner_compile.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@ def setUp(cls) -> None:
def tearDown(cls) -> None:
ray.shutdown()

# Todo (rllib-team): Fix for torch 2.0+
@unittest.skip("Failing with torch >= 2.0")
@unittest.skipIf(not _dynamo_is_available(), "torch._dynamo not available")
def test_torch_compile(self):
"""Test if torch.compile() can be applied and used on the learner.
Expand Down Expand Up @@ -77,8 +75,6 @@ def test_torch_compile(self):

learner.remove_module(module_id="another_module")

# Todo (rllib-team): Fix for torch 2.0+
@unittest.skip("Failing with torch >= 2.0")
@unittest.skipIf(not _dynamo_is_available(), "torch._dynamo not available")
def test_torch_compile_no_breaks(self):
"""Tests if torch.compile() does encounter too many breaks.
Expand Down
2 changes: 0 additions & 2 deletions rllib/core/models/tests/test_base_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,6 @@ def build(self, framework: str):

model({"in_1": [[1]]})

# Todo (rllib-team): Fix for torch 2.0+
@unittest.skip("Failing with torch >= 2.0")
@unittest.skipIf(not _dynamo_is_available(), "torch._dynamo not available")
def test_torch_compile_no_breaks(self):
"""Tests if torch.compile() does not encounter any breaks.
Expand Down

0 comments on commit a8dd150

Please sign in to comment.