Revert "[ci/requirements] Upgrade torch to 2.0.1 (ray-project#37128)"

This reverts commit 47b4189.
edoakes · Jul 12, 2023 · a8dd150 · a8dd150
1 parent 8f3ca4b
commit a8dd150
Show file tree

Hide file tree

Showing 17 changed files with 46 additions and 108 deletions.
diff --git a/ci/env/install-dependencies.sh b/ci/env/install-dependencies.sh
@@ -419,17 +419,17 @@ install_pip_packages() {
  pip install -U "torch==${TORCH_VERSION-1.9.0}" "torchvision==${TORCHVISION_VERSION-0.10.0}"
  # We won't add dl-cpu-requirements.txt as it would otherwise overwrite our custom
  # torch. Thus we have also have to install tensorflow manually.
- TF_PACKAGE=$(grep -ohE "tensorflow==[^ ;]+" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt" | head -n 1)
- TFPROB_PACKAGE=$(grep -ohE "tensorflow-probability==[^ ;]+" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt" | head -n 1)
+ TF_PACKAGE=$(grep "tensorflow==" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt")
+ TFPROB_PACKAGE=$(grep "tensorflow-probability==" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt")
 
  # %%;* deletes everything after ; to get rid of e.g. python version specifiers
  pip install -U "${TF_PACKAGE%%;*}" "${TFPROB_PACKAGE%%;*}"
  else
  # Otherwise, use pinned default torch version.
  # Again, install right away, as some dependencies (e.g. torch-spline-conv) need
  # torch to be installed for their own install.
- TORCH_PACKAGE=$(grep -ohE "torch==[^ ;]+" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt" | head -n 1)
- TORCHVISION_PACKAGE=$(grep -ohE "torchvision==[^ ;]+" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt" | head -n 1)
+ TORCH_PACKAGE=$(grep "torch==" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt")
+ TORCHVISION_PACKAGE=$(grep "torchvision==" "${WORKSPACE_DIR}/python/requirements/ml/dl-cpu-requirements.txt")
 
  # %%;* deletes everything after ; to get rid of e.g. python version specifiers
  pip install "${TORCH_PACKAGE%%;*}" "${TORCHVISION_PACKAGE%%;*}"

diff --git a/doc/source/ray-air/examples/stablediffusion_batch_prediction.ipynb b/doc/source/ray-air/examples/stablediffusion_batch_prediction.ipynb
@@ -58,7 +58,7 @@
  " \"transformers>=4.26.0\",\n",
  " \"diffusers>=0.13.1\",\n",
  " \"xformers>=0.0.16\",\n",
- " \"torch<2\",\n",
+ " \"torch\",\n",
  " ]\n",
  " }\n",
  ")"

diff --git a/doc/source/train/examples/lightning/lightning_mnist_example.ipynb b/doc/source/train/examples/lightning/lightning_mnist_example.ipynb
@@ -117,7 +117,7 @@
  " nn.ReLU(),\n",
  " )\n",
  " self.lr = lr\n",
- " self.accuracy = Accuracy(task=\"multiclass\", num_classes=10, top_k=1)\n",
+ " self.accuracy = Accuracy(task=\"multiclass\", num_classes=10)\n",
  " self.eval_loss = []\n",
  " self.eval_accuracy = []\n",
  " self.test_accuracy = []\n",

diff --git a/doc/source/tune/examples/tune-pytorch-lightning.ipynb b/doc/source/tune/examples/tune-pytorch-lightning.ipynb
@@ -112,7 +112,7 @@
  "class MNISTClassifier(pl.LightningModule):\n",
  " def __init__(self, config):\n",
  " super(MNISTClassifier, self).__init__()\n",
- " self.accuracy = Accuracy(task=\"multiclass\", num_classes=10, top_k=1)\n",
+ " self.accuracy = Accuracy()\n",
  " self.layer_1_size = config[\"layer_1_size\"]\n",
  " self.layer_2_size = config[\"layer_2_size\"]\n",
  " self.lr = config[\"lr\"]\n",

diff --git a/python/ray/train/examples/mosaic_cifar10_example.py b/python/ray/train/examples/mosaic_cifar10_example.py
@@ -5,6 +5,8 @@
 import torchvision
 from torchvision import transforms, datasets
 
+from torchmetrics.classification.accuracy import Accuracy
+
 
 import ray
 from ray.air.config import ScalingConfig
@@ -16,7 +18,6 @@ def trainer_init_per_worker(config):
  from composer.core.evaluator import Evaluator
  from composer.models.tasks import ComposerClassifier
  import composer.optim
- from torchmetrics.classification.accuracy import Accuracy
 
  BATCH_SIZE = 64
  # prepare the model for distributed training and wrap with ComposerClassifier for
@@ -56,9 +57,7 @@ def trainer_init_per_worker(config):
  test_dataloader = train.torch.prepare_data_loader(test_dataloader)
 
  evaluator = Evaluator(
- dataloader=test_dataloader,
- label="my_evaluator",
- metrics=Accuracy(task="multiclass", num_classes=10, top_k=1),
+ dataloader=test_dataloader, label="my_evaluator", metrics=Accuracy()
  )
 
  # prepare optimizer
@@ -116,6 +115,6 @@ def train_mosaic_cifar10(num_workers=2, use_gpu=False, max_duration="5ep"):
 
  args, _ = parser.parse_known_args()
 
- runtime_env = {"pip": ["mosaicml==0.12.1"]}
+ runtime_env = {"pip": ["mosaicml==0.10.1"]}
  ray.init(address=args.address, runtime_env=runtime_env)
  train_mosaic_cifar10(num_workers=args.num_workers, use_gpu=args.use_gpu)
diff --git a/python/ray/train/lightning/lightning_trainer.py b/python/ray/train/lightning/lightning_trainer.py
@@ -274,7 +274,7 @@ def __init__(self, lr, feature_dim):
  self.fc1 = torch.nn.Linear(28 * 28, feature_dim)
  self.fc2 = torch.nn.Linear(feature_dim, 10)
  self.lr = lr
- self.accuracy = Accuracy(task="multiclass", num_classes=10, top_k=1)
+ self.accuracy = Accuracy()
  self.val_loss = []
  self.val_acc = []
 

diff --git a/python/ray/train/tests/lightning_test_utils.py b/python/ray/train/tests/lightning_test_utils.py
@@ -119,7 +119,7 @@ def __init__(self, lr: float, layer_1: int, layer_2: int):
  self.layer_1 = torch.nn.Linear(28 * 28, layer_1)
  self.layer_2 = torch.nn.Linear(layer_1, layer_2)
  self.layer_3 = torch.nn.Linear(layer_2, 10)
- self.accuracy = Accuracy(task="multiclass", num_classes=10, top_k=1)
+ self.accuracy = Accuracy(task="multiclass", num_classes=10)
  self.val_acc_list = []
  self.val_loss_list = []
 

diff --git a/python/ray/train/tests/test_mosaic_trainer.py b/python/ray/train/tests/test_mosaic_trainer.py
@@ -60,9 +60,7 @@ def trainer_init_per_worker(config):
  test_dataloader = train.torch.prepare_data_loader(test_dataloader)
 
  evaluator = Evaluator(
- dataloader=test_dataloader,
- label="my_evaluator",
- metrics=Accuracy(task="multiclass", num_classes=10, top_k=1),
+ dataloader=test_dataloader, label="my_evaluator", metrics=Accuracy()
  )
 
  # prepare optimizer

diff --git a/python/ray/tune/examples/mlflow_ptl.py b/python/ray/tune/examples/mlflow_ptl.py
@@ -4,13 +4,14 @@
 import tempfile
 
 import pytorch_lightning as pl
+from pl_bolts.datamodules import MNISTDataModule
 
 import mlflow
 
 from ray import air, tune
 from ray.air.integrations.mlflow import setup_mlflow
 from ray.tune.integration.pytorch_lightning import TuneReportCallback
-from ray.tune.examples.mnist_ptl_mini import LightningMNISTClassifier, MNISTDataModule
+from ray.tune.examples.mnist_ptl_mini import LightningMNISTClassifier
 
 
 def train_mnist_tune(config, data_dir=None, num_epochs=10, num_gpus=0):
@@ -44,7 +45,7 @@ def tune_mnist(
 ):
  data_dir = os.path.join(tempfile.gettempdir(), "mnist_data_")
  # Download data
- MNISTDataModule(data_dir=data_dir, batch_size=32).prepare_data()
+ MNISTDataModule(data_dir=data_dir).prepare_data()
 
  # Set the MLflow experiment, or create it if it does not exist.
  mlflow.set_tracking_uri(tracking_uri)

diff --git a/python/ray/tune/examples/mnist_ptl_mini.py b/python/ray/tune/examples/mnist_ptl_mini.py
@@ -1,67 +1,17 @@
 import math
 
-import os
 import torch
 from filelock import FileLock
-
-import pytorch_lightning as pl
-
-
 from torch.nn import functional as F
-from torch.utils.data import DataLoader, random_split
 from torchmetrics import Accuracy
-from torchvision import transforms
-from torchvision.datasets import MNIST
+import pytorch_lightning as pl
+from pl_bolts.datamodules.mnist_datamodule import MNISTDataModule
+import os
 from ray.tune.integration.pytorch_lightning import TuneReportCallback
 
 from ray import air, tune
 
 
-PATH_DATASETS = os.environ.get("PATH_DATASETS", ".")
-
-
-class MNISTDataModule(pl.LightningDataModule):
- def __init__(self, batch_size: int, data_dir: str = PATH_DATASETS):
- super().__init__()
- self.data_dir = data_dir
- self.transform = transforms.Compose(
- [
- transforms.ToTensor(),
- transforms.Normalize((0.1307,), (0.3081,)),
- ]
- )
-
- self.batch_size = batch_size
- self.dims = (1, 28, 28)
- self.num_classes = 10
-
- def prepare_data(self):
- # download
- MNIST(self.data_dir, train=True, download=True)
- MNIST(self.data_dir, train=False, download=True)
-
- def setup(self, stage=None):
- # Assign train/val datasets for use in dataloaders
- if stage == "fit" or stage is None:
- mnist_full = MNIST(self.data_dir, train=True, transform=self.transform)
- self.mnist_train, self.mnist_val = random_split(mnist_full, [55000, 5000])
-
- # Assign test dataset for use in dataloader(s)
- if stage == "test" or stage is None:
- self.mnist_test = MNIST(
- self.data_dir, train=False, transform=self.transform
- )
-
- def train_dataloader(self):
- return DataLoader(self.mnist_train, batch_size=self.batch_size)
-
- def val_dataloader(self):
- return DataLoader(self.mnist_val, batch_size=self.batch_size)
-
- def test_dataloader(self):
- return DataLoader(self.mnist_test, batch_size=self.batch_size)
-
-
 class LightningMNISTClassifier(pl.LightningModule):
  def __init__(self, config, data_dir=None):
  super(LightningMNISTClassifier, self).__init__()
@@ -75,7 +25,7 @@ def __init__(self, config, data_dir=None):
  self.layer_1 = torch.nn.Linear(28 * 28, layer_1)
  self.layer_2 = torch.nn.Linear(layer_1, layer_2)
  self.layer_3 = torch.nn.Linear(layer_2, 10)
- self.accuracy = Accuracy(task="multiclass", num_classes=10, top_k=1)
+ self.accuracy = Accuracy()
 
  def forward(self, x):
  batch_size, channels, width, height = x.size()
@@ -118,7 +68,9 @@ def train_mnist_tune(config, num_epochs=10, num_gpus=0):
  data_dir = os.path.abspath("./data")
  model = LightningMNISTClassifier(config, data_dir)
  with FileLock(os.path.expanduser("~/.data.lock")):
- dm = MNISTDataModule(data_dir=data_dir, batch_size=config["batch_size"])
+ dm = MNISTDataModule(
+ data_dir=data_dir, num_workers=1, batch_size=config["batch_size"]
+ )
  metrics = {"loss": "ptl/val_loss", "acc": "ptl/val_accuracy"}
  trainer = pl.Trainer(
  max_epochs=num_epochs,

diff --git a/python/requirements/ml/dl-cpu-requirements.txt b/python/requirements/ml/dl-cpu-requirements.txt
@@ -11,19 +11,12 @@ tensorflow-datasets
 
 --extra-index-url https://download.pytorch.org/whl/cpu # for CPU versions of torch, torchvision
 --find-links https://data.pyg.org/whl/torch-1.13.0+cpu.html # for CPU versions of torch-scatter, torch-sparse, torch-cluster, torch-spline-conv
---find-links https://data.pyg.org/whl/torch-2.0.1+cpu.html # for CPU versions of torch-scatter, torch-sparse, torch-cluster, torch-spline-conv
-torch==1.13.0; python_version <= '3.7'
-torchmetrics==0.9.3; python_version <= '3.7'
-torchtext==0.14.0; python_version <= '3.7'
-torchvision==0.14.0; python_version <= '3.7'
-
-torch==2.0.1; python_version > '3.7'
-torchmetrics==0.10.3; python_version > '3.7'
-torchtext==0.15.2; python_version > '3.7'
-torchvision==0.15.2; python_version > '3.7'
-
-torch-scatter==2.1.1
-torch-sparse==0.6.17
-torch-cluster==1.6.1
-torch-spline-conv==1.2.2
-torch-geometric==2.3.1
+torch==1.13.0
+torchmetrics==0.9.3
+torchtext==0.14.0
+torchvision==0.14.0
+torch-scatter==2.1.0
+torch-sparse==0.6.16
+torch-cluster==1.6.0
+torch-spline-conv==1.2.1
+torch-geometric==2.1.0
diff --git a/python/requirements/ml/dl-gpu-requirements.txt b/python/requirements/ml/dl-gpu-requirements.txt
@@ -1,11 +1,11 @@
 # If you make changes below this line, please also make the corresponding changes to `dl-cpu-requirements.txt`!
 
---extra-index-url https://download.pytorch.org/whl/cu118 # for GPU versions of torch, torchvision
---find-links https://data.pyg.org/whl/torch-2.0.1+cu118.html # for GPU versions of torch-scatter, torch-sparse, torch-cluster, torch-spline-conv
+--extra-index-url https://download.pytorch.org/whl/cu116 # for GPU versions of torch, torchvision
+--find-links https://data.pyg.org/whl/torch-1.13.0+cu116.html # for GPU versions of torch-scatter, torch-sparse, torch-cluster, torch-spline-conv
 # specifying explicit plus-notation below so pip overwrites the existing cpu verisons
-torch==2.0.1+cu118
-torchvision==0.15.2+cu118
-torch-scatter==2.1.1+pt20cu118
-torch-sparse==0.6.17+pt20cu118
-torch-cluster==1.6.1+pt20cu118
-torch-spline-conv==1.2.2+pt20cu118
+torch==1.13.0+cu116
+torchvision==0.14.0+cu116
+torch-scatter==2.1.0+pt113cu116
+torch-sparse==0.6.15+pt113cu116
+torch-cluster==1.6.0+pt113cu116
+torch-spline-conv==1.2.1+pt113cu116
diff --git a/python/requirements/ml/tune-test-requirements.txt b/python/requirements/ml/tune-test-requirements.txt
@@ -8,6 +8,7 @@ jupyterlab==3.6.1
 matplotlib!=3.4.3
 
 pytest-remotedata==0.3.2
+lightning-bolts==0.4.0
 pytorch-lightning==1.6.5
 fairscale==0.4.6
 shortuuid==1.0.1

diff --git a/release/lightning_tests/workloads/lightning_test_utils.py b/release/lightning_tests/workloads/lightning_test_utils.py
@@ -14,7 +14,7 @@ def __init__(self, lr, feature_dim):
  self.fc1 = torch.nn.Linear(28 * 28, feature_dim)
  self.fc2 = torch.nn.Linear(feature_dim, 10)
  self.lr = lr
- self.accuracy = Accuracy(task="multiclass", num_classes=10, top_k=1)
+ self.accuracy = Accuracy()
 
  def forward(self, x):
  x = x.view(-1, 28 * 28)

diff --git a/rllib/core/learner/tests/test_learner.py b/rllib/core/learner/tests/test_learner.py
@@ -49,10 +49,10 @@ def test_end_to_end_update(self):
  min_loss = min(loss, min_loss)
  print(f"[iter = {iter_i}] Loss: {loss:.3f}, Min Loss: {min_loss:.3f}")
  # The loss is initially around 0.69 (ln2). When it gets to around
- # 0.58 the return of the policy gets to around 100.
- if min_loss < 0.58:
+ # 0.57 the return of the policy gets to around 100.
+ if min_loss < 0.57:
  break
- self.assertLess(min_loss, 0.58)
+ self.assertLess(min_loss, 0.57)
 
  def test_compute_gradients(self):
  """Tests the compute_gradients correctness.

diff --git a/rllib/core/learner/torch/tests/test_torch_learner_compile.py b/rllib/core/learner/torch/tests/test_torch_learner_compile.py
@@ -25,8 +25,6 @@ def setUp(cls) -> None:
  def tearDown(cls) -> None:
  ray.shutdown()
 
- # Todo (rllib-team): Fix for torch 2.0+
- @unittest.skip("Failing with torch >= 2.0")
  @unittest.skipIf(not _dynamo_is_available(), "torch._dynamo not available")
  def test_torch_compile(self):
  """Test if torch.compile() can be applied and used on the learner.
@@ -77,8 +75,6 @@ def test_torch_compile(self):
 
  learner.remove_module(module_id="another_module")
 
- # Todo (rllib-team): Fix for torch 2.0+
- @unittest.skip("Failing with torch >= 2.0")
  @unittest.skipIf(not _dynamo_is_available(), "torch._dynamo not available")
  def test_torch_compile_no_breaks(self):
  """Tests if torch.compile() does encounter too many breaks.

diff --git a/rllib/core/models/tests/test_base_models.py b/rllib/core/models/tests/test_base_models.py
@@ -215,8 +215,6 @@ def build(self, framework: str):
 
  model({"in_1": [[1]]})
 
- # Todo (rllib-team): Fix for torch 2.0+
- @unittest.skip("Failing with torch >= 2.0")
  @unittest.skipIf(not _dynamo_is_available(), "torch._dynamo not available")
  def test_torch_compile_no_breaks(self):
  """Tests if torch.compile() does not encounter any breaks.