Revert "[CI] Add Lightning 2.0 compatibility test pipeline (ray-proje…

…ct#34147)" (ray-project#34933) This reverts commit aeed2b3.
krfricke · May 2, 2023 · 24c3c4f · 24c3c4f
1 parent d50f528
commit 24c3c4f
Show file tree

Hide file tree

Showing 8 changed files with 19 additions and 59 deletions.
diff --git a/.buildkite/pipeline.gpu_large.yml b/.buildkite/pipeline.gpu_large.yml
@@ -49,16 +49,3 @@
  - pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt
  - ./ci/env/env_info.sh
  - bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,-timeseries_libs,-py37,-post_wheel_build doc/...
-
-- label: ":zap: :python: Lightning 2.0 Train GPU tests"
- conditions:
- ["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
- commands:
- - cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- - NO_DASHBOARD=1 ./ci/env/install-minimal.sh 3.8
- - PYTHON=3.8 DOC_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
- - pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt
- - pip uninstall -y pytorch-lightning
- - pip install lightning==2.0.0
- - ./ci/env/env_info.sh
- - bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=ptl_v2 python/ray/train/...
diff --git a/ci/ci.sh b/ci/ci.sh
@@ -287,8 +287,6 @@ install_npm_project() {
 build_dashboard_front_end() {
  if [ "${OSTYPE}" = msys ]; then
  { echo "WARNING: Skipping dashboard due to NPM incompatibilities with Windows"; } 2> /dev/null
- elif [ "${NO_DASHBOARD-}" = "1" ]; then
- echo "Skipping dashboard build"
  else
  (
  cd ray/dashboard/client

diff --git a/ci/env/install-minimal.sh b/ci/env/install-minimal.sh
@@ -1,7 +1,5 @@
 #!/usr/bin/env bash
 
-set -xe
-
 # Python version can be specified as 3.7, 3.8, 3.9, etc..
 if [ -z "$1" ]; then
  PYTHON_VERSION=${PYTHON-3.7}

diff --git a/python/ray/train/BUILD b/python/ray/train/BUILD
@@ -412,31 +412,31 @@ py_test(
  name = "test_lightning_checkpoint",
  size = "medium",
  srcs = ["tests/test_lightning_checkpoint.py"],
- tags = ["team:ml", "exclusive", "ray_air", "gpu", "ptl_v2"],
+ tags = ["team:ml", "exclusive", "ray_air", "gpu"],
  deps = [":train_lib"]
 )
 
 py_test(
  name = "test_lightning_trainer_restore",
  size = "medium",
  srcs = ["tests/test_lightning_trainer_restore.py"],
- tags = ["team:ml", "exclusive", "ray_air", "gpu", "ptl_v2"],
+ tags = ["team:ml", "exclusive", "ray_air", "gpu"],
  deps = [":train_lib"]
 )
 
 py_test(
  name = "test_lightning_trainer",
  size = "large",
  srcs = ["tests/test_lightning_trainer.py"],
- tags = ["team:ml", "exclusive", "ray_air", "gpu", "ptl_v2"],
+ tags = ["team:ml", "exclusive", "ray_air", "gpu"],
  deps = [":train_lib"]
 )
 
 py_test(
  name = "test_lightning_predictor",
  size = "medium",
  srcs = ["tests/test_lightning_predictor.py"],
- tags = ["team:ml", "exclusive", "ray_air", "gpu", "ptl_v2"],
+ tags = ["team:ml", "exclusive", "ray_air", "gpu"],
  deps = [":train_lib"]
 )
 

diff --git a/python/ray/train/tests/lightning_test_utils.py b/python/ray/train/tests/lightning_test_utils.py
@@ -7,11 +7,9 @@
 
 
 class LinearModule(pl.LightningModule):
- def __init__(self, input_dim, output_dim, strategy="ddp") -> None:
+ def __init__(self, input_dim, output_dim) -> None:
  super().__init__()
  self.linear = nn.Linear(input_dim, output_dim)
- self.loss = []
- self.strategy = strategy
 
  def forward(self, input):
  # Backwards compat for Ray data strict mode.
@@ -27,31 +25,24 @@ def training_step(self, batch):
 
  def validation_step(self, val_batch, batch_idx):
  loss = self.forward(val_batch)
- self.loss.append(loss)
  return {"val_loss": loss}
 
- def on_validation_epoch_end(self) -> None:
- avg_loss = torch.stack(self.loss).mean()
+ def validation_epoch_end(self, outputs) -> None:
+ avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
  self.log("val_loss", avg_loss)
- self.loss.clear()
 
  def predict_step(self, batch, batch_idx):
  return self.forward(batch)
 
  def configure_optimizers(self):
- if self.strategy == "fsdp":
- # Feed FSDP wrapped model parameters to optimizer
- return torch.optim.SGD(self.trainer.model.parameters(), lr=0.1)
- else:
- return torch.optim.SGD(self.parameters(), lr=0.1)
+ return torch.optim.SGD(self.parameters(), lr=0.1)
 
 
 class DoubleLinearModule(pl.LightningModule):
  def __init__(self, input_dim_1, input_dim_2, output_dim) -> None:
  super().__init__()
  self.linear_1 = nn.Linear(input_dim_1, output_dim)
  self.linear_2 = nn.Linear(input_dim_2, output_dim)
- self.loss = []
 
  def forward(self, batch):
  input_1 = batch["input_1"]
@@ -66,14 +57,12 @@ def training_step(self, batch):
 
  def validation_step(self, val_batch, batch_idx):
  loss = self.forward(val_batch)
- self.loss.append(loss)
  return {"val_loss": loss}
 
- def on_validation_epoch_end(self) -> None:
+ def validation_epoch_end(self, outputs) -> None:
  print("Validation Epoch:", self.current_epoch)
- avg_loss = torch.stack(self.loss).mean()
+ avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
  self.log("val_loss", avg_loss)
- self.loss.clear()
 
  def predict_step(self, batch, batch_idx):
  return self.forward(batch)
@@ -105,9 +94,7 @@ def __init__(self, lr: float, layer_1: int, layer_2: int):
  self.layer_1 = torch.nn.Linear(28 * 28, layer_1)
  self.layer_2 = torch.nn.Linear(layer_1, layer_2)
  self.layer_3 = torch.nn.Linear(layer_2, 10)
- self.accuracy = Accuracy(task="multiclass", num_classes=10)
- self.val_acc_list = []
- self.val_loss_list = []
+ self.accuracy = Accuracy()
 
  def forward(self, x):
  batch_size, channels, width, height = x.size()
@@ -137,17 +124,13 @@ def validation_step(self, val_batch, batch_idx):
  logits = self.forward(x)
  loss = F.nll_loss(logits, y)
  acc = self.accuracy(logits, y)
- self.val_acc_list.append(acc)
- self.val_loss_list.append(loss)
  return {"val_loss": loss, "val_accuracy": acc}
 
- def on_validation_epoch_end(self):
- avg_loss = torch.stack(self.val_loss_list).mean()
- avg_acc = torch.stack(self.val_acc_list).mean()
+ def validation_epoch_end(self, outputs):
+ avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
+ avg_acc = torch.stack([x["val_accuracy"] for x in outputs]).mean()
  self.log("ptl/val_loss", avg_loss)
  self.log("ptl/val_accuracy", avg_acc)
- self.val_acc_list.clear()
- self.val_loss_list.clear()
 
  def predict_step(self, batch, batch_idx, dataloader_idx=None):
  x = batch

diff --git a/python/ray/train/tests/test_lightning_checkpoint.py b/python/ray/train/tests/test_lightning_checkpoint.py
@@ -38,10 +38,7 @@ def test_load_from_path():
 
  # Train one epoch and save a checkpoint
  trainer = pl.Trainer(
- max_epochs=1,
- accelerator="cpu",
- enable_progress_bar=False,
- enable_checkpointing=False,
+ max_epochs=1, enable_progress_bar=False, enable_checkpointing=False
  )
  trainer.fit(model=model, train_dataloaders=dataloader)
  ckpt_path = f"{tmpdir}/random_checkpoint_name.ckpt"
@@ -78,10 +75,7 @@ def test_from_directory():
 
  # Train one epoch and save a checkpoint
  trainer = pl.Trainer(
- max_epochs=1,
- accelerator="cpu",
- enable_progress_bar=False,
- enable_checkpointing=False,
+ max_epochs=1, enable_progress_bar=False, enable_checkpointing=False
  )
  trainer.fit(model=model, train_dataloaders=dataloader)
  trainer.save_checkpoint(f"{tmpdir}/{MODEL_KEY}")

diff --git a/python/ray/train/tests/test_lightning_predictor.py b/python/ray/train/tests/test_lightning_predictor.py
@@ -28,7 +28,7 @@ def test_repr():
 
 
 def save_checkpoint(model: pl.LightningModule, ckpt_path: str):
- trainer = pl.Trainer(max_epochs=0, accelerator="cpu")
+ trainer = pl.Trainer(max_epochs=0)
  trainer.fit(model, train_dataloaders=DataLoader(torch.randn(1)))
  trainer.save_checkpoint(ckpt_path)
 

diff --git a/python/ray/train/tests/test_lightning_trainer.py b/python/ray/train/tests/test_lightning_trainer.py
@@ -74,7 +74,7 @@ def test_trainer_with_native_dataloader(
 
  config_builder = (
  LightningConfigBuilder()
- .module(LinearModule, input_dim=32, output_dim=4, strategy=strategy)
+ .module(LinearModule, input_dim=32, output_dim=4)
  .trainer(max_epochs=num_epochs, accelerator=accelerator)
  .strategy(strategy)
  )
@@ -124,7 +124,7 @@ def test_trainer_with_ray_data(ray_start_6_cpus_2_gpus, strategy, accelerator):
 
  lightning_config = (
  LightningConfigBuilder()
- .module(cls=LinearModule, input_dim=32, output_dim=4, strategy=strategy)
+ .module(cls=LinearModule, input_dim=32, output_dim=4)
  .trainer(max_epochs=num_epochs, accelerator=accelerator)
  .strategy(strategy)
  .build()