Skip to content

Commit

Permalink
Revert "[CI] Add Lightning 2.0 compatibility test pipeline (ray-proje…
Browse files Browse the repository at this point in the history
…ct#34147)" (ray-project#34933)

This reverts commit aeed2b3.
  • Loading branch information
scv119 committed May 2, 2023
1 parent d50f528 commit 24c3c4f
Show file tree
Hide file tree
Showing 8 changed files with 19 additions and 59 deletions.
13 changes: 0 additions & 13 deletions .buildkite/pipeline.gpu_large.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,16 +49,3 @@
- pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt
- ./ci/env/env_info.sh
- bazel test --config=ci $(./ci/run/bazel_export_options) --build_tests_only --test_tag_filters=gpu,-timeseries_libs,-py37,-post_wheel_build doc/...

- label: ":zap: :python: Lightning 2.0 Train GPU tests"
conditions:
["NO_WHEELS_REQUIRED", "RAY_CI_TRAIN_AFFECTED"]
commands:
- cleanup() { if [ "${BUILDKITE_PULL_REQUEST}" = "false" ]; then ./ci/build/upload_build_info.sh; fi }; trap cleanup EXIT
- NO_DASHBOARD=1 ./ci/env/install-minimal.sh 3.8
- PYTHON=3.8 DOC_TESTING=1 TRAIN_TESTING=1 TUNE_TESTING=1 ./ci/env/install-dependencies.sh
- pip install -Ur ./python/requirements/ml/requirements_ml_docker.txt
- pip uninstall -y pytorch-lightning
- pip install lightning==2.0.0
- ./ci/env/env_info.sh
- bazel test --config=ci $(./scripts/bazel_export_options) --test_tag_filters=ptl_v2 python/ray/train/...
2 changes: 0 additions & 2 deletions ci/ci.sh
Original file line number Diff line number Diff line change
Expand Up @@ -287,8 +287,6 @@ install_npm_project() {
build_dashboard_front_end() {
if [ "${OSTYPE}" = msys ]; then
{ echo "WARNING: Skipping dashboard due to NPM incompatibilities with Windows"; } 2> /dev/null
elif [ "${NO_DASHBOARD-}" = "1" ]; then
echo "Skipping dashboard build"
else
(
cd ray/dashboard/client
Expand Down
2 changes: 0 additions & 2 deletions ci/env/install-minimal.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
#!/usr/bin/env bash

set -xe

# Python version can be specified as 3.7, 3.8, 3.9, etc..
if [ -z "$1" ]; then
PYTHON_VERSION=${PYTHON-3.7}
Expand Down
8 changes: 4 additions & 4 deletions python/ray/train/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -412,31 +412,31 @@ py_test(
name = "test_lightning_checkpoint",
size = "medium",
srcs = ["tests/test_lightning_checkpoint.py"],
tags = ["team:ml", "exclusive", "ray_air", "gpu", "ptl_v2"],
tags = ["team:ml", "exclusive", "ray_air", "gpu"],
deps = [":train_lib"]
)

py_test(
name = "test_lightning_trainer_restore",
size = "medium",
srcs = ["tests/test_lightning_trainer_restore.py"],
tags = ["team:ml", "exclusive", "ray_air", "gpu", "ptl_v2"],
tags = ["team:ml", "exclusive", "ray_air", "gpu"],
deps = [":train_lib"]
)

py_test(
name = "test_lightning_trainer",
size = "large",
srcs = ["tests/test_lightning_trainer.py"],
tags = ["team:ml", "exclusive", "ray_air", "gpu", "ptl_v2"],
tags = ["team:ml", "exclusive", "ray_air", "gpu"],
deps = [":train_lib"]
)

py_test(
name = "test_lightning_predictor",
size = "medium",
srcs = ["tests/test_lightning_predictor.py"],
tags = ["team:ml", "exclusive", "ray_air", "gpu", "ptl_v2"],
tags = ["team:ml", "exclusive", "ray_air", "gpu"],
deps = [":train_lib"]
)

Expand Down
37 changes: 10 additions & 27 deletions python/ray/train/tests/lightning_test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,9 @@


class LinearModule(pl.LightningModule):
def __init__(self, input_dim, output_dim, strategy="ddp") -> None:
def __init__(self, input_dim, output_dim) -> None:
super().__init__()
self.linear = nn.Linear(input_dim, output_dim)
self.loss = []
self.strategy = strategy

def forward(self, input):
# Backwards compat for Ray data strict mode.
Expand All @@ -27,31 +25,24 @@ def training_step(self, batch):

def validation_step(self, val_batch, batch_idx):
loss = self.forward(val_batch)
self.loss.append(loss)
return {"val_loss": loss}

def on_validation_epoch_end(self) -> None:
avg_loss = torch.stack(self.loss).mean()
def validation_epoch_end(self, outputs) -> None:
avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
self.log("val_loss", avg_loss)
self.loss.clear()

def predict_step(self, batch, batch_idx):
return self.forward(batch)

def configure_optimizers(self):
if self.strategy == "fsdp":
# Feed FSDP wrapped model parameters to optimizer
return torch.optim.SGD(self.trainer.model.parameters(), lr=0.1)
else:
return torch.optim.SGD(self.parameters(), lr=0.1)
return torch.optim.SGD(self.parameters(), lr=0.1)


class DoubleLinearModule(pl.LightningModule):
def __init__(self, input_dim_1, input_dim_2, output_dim) -> None:
super().__init__()
self.linear_1 = nn.Linear(input_dim_1, output_dim)
self.linear_2 = nn.Linear(input_dim_2, output_dim)
self.loss = []

def forward(self, batch):
input_1 = batch["input_1"]
Expand All @@ -66,14 +57,12 @@ def training_step(self, batch):

def validation_step(self, val_batch, batch_idx):
loss = self.forward(val_batch)
self.loss.append(loss)
return {"val_loss": loss}

def on_validation_epoch_end(self) -> None:
def validation_epoch_end(self, outputs) -> None:
print("Validation Epoch:", self.current_epoch)
avg_loss = torch.stack(self.loss).mean()
avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
self.log("val_loss", avg_loss)
self.loss.clear()

def predict_step(self, batch, batch_idx):
return self.forward(batch)
Expand Down Expand Up @@ -105,9 +94,7 @@ def __init__(self, lr: float, layer_1: int, layer_2: int):
self.layer_1 = torch.nn.Linear(28 * 28, layer_1)
self.layer_2 = torch.nn.Linear(layer_1, layer_2)
self.layer_3 = torch.nn.Linear(layer_2, 10)
self.accuracy = Accuracy(task="multiclass", num_classes=10)
self.val_acc_list = []
self.val_loss_list = []
self.accuracy = Accuracy()

def forward(self, x):
batch_size, channels, width, height = x.size()
Expand Down Expand Up @@ -137,17 +124,13 @@ def validation_step(self, val_batch, batch_idx):
logits = self.forward(x)
loss = F.nll_loss(logits, y)
acc = self.accuracy(logits, y)
self.val_acc_list.append(acc)
self.val_loss_list.append(loss)
return {"val_loss": loss, "val_accuracy": acc}

def on_validation_epoch_end(self):
avg_loss = torch.stack(self.val_loss_list).mean()
avg_acc = torch.stack(self.val_acc_list).mean()
def validation_epoch_end(self, outputs):
avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
avg_acc = torch.stack([x["val_accuracy"] for x in outputs]).mean()
self.log("ptl/val_loss", avg_loss)
self.log("ptl/val_accuracy", avg_acc)
self.val_acc_list.clear()
self.val_loss_list.clear()

def predict_step(self, batch, batch_idx, dataloader_idx=None):
x = batch
Expand Down
10 changes: 2 additions & 8 deletions python/ray/train/tests/test_lightning_checkpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,7 @@ def test_load_from_path():

# Train one epoch and save a checkpoint
trainer = pl.Trainer(
max_epochs=1,
accelerator="cpu",
enable_progress_bar=False,
enable_checkpointing=False,
max_epochs=1, enable_progress_bar=False, enable_checkpointing=False
)
trainer.fit(model=model, train_dataloaders=dataloader)
ckpt_path = f"{tmpdir}/random_checkpoint_name.ckpt"
Expand Down Expand Up @@ -78,10 +75,7 @@ def test_from_directory():

# Train one epoch and save a checkpoint
trainer = pl.Trainer(
max_epochs=1,
accelerator="cpu",
enable_progress_bar=False,
enable_checkpointing=False,
max_epochs=1, enable_progress_bar=False, enable_checkpointing=False
)
trainer.fit(model=model, train_dataloaders=dataloader)
trainer.save_checkpoint(f"{tmpdir}/{MODEL_KEY}")
Expand Down
2 changes: 1 addition & 1 deletion python/ray/train/tests/test_lightning_predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def test_repr():


def save_checkpoint(model: pl.LightningModule, ckpt_path: str):
trainer = pl.Trainer(max_epochs=0, accelerator="cpu")
trainer = pl.Trainer(max_epochs=0)
trainer.fit(model, train_dataloaders=DataLoader(torch.randn(1)))
trainer.save_checkpoint(ckpt_path)

Expand Down
4 changes: 2 additions & 2 deletions python/ray/train/tests/test_lightning_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def test_trainer_with_native_dataloader(

config_builder = (
LightningConfigBuilder()
.module(LinearModule, input_dim=32, output_dim=4, strategy=strategy)
.module(LinearModule, input_dim=32, output_dim=4)
.trainer(max_epochs=num_epochs, accelerator=accelerator)
.strategy(strategy)
)
Expand Down Expand Up @@ -124,7 +124,7 @@ def test_trainer_with_ray_data(ray_start_6_cpus_2_gpus, strategy, accelerator):

lightning_config = (
LightningConfigBuilder()
.module(cls=LinearModule, input_dim=32, output_dim=4, strategy=strategy)
.module(cls=LinearModule, input_dim=32, output_dim=4)
.trainer(max_epochs=num_epochs, accelerator=accelerator)
.strategy(strategy)
.build()
Expand Down

0 comments on commit 24c3c4f

Please sign in to comment.