Skip to content

Commit

Permalink
[air/tuner/docs] Update docs for Tuner() API 1: RSTs, docs, move reus…
Browse files Browse the repository at this point in the history
…e_actors (ray-project#26930)

Signed-off-by: Kai Fricke [email protected]

Why are these changes needed?
Splitting up ray-project#26884: This PR includes changes to use Tuner() instead of tune.run() for most docs files (rst and py), and a change to move reuse_actors to the TuneConfig

Signed-off-by: klwuibm <[email protected]>
  • Loading branch information
krfricke authored and klwuibm committed Jul 27, 2022
1 parent 25a0a01 commit c1a6729
Show file tree
Hide file tree
Showing 39 changed files with 629 additions and 443 deletions.
10 changes: 6 additions & 4 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -135,17 +135,19 @@ This example runs a parallel grid search to optimize an example objective functi
session.report({"mean_loss": intermediate_score})
analysis = tune.run(
tuner = tune.Tuner(
training_function,
config={
param_space={
"alpha": tune.grid_search([0.001, 0.01, 0.1]),
"beta": tune.choice([1, 2, 3])
})
results = tuner.fit()
print("Best config: ", analysis.get_best_config(metric="mean_loss", mode="min"))
print("Best config: ", results.get_best_result(metric="mean_loss", mode="min").config)
# Get a dataframe for analyzing trial results.
df = analysis.results_df
df = results.get_dataframe()
If TensorBoard is installed, automatically visualize all trial results:

Expand Down
19 changes: 13 additions & 6 deletions doc/source/data/doc_code/key_concepts.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,14 @@ def objective(*args):
# By setting `max_concurrent_trials=3`, this ensures the cluster will always
# have a sparse CPU for Datasets. Try setting `max_concurrent_trials=4` here,
# and notice that the experiment will appear to hang.
tune.run(objective, num_samples=4, max_concurrent_trials=3)
tuner = tune.Tuner(
tune.with_resources(objective, {"cpu": 1}),
tune_config=tune.TuneConfig(
num_samples=1,
max_concurrent_trials=3
)
)
tuner.fit()
# __resource_allocation_1_end__
# fmt: on

Expand All @@ -33,14 +40,14 @@ def objective(*args):

# This runs smoothly since _max_cpu_fraction_per_node is set to 0.8, effectively
# reserving 1 CPU for Datasets task execution.
tune.run(
objective,
num_samples=4,
resources_per_trial=tune.PlacementGroupFactory(
tuner = tune.Tuner(
tune.with_resources(objective, tune.PlacementGroupFactory(
[{"CPU": 1}],
_max_cpu_fraction_per_node=0.8,
),
)),
tune_config=tune.TuneConfig(num_samples=1)
)
tuner.fit()
# __resource_allocation_2_end__
# fmt: on

Expand Down
59 changes: 39 additions & 20 deletions doc/source/ray-core/examples/dask_xgboost/dask_xgboost.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@
"from xgboost_ray import RayDMatrix, RayParams, train, predict\n",
"\n",
"import ray\n",
"from ray import tune\n",
"from ray import air, tune\n",
"from ray.util.dask import ray_dask_get"
]
},
Expand Down Expand Up @@ -351,7 +351,7 @@
"and ``max_depth``. We are using [Tune's samplers to define the search\n",
"space](https://docs.ray.io/en/latest/tune/user-guide.html#search-space-grid-random).\n",
"\n",
"The experiment configuration is done through ``tune.run``. We set the amount\n",
"The experiment configuration is done through ``Tuner()``. We set the amount\n",
"of resources each trial (hyperparameter combination) requires by using the\n",
"``get_tune_resources`` method of ``RayParams``. The ``num_samples`` argument\n",
"controls how many trials will be ran in total. In the end, the best\n",
Expand Down Expand Up @@ -387,32 +387,39 @@
" )\n",
"\n",
" tune_start_time = time.time()\n",
"\n",
" analysis = tune.run(\n",
" tune.with_parameters(\n",
" train_xgboost,\n",
" train_df=train_df,\n",
" test_df=test_df,\n",
" target_column=target_column,\n",
" ray_params=ray_params,\n",
" \n",
" tuner = tune.Tuner(\n",
" tune.with_resources(\n",
" tune.with_parameters(\n",
" train_xgboost,\n",
" train_df=train_df,\n",
" test_df=test_df,\n",
" target_column=target_column,\n",
" ray_params=ray_params,\n",
" ),\n",
" resources=ray_params.get_tune_resources()\n",
" ),\n",
" tune_config=tune.TuneConfig(\n",
" num_samples=10,\n",
" metric=\"eval-error\",\n",
" mode=\"min\",\n",
" ),\n",
" # Use the `get_tune_resources` helper function to set the resources.\n",
" resources_per_trial=ray_params.get_tune_resources(),\n",
" config=config,\n",
" num_samples=10,\n",
" metric=\"eval-error\",\n",
" mode=\"min\",\n",
" param_space=config\n",
" )\n",
" results = tuner.fit()\n",
" \n",
" best_result = results.get_best_result()\n",
"\n",
"\n",
" tune_end_time = time.time()\n",
" tune_duration = tune_end_time - tune_start_time\n",
" print(f\"Total time taken: {tune_duration} seconds.\")\n",
"\n",
" accuracy = 1.0 - analysis.best_result[\"eval-error\"]\n",
" print(f\"Best model parameters: {analysis.best_config}\")\n",
" accuracy = 1.0 - best_result.metrics[\"eval-error\"]\n",
" print(f\"Best model parameters: {best_result.config}\")\n",
" print(f\"Best model total accuracy: {accuracy:.4f}\")\n",
"\n",
" return analysis.best_config"
" return best_result.config"
]
},
{
Expand Down Expand Up @@ -470,9 +477,21 @@
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
Expand Down
39 changes: 22 additions & 17 deletions doc/source/ray-core/examples/dask_xgboost/dask_xgboost.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ def train_xgboost(config, train_df, test_df, target_column, ray_params):
# and ``max_depth``. We are using [Tune's samplers to define the search
# space](https://docs.ray.io/en/latest/tune/user-guide.html#search-space-grid-random).
#
# The experiment configuration is done through ``tune.run``. We set the amount
# The experiment configuration is done through ``Tuner()``. We set the amount
# of resources each trial (hyperparameter combination) requires by using the
# ``get_tune_resources`` method of ``RayParams``. The ``num_samples`` argument
# controls how many trials will be ran in total. In the end, the best
Expand Down Expand Up @@ -282,31 +282,36 @@ def tune_xgboost(train_df, test_df, target_column):

tune_start_time = time.time()

analysis = tune.run(
tune.with_parameters(
train_xgboost,
train_df=train_df,
test_df=test_df,
target_column=target_column,
ray_params=ray_params,
tuner = tune.Tuner(
tune.with_resources(
tune.with_parameters(
train_xgboost,
train_df=train_df,
test_df=test_df,
target_column=target_column,
ray_params=ray_params,
),
resources=ray_params.get_tune_resources(),
),
# Use the `get_tune_resources` helper function to set the resources.
resources_per_trial=ray_params.get_tune_resources(),
config=config,
num_samples=10,
metric="eval-error",
mode="min",
tune_config=tune.TuneConfig(
num_samples=10,
metric="eval-error",
mode="min",
),
param_space=config,
)
results = tuner.fit()

tune_end_time = time.time()
tune_duration = tune_end_time - tune_start_time
print(f"Total time taken: {tune_duration} seconds.")

accuracy = 1.0 - analysis.best_result["eval-error"]
print(f"Best model parameters: {analysis.best_config}")
best_result = results.get_best_result()
accuracy = 1.0 - best_result.metrics["eval-error"]
print(f"Best model parameters: {best_result.config}")
print(f"Best model total accuracy: {accuracy:.4f}")

return analysis.best_config
return best_result.config


# Hyperparameter optimization may take some time to complete.
Expand Down
39 changes: 23 additions & 16 deletions doc/source/ray-core/examples/using-ray-with-pytorch-lightning.rst
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ To report metrics back to Tune after each validation epoch, we can use the ``Tun

.. code-block:: python
from ray import air, tune
from ray.tune.integration.pytorch_lightning import TuneReportCallback
def train_mnist(config):
Expand All @@ -123,15 +124,20 @@ To report metrics back to Tune after each validation epoch, we can use the ``Tun
}
# Make sure to specify how many actors each training run will create via the "extra_cpu" field.
analysis = tune.run(
train_mnist,
tuner = tune.Tuner(
train_mnist,
tune_config=tune.TuneConfig(
metric="loss",
mode="min",
config=config,
num_samples=num_samples,
name="tune_mnist")
num_samples=num_samples
),
param_space=config,
run_config=air.RunConfig(name="tune_mnist"),
)
results = tuner.fit()
print("Best hyperparameters found were: ", analysis.best_config)
print("Best hyperparameters found were: ", results.get_best_result().config)
And if you want to add periodic checkpointing as well, you can use the ``TuneReportCheckpointCallback`` instead.
Expand All @@ -155,14 +161,15 @@ In this case, you want to use the `Ray Lightning Library's <https://github.com/r

With this integration, you can run multiple PyTorch Lightning training runs in parallel,
each with a different hyperparameter configuration, and each training run also parallelized.
All you have to do is move your training code to a function, pass the function to ``tune.run``, and make sure to add the appropriate callback (Either ``TuneReportCallback`` or ``TuneReportCheckpointCallback``) to your PyTorch Lightning Trainer.
All you have to do is move your training code to a function, pass the function to ``Tuner()``, and make sure to add the appropriate callback (Either ``TuneReportCallback`` or ``TuneReportCheckpointCallback``) to your PyTorch Lightning Trainer.

.. warning:: Make sure to use the callbacks from the Ray Lightning library and not the one from the Tune library, i.e. use ``ray_lightning.tune.TuneReportCallback`` and not ``ray.tune.integrations.pytorch_lightning.TuneReportCallback``.

Example using Ray Lightning with Tune:

.. code-block:: python
from ray import air, tune
from ray_lightning import RayPlugin
from ray_lightning.tune import TuneReportCallback
Expand All @@ -189,16 +196,16 @@ Example using Ray Lightning with Tune:
}
# Make sure to specify how many actors each training run will create via the "extra_cpu" field.
analysis = tune.run(
train_mnist,
tuner = tune.Tuner(
tune.with_resources(train_mnist, {"cpu": 1, "extra_cpu": 4}),
tune_config=tune.TuneConfig(
metric="loss",
mode="min",
config=config,
num_samples=num_samples,
resources_per_trial={
"cpu": 1,
"extra_cpu": 4
},
name="tune_mnist")
),
param_space=config
)
results = tuner.fit()
print("Best hyperparameters found were: ", analysis.best_config)
print("Best hyperparameters found were: ", results.get_best_result().config)
13 changes: 9 additions & 4 deletions doc/source/ray-core/gotchas.rst
Original file line number Diff line number Diff line change
Expand Up @@ -95,22 +95,27 @@ of Ray Tasks itself, e.g.

.. code-block:: python
from ray import air, tune
def create_task_that_uses_resources():
@ray.remote(num_cpus=10)
def sample_task():
print("Hello")
return
return ray.get([my_task.remote() for i in range(10)])
return ray.get([sample_task.remote() for i in range(10)])
def objective(config):
create_task_that_uses_resources()
analysis = tune.run(objective, config=search_space)
tuner = tune.Tuner(objective, param_space={"a": 1})
tuner.fit()
This will hang forever.
This will error with message:
ValueError: Cannot schedule create_task_that_uses_resources.<locals>.sample_task with the placement group
because the resource request {'CPU': 10} cannot fit into any bundles for the placement group, [{'CPU': 1.0}].

**Expected behavior**: The above executes and doesn't hang.
**Expected behavior**: The above executes.

**Fix**: In the ``@ray.remote`` declaration of tasks
called by ``create_task_that_uses_resources()`` , include a
Expand Down
11 changes: 7 additions & 4 deletions doc/source/ray-overview/doc_test/ray_rllib.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,11 @@
from ray import tune
from ray import air, tune
from ray.rllib.algorithms.ppo import PPO

tune.run(
tuner = tune.Tuner(
PPO,
stop={"episode_len_mean": 20},
config={"env": "CartPole-v0", "framework": "torch", "log_level": "INFO"},
run_config=air.RunConfig(
stop={"episode_len_mean": 20},
),
param_space={"env": "CartPole-v0", "framework": "torch", "log_level": "INFO"},
)
tuner.fit()
10 changes: 6 additions & 4 deletions doc/source/ray-overview/doc_test/ray_tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,17 @@ def training_function(config):
session.report({"mean_loss": intermediate_score})


analysis = tune.run(
tuner = tune.Tuner(
training_function,
config={
param_space={
"alpha": tune.grid_search([0.001, 0.01, 0.1]),
"beta": tune.choice([1, 2, 3]),
},
)
results = tuner.fit()

print("Best config: ", analysis.get_best_config(metric="mean_loss", mode="min"))
best_result = results.get_best_result(metric="mean_loss", mode="min")
print("Best result: ", best_result.metrics)

# Get a dataframe for analyzing trial results.
df = analysis.results_df
df = results.get_dataframe()
6 changes: 3 additions & 3 deletions doc/source/rllib/core-concepts.rst
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,13 @@ which implements the proximal policy optimization algorithm in RLlib.
from ray import tune
# Configure.
from ray.rllib.algorithms import PPOConfig
config = PPOConfig().environment("CartPole-v0").training(train_batch_size=4000)
from ray.rllib.algorithms.ppo import PPO, PPOConfig
config = PPOConfig().environment(env="CartPole-v0").training(train_batch_size=4000)
# Train via Ray Tune.
# Note that Ray Tune does not yet support AlgorithmConfig objects, hence
# we need to convert back to old-style config dicts.
tune.run("PPO", config=config.to_dict())
tune.run(PPO, param_space=config.to_dict())
.. tabbed:: RLlib Command Line
Expand Down
6 changes: 4 additions & 2 deletions doc/source/train/user_guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1217,8 +1217,10 @@ Reproducibility
# Convert this to a trainable.
trainable = trainer.to_tune_trainable(training_func, dataset=dataset)

analysis = tune.run(trainable, config={
"lr": tune.uniform(), "batch_size": tune.randint(1, 2, 3)}, num_samples=12)
tuner = tune.Tuner(trainable,
param_space={"lr": tune.uniform(), "batch_size": tune.randint(1, 2, 3)},
tune_config=tune.TuneConfig(num_samples=12))
results = tuner.fit()
..
Advanced APIs
-------------
Expand Down
Loading

0 comments on commit c1a6729

Please sign in to comment.