Skip to content

Commit

Permalink
[RLlib] Cleanup examples folder: Cleaned-up PyFlyt example. (ray-proj…
Browse files Browse the repository at this point in the history
  • Loading branch information
simonsays1980 committed Jun 14, 2024
1 parent d15204f commit af45a89
Showing 1 changed file with 38 additions and 32 deletions.
70 changes: 38 additions & 32 deletions rllib/examples/quadx_waypoints.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,35 @@
"""Example using the PyFlyt Gymnasium environment to train a UAV to reach waypoints.
"""An example showing how to use PyFlyt gymnasium environment to train a UAV to
reach waypoints.
For more infos about the PyFlyt gymnasium environment see the GitHub Repository:
https://github.com/jjshoots/PyFlyt/tree/master/PyFlyt
This example
- Runs a single-agent `PyFlyt/QuadX-Waypoints-v1` experiment.
- Uses a gymnasium reward wrapper for reward scaling.
- Stops the experiment, if either `--stop-iters` (default is 200) or
`--stop-reward` (default is 90.0) is reached.
PyFlyt GitHub Repository: https://github.com/jjshoots/PyFlyt/tree/master/PyFlyt
How to run this script
----------------------
`python [script file name].py --enable-new-api-stack`
Control the number of environments per `EnvRunner` via `--num-envs-per-env-runner`.
This will increase sampling speed.
For debugging, use the following additional command line options
`--no-tune --num-env-runners=0`
which should allow you to set breakpoints anywhere in the RLlib code and
have the execution stop there for inspection and debugging.
`--no-tune --num-env-runners=0` which should allow you to set breakpoints
anywhere in the RLlib code and have the execution stop there for inspection
and debugging.
For logging to your WandB account, use:
`--wandb-key=[your WandB API key] --wandb-project=[some project name]
--wandb-run-name=[optional: WandB run name (within the defined project)]`
"""

import os

from ray.tune.registry import get_trainable_cls
import gymnasium as gym
import sys

from ray.rllib.utils.test_utils import (
add_rllib_example_script_args,
run_rllib_example_script_experiment,
Expand All @@ -30,6 +39,9 @@
EPISODE_RETURN_MEAN,
TRAINING_ITERATION_TIMER,
)
from ray.tune.registry import get_trainable_cls, register_env

sys.setrecursionlimit(3000)

parser = add_rllib_example_script_args(
default_iters=200,
Expand All @@ -40,7 +52,7 @@
"--run", type=str, default="PPO", help="The RLlib-registered algorithm to use."
)
parser.add_argument("--env-name", type=str, default="quadx_waypoints")
parser.add_argument("--num-envs-per-worker", type=int, default=4)
parser.add_argument("--num-envs-per-env-runner", type=int, default=4)


class RewardWrapper(gym.RewardWrapper):
Expand All @@ -66,31 +78,23 @@ def create_quadx_waypoints_env(env_config):


if __name__ == "__main__":
from ray.tune.registry import register_env

args = parser.parse_args()
num_gpus = int(os.environ.get("RLLIB_NUM_GPUS", "0"))

# Register the environment with tune.
register_env(args.env_name, env_creator=create_quadx_waypoints_env)

# Get the algorithm class to use for training.
algo_cls = get_trainable_cls(args.run)
config = algo_cls.get_default_config()

config.environment(env=args.env_name).resources(
num_learner_workers=num_gpus,
num_gpus_per_learner_worker=num_gpus,
).rollouts(
num_rollout_workers=args.num_cpus,
num_envs_per_worker=args.num_envs_per_worker,
).framework(
args.framework
).api_stack(
enable_rl_module_and_learner=True,
enable_env_runner_and_connector_v2=True,
).reporting(
min_time_s_per_iteration=0.1
config = (
algo_cls.get_default_config()
.environment(env=args.env_name)
.env_runners(
num_envs_per_env_runner=args.num_envs_per_env_runner,
)
.reporting(min_time_s_per_iteration=0.1)
)

# If PPO set additional configurations.
if args.run == "PPO":
config.rl_module(
model_config_dict={
Expand All @@ -101,20 +105,22 @@ def create_quadx_waypoints_env(env_config):
)
config.training(
sgd_minibatch_size=128,
train_batch_size=10000,
train_batch_size_per_learner=10000,
)
# If IMPALA set additional arguments.
elif args.run == "IMPALA":
config.rollouts(num_rollout_workers=2)
config.resources(num_gpus=0)
config.env_runners(num_env_runners=2)
config.learners(num_gpus_per_learner=0)
config.training(vf_loss_coeff=0.01)

# Set the stopping arguments.
EPISODE_RETURN_MEAN_KEY = f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}"

stop = {
TRAINING_ITERATION_TIMER: args.stop_iters,
EPISODE_RETURN_MEAN_KEY: args.stop_reward,
}

# Run the experiment.
run_rllib_example_script_experiment(
config,
args,
Expand Down

0 comments on commit af45a89

Please sign in to comment.