[RLlib] Cleanup examples folder: Cleaned-up PyFlyt example. (ray-proj…

…ect#45956)
edoakes · Jun 14, 2024 · af45a89 · af45a89
1 parent d15204f
commit af45a89
Showing 1 changed file with 38 additions and 32 deletions.
diff --git a/rllib/examples/quadx_waypoints.py b/rllib/examples/quadx_waypoints.py
@@ -1,26 +1,35 @@
-"""Example using the PyFlyt Gymnasium environment to train a UAV to reach waypoints.
+"""An example showing how to use PyFlyt gymnasium environment to train a UAV to
+reach waypoints.
+
+For more infos about the PyFlyt gymnasium environment see the GitHub Repository:
+https://github.com/jjshoots/PyFlyt/tree/master/PyFlyt
+
+This example
+ - Runs a single-agent `PyFlyt/QuadX-Waypoints-v1` experiment.
+ - Uses a gymnasium reward wrapper for reward scaling.
+ - Stops the experiment, if either `--stop-iters` (default is 200) or
+ `--stop-reward` (default is 90.0) is reached.
 
-PyFlyt GitHub Repository: https://github.com/jjshoots/PyFlyt/tree/master/PyFlyt
 
 How to run this script
 ----------------------
 `python [script file name].py --enable-new-api-stack`
 
+Control the number of environments per `EnvRunner` via `--num-envs-per-env-runner`.
+This will increase sampling speed.
 
 For debugging, use the following additional command line options
-`--no-tune --num-env-runners=0`
-which should allow you to set breakpoints anywhere in the RLlib code and
-have the execution stop there for inspection and debugging.
+`--no-tune --num-env-runners=0` which should allow you to set breakpoints
+anywhere in the RLlib code and have the execution stop there for inspection
+and debugging.
 
 For logging to your WandB account, use:
 `--wandb-key=[your WandB API key] --wandb-project=[some project name]
 --wandb-run-name=[optional: WandB run name (within the defined project)]`
 """
-
-import os
-
-from ray.tune.registry import get_trainable_cls
 import gymnasium as gym
+import sys
+
 from ray.rllib.utils.test_utils import (
  add_rllib_example_script_args,
  run_rllib_example_script_experiment,
@@ -30,6 +39,9 @@
  EPISODE_RETURN_MEAN,
  TRAINING_ITERATION_TIMER,
 )
+from ray.tune.registry import get_trainable_cls, register_env
+
+sys.setrecursionlimit(3000)
 
 parser = add_rllib_example_script_args(
  default_iters=200,
@@ -40,7 +52,7 @@
  "--run", type=str, default="PPO", help="The RLlib-registered algorithm to use."
 )
 parser.add_argument("--env-name", type=str, default="quadx_waypoints")
-parser.add_argument("--num-envs-per-worker", type=int, default=4)
+parser.add_argument("--num-envs-per-env-runner", type=int, default=4)
 
 
 class RewardWrapper(gym.RewardWrapper):
@@ -66,31 +78,23 @@ def create_quadx_waypoints_env(env_config):
 
 
 if __name__ == "__main__":
- from ray.tune.registry import register_env
-
  args = parser.parse_args()
- num_gpus = int(os.environ.get("RLLIB_NUM_GPUS", "0"))
 
+ # Register the environment with tune.
  register_env(args.env_name, env_creator=create_quadx_waypoints_env)
 
+ # Get the algorithm class to use for training.
  algo_cls = get_trainable_cls(args.run)
- config = algo_cls.get_default_config()
-
- config.environment(env=args.env_name).resources(
- num_learner_workers=num_gpus,
- num_gpus_per_learner_worker=num_gpus,
- ).rollouts(
- num_rollout_workers=args.num_cpus,
- num_envs_per_worker=args.num_envs_per_worker,
- ).framework(
- args.framework
- ).api_stack(
- enable_rl_module_and_learner=True,
- enable_env_runner_and_connector_v2=True,
- ).reporting(
- min_time_s_per_iteration=0.1
+ config = (
+ algo_cls.get_default_config()
+ .environment(env=args.env_name)
+ .env_runners(
+ num_envs_per_env_runner=args.num_envs_per_env_runner,
+ )
+ .reporting(min_time_s_per_iteration=0.1)
  )
 
+ # If PPO set additional configurations.
  if args.run == "PPO":
  config.rl_module(
  model_config_dict={
@@ -101,20 +105,22 @@ def create_quadx_waypoints_env(env_config):
  )
  config.training(
  sgd_minibatch_size=128,
- train_batch_size=10000,
+ train_batch_size_per_learner=10000,
  )
+ # If IMPALA set additional arguments.
  elif args.run == "IMPALA":
- config.rollouts(num_rollout_workers=2)
- config.resources(num_gpus=0)
+ config.env_runners(num_env_runners=2)
+ config.learners(num_gpus_per_learner=0)
  config.training(vf_loss_coeff=0.01)
 
+ # Set the stopping arguments.
  EPISODE_RETURN_MEAN_KEY = f"{ENV_RUNNER_RESULTS}/{EPISODE_RETURN_MEAN}"
-
  stop = {
  TRAINING_ITERATION_TIMER: args.stop_iters,
  EPISODE_RETURN_MEAN_KEY: args.stop_reward,
  }
 
+ # Run the experiment.
  run_rllib_example_script_experiment(
  config,
  args,