diff --git a/ci/jenkins_tests/run_rllib_tests.sh b/ci/jenkins_tests/run_rllib_tests.sh index 7de03a6417858..c26335150fe86 100644 --- a/ci/jenkins_tests/run_rllib_tests.sh +++ b/ci/jenkins_tests/run_rllib_tests.sh @@ -1,47 +1,47 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env PongDeterministic-v0 \ --run A3C \ --stop '{"training_iteration": 1}' \ --config '{"num_workers": 2}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env Pong-ram-v4 \ --run A3C \ --stop '{"training_iteration": 1}' \ --config '{"num_workers": 2}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env PongDeterministic-v0 \ --run A2C \ --stop '{"training_iteration": 1}' \ --config '{"num_workers": 2}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v1 \ --run PPO \ --stop '{"training_iteration": 1}' \ --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "model": {"free_log_std": true}}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v1 \ --run PPO \ --stop '{"training_iteration": 1}' \ --config '{"simple_optimizer": false, "num_sgd_iter": 2, "model": {"use_lstm": true}}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v1 \ --run PPO \ --stop '{"training_iteration": 1}' \ --config '{"simple_optimizer": true, "num_sgd_iter": 2, "model": {"use_lstm": true}}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v1 \ --run PPO \ --stop '{"training_iteration": 1}' \ @@ -49,215 +49,215 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ --ray-num-gpus 1 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v1 \ --run PPO \ --stop '{"training_iteration": 1}' \ --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "use_gae": false, "batch_mode": "complete_episodes"}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v1 \ --run PPO \ --stop '{"training_iteration": 1}' \ --config '{"remote_worker_envs": true, "remote_env_batch_wait_ms": 99999999, "num_envs_per_worker": 2, "num_workers": 1, "train_batch_size": 100, "sgd_minibatch_size": 50}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v1 \ --run PPO \ --stop '{"training_iteration": 2}' \ --config '{"remote_worker_envs": true, "num_envs_per_worker": 2, "num_workers": 1, "train_batch_size": 100, "sgd_minibatch_size": 50}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env Pendulum-v0 \ --run APPO \ --stop '{"training_iteration": 1}' \ --config '{"num_workers": 2, "num_gpus": 0}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env Pendulum-v0 \ --run ES \ --stop '{"training_iteration": 1}' \ --config '{"stepsize": 0.01, "episodes_per_batch": 20, "train_batch_size": 100, "num_workers": 2}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env Pong-v0 \ --run ES \ --stop '{"training_iteration": 1}' \ --config '{"stepsize": 0.01, "episodes_per_batch": 20, "train_batch_size": 100, "num_workers": 2}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run A3C \ --stop '{"training_iteration": 1}' \ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run DQN \ --stop '{"training_iteration": 1}' \ --config '{"lr": 1e-3, "schedule_max_timesteps": 100000, "exploration_fraction": 0.1, "exploration_final_eps": 0.02, "dueling": false, "hiddens": [], "model": {"fcnet_hiddens": [64], "fcnet_activation": "relu"}}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run DQN \ --stop '{"training_iteration": 1}' \ --config '{"num_workers": 2}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run APEX \ --stop '{"training_iteration": 1}' \ --config '{"num_workers": 2, "timesteps_per_iteration": 1000, "num_gpus": 0, "min_iter_time_s": 1}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env FrozenLake-v0 \ --run DQN \ --stop '{"training_iteration": 1}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env FrozenLake-v0 \ --run PPO \ --stop '{"training_iteration": 1}' \ --config '{"num_sgd_iter": 10, "sgd_minibatch_size": 64, "train_batch_size": 1000, "num_workers": 1}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env PongDeterministic-v4 \ --run DQN \ --stop '{"training_iteration": 1}' \ --config '{"lr": 1e-4, "schedule_max_timesteps": 2000000, "buffer_size": 10000, "exploration_fraction": 0.1, "exploration_final_eps": 0.01, "sample_batch_size": 4, "learning_starts": 10000, "target_network_update_freq": 1000, "gamma": 0.99, "prioritized_replay": true}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env MontezumaRevenge-v0 \ --run PPO \ --stop '{"training_iteration": 1}' \ --config '{"kl_coeff": 1.0, "num_sgd_iter": 10, "lr": 1e-4, "sgd_minibatch_size": 64, "train_batch_size": 2000, "num_workers": 1, "model": {"dim": 40, "conv_filters": [[16, [8, 8], 4], [32, [4, 4], 2], [512, [5, 5], 1]]}}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v1 \ --run A3C \ --stop '{"training_iteration": 1}' \ --config '{"num_workers": 2, "model": {"use_lstm": true}}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run DQN \ --stop '{"training_iteration": 1}' \ --config '{"num_workers": 2}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run PG \ --stop '{"training_iteration": 1}' \ --config '{"sample_batch_size": 500, "num_workers": 1}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run PG \ --stop '{"training_iteration": 1}' \ --config '{"sample_batch_size": 500, "use_pytorch": true}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run PG \ --stop '{"training_iteration": 1}' \ --config '{"sample_batch_size": 500, "num_workers": 1, "model": {"use_lstm": true, "max_seq_len": 100}}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run PG \ --stop '{"training_iteration": 1}' \ --config '{"sample_batch_size": 500, "num_workers": 1, "num_envs_per_worker": 10}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env Pong-v0 \ --run PG \ --stop '{"training_iteration": 1}' \ --config '{"sample_batch_size": 500, "num_workers": 1}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env FrozenLake-v0 \ --run PG \ --stop '{"training_iteration": 1}' \ --config '{"sample_batch_size": 500, "num_workers": 1}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env Pendulum-v0 \ --run DDPG \ --stop '{"training_iteration": 1}' \ --config '{"num_workers": 1}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run IMPALA \ --stop '{"training_iteration": 1}' \ --config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run IMPALA \ --stop '{"training_iteration": 1}' \ --config '{"num_gpus": 0, "num_workers": 2, "num_aggregation_workers": 2, "min_iter_time_s": 1}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run IMPALA \ --stop '{"training_iteration": 1}' \ --config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "model": {"use_lstm": true}}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run IMPALA \ --stop '{"training_iteration": 1}' \ --config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "num_data_loader_buffers": 2, "replay_buffer_num_slots": 100, "replay_proportion": 1.0}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run IMPALA \ --stop '{"training_iteration": 1}' \ --config '{"num_gpus": 0, "num_workers": 2, "min_iter_time_s": 1, "num_data_loader_buffers": 2, "replay_buffer_num_slots": 100, "replay_proportion": 1.0, "model": {"use_lstm": true}}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env MountainCarContinuous-v0 \ --run DDPG \ --stop '{"training_iteration": 1}' \ --config '{"num_workers": 1}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env MountainCarContinuous-v0 \ --run DDPG \ --stop '{"training_iteration": 1}' \ --config '{"num_workers": 1}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env Pendulum-v0 \ --run APEX_DDPG \ --ray-num-cpus 8 \ @@ -265,7 +265,7 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ --config '{"num_workers": 2, "optimizer": {"num_replay_buffer_shards": 1}, "learning_starts": 100, "min_iter_time_s": 1}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env Pendulum-v0 \ --run APEX_DDPG \ --ray-num-cpus 8 \ @@ -273,193 +273,193 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ --config '{"num_workers": 2, "optimizer": {"num_replay_buffer_shards": 1}, "learning_starts": 100, "min_iter_time_s": 1, "batch_mode": "complete_episodes", "parameter_noise": true}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run MARWIL \ --stop '{"training_iteration": 1}' \ - --config '{"input": "/ray/python/ray/rllib/tests/data/cartpole_small", "learning_starts": 0, "input_evaluation": ["wis", "is"], "shuffle_buffer_size": 10}' + --config '{"input": "/ray/rllib/tests/data/cartpole_small", "learning_starts": 0, "input_evaluation": ["wis", "is"], "shuffle_buffer_size": 10}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v0 \ --run DQN \ --stop '{"training_iteration": 1}' \ - --config '{"input": "/ray/python/ray/rllib/tests/data/cartpole_small", "learning_starts": 0, "input_evaluation": ["wis", "is"], "soft_q": true}' + --config '{"input": "/ray/rllib/tests/data/cartpole_small", "learning_starts": 0, "input_evaluation": ["wis", "is"], "soft_q": true}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_local.py + /ray/ci/suppress_output python /ray/rllib/tests/test_local.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_reproducibility.py + /ray/ci/suppress_output python /ray/rllib/tests/test_reproducibility.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_dependency.py + /ray/ci/suppress_output python /ray/rllib/tests/test_dependency.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_legacy.py + /ray/ci/suppress_output python /ray/rllib/tests/test_legacy.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_io.py + /ray/ci/suppress_output python /ray/rllib/tests/test_io.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_checkpoint_restore.py + /ray/ci/suppress_output python /ray/rllib/tests/test_checkpoint_restore.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_rollout_worker.py + /ray/ci/suppress_output python /ray/rllib/tests/test_rollout_worker.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_nested_spaces.py + /ray/ci/suppress_output python /ray/rllib/tests/test_nested_spaces.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_external_env.py + /ray/ci/suppress_output python /ray/rllib/tests/test_external_env.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_external_multi_agent_env.py + /ray/ci/suppress_output python /ray/rllib/tests/test_external_multi_agent_env.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/custom_keras_model.py --run=A2C --stop=50 + /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_model.py --run=A2C --stop=50 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/custom_keras_model.py --run=PPO --stop=50 + /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_model.py --run=PPO --stop=50 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/custom_keras_model.py --run=DQN --stop=50 + /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_model.py --run=DQN --stop=50 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatAfterMeEnv + /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatAfterMeEnv docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatInitialEnv + /ray/ci/suppress_output python /ray/rllib/examples/custom_keras_rnn_model.py --run=PPO --stop=50 --env=RepeatInitialEnv docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/parametric_action_cartpole.py --run=PG --stop=50 + /ray/ci/suppress_output python /ray/rllib/examples/parametric_action_cartpole.py --run=PG --stop=50 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/parametric_action_cartpole.py --run=PPO --stop=50 + /ray/ci/suppress_output python /ray/rllib/examples/parametric_action_cartpole.py --run=PPO --stop=50 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/parametric_action_cartpole.py --run=DQN --stop=50 + /ray/ci/suppress_output python /ray/rllib/examples/parametric_action_cartpole.py --run=DQN --stop=50 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_lstm.py + /ray/ci/suppress_output python /ray/rllib/tests/test_lstm.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=PPO + /ray/ci/suppress_output python /ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=PPO docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=PG + /ray/ci/suppress_output python /ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=PG docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=DQN + /ray/ci/suppress_output python /ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=DQN docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=DDPG + /ray/ci/suppress_output python /ray/rllib/examples/batch_norm_model.py --num-iters=1 --run=DDPG docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_multi_agent_env.py + /ray/ci/suppress_output python /ray/rllib/tests/test_multi_agent_env.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_supported_spaces.py + /ray/ci/suppress_output python /ray/rllib/tests/test_supported_spaces.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_env_with_subprocess.py + /ray/ci/suppress_output python /ray/rllib/tests/test_env_with_subprocess.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/tests/test_rollout.sh + /ray/ci/suppress_output /ray/rllib/tests/test_rollout.sh # Run all single-agent regression tests (3x retry each) -for yaml in $(ls $ROOT_DIR/../../python/ray/rllib/tuned_examples/regression_tests); do +for yaml in $(ls $ROOT_DIR/../../rllib/tuned_examples/regression_tests); do docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/run_regression_tests.py \ - /ray/python/ray/rllib/tuned_examples/regression_tests/$yaml + /ray/ci/suppress_output python /ray/rllib/tests/run_regression_tests.py \ + /ray/rllib/tuned_examples/regression_tests/$yaml done # Try a couple times since it's stochastic docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/multiagent_pendulum.py || \ + /ray/ci/suppress_output python /ray/rllib/tests/multiagent_pendulum.py || \ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/multiagent_pendulum.py || \ + /ray/ci/suppress_output python /ray/rllib/tests/multiagent_pendulum.py || \ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/multiagent_pendulum.py + /ray/ci/suppress_output python /ray/rllib/tests/multiagent_pendulum.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/multiagent_cartpole.py --num-iters=2 + /ray/ci/suppress_output python /ray/rllib/examples/multiagent_cartpole.py --num-iters=2 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/multiagent_cartpole.py --num-iters=2 --simple + /ray/ci/suppress_output python /ray/rllib/examples/multiagent_cartpole.py --num-iters=2 --simple docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/multiagent_two_trainers.py --num-iters=2 + /ray/ci/suppress_output python /ray/rllib/examples/multiagent_two_trainers.py --num-iters=2 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_avail_actions_qmix.py + /ray/ci/suppress_output python /ray/rllib/tests/test_avail_actions_qmix.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/cartpole_lstm.py --run=PPO --stop=200 + /ray/ci/suppress_output python /ray/rllib/examples/cartpole_lstm.py --run=PPO --stop=200 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/cartpole_lstm.py --run=IMPALA --stop=100 + /ray/ci/suppress_output python /ray/rllib/examples/cartpole_lstm.py --run=IMPALA --stop=100 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/cartpole_lstm.py --stop=200 --use-prev-action-reward + /ray/ci/suppress_output python /ray/rllib/examples/cartpole_lstm.py --stop=200 --use-prev-action-reward docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/custom_loss.py --iters=2 + /ray/ci/suppress_output python /ray/rllib/examples/custom_loss.py --iters=2 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/rollout_worker_custom_workflow.py + /ray/ci/suppress_output python /ray/rllib/examples/rollout_worker_custom_workflow.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/eager_execution.py --iters=2 + /ray/ci/suppress_output python /ray/rllib/examples/eager_execution.py --iters=2 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/custom_tf_policy.py --iters=2 + /ray/ci/suppress_output python /ray/rllib/examples/custom_tf_policy.py --iters=2 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/custom_torch_policy.py --iters=2 + /ray/ci/suppress_output python /ray/rllib/examples/custom_torch_policy.py --iters=2 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/rollout_worker_custom_workflow.py + /ray/ci/suppress_output python /ray/rllib/examples/rollout_worker_custom_workflow.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/custom_metrics_and_callbacks.py --num-iters=2 + /ray/ci/suppress_output python /ray/rllib/examples/custom_metrics_and_callbacks.py --num-iters=2 docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/contrib/random_agent/random_agent.py + /ray/ci/suppress_output python /ray/rllib/contrib/random_agent/random_agent.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/twostep_game.py --stop=2000 --run=PG + /ray/ci/suppress_output python /ray/rllib/examples/twostep_game.py --stop=2000 --run=PG docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/twostep_game.py --stop=2000 --run=QMIX + /ray/ci/suppress_output python /ray/rllib/examples/twostep_game.py --stop=2000 --run=QMIX docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/examples/twostep_game.py --stop=2000 --run=APEX_QMIX + /ray/ci/suppress_output python /ray/rllib/examples/twostep_game.py --stop=2000 --run=APEX_QMIX docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env PongDeterministic-v4 \ --run A3C \ --stop '{"training_iteration": 1}' \ --config '{"num_workers": 2, "use_pytorch": true, "sample_async": false, "model": {"use_lstm": false, "grayscale": true, "zero_mean": false, "dim": 84}, "preprocessor_pref": "rllib"}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env CartPole-v1 \ --run A3C \ --stop '{"training_iteration": 1}' \ --config '{"num_workers": 2, "use_pytorch": true, "sample_async": false}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env Pendulum-v0 \ --run A3C \ --stop '{"training_iteration": 1}' \ --config '{"num_workers": 2, "use_pytorch": true, "sample_async": false}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output /ray/python/ray/rllib/train.py \ + /ray/ci/suppress_output /ray/rllib/train.py \ --env PongDeterministic-v4 \ --run IMPALA \ --stop='{"timesteps_total": 40000}' \ @@ -467,7 +467,7 @@ docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ --config '{"num_workers": 1, "num_gpus": 0, "num_envs_per_worker": 32, "sample_batch_size": 50, "train_batch_size": 50, "learner_queue_size": 1}' docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/agents/impala/vtrace_test.py + /ray/ci/suppress_output python /ray/rllib/agents/impala/vtrace_test.py docker run --rm --shm-size=${SHM_SIZE} --memory=${MEMORY_SIZE} $DOCKER_SHA \ - /ray/ci/suppress_output python /ray/python/ray/rllib/tests/test_ignore_worker_failure.py + /ray/ci/suppress_output python /ray/rllib/tests/test_ignore_worker_failure.py diff --git a/doc/source/example-a3c.rst b/doc/source/example-a3c.rst index f8a8bfb4c1f3d..821cd71869d02 100644 --- a/doc/source/example-a3c.rst +++ b/doc/source/example-a3c.rst @@ -9,7 +9,7 @@ View the `code for this example`_. .. _`A3C`: https://arxiv.org/abs/1602.01783 .. _`Universe Starter Agent`: https://github.com/openai/universe-starter-agent -.. _`code for this example`: https://github.com/ray-project/ray/tree/master/python/ray/rllib/agents/a3c +.. _`code for this example`: https://github.com/ray-project/ray/tree/master/rllib/agents/a3c .. note:: diff --git a/doc/source/example-evolution-strategies.rst b/doc/source/example-evolution-strategies.rst index 38268bb4676ed..875750649e527 100644 --- a/doc/source/example-evolution-strategies.rst +++ b/doc/source/example-evolution-strategies.rst @@ -11,7 +11,7 @@ To run the application, first install some dependencies. You can view the `code for this example`_. -.. _`code for this example`: https://github.com/ray-project/ray/tree/master/python/ray/rllib/agents/es +.. _`code for this example`: https://github.com/ray-project/ray/tree/master/rllib/agents/es The script can be run as follows. Note that the configuration is tuned to work on the ``Humanoid-v1`` gym environment. diff --git a/doc/source/example-policy-gradient.rst b/doc/source/example-policy-gradient.rst index 9b58575044c3b..f21e2cd99df06 100644 --- a/doc/source/example-policy-gradient.rst +++ b/doc/source/example-policy-gradient.rst @@ -39,4 +39,4 @@ Many of the TensorBoard metrics are also printed to the console, but you might find it easier to visualize and compare between runs using the TensorBoard UI. .. _`TensorFlow with GPU support`: https://www.tensorflow.org/install/ -.. _`code for this example`: https://github.com/ray-project/ray/tree/master/python/ray/rllib/agents/ppo +.. _`code for this example`: https://github.com/ray-project/ray/tree/master/rllib/agents/ppo diff --git a/doc/source/rllib-algorithms.rst b/doc/source/rllib-algorithms.rst index 17b1005388bbc..ca3d19bd1459d 100644 --- a/doc/source/rllib-algorithms.rst +++ b/doc/source/rllib-algorithms.rst @@ -7,10 +7,10 @@ High-throughput architectures Distributed Prioritized Experience Replay (Ape-X) ------------------------------------------------- `[paper] `__ -`[implementation] `__ -Ape-X variations of DQN, DDPG, and QMIX (`APEX_DQN `__, `APEX_DDPG `__, `APEX_QMIX `__) use a single GPU learner and many CPU workers for experience collection. Experience collection can scale to hundreds of CPU workers due to the distributed prioritization of experience prior to storage in replay buffers. +`[implementation] `__ +Ape-X variations of DQN, DDPG, and QMIX (`APEX_DQN `__, `APEX_DDPG `__, `APEX_QMIX `__) use a single GPU learner and many CPU workers for experience collection. Experience collection can scale to hundreds of CPU workers due to the distributed prioritization of experience prior to storage in replay buffers. -Tuned examples: `PongNoFrameskip-v4 `__, `Pendulum-v0 `__, `MountainCarContinuous-v0 `__, `{BeamRider,Breakout,Qbert,SpaceInvaders}NoFrameskip-v4 `__. +Tuned examples: `PongNoFrameskip-v4 `__, `Pendulum-v0 `__, `MountainCarContinuous-v0 `__, `{BeamRider,Breakout,Qbert,SpaceInvaders}NoFrameskip-v4 `__. **Atari results @10M steps**: `more details `__ @@ -40,7 +40,7 @@ SpaceInvaders 646 ~300 **Ape-X specific configs** (see also `common configs `__): -.. literalinclude:: ../../python/ray/rllib/agents/dqn/apex.py +.. literalinclude:: ../../rllib/agents/dqn/apex.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ @@ -49,10 +49,10 @@ Importance Weighted Actor-Learner Architecture (IMPALA) ------------------------------------------------------- `[paper] `__ -`[implementation] `__ +`[implementation] `__ In IMPALA, a central learner runs SGD in a tight loop while asynchronously pulling sample batches from many actor processes. RLlib's IMPALA implementation uses DeepMind's reference `V-trace code `__. Note that we do not provide a deep residual network out of the box, but one can be plugged in as a `custom model `__. Multiple learner GPUs and experience replay are also supported. -Tuned examples: `PongNoFrameskip-v4 `__, `vectorized configuration `__, `multi-gpu configuration `__, `{BeamRider,Breakout,Qbert,SpaceInvaders}NoFrameskip-v4 `__ +Tuned examples: `PongNoFrameskip-v4 `__, `vectorized configuration `__, `multi-gpu configuration `__, `{BeamRider,Breakout,Qbert,SpaceInvaders}NoFrameskip-v4 `__ **Atari results @10M steps**: `more details `__ @@ -83,7 +83,7 @@ SpaceInvaders 843 ~300 **IMPALA-specific configs** (see also `common configs `__): -.. literalinclude:: ../../python/ray/rllib/agents/impala/impala.py +.. literalinclude:: ../../rllib/agents/impala/impala.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ @@ -92,16 +92,16 @@ Asynchronous Proximal Policy Optimization (APPO) ------------------------------------------------ `[paper] `__ -`[implementation] `__ +`[implementation] `__ We include an asynchronous variant of Proximal Policy Optimization (PPO) based on the IMPALA architecture. This is similar to IMPALA but using a surrogate policy loss with clipping. Compared to synchronous PPO, APPO is more efficient in wall-clock time due to its use of asynchronous sampling. Using a clipped loss also allows for multiple SGD passes, and therefore the potential for better sample efficiency compared to IMPALA. V-trace can also be enabled to correct for off-policy samples. APPO is not always more efficient; it is often better to simply use `PPO `__ or `IMPALA `__. -Tuned examples: `PongNoFrameskip-v4 `__ +Tuned examples: `PongNoFrameskip-v4 `__ **APPO-specific configs** (see also `common configs `__): -.. literalinclude:: ../../python/ray/rllib/agents/ppo/appo.py +.. literalinclude:: ../../rllib/agents/ppo/appo.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ @@ -111,10 +111,10 @@ Gradient-based Advantage Actor-Critic (A2C, A3C) --------------------------------- -`[paper] `__ `[implementation] `__ +`[paper] `__ `[implementation] `__ RLlib implements A2C and A3C using SyncSamplesOptimizer and AsyncGradientsOptimizer respectively for policy optimization. These algorithms scale to up to 16-32 worker processes depending on the environment. Both a TensorFlow (LSTM), and PyTorch version are available. -Tuned examples: `PongDeterministic-v4 `__, `PyTorch version `__, `{BeamRider,Breakout,Qbert,SpaceInvaders}NoFrameskip-v4 `__ +Tuned examples: `PongDeterministic-v4 `__, `PyTorch version `__, `{BeamRider,Breakout,Qbert,SpaceInvaders}NoFrameskip-v4 `__ .. tip:: Consider using `IMPALA <#importance-weighted-actor-learner-architecture-impala>`__ for faster training with similar timestep efficiency. @@ -132,31 +132,31 @@ SpaceInvaders 692 ~600 **A3C-specific configs** (see also `common configs `__): -.. literalinclude:: ../../python/ray/rllib/agents/a3c/a3c.py +.. literalinclude:: ../../rllib/agents/a3c/a3c.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ Deep Deterministic Policy Gradients (DDPG, TD3) ----------------------------------------------- -`[paper] `__ `[implementation] `__ +`[paper] `__ `[implementation] `__ DDPG is implemented similarly to DQN (below). The algorithm can be scaled by increasing the number of workers, switching to AsyncGradientsOptimizer, or using Ape-X. The improvements from `TD3 `__ are available though not enabled by default. -Tuned examples: `Pendulum-v0 `__, `MountainCarContinuous-v0 `__, `HalfCheetah-v2 `__, `TD3 Pendulum-v0 `__, `TD3 InvertedPendulum-v2 `__, `TD3 Mujoco suite (Ant-v2, HalfCheetah-v2, Hopper-v2, Walker2d-v2) `__. +Tuned examples: `Pendulum-v0 `__, `MountainCarContinuous-v0 `__, `HalfCheetah-v2 `__, `TD3 Pendulum-v0 `__, `TD3 InvertedPendulum-v2 `__, `TD3 Mujoco suite (Ant-v2, HalfCheetah-v2, Hopper-v2, Walker2d-v2) `__. **DDPG-specific configs** (see also `common configs `__): -.. literalinclude:: ../../python/ray/rllib/agents/ddpg/ddpg.py +.. literalinclude:: ../../rllib/agents/ddpg/ddpg.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ Deep Q Networks (DQN, Rainbow, Parametric DQN) ---------------------------------------------- -`[paper] `__ `[implementation] `__ +`[paper] `__ `[implementation] `__ RLlib DQN is implemented using the SyncReplayOptimizer. The algorithm can be scaled by increasing the number of workers, using the AsyncGradientsOptimizer for async DQN, or using Ape-X. Memory usage is reduced by compressing samples in the replay buffer with LZ4. All of the DQN improvements evaluated in `Rainbow `__ are available, though not all are enabled by default. See also how to use `parametric-actions in DQN `__. -Tuned examples: `PongDeterministic-v4 `__, `Rainbow configuration `__, `{BeamRider,Breakout,Qbert,SpaceInvaders}NoFrameskip-v4 `__, `with Dueling and Double-Q `__, `with Distributional DQN `__. +Tuned examples: `PongDeterministic-v4 `__, `Rainbow configuration `__, `{BeamRider,Breakout,Qbert,SpaceInvaders}NoFrameskip-v4 `__, `with Dueling and Double-Q `__, `with Distributional DQN `__. .. tip:: Consider using `Ape-X <#distributed-prioritized-experience-replay-ape-x>`__ for faster training with similar timestep efficiency. @@ -174,30 +174,30 @@ SpaceInvaders 650 1001 1025 **DQN-specific configs** (see also `common configs `__): -.. literalinclude:: ../../python/ray/rllib/agents/dqn/dqn.py +.. literalinclude:: ../../rllib/agents/dqn/dqn.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ Policy Gradients ---------------- -`[paper] `__ `[implementation] `__ We include a vanilla policy gradients implementation as an example algorithm in both TensorFlow and PyTorch. This is usually outperformed by PPO. +`[paper] `__ `[implementation] `__ We include a vanilla policy gradients implementation as an example algorithm in both TensorFlow and PyTorch. This is usually outperformed by PPO. -Tuned examples: `CartPole-v0 `__ +Tuned examples: `CartPole-v0 `__ **PG-specific configs** (see also `common configs `__): -.. literalinclude:: ../../python/ray/rllib/agents/pg/pg.py +.. literalinclude:: ../../rllib/agents/pg/pg.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ Proximal Policy Optimization (PPO) ---------------------------------- -`[paper] `__ `[implementation] `__ +`[paper] `__ `[implementation] `__ PPO's clipped objective supports multiple SGD passes over the same batch of experiences. RLlib's multi-GPU optimizer pins that data in GPU memory to avoid unnecessary transfers from host memory, substantially improving performance over a naive implementation. RLlib's PPO scales out using multiple workers for experience collection, and also with multiple GPUs for SGD. -Tuned examples: `Humanoid-v1 `__, `Hopper-v1 `__, `Pendulum-v0 `__, `PongDeterministic-v4 `__, `Walker2d-v1 `__, `HalfCheetah-v2 `__, `{BeamRider,Breakout,Qbert,SpaceInvaders}NoFrameskip-v4 `__ +Tuned examples: `Humanoid-v1 `__, `Hopper-v1 `__, `Pendulum-v0 `__, `PongDeterministic-v4 `__, `Walker2d-v1 `__, `HalfCheetah-v2 `__, `{BeamRider,Breakout,Qbert,SpaceInvaders}NoFrameskip-v4 `__ **Atari results**: `more details `__ @@ -227,22 +227,22 @@ HalfCheetah 9664 ~7700 **PPO-specific configs** (see also `common configs `__): -.. literalinclude:: ../../python/ray/rllib/agents/ppo/ppo.py +.. literalinclude:: ../../rllib/agents/ppo/ppo.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ Soft Actor Critic (SAC) ------------------------ -`[paper] `__ `[implementation] `__ +`[paper] `__ `[implementation] `__ RLlib's soft-actor critic implementation is ported from the `official SAC repo `__ to better integrate with RLlib APIs. Note that SAC has two fields to configure for custom models: ``policy_model`` and ``Q_model``, and currently has no support for non-continuous action distributions. It is also currently *experimental*. -Tuned examples: `Pendulum-v0 `__ +Tuned examples: `Pendulum-v0 `__ **SAC-specific configs** (see also `common configs `__): -.. literalinclude:: ../../python/ray/rllib/agents/sac/sac.py +.. literalinclude:: ../../rllib/agents/sac/sac.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ @@ -252,24 +252,24 @@ Derivative-free Augmented Random Search (ARS) ----------------------------- -`[paper] `__ `[implementation] `__ +`[paper] `__ `[implementation] `__ ARS is a random search method for training linear policies for continuous control problems. Code here is adapted from https://github.com/modestyachts/ARS to integrate with RLlib APIs. -Tuned examples: `CartPole-v0 `__, `Swimmer-v2 `__ +Tuned examples: `CartPole-v0 `__, `Swimmer-v2 `__ **ARS-specific configs** (see also `common configs `__): -.. literalinclude:: ../../python/ray/rllib/agents/ars/ars.py +.. literalinclude:: ../../rllib/agents/ars/ars.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ Evolution Strategies -------------------- -`[paper] `__ `[implementation] `__ +`[paper] `__ `[implementation] `__ Code here is adapted from https://github.com/openai/evolution-strategies-starter to execute in the distributed setting with Ray. -Tuned examples: `Humanoid-v1 `__ +Tuned examples: `Humanoid-v1 `__ **Scalability:** @@ -280,22 +280,22 @@ Tuned examples: `Humanoid-v1 `__): -.. literalinclude:: ../../python/ray/rllib/agents/es/es.py +.. literalinclude:: ../../rllib/agents/es/es.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ QMIX Monotonic Value Factorisation (QMIX, VDN, IQN) --------------------------------------------------- -`[paper] `__ `[implementation] `__ Q-Mix is a specialized multi-agent algorithm. Code here is adapted from https://github.com/oxwhirl/pymarl_alpha to integrate with RLlib multi-agent APIs. To use Q-Mix, you must specify an agent `grouping `__ in the environment (see the `two-step game example `__). Currently, all agents in the group must be homogeneous. The algorithm can be scaled by increasing the number of workers or using Ape-X. +`[paper] `__ `[implementation] `__ Q-Mix is a specialized multi-agent algorithm. Code here is adapted from https://github.com/oxwhirl/pymarl_alpha to integrate with RLlib multi-agent APIs. To use Q-Mix, you must specify an agent `grouping `__ in the environment (see the `two-step game example `__). Currently, all agents in the group must be homogeneous. The algorithm can be scaled by increasing the number of workers or using Ape-X. -Q-Mix is implemented in `PyTorch `__ and is currently *experimental*. +Q-Mix is implemented in `PyTorch `__ and is currently *experimental*. -Tuned examples: `Two-step game `__ +Tuned examples: `Two-step game `__ **QMIX-specific configs** (see also `common configs `__): -.. literalinclude:: ../../python/ray/rllib/agents/qmix/qmix.py +.. literalinclude:: ../../rllib/agents/qmix/qmix.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ @@ -303,13 +303,13 @@ Tuned examples: `Two-step game `__ `[implementation] `__ MARWIL is a hybrid imitation learning and policy gradient algorithm suitable for training on batched historical data. When the ``beta`` hyperparameter is set to zero, the MARWIL objective reduces to vanilla imitation learning. MARWIL requires the `offline datasets API `__ to be used. +`[paper] `__ `[implementation] `__ MARWIL is a hybrid imitation learning and policy gradient algorithm suitable for training on batched historical data. When the ``beta`` hyperparameter is set to zero, the MARWIL objective reduces to vanilla imitation learning. MARWIL requires the `offline datasets API `__ to be used. -Tuned examples: `CartPole-v0 `__ +Tuned examples: `CartPole-v0 `__ **MARWIL-specific configs** (see also `common configs `__): -.. literalinclude:: ../../python/ray/rllib/agents/marwil/marwil.py +.. literalinclude:: ../../rllib/agents/marwil/marwil.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ diff --git a/doc/source/rllib-concepts.rst b/doc/source/rllib-concepts.rst index 7a37f833b9411..32aed38514821 100644 --- a/doc/source/rllib-concepts.rst +++ b/doc/source/rllib-concepts.rst @@ -6,9 +6,9 @@ This page describes the internal concepts used to implement algorithms in RLlib. Policies -------- -Policy classes encapsulate the core numerical components of RL algorithms. This typically includes the policy model that determines actions to take, a trajectory postprocessor for experiences, and a loss function to improve the policy given postprocessed experiences. For a simple example, see the policy gradients `policy definition `__. +Policy classes encapsulate the core numerical components of RL algorithms. This typically includes the policy model that determines actions to take, a trajectory postprocessor for experiences, and a loss function to improve the policy given postprocessed experiences. For a simple example, see the policy gradients `policy definition `__. -Most interaction with deep learning frameworks is isolated to the `Policy interface `__, allowing RLlib to support multiple frameworks. To simplify the definition of policies, RLlib includes `Tensorflow <#building-policies-in-tensorflow>`__ and `PyTorch-specific <#building-policies-in-pytorch>`__ templates. You can also write your own from scratch. Here is an example: +Most interaction with deep learning frameworks is isolated to the `Policy interface `__, allowing RLlib to support multiple frameworks. To simplify the definition of policies, RLlib includes `Tensorflow <#building-policies-in-tensorflow>`__ and `PyTorch-specific <#building-policies-in-pytorch>`__ templates. You can also write your own from scratch. Here is an example: .. code-block:: python @@ -153,7 +153,7 @@ We can create a `Trainer <#trainers>`__ and try running this policy on a toy env tune.run(MyTrainer, config={"env": "CartPole-v0", "num_workers": 2}) -If you run the above snippet `(runnable file here) `__, you'll probably notice that CartPole doesn't learn so well: +If you run the above snippet `(runnable file here) `__, you'll probably notice that CartPole doesn't learn so well: .. code-block:: bash @@ -197,7 +197,7 @@ You might be wondering how RLlib makes the advantages placeholder automatically **Example 1: Proximal Policy Optimization** -In the above section you saw how to compose a simple policy gradient algorithm with RLlib. In this example, we'll dive into how PPO was built with RLlib and how you can modify it. First, check out the `PPO trainer definition `__: +In the above section you saw how to compose a simple policy gradient algorithm with RLlib. In this example, we'll dive into how PPO was built with RLlib and how you can modify it. First, check out the `PPO trainer definition `__: .. code-block:: python @@ -271,7 +271,7 @@ Now let's take a look at the ``update_kl`` function. This is used to adaptively # multi-agent trainer.workers.local_worker().foreach_trainable_policy(update) -The ``update_kl`` method on the policy is defined in `PPOTFPolicy `__ via the ``KLCoeffMixin``, along with several other advanced features. Let's look at each new feature used by the policy: +The ``update_kl`` method on the policy is defined in `PPOTFPolicy `__ via the ``KLCoeffMixin``, along with several other advanced features. Let's look at each new feature used by the policy: .. code-block:: python @@ -347,7 +347,7 @@ In PPO we run ``setup_mixins`` before the loss function is called (i.e., ``befor **Example 2: Deep Q Networks** -Let's look at how to implement a different family of policies, by looking at the `SimpleQ policy definition `__: +Let's look at how to implement a different family of policies, by looking at the `SimpleQ policy definition `__: .. code-block:: python @@ -440,12 +440,12 @@ While RLlib runs all TF operations in graph mode, you can still leverage TensorF return penalty - tf.reduce_mean(policy.action_dist.logp(actions) * rewards) -You can find a runnable file for the above eager execution example `here `__. +You can find a runnable file for the above eager execution example `here `__. Building Policies in PyTorch ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -Defining a policy in PyTorch is quite similar to that for TensorFlow (and the process of defining a trainer given a Torch policy is exactly the same). Here's a simple example of a trivial torch policy `(runnable file here) `__: +Defining a policy in PyTorch is quite similar to that for TensorFlow (and the process of defining a trainer given a Torch policy is exactly the same). Here's a simple example of a trivial torch policy `(runnable file here) `__: .. code-block:: python @@ -465,7 +465,7 @@ Defining a policy in PyTorch is quite similar to that for TensorFlow (and the pr name="MyTorchPolicy", loss_fn=policy_gradient_loss) -Now, building on the TF examples above, let's look at how the `A3C torch policy `__ is defined: +Now, building on the TF examples above, let's look at how the `A3C torch policy `__ is defined: .. code-block:: python @@ -535,7 +535,7 @@ Now, building on the TF examples above, let's look at how the `A3C torch policy _, _, vf, _ = self.model({"obs": obs}, []) return vf.detach().cpu().numpy().squeeze() -You can find the full policy definition in `a3c_torch_policy.py `__. +You can find the full policy definition in `a3c_torch_policy.py `__. In summary, the main differences between the PyTorch and TensorFlow policy builder functions is that the TF loss and stats functions are built symbolically when the policy is initialized, whereas for PyTorch these functions are called imperatively each time they are used. @@ -559,9 +559,9 @@ You can use the ``with_updates`` method on Trainers and Policy objects built wit Policy Evaluation ----------------- -Given an environment and policy, policy evaluation produces `batches `__ of experiences. This is your classic "environment interaction loop". Efficient policy evaluation can be burdensome to get right, especially when leveraging vectorization, RNNs, or when operating in a multi-agent environment. RLlib provides a `RolloutWorker `__ class that manages all of this, and this class is used in most RLlib algorithms. +Given an environment and policy, policy evaluation produces `batches `__ of experiences. This is your classic "environment interaction loop". Efficient policy evaluation can be burdensome to get right, especially when leveraging vectorization, RNNs, or when operating in a multi-agent environment. RLlib provides a `RolloutWorker `__ class that manages all of this, and this class is used in most RLlib algorithms. -You can use rollout workers standalone to produce batches of experiences. This can be done by calling ``worker.sample()`` on a worker instance, or ``worker.sample.remote()`` in parallel on worker instances created as Ray actors (see `WorkerSet `__). +You can use rollout workers standalone to produce batches of experiences. This can be done by calling ``worker.sample()`` on a worker instance, or ``worker.sample.remote()`` in parallel on worker instances created as Ray actors (see `WorkerSet `__). Here is an example of creating a set of rollout workers and using them gather experiences in parallel. The trajectories are concatenated, the policy learns on the trajectory batch, and then we broadcast the policy weights to the workers for the next round of rollouts: @@ -591,9 +591,9 @@ Here is an example of creating a set of rollout workers and using them gather ex Policy Optimization ------------------- -Similar to how a `gradient-descent optimizer `__ can be used to improve a model, RLlib's `policy optimizers `__ implement different strategies for improving a policy. +Similar to how a `gradient-descent optimizer `__ can be used to improve a model, RLlib's `policy optimizers `__ implement different strategies for improving a policy. -For example, in A3C you'd want to compute gradients asynchronously on different workers, and apply them to a central policy replica. This strategy is implemented by the `AsyncGradientsOptimizer `__. Another alternative is to gather experiences synchronously in parallel and optimize the model centrally, as in `SyncSamplesOptimizer `__. Policy optimizers abstract these strategies away into reusable modules. +For example, in A3C you'd want to compute gradients asynchronously on different workers, and apply them to a central policy replica. This strategy is implemented by the `AsyncGradientsOptimizer `__. Another alternative is to gather experiences synchronously in parallel and optimize the model centrally, as in `SyncSamplesOptimizer `__. Policy optimizers abstract these strategies away into reusable modules. This is how the example in the previous section looks when written using a policy optimizer: diff --git a/doc/source/rllib-dev.rst b/doc/source/rllib-dev.rst index 5425879fdeea7..d219df644c03c 100644 --- a/doc/source/rllib-dev.rst +++ b/doc/source/rllib-dev.rst @@ -23,20 +23,20 @@ Feature development and upcoming priorities are tracked on the `RLlib project bo Benchmarks ---------- -A number of training run results are available in the `rl-experiments repo `__, and there is also a list of working hyperparameter configurations in `tuned_examples `__. Benchmark results are extremely valuable to the community, so if you happen to have results that may be of interest, consider making a pull request to either repo. +A number of training run results are available in the `rl-experiments repo `__, and there is also a list of working hyperparameter configurations in `tuned_examples `__. Benchmark results are extremely valuable to the community, so if you happen to have results that may be of interest, consider making a pull request to either repo. Contributing Algorithms ----------------------- These are the guidelines for merging new algorithms into RLlib: -* Contributed algorithms (`rllib/contrib `__): +* Contributed algorithms (`rllib/contrib `__): - must subclass Trainer and implement the ``_train()`` method - must include a lightweight test (`example `__) to ensure the algorithm runs - should include tuned hyperparameter examples and documentation - should offer functionality not present in existing algorithms -* Fully integrated algorithms (`rllib/agents `__) have the following additional requirements: +* Fully integrated algorithms (`rllib/agents `__) have the following additional requirements: - must fully implement the Trainer API - must offer substantial new functionality not possible to add to other algorithms - should support custom models and preprocessors @@ -46,14 +46,14 @@ Both integrated and contributed algorithms ship with the ``ray`` PyPI package, a How to add an algorithm to ``contrib`` ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -It takes just two changes to add an algorithm to `contrib `__. A minimal example can be found `here `__. First, subclass `Trainer `__ and implement the ``_init`` and ``_train`` methods: +It takes just two changes to add an algorithm to `contrib `__. A minimal example can be found `here `__. First, subclass `Trainer `__ and implement the ``_init`` and ``_train`` methods: -.. literalinclude:: ../../python/ray/rllib/contrib/random_agent/random_agent.py +.. literalinclude:: ../../rllib/contrib/random_agent/random_agent.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ -Second, register the trainer with a name in `contrib/registry.py `__. +Second, register the trainer with a name in `contrib/registry.py `__. .. code-block:: python diff --git a/doc/source/rllib-env.rst b/doc/source/rllib-env.rst index b95821a62f7d6..618e7a2217b3e 100644 --- a/doc/source/rllib-env.rst +++ b/doc/source/rllib-env.rst @@ -63,7 +63,7 @@ You can also register a custom env creator function with a string name. This fun register_env("my_env", env_creator) trainer = ppo.PPOTrainer(env="my_env") -For a full runnable code example using the custom environment API, see `custom_env.py `__. +For a full runnable code example using the custom environment API, see `custom_env.py `__. .. warning:: @@ -93,7 +93,7 @@ In the above example, note that the ``env_creator`` function takes in an ``env_c OpenAI Gym ---------- -RLlib uses Gym as its environment interface for single-agent training. For more information on how to implement a custom Gym environment, see the `gym.Env class definition `__. You may find the `SimpleCorridor `__ example useful as a reference. +RLlib uses Gym as its environment interface for single-agent training. For more information on how to implement a custom Gym environment, see the `gym.Env class definition `__. You may find the `SimpleCorridor `__ example useful as a reference. Performance ~~~~~~~~~~~ @@ -102,7 +102,7 @@ There are two ways to scale experience collection with Gym environments: 1. **Vectorization within a single process:** Though many envs can achieve high frame rates per core, their throughput is limited in practice by policy evaluation between steps. For example, even small TensorFlow models incur a couple milliseconds of latency to evaluate. This can be worked around by creating multiple envs per process and batching policy evaluations across these envs. - You can configure ``{"num_envs_per_worker": M}`` to have RLlib create ``M`` concurrent environments per worker. RLlib auto-vectorizes Gym environments via `VectorEnv.wrap() `__. + You can configure ``{"num_envs_per_worker": M}`` to have RLlib create ``M`` concurrent environments per worker. RLlib auto-vectorizes Gym environments via `VectorEnv.wrap() `__. 2. **Distribute across multiple processes:** You can also have RLlib create multiple processes (Ray actors) for experience collection. In most algorithms this can be controlled by setting the ``{"num_workers": N}`` config. @@ -118,7 +118,7 @@ Some environments may be very resource-intensive to create. RLlib will create `` Vectorized ---------- -RLlib will auto-vectorize Gym envs for batch evaluation if the ``num_envs_per_worker`` config is set, or you can define a custom environment class that subclasses `VectorEnv `__ to implement ``vector_step()`` and ``vector_reset()``. +RLlib will auto-vectorize Gym envs for batch evaluation if the ``num_envs_per_worker`` config is set, or you can define a custom environment class that subclasses `VectorEnv `__ to implement ``vector_step()`` and ``vector_reset()``. Note that auto-vectorization only applies to policy inference by default. This means that policy inference will be batched, but your envs will still be stepped one at a time. If you would like your envs to be stepped in parallel, you can set ``"remote_worker_envs": True``. This will create env instances in Ray actors and step them in parallel. These remote processes introduce communication overheads, so this only helps if your env is very expensive to step / reset. @@ -135,7 +135,7 @@ A multi-agent environment is one which has multiple acting entities per step, e. .. image:: multi-agent.svg -The environment itself must subclass the `MultiAgentEnv `__ interface, which can returns observations and rewards from multiple ready agents per step: +The environment itself must subclass the `MultiAgentEnv `__ interface, which can returns observations and rewards from multiple ready agents per step: .. code-block:: python @@ -187,7 +187,7 @@ If all the agents will be using the same algorithm class to train, then you can RLlib will create three distinct policies and route agent decisions to its bound policy. When an agent first appears in the env, ``policy_mapping_fn`` will be called to determine which policy it is bound to. RLlib reports separate training statistics for each policy in the return from ``train()``, along with the combined reward. -Here is a simple `example training script `__ in which you can vary the number of agents and policies in the environment. For how to use multiple training methods at once (here DQN and PPO), see the `two-trainer example `__. Metrics are reported for each policy separately, for example: +Here is a simple `example training script `__ in which you can vary the number of agents and policies in the environment. For how to use multiple training methods at once (here DQN and PPO), see the `two-trainer example `__. Metrics are reported for each policy separately, for example: .. code-block:: bash :emphasize-lines: 6,14,22 @@ -222,7 +222,7 @@ To scale to hundreds of agents, MultiAgentEnv batches policy evaluations across Rock Paper Scissors Example ~~~~~~~~~~~~~~~~~~~~~~~~~~~ -The `rock_paper_scissors_multiagent.py `__ example demonstrates several types of policies competing against each other: heuristic policies of repeating the same move, beating the last opponent move, and learned LSTM and feedforward policies. +The `rock_paper_scissors_multiagent.py `__ example demonstrates several types of policies competing against each other: heuristic policies of repeating the same move, beating the last opponent move, and learned LSTM and feedforward policies. .. figure:: rock-paper-scissors.png @@ -259,7 +259,7 @@ This can be implemented as a multi-agent environment with three types of agents. In this setup, the appropriate rewards for training lower-level agents must be provided by the multi-agent env implementation. The environment class is also responsible for routing between the agents, e.g., conveying `goals `__ from higher-level agents to lower-level agents as part of the lower-level agent observation. -See this file for a runnable example: `hierarchical_training.py `__. +See this file for a runnable example: `hierarchical_training.py `__. Variable-Sharing Between Policies ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -278,7 +278,7 @@ RLlib will create each policy's model in a separate ``tf.variable_scope``. Howev auxiliary_name_scope=False): -There is a full example of this in the `example training script `__. +There is a full example of this in the `example training script `__. Implementing a Centralized Critic ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -300,14 +300,14 @@ Implementing a centralized critic that takes as input the observations and actio self.critic_network, feed_dict={"obs": global_obs_batch}) return sample_batch -2. Updating the critic: the centralized critic loss can be added to the loss of the custom policy, the same as with any other value function. For an example of defining loss inputs, see the `PGPolicy example `__. +2. Updating the critic: the centralized critic loss can be added to the loss of the custom policy, the same as with any other value function. For an example of defining loss inputs, see the `PGPolicy example `__. Grouping Agents ~~~~~~~~~~~~~~~ It is common to have groups of agents in multi-agent RL. RLlib treats agent groups like a single agent with a Tuple action and observation space. The group agent can then be assigned to a single policy for centralized execution, or to specialized multi-agent policies such as `Q-Mix `__ that implement centralized training but decentralized execution. You can use the ``MultiAgentEnv.with_agent_groups()`` method to define these groups: -.. literalinclude:: ../../python/ray/rllib/env/multi_agent_env.py +.. literalinclude:: ../../rllib/env/multi_agent_env.py :language: python :start-after: __grouping_doc_begin__ :end-before: __grouping_doc_end__ @@ -319,9 +319,9 @@ Interfacing with External Agents In many situations, it does not make sense for an environment to be "stepped" by RLlib. For example, if a policy is to be used in a web serving system, then it is more natural for an agent to query a service that serves policy decisions, and for that service to learn from experience over time. This case also naturally arises with **external simulators** that run independently outside the control of RLlib, but may still want to leverage RLlib for training. -RLlib provides the `ExternalEnv `__ class for this purpose. Unlike other envs, ExternalEnv has its own thread of control. At any point, agents on that thread can query the current policy for decisions via ``self.get_action()`` and reports rewards via ``self.log_returns()``. This can be done for multiple concurrent episodes as well. +RLlib provides the `ExternalEnv `__ class for this purpose. Unlike other envs, ExternalEnv has its own thread of control. At any point, agents on that thread can query the current policy for decisions via ``self.get_action()`` and reports rewards via ``self.log_returns()``. This can be done for multiple concurrent episodes as well. -ExternalEnv can be used to implement a simple REST policy `server `__ that learns over time using RLlib. In this example RLlib runs with ``num_workers=0`` to avoid port allocation issues, but in principle this could be scaled by increasing ``num_workers``. +ExternalEnv can be used to implement a simple REST policy `server `__ that learns over time using RLlib. In this example RLlib runs with ``num_workers=0`` to avoid port allocation issues, but in principle this could be scaled by increasing ``num_workers``. Logging off-policy actions ~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -333,7 +333,7 @@ Data ingest The ``log_action`` API of ExternalEnv can be used to ingest data from offline logs. The pattern would be as follows: First, some policy is followed to produce experience data which is stored in some offline storage system. Then, RLlib creates a number of workers that use a ExternalEnv to read the logs in parallel and ingest the experiences. After a round of training completes, the new policy can be deployed to collect more experiences. -Note that envs can read from different partitions of the logs based on the ``worker_index`` attribute of the `env context `__ passed into the environment constructor. +Note that envs can read from different partitions of the logs based on the ``worker_index`` attribute of the `env context `__ passed into the environment constructor. .. seealso:: @@ -342,4 +342,4 @@ Note that envs can read from different partitions of the logs based on the ``wor Advanced Integrations --------------------- -For more complex / high-performance environment integrations, you can instead extend the low-level `BaseEnv `__ class. This low-level API models multiple agents executing asynchronously in multiple environments. A call to ``BaseEnv:poll()`` returns observations from ready agents keyed by their environment and agent ids, and actions for those agents are sent back via ``BaseEnv:send_actions()``. BaseEnv is used to implement all the other env types in RLlib, so it offers a superset of their functionality. For example, ``BaseEnv`` is used to implement dynamic batching of observations for inference over `multiple simulator actors `__. +For more complex / high-performance environment integrations, you can instead extend the low-level `BaseEnv `__ class. This low-level API models multiple agents executing asynchronously in multiple environments. A call to ``BaseEnv:poll()`` returns observations from ready agents keyed by their environment and agent ids, and actions for those agents are sent back via ``BaseEnv:send_actions()``. BaseEnv is used to implement all the other env types in RLlib, so it offers a superset of their functionality. For example, ``BaseEnv`` is used to implement dynamic batching of observations for inference over `multiple simulator actors `__. diff --git a/doc/source/rllib-examples.rst b/doc/source/rllib-examples.rst index 9e47cc18f8eca..8c6c61cf78b22 100644 --- a/doc/source/rllib-examples.rst +++ b/doc/source/rllib-examples.rst @@ -8,7 +8,7 @@ If any example is broken, or if you'd like to add an example to this page, feel Tuned Examples -------------- -- `Tuned examples `__: +- `Tuned examples `__: Collection of tuned algorithm hyperparameters. - `Atari benchmarks `__: Collection of reasonably optimized Atari results. @@ -16,56 +16,56 @@ Tuned Examples Training Workflows ------------------ -- `Custom training workflows `__: +- `Custom training workflows `__: Example of how to use Tune's support for custom training functions to implement custom training workflows. - `Curriculum learning `__: Example of how to adjust the configuration of an environment over time. -- `Custom metrics `__: +- `Custom metrics `__: Example of how to output custom training metrics to TensorBoard. -- `Using rollout workers directly for control over the whole training workflow `__: +- `Using rollout workers directly for control over the whole training workflow `__: Example of how to use RLlib's lower-level building blocks to implement a fully customized training workflow. Custom Envs and Models ---------------------- -- `Registering a custom env and model `__: +- `Registering a custom env and model `__: Example of defining and registering a gym env and model for use with RLlib. -- `Custom Keras model `__: +- `Custom Keras model `__: Example of using a custom Keras model. -- `Custom Keras RNN model `__: +- `Custom Keras RNN model `__: Example of using a custom Keras RNN model. -- `Registering a custom model with supervised loss `__: +- `Registering a custom model with supervised loss `__: Example of defining and registering a custom model with a supervised loss. -- `Subprocess environment `__: +- `Subprocess environment `__: Example of how to ensure subprocesses spawned by envs are killed when RLlib exits. -- `Batch normalization `__: +- `Batch normalization `__: Example of adding batch norm layers to a custom model. -- `Parametric actions `__: +- `Parametric actions `__: Example of how to handle variable-length or parametric action spaces. -- `Eager execution `__: +- `Eager execution `__: Example of how to leverage TensorFlow eager to simplify debugging and design of custom models and policies. Serving and Offline ------------------- -- `CartPole server `__: +- `CartPole server `__: Example of online serving of predictions for a simple CartPole policy. -- `Saving experiences `__: +- `Saving experiences `__: Example of how to externally generate experience batches in RLlib-compatible format. Multi-Agent and Hierarchical ---------------------------- -- `Rock-paper-scissors `__: +- `Rock-paper-scissors `__: Example of different heuristic and learned policies competing against each other in rock-paper-scissors. -- `Two-step game `__: +- `Two-step game `__: Example of the two-step game from the `QMIX paper `__. -- `Hand-coded policy `__: +- `Hand-coded policy `__: Example of running a custom hand-coded policy alongside trainable policies. -- `Weight sharing between policies `__: +- `Weight sharing between policies `__: Example of how to define weight-sharing layers between two different policies. -- `Multiple trainers `__: +- `Multiple trainers `__: Example of alternating training between two DQN and PPO trainers. -- `Hierarchical training `__: +- `Hierarchical training `__: Example of hierarchical training using the multi-agent API. - `PPO with centralized value function `__: Example of customizing PPO to include a centralized value function, including a runnable script that demonstrates cooperative CartPole. diff --git a/doc/source/rllib-models.rst b/doc/source/rllib-models.rst index 295136842177b..a392dba5c3587 100644 --- a/doc/source/rllib-models.rst +++ b/doc/source/rllib-models.rst @@ -14,18 +14,18 @@ Default Behaviours Built-in Models and Preprocessors ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -RLlib picks default models based on a simple heuristic: a `vision network `__ for image observations, and a `fully connected network `__ for everything else. These models can be configured via the ``model`` config key, documented in the model `catalog `__. Note that you'll probably have to configure ``conv_filters`` if your environment observations have custom sizes, e.g., ``"model": {"dim": 42, "conv_filters": [[16, [4, 4], 2], [32, [4, 4], 2], [512, [11, 11], 1]]}`` for 42x42 observations. +RLlib picks default models based on a simple heuristic: a `vision network `__ for image observations, and a `fully connected network `__ for everything else. These models can be configured via the ``model`` config key, documented in the model `catalog `__. Note that you'll probably have to configure ``conv_filters`` if your environment observations have custom sizes, e.g., ``"model": {"dim": 42, "conv_filters": [[16, [4, 4], 2], [32, [4, 4], 2], [512, [11, 11], 1]]}`` for 42x42 observations. -In addition, if you set ``"model": {"use_lstm": true}``, then the model output will be further processed by a `LSTM cell `__. More generally, RLlib supports the use of recurrent models for its policy gradient algorithms (A3C, PPO, PG, IMPALA), and RNN support is built into its policy evaluation utilities. +In addition, if you set ``"model": {"use_lstm": true}``, then the model output will be further processed by a `LSTM cell `__. More generally, RLlib supports the use of recurrent models for its policy gradient algorithms (A3C, PPO, PG, IMPALA), and RNN support is built into its policy evaluation utilities. -For preprocessors, RLlib tries to pick one of its built-in preprocessor based on the environment's observation space. Discrete observations are one-hot encoded, Atari observations downscaled, and Tuple and Dict observations flattened (these are unflattened and accessible via the ``input_dict`` parameter in custom models). Note that for Atari, RLlib defaults to using the `DeepMind preprocessors `__, which are also used by the OpenAI baselines library. +For preprocessors, RLlib tries to pick one of its built-in preprocessor based on the environment's observation space. Discrete observations are one-hot encoded, Atari observations downscaled, and Tuple and Dict observations flattened (these are unflattened and accessible via the ``input_dict`` parameter in custom models). Note that for Atari, RLlib defaults to using the `DeepMind preprocessors `__, which are also used by the OpenAI baselines library. Built-in Model Parameters ~~~~~~~~~~~~~~~~~~~~~~~~~ The following is a list of the built-in model hyperparameters: -.. literalinclude:: ../../python/ray/rllib/models/catalog.py +.. literalinclude:: ../../rllib/models/catalog.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ @@ -37,7 +37,7 @@ TensorFlow Models TFModelV2 replaces the previous ``rllib.models.Model`` class, which did not support Keras-style reuse of variables. The ``rllib.models.Model`` class is deprecated and should not be used. -Custom TF models should subclass `TFModelV2 `__ to implement the ``__init__()`` and ``forward()`` methods. Forward takes in a dict of tensor inputs (the observation ``obs``, ``prev_action``, and ``prev_reward``, ``is_training``), optional RNN state, and returns the model output of size ``num_outputs`` and the new state. You can also override extra methods of the model such as ``value_function`` to implement a custom value branch. Additional supervised / self-supervised losses can be added via the ``custom_loss`` method: +Custom TF models should subclass `TFModelV2 `__ to implement the ``__init__()`` and ``forward()`` methods. Forward takes in a dict of tensor inputs (the observation ``obs``, ``prev_action``, and ``prev_reward``, ``is_training``), optional RNN state, and returns the model output of size ``num_outputs`` and the new state. You can also override extra methods of the model such as ``value_function`` to implement a custom value branch. Additional supervised / self-supervised losses can be added via the ``custom_loss`` method: .. autoclass:: ray.rllib.models.tf.tf_modelv2.TFModelV2 @@ -75,12 +75,12 @@ Once implemented, the model can then be registered and used in place of a built- }, }) -For a full example of a custom model in code, see the `keras model example `__. You can also reference the `unit tests `__ for Tuple and Dict spaces, which show how to access nested observation fields. +For a full example of a custom model in code, see the `keras model example `__. You can also reference the `unit tests `__ for Tuple and Dict spaces, which show how to access nested observation fields. Recurrent Models ~~~~~~~~~~~~~~~~ -Instead of using the ``use_lstm: True`` option, it can be preferable use a custom recurrent model. This provides more control over postprocessing of the LSTM output and can also allow the use of multiple LSTM cells to process different portions of the input. For a RNN model it is preferred to subclass ``RecurrentTFModelV2`` to implement ``__init__()``, ``get_initial_state()``, and ``forward_rnn()``. You can check out the `custom_keras_rnn_model.py `__ model as an example to implement your own model: +Instead of using the ``use_lstm: True`` option, it can be preferable use a custom recurrent model. This provides more control over postprocessing of the LSTM output and can also allow the use of multiple LSTM cells to process different portions of the input. For a RNN model it is preferred to subclass ``RecurrentTFModelV2`` to implement ``__init__()``, ``get_initial_state()``, and ``forward_rnn()``. You can check out the `custom_keras_rnn_model.py `__ model as an example to implement your own model: .. autoclass:: ray.rllib.models.tf.recurrent_tf_modelv2.RecurrentTFModelV2 @@ -91,14 +91,14 @@ Instead of using the ``use_lstm: True`` option, it can be preferable use a custo Batch Normalization ~~~~~~~~~~~~~~~~~~~ -You can use ``tf.layers.batch_normalization(x, training=input_dict["is_training"])`` to add batch norm layers to your custom model: `code example `__. RLlib will automatically run the update ops for the batch norm layers during optimization (see `tf_policy.py `__ and `multi_gpu_impl.py `__ for the exact handling of these updates). +You can use ``tf.layers.batch_normalization(x, training=input_dict["is_training"])`` to add batch norm layers to your custom model: `code example `__. RLlib will automatically run the update ops for the batch norm layers during optimization (see `tf_policy.py `__ and `multi_gpu_impl.py `__ for the exact handling of these updates). In case RLlib does not properly detect the update ops for your custom model, you can override the ``update_ops()`` method to return the list of ops to run for updates. PyTorch Models -------------- -Similarly, you can create and register custom PyTorch models for use with PyTorch-based algorithms (e.g., A2C, PG, QMIX). See these examples of `fully connected `__, `convolutional `__, and `recurrent `__ torch models. +Similarly, you can create and register custom PyTorch models for use with PyTorch-based algorithms (e.g., A2C, PG, QMIX). See these examples of `fully connected `__, `convolutional `__, and `recurrent `__ torch models. .. autoclass:: ray.rllib.models.torch.torch_modelv2.TorchModelV2 @@ -139,7 +139,7 @@ Once implemented, the model can then be registered and used in place of a built- Custom Preprocessors -------------------- -Custom preprocessors should subclass the RLlib `preprocessor class `__ and be registered in the model catalog. Note that you can alternatively use `gym wrapper classes `__ around your environment instead of preprocessors. +Custom preprocessors should subclass the RLlib `preprocessor class `__ and be registered in the model catalog. Note that you can alternatively use `gym wrapper classes `__ around your environment instead of preprocessors. .. code-block:: python @@ -169,7 +169,7 @@ Supervised Model Losses You can mix supervised losses into any RLlib algorithm through custom models. For example, you can add an imitation learning loss on expert experiences, or a self-supervised autoencoder loss within the model. These losses can be defined over either policy evaluation inputs, or data read from `offline storage `__. -**TensorFlow**: To add a supervised loss to a custom TF model, you need to override the ``custom_loss()`` method. This method takes in the existing policy loss for the algorithm, which you can add your own supervised loss to before returning. For debugging, you can also return a dictionary of scalar tensors in the ``metrics()`` method. Here is a `runnable example `__ of adding an imitation loss to CartPole training that is defined over a `offline dataset `__. +**TensorFlow**: To add a supervised loss to a custom TF model, you need to override the ``custom_loss()`` method. This method takes in the existing policy loss for the algorithm, which you can add your own supervised loss to before returning. For debugging, you can also return a dictionary of scalar tensors in the ``metrics()`` method. Here is a `runnable example `__ of adding an imitation loss to CartPole training that is defined over a `offline dataset `__. **PyTorch**: There is no explicit API for adding losses to custom torch models. However, you can modify the loss in the policy definition directly. Like for TF models, offline datasets can be incorporated by creating an input reader and calling ``reader.next()`` in the loss forward pass. @@ -231,7 +231,7 @@ Custom models can be used to work with environments where (1) the set of valid a return action_logits + inf_mask, state -Depending on your use case it may make sense to use just the masking, just action embeddings, or both. For a runnable example of this in code, check out `parametric_action_cartpole.py `__. Note that since masking introduces ``tf.float32.min`` values into the model output, this technique might not work with all algorithm options. For example, algorithms might crash if they incorrectly process the ``tf.float32.min`` values. The cartpole example has working configurations for DQN (must set ``hiddens=[]``), PPO (must disable running mean and set ``vf_share_layers=True``), and several other algorithms. +Depending on your use case it may make sense to use just the masking, just action embeddings, or both. For a runnable example of this in code, check out `parametric_action_cartpole.py `__. Note that since masking introduces ``tf.float32.min`` values into the model output, this technique might not work with all algorithm options. For example, algorithms might crash if they incorrectly process the ``tf.float32.min`` values. The cartpole example has working configurations for DQN (must set ``hiddens=[]``), PPO (must disable running mean and set ``vf_share_layers=True``), and several other algorithms. Model-Based Rollouts ~~~~~~~~~~~~~~~~~~~~ @@ -253,4 +253,4 @@ With a custom policy, you can also perform model-based rollouts and optionally i return action_batch -If you want take this rollouts data and append it to the sample batch, use the ``add_extra_batch()`` method of the `episode objects `__ passed in. For an example of this, see the ``testReturningModelBasedRolloutsData`` `unit test `__. +If you want take this rollouts data and append it to the sample batch, use the ``add_extra_batch()`` method of the `episode objects `__ passed in. For an example of this, see the ``testReturningModelBasedRolloutsData`` `unit test `__. diff --git a/doc/source/rllib-offline.rst b/doc/source/rllib-offline.rst index 825038af3d539..fc6c63fe40aca 100644 --- a/doc/source/rllib-offline.rst +++ b/doc/source/rllib-offline.rst @@ -6,7 +6,7 @@ Working with Offline Datasets RLlib's offline dataset APIs enable working with experiences read from offline storage (e.g., disk, cloud storage, streaming systems, HDFS). For example, you might want to read experiences saved from previous training runs, or gathered from policies deployed in `web applications `__. You can also log new agent experiences produced during online training for future use. -RLlib represents trajectory sequences (i.e., ``(s, a, r, s', ...)`` tuples) with `SampleBatch `__ objects. Using a batch format enables efficient encoding and compression of experiences. During online training, RLlib uses `policy evaluation `__ actors to generate batches of experiences in parallel using the current policy. RLlib also uses this same batch format for reading and writing experiences to offline storage. +RLlib represents trajectory sequences (i.e., ``(s, a, r, s', ...)`` tuples) with `SampleBatch `__ objects. Using a batch format enables efficient encoding and compression of experiences. During online training, RLlib uses `policy evaluation `__ actors to generate batches of experiences in parallel using the current policy. RLlib also uses this same batch format for reading and writing experiences to offline storage. Example: Training on previously saved experiences ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -88,10 +88,10 @@ This example plot shows the Q-value metric in addition to importance sampling (I Example: Converting external experiences to batch format ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -When the env does not support simulation (e.g., it is a web application), it is necessary to generate the ``*.json`` experience batch files outside of RLlib. This can be done by using the `JsonWriter `__ class to write out batches. -This `runnable example `__ shows how to generate and save experience batches for CartPole-v0 to disk: +When the env does not support simulation (e.g., it is a web application), it is necessary to generate the ``*.json`` experience batch files outside of RLlib. This can be done by using the `JsonWriter `__ class to write out batches. +This `runnable example `__ shows how to generate and save experience batches for CartPole-v0 to disk: -.. literalinclude:: ../../python/ray/rllib/examples/saving_experiences.py +.. literalinclude:: ../../rllib/examples/saving_experiences.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ @@ -99,7 +99,7 @@ This `runnable example `__. This isn't typically critical for off-policy algorithms (e.g., DQN's `post-processing `__ is only needed if ``n_step > 1`` or ``worker_side_prioritization: True``). For off-policy algorithms, you can also safely set the ``postprocess_inputs: True`` config to auto-postprocess data. +RLlib assumes that input batches are of `postprocessed experiences `__. This isn't typically critical for off-policy algorithms (e.g., DQN's `post-processing `__ is only needed if ``n_step > 1`` or ``worker_side_prioritization: True``). For off-policy algorithms, you can also safely set the ``postprocess_inputs: True`` config to auto-postprocess data. However, for on-policy algorithms like PPO, you'll need to pass in the extra values added during policy evaluation and postprocessing to ``batch_builder.add_values()``, e.g., ``logits``, ``vf_preds``, ``value_target``, and ``advantages`` for PPO. This is needed since the calculation of these values depends on the parameters of the *behaviour* policy, which RLlib does not have access to in the offline setting (in online training, these values are automatically added during policy evaluation). @@ -147,7 +147,7 @@ You can also define supervised model losses over offline data. This requires def supervised_loss = some_function_of(input_ops) return policy_loss + supervised_loss -See `custom_loss.py `__ for a runnable example of using these TF input ops in a custom loss. +See `custom_loss.py `__ for a runnable example of using these TF input ops in a custom loss. Input API @@ -155,7 +155,7 @@ Input API You can configure experience input for an agent using the following options: -.. literalinclude:: ../../python/ray/rllib/agents/trainer.py +.. literalinclude:: ../../rllib/agents/trainer.py :language: python :start-after: === Offline Datasets === :end-before: Specify where experiences should be saved @@ -170,7 +170,7 @@ Output API You can configure experience output for an agent using the following options: -.. literalinclude:: ../../python/ray/rllib/agents/trainer.py +.. literalinclude:: ../../rllib/agents/trainer.py :language: python :start-after: shuffle_buffer_size :end-before: === Multiagent === diff --git a/doc/source/rllib-training.rst b/doc/source/rllib-training.rst index b4c45d58b8889..9052220a8abeb 100644 --- a/doc/source/rllib-training.rst +++ b/doc/source/rllib-training.rst @@ -32,7 +32,7 @@ The ``rllib train`` command (same as the ``train.py`` script in the repo) has a rllib train --help -or- - python ray/python/ray/rllib/train.py --help + python ray/rllib/train.py --help The most important options are for choosing the environment with ``--env`` (any OpenAI gym environment including ones registered by the user @@ -65,7 +65,7 @@ Configuration Specifying Parameters ~~~~~~~~~~~~~~~~~~~~~ -Each algorithm has specific hyperparameters that can be set with ``--config``, in addition to a number of `common hyperparameters `__. See the +Each algorithm has specific hyperparameters that can be set with ``--config``, in addition to a number of `common hyperparameters `__. See the `algorithms documentation `__ for more information. In an example below, we train A2C by specifying 8 workers through the config flag. @@ -86,7 +86,7 @@ Common Parameters The following is a list of the common algorithm hyperparameters: -.. literalinclude:: ../../python/ray/rllib/agents/trainer.py +.. literalinclude:: ../../rllib/agents/trainer.py :language: python :start-after: __sphinx_doc_begin__ :end-before: __sphinx_doc_end__ @@ -95,7 +95,7 @@ Tuned Examples ~~~~~~~~~~~~~~ Some good hyperparameters and settings are available in -`the repository `__ +`the repository `__ (some of them are tuned to run on GPUs). If you find better settings or tune an algorithm on a different domain, consider submitting a Pull Request! @@ -110,7 +110,7 @@ Python API The Python API provides the needed flexibility for applying RLlib to new problems. You will need to use this API if you wish to use `custom environments, preprocessors, or models `__ with RLlib. -Here is an example of the basic usage (for a more complete example, see `custom_env.py `__): +Here is an example of the basic usage (for a more complete example, see `custom_env.py `__): .. code-block:: python @@ -176,9 +176,9 @@ Tune will schedule the trials to run in parallel on your Ray cluster: Custom Training Workflows ~~~~~~~~~~~~~~~~~~~~~~~~~ -In the `basic training example `__, Tune will call ``train()`` on your trainer once per iteration and report the new training results. Sometimes, it is desirable to have full control over training, but still run inside Tune. Tune supports `custom trainable functions `__ that can be used to implement `custom training workflows (example) `__. +In the `basic training example `__, Tune will call ``train()`` on your trainer once per iteration and report the new training results. Sometimes, it is desirable to have full control over training, but still run inside Tune. Tune supports `custom trainable functions `__ that can be used to implement `custom training workflows (example) `__. -For even finer-grained control over training, you can use RLlib's lower-level `building blocks `__ directly to implement `fully customized training workflows `__. +For even finer-grained control over training, you can use RLlib's lower-level `building blocks `__ directly to implement `fully customized training workflows `__. Accessing Policy State ~~~~~~~~~~~~~~~~~~~~~~ @@ -231,7 +231,7 @@ Ray actors provide high levels of performance, so in more complex cases they can Callbacks and Custom Metrics ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -You can provide callback functions to be called at points during policy evaluation. These functions have access to an info dict containing state for the current `episode `__. Custom state can be stored for the `episode `__ in the ``info["episode"].user_data`` dict, and custom scalar metrics reported by saving values to the ``info["episode"].custom_metrics`` dict. These custom metrics will be aggregated and reported as part of training results. The following example (full code `here `__) logs a custom metric from the environment: +You can provide callback functions to be called at points during policy evaluation. These functions have access to an info dict containing state for the current `episode `__. Custom state can be stored for the `episode `__ in the ``info["episode"].user_data`` dict, and custom scalar metrics reported by saving values to the ``info["episode"].custom_metrics`` dict. These custom metrics will be aggregated and reported as part of training results. The following example (full code `here `__) logs a custom metric from the environment: .. code-block:: python @@ -370,7 +370,7 @@ The ``"monitor": true`` config can be used to save Gym episode videos to the res TensorFlow Eager ~~~~~~~~~~~~~~~~ -While RLlib uses TF graph mode for all computations, you can still leverage TF eager to inspect the intermediate state of computations using `tf.py_function `__. Here's an example of using eager mode in `a custom RLlib model and loss `__. +While RLlib uses TF graph mode for all computations, you can still leverage TF eager to inspect the intermediate state of computations using `tf.py_function `__. Here's an example of using eager mode in `a custom RLlib model and loss `__. Episode Traces ~~~~~~~~~~~~~~ @@ -412,4 +412,4 @@ In some cases (i.e., when interacting with an externally hosted simulator or pro .. autoclass:: ray.rllib.utils.policy_server.PolicyServer :members: -For a full client / server example that you can run, see the example `client script `__ and also the corresponding `server script `__, here configured to serve a policy for the toy CartPole-v0 environment. +For a full client / server example that you can run, see the example `client script `__ and also the corresponding `server script `__, here configured to serve a policy for the toy CartPole-v0 environment. diff --git a/doc/source/rllib.rst b/doc/source/rllib.rst index 3a643238c286b..a64da784efc88 100644 --- a/doc/source/rllib.rst +++ b/doc/source/rllib.rst @@ -5,7 +5,7 @@ RLlib is an open-source library for reinforcement learning that offers both high .. image:: rllib-stack.svg -To get started, take a look over the `custom env example `__ and the `API documentation `__. If you're looking to develop custom algorithms with RLlib, also check out `concepts and custom algorithms `__. +To get started, take a look over the `custom env example `__ and the `API documentation `__. If you're looking to develop custom algorithms with RLlib, also check out `concepts and custom algorithms `__. Installation ------------ @@ -22,7 +22,7 @@ You might also want to clone the `Ray repo ` .. code-block:: bash git clone https://github.com/ray-project/ray - cd ray/python/ray/rllib + cd ray/rllib Training APIs ------------- diff --git a/python/ray/rllib b/python/ray/rllib new file mode 120000 index 0000000000000..ca645e728291d --- /dev/null +++ b/python/ray/rllib @@ -0,0 +1 @@ +../../rllib \ No newline at end of file diff --git a/python/ray/rllib/README.md b/rllib/README.md similarity index 61% rename from python/ray/rllib/README.md rename to rllib/README.md index d69e8037458d2..667062c63db72 100644 --- a/python/ray/rllib/README.md +++ b/rllib/README.md @@ -23,3 +23,8 @@ If you've found RLlib useful for your research, you can cite the [paper](https:/ Year = {2018} } ``` + +Development Install +------------------- + +You can develop RLlib locally without needing to compile Ray by using the [setup-dev.py](https://github.com/ray-project/ray/blob/master/python/ray/setup-dev.py) script. This sets up links between the ``rllib`` dir in your git repo and the one bundled with the ``ray`` package. When using this script, make sure that your git branch is in sync with the installed Ray binaries (i.e., you are up-to-date on `master `__ and have the latest [wheel](https://ray.readthedocs.io/en/latest/installation.html) installed.) diff --git a/python/ray/rllib/__init__.py b/rllib/__init__.py similarity index 100% rename from python/ray/rllib/__init__.py rename to rllib/__init__.py diff --git a/python/ray/rllib/agents/__init__.py b/rllib/agents/__init__.py similarity index 100% rename from python/ray/rllib/agents/__init__.py rename to rllib/agents/__init__.py diff --git a/python/ray/rllib/agents/a3c/__init__.py b/rllib/agents/a3c/__init__.py similarity index 100% rename from python/ray/rllib/agents/a3c/__init__.py rename to rllib/agents/a3c/__init__.py diff --git a/python/ray/rllib/agents/a3c/a2c.py b/rllib/agents/a3c/a2c.py similarity index 100% rename from python/ray/rllib/agents/a3c/a2c.py rename to rllib/agents/a3c/a2c.py diff --git a/python/ray/rllib/agents/a3c/a3c.py b/rllib/agents/a3c/a3c.py similarity index 100% rename from python/ray/rllib/agents/a3c/a3c.py rename to rllib/agents/a3c/a3c.py diff --git a/python/ray/rllib/agents/a3c/a3c_tf_policy.py b/rllib/agents/a3c/a3c_tf_policy.py similarity index 100% rename from python/ray/rllib/agents/a3c/a3c_tf_policy.py rename to rllib/agents/a3c/a3c_tf_policy.py diff --git a/python/ray/rllib/agents/a3c/a3c_torch_policy.py b/rllib/agents/a3c/a3c_torch_policy.py similarity index 100% rename from python/ray/rllib/agents/a3c/a3c_torch_policy.py rename to rllib/agents/a3c/a3c_torch_policy.py diff --git a/python/ray/rllib/agents/agent.py b/rllib/agents/agent.py similarity index 100% rename from python/ray/rllib/agents/agent.py rename to rllib/agents/agent.py diff --git a/python/ray/rllib/agents/ars/__init__.py b/rllib/agents/ars/__init__.py similarity index 100% rename from python/ray/rllib/agents/ars/__init__.py rename to rllib/agents/ars/__init__.py diff --git a/python/ray/rllib/agents/ars/ars.py b/rllib/agents/ars/ars.py similarity index 100% rename from python/ray/rllib/agents/ars/ars.py rename to rllib/agents/ars/ars.py diff --git a/python/ray/rllib/agents/ars/optimizers.py b/rllib/agents/ars/optimizers.py similarity index 100% rename from python/ray/rllib/agents/ars/optimizers.py rename to rllib/agents/ars/optimizers.py diff --git a/python/ray/rllib/agents/ars/policies.py b/rllib/agents/ars/policies.py similarity index 100% rename from python/ray/rllib/agents/ars/policies.py rename to rllib/agents/ars/policies.py diff --git a/python/ray/rllib/agents/ars/utils.py b/rllib/agents/ars/utils.py similarity index 100% rename from python/ray/rllib/agents/ars/utils.py rename to rllib/agents/ars/utils.py diff --git a/python/ray/rllib/agents/ddpg/README.md b/rllib/agents/ddpg/README.md similarity index 100% rename from python/ray/rllib/agents/ddpg/README.md rename to rllib/agents/ddpg/README.md diff --git a/python/ray/rllib/agents/ddpg/__init__.py b/rllib/agents/ddpg/__init__.py similarity index 100% rename from python/ray/rllib/agents/ddpg/__init__.py rename to rllib/agents/ddpg/__init__.py diff --git a/python/ray/rllib/agents/ddpg/apex.py b/rllib/agents/ddpg/apex.py similarity index 100% rename from python/ray/rllib/agents/ddpg/apex.py rename to rllib/agents/ddpg/apex.py diff --git a/python/ray/rllib/agents/ddpg/common/__init__.py b/rllib/agents/ddpg/common/__init__.py similarity index 100% rename from python/ray/rllib/agents/ddpg/common/__init__.py rename to rllib/agents/ddpg/common/__init__.py diff --git a/python/ray/rllib/agents/ddpg/ddpg.py b/rllib/agents/ddpg/ddpg.py similarity index 100% rename from python/ray/rllib/agents/ddpg/ddpg.py rename to rllib/agents/ddpg/ddpg.py diff --git a/python/ray/rllib/agents/ddpg/ddpg_model.py b/rllib/agents/ddpg/ddpg_model.py similarity index 100% rename from python/ray/rllib/agents/ddpg/ddpg_model.py rename to rllib/agents/ddpg/ddpg_model.py diff --git a/python/ray/rllib/agents/ddpg/ddpg_policy.py b/rllib/agents/ddpg/ddpg_policy.py similarity index 100% rename from python/ray/rllib/agents/ddpg/ddpg_policy.py rename to rllib/agents/ddpg/ddpg_policy.py diff --git a/python/ray/rllib/agents/ddpg/noop_model.py b/rllib/agents/ddpg/noop_model.py similarity index 100% rename from python/ray/rllib/agents/ddpg/noop_model.py rename to rllib/agents/ddpg/noop_model.py diff --git a/python/ray/rllib/agents/ddpg/td3.py b/rllib/agents/ddpg/td3.py similarity index 100% rename from python/ray/rllib/agents/ddpg/td3.py rename to rllib/agents/ddpg/td3.py diff --git a/python/ray/rllib/agents/dqn/README.md b/rllib/agents/dqn/README.md similarity index 100% rename from python/ray/rllib/agents/dqn/README.md rename to rllib/agents/dqn/README.md diff --git a/python/ray/rllib/agents/dqn/__init__.py b/rllib/agents/dqn/__init__.py similarity index 100% rename from python/ray/rllib/agents/dqn/__init__.py rename to rllib/agents/dqn/__init__.py diff --git a/python/ray/rllib/agents/dqn/apex.py b/rllib/agents/dqn/apex.py similarity index 100% rename from python/ray/rllib/agents/dqn/apex.py rename to rllib/agents/dqn/apex.py diff --git a/python/ray/rllib/agents/dqn/common/__init__.py b/rllib/agents/dqn/common/__init__.py similarity index 100% rename from python/ray/rllib/agents/dqn/common/__init__.py rename to rllib/agents/dqn/common/__init__.py diff --git a/python/ray/rllib/agents/dqn/distributional_q_model.py b/rllib/agents/dqn/distributional_q_model.py similarity index 100% rename from python/ray/rllib/agents/dqn/distributional_q_model.py rename to rllib/agents/dqn/distributional_q_model.py diff --git a/python/ray/rllib/agents/dqn/dqn.py b/rllib/agents/dqn/dqn.py similarity index 100% rename from python/ray/rllib/agents/dqn/dqn.py rename to rllib/agents/dqn/dqn.py diff --git a/python/ray/rllib/agents/dqn/dqn_policy.py b/rllib/agents/dqn/dqn_policy.py similarity index 100% rename from python/ray/rllib/agents/dqn/dqn_policy.py rename to rllib/agents/dqn/dqn_policy.py diff --git a/python/ray/rllib/agents/dqn/simple_q_model.py b/rllib/agents/dqn/simple_q_model.py similarity index 100% rename from python/ray/rllib/agents/dqn/simple_q_model.py rename to rllib/agents/dqn/simple_q_model.py diff --git a/python/ray/rllib/agents/dqn/simple_q_policy.py b/rllib/agents/dqn/simple_q_policy.py similarity index 100% rename from python/ray/rllib/agents/dqn/simple_q_policy.py rename to rllib/agents/dqn/simple_q_policy.py diff --git a/python/ray/rllib/agents/es/__init__.py b/rllib/agents/es/__init__.py similarity index 100% rename from python/ray/rllib/agents/es/__init__.py rename to rllib/agents/es/__init__.py diff --git a/python/ray/rllib/agents/es/es.py b/rllib/agents/es/es.py similarity index 100% rename from python/ray/rllib/agents/es/es.py rename to rllib/agents/es/es.py diff --git a/python/ray/rllib/agents/es/optimizers.py b/rllib/agents/es/optimizers.py similarity index 100% rename from python/ray/rllib/agents/es/optimizers.py rename to rllib/agents/es/optimizers.py diff --git a/python/ray/rllib/agents/es/policies.py b/rllib/agents/es/policies.py similarity index 100% rename from python/ray/rllib/agents/es/policies.py rename to rllib/agents/es/policies.py diff --git a/python/ray/rllib/agents/es/utils.py b/rllib/agents/es/utils.py similarity index 100% rename from python/ray/rllib/agents/es/utils.py rename to rllib/agents/es/utils.py diff --git a/python/ray/rllib/agents/impala/__init__.py b/rllib/agents/impala/__init__.py similarity index 100% rename from python/ray/rllib/agents/impala/__init__.py rename to rllib/agents/impala/__init__.py diff --git a/python/ray/rllib/agents/impala/impala.py b/rllib/agents/impala/impala.py similarity index 100% rename from python/ray/rllib/agents/impala/impala.py rename to rllib/agents/impala/impala.py diff --git a/python/ray/rllib/agents/impala/vtrace.py b/rllib/agents/impala/vtrace.py similarity index 100% rename from python/ray/rllib/agents/impala/vtrace.py rename to rllib/agents/impala/vtrace.py diff --git a/python/ray/rllib/agents/impala/vtrace_policy.py b/rllib/agents/impala/vtrace_policy.py similarity index 100% rename from python/ray/rllib/agents/impala/vtrace_policy.py rename to rllib/agents/impala/vtrace_policy.py diff --git a/python/ray/rllib/agents/impala/vtrace_test.py b/rllib/agents/impala/vtrace_test.py similarity index 100% rename from python/ray/rllib/agents/impala/vtrace_test.py rename to rllib/agents/impala/vtrace_test.py diff --git a/python/ray/rllib/agents/marwil/__init__.py b/rllib/agents/marwil/__init__.py similarity index 100% rename from python/ray/rllib/agents/marwil/__init__.py rename to rllib/agents/marwil/__init__.py diff --git a/python/ray/rllib/agents/marwil/marwil.py b/rllib/agents/marwil/marwil.py similarity index 100% rename from python/ray/rllib/agents/marwil/marwil.py rename to rllib/agents/marwil/marwil.py diff --git a/python/ray/rllib/agents/marwil/marwil_policy.py b/rllib/agents/marwil/marwil_policy.py similarity index 100% rename from python/ray/rllib/agents/marwil/marwil_policy.py rename to rllib/agents/marwil/marwil_policy.py diff --git a/python/ray/rllib/agents/mock.py b/rllib/agents/mock.py similarity index 100% rename from python/ray/rllib/agents/mock.py rename to rllib/agents/mock.py diff --git a/python/ray/rllib/agents/pg/__init__.py b/rllib/agents/pg/__init__.py similarity index 100% rename from python/ray/rllib/agents/pg/__init__.py rename to rllib/agents/pg/__init__.py diff --git a/python/ray/rllib/agents/pg/pg.py b/rllib/agents/pg/pg.py similarity index 100% rename from python/ray/rllib/agents/pg/pg.py rename to rllib/agents/pg/pg.py diff --git a/python/ray/rllib/agents/pg/pg_policy.py b/rllib/agents/pg/pg_policy.py similarity index 100% rename from python/ray/rllib/agents/pg/pg_policy.py rename to rllib/agents/pg/pg_policy.py diff --git a/python/ray/rllib/agents/pg/torch_pg_policy.py b/rllib/agents/pg/torch_pg_policy.py similarity index 100% rename from python/ray/rllib/agents/pg/torch_pg_policy.py rename to rllib/agents/pg/torch_pg_policy.py diff --git a/python/ray/rllib/agents/ppo/__init__.py b/rllib/agents/ppo/__init__.py similarity index 100% rename from python/ray/rllib/agents/ppo/__init__.py rename to rllib/agents/ppo/__init__.py diff --git a/python/ray/rllib/agents/ppo/appo.py b/rllib/agents/ppo/appo.py similarity index 100% rename from python/ray/rllib/agents/ppo/appo.py rename to rllib/agents/ppo/appo.py diff --git a/python/ray/rllib/agents/ppo/appo_policy.py b/rllib/agents/ppo/appo_policy.py similarity index 100% rename from python/ray/rllib/agents/ppo/appo_policy.py rename to rllib/agents/ppo/appo_policy.py diff --git a/python/ray/rllib/agents/ppo/ppo.py b/rllib/agents/ppo/ppo.py similarity index 100% rename from python/ray/rllib/agents/ppo/ppo.py rename to rllib/agents/ppo/ppo.py diff --git a/python/ray/rllib/agents/ppo/ppo_policy.py b/rllib/agents/ppo/ppo_policy.py similarity index 100% rename from python/ray/rllib/agents/ppo/ppo_policy.py rename to rllib/agents/ppo/ppo_policy.py diff --git a/python/ray/rllib/agents/ppo/test/test.py b/rllib/agents/ppo/test/test.py similarity index 100% rename from python/ray/rllib/agents/ppo/test/test.py rename to rllib/agents/ppo/test/test.py diff --git a/python/ray/rllib/agents/ppo/utils.py b/rllib/agents/ppo/utils.py similarity index 100% rename from python/ray/rllib/agents/ppo/utils.py rename to rllib/agents/ppo/utils.py diff --git a/python/ray/rllib/agents/qmix/README.md b/rllib/agents/qmix/README.md similarity index 100% rename from python/ray/rllib/agents/qmix/README.md rename to rllib/agents/qmix/README.md diff --git a/python/ray/rllib/agents/qmix/__init__.py b/rllib/agents/qmix/__init__.py similarity index 100% rename from python/ray/rllib/agents/qmix/__init__.py rename to rllib/agents/qmix/__init__.py diff --git a/python/ray/rllib/agents/qmix/apex.py b/rllib/agents/qmix/apex.py similarity index 100% rename from python/ray/rllib/agents/qmix/apex.py rename to rllib/agents/qmix/apex.py diff --git a/python/ray/rllib/agents/qmix/mixers.py b/rllib/agents/qmix/mixers.py similarity index 100% rename from python/ray/rllib/agents/qmix/mixers.py rename to rllib/agents/qmix/mixers.py diff --git a/python/ray/rllib/agents/qmix/model.py b/rllib/agents/qmix/model.py similarity index 100% rename from python/ray/rllib/agents/qmix/model.py rename to rllib/agents/qmix/model.py diff --git a/python/ray/rllib/agents/qmix/qmix.py b/rllib/agents/qmix/qmix.py similarity index 100% rename from python/ray/rllib/agents/qmix/qmix.py rename to rllib/agents/qmix/qmix.py diff --git a/python/ray/rllib/agents/qmix/qmix_policy.py b/rllib/agents/qmix/qmix_policy.py similarity index 100% rename from python/ray/rllib/agents/qmix/qmix_policy.py rename to rllib/agents/qmix/qmix_policy.py diff --git a/python/ray/rllib/agents/registry.py b/rllib/agents/registry.py similarity index 100% rename from python/ray/rllib/agents/registry.py rename to rllib/agents/registry.py diff --git a/python/ray/rllib/agents/sac/README.md b/rllib/agents/sac/README.md similarity index 100% rename from python/ray/rllib/agents/sac/README.md rename to rllib/agents/sac/README.md diff --git a/python/ray/rllib/agents/sac/__init__.py b/rllib/agents/sac/__init__.py similarity index 100% rename from python/ray/rllib/agents/sac/__init__.py rename to rllib/agents/sac/__init__.py diff --git a/python/ray/rllib/agents/sac/common/__init__.py b/rllib/agents/sac/common/__init__.py similarity index 100% rename from python/ray/rllib/agents/sac/common/__init__.py rename to rllib/agents/sac/common/__init__.py diff --git a/python/ray/rllib/agents/sac/sac.py b/rllib/agents/sac/sac.py similarity index 100% rename from python/ray/rllib/agents/sac/sac.py rename to rllib/agents/sac/sac.py diff --git a/python/ray/rllib/agents/sac/sac_model.py b/rllib/agents/sac/sac_model.py similarity index 100% rename from python/ray/rllib/agents/sac/sac_model.py rename to rllib/agents/sac/sac_model.py diff --git a/python/ray/rllib/agents/sac/sac_policy.py b/rllib/agents/sac/sac_policy.py similarity index 100% rename from python/ray/rllib/agents/sac/sac_policy.py rename to rllib/agents/sac/sac_policy.py diff --git a/python/ray/rllib/agents/trainer.py b/rllib/agents/trainer.py similarity index 100% rename from python/ray/rllib/agents/trainer.py rename to rllib/agents/trainer.py diff --git a/python/ray/rllib/agents/trainer_template.py b/rllib/agents/trainer_template.py similarity index 100% rename from python/ray/rllib/agents/trainer_template.py rename to rllib/agents/trainer_template.py diff --git a/python/ray/rllib/asv.conf.json b/rllib/asv.conf.json similarity index 100% rename from python/ray/rllib/asv.conf.json rename to rllib/asv.conf.json diff --git a/python/ray/rllib/contrib/README.rst b/rllib/contrib/README.rst similarity index 100% rename from python/ray/rllib/contrib/README.rst rename to rllib/contrib/README.rst diff --git a/python/ray/rllib/contrib/__init__.py b/rllib/contrib/__init__.py similarity index 100% rename from python/ray/rllib/contrib/__init__.py rename to rllib/contrib/__init__.py diff --git a/python/ray/rllib/contrib/random_agent/random_agent.py b/rllib/contrib/random_agent/random_agent.py similarity index 100% rename from python/ray/rllib/contrib/random_agent/random_agent.py rename to rllib/contrib/random_agent/random_agent.py diff --git a/python/ray/rllib/contrib/registry.py b/rllib/contrib/registry.py similarity index 100% rename from python/ray/rllib/contrib/registry.py rename to rllib/contrib/registry.py diff --git a/python/ray/rllib/env/__init__.py b/rllib/env/__init__.py similarity index 100% rename from python/ray/rllib/env/__init__.py rename to rllib/env/__init__.py diff --git a/python/ray/rllib/env/atari_wrappers.py b/rllib/env/atari_wrappers.py similarity index 100% rename from python/ray/rllib/env/atari_wrappers.py rename to rllib/env/atari_wrappers.py diff --git a/python/ray/rllib/env/base_env.py b/rllib/env/base_env.py similarity index 100% rename from python/ray/rllib/env/base_env.py rename to rllib/env/base_env.py diff --git a/python/ray/rllib/env/constants.py b/rllib/env/constants.py similarity index 100% rename from python/ray/rllib/env/constants.py rename to rllib/env/constants.py diff --git a/python/ray/rllib/env/env_context.py b/rllib/env/env_context.py similarity index 100% rename from python/ray/rllib/env/env_context.py rename to rllib/env/env_context.py diff --git a/python/ray/rllib/env/external_env.py b/rllib/env/external_env.py similarity index 100% rename from python/ray/rllib/env/external_env.py rename to rllib/env/external_env.py diff --git a/python/ray/rllib/env/external_multi_agent_env.py b/rllib/env/external_multi_agent_env.py similarity index 100% rename from python/ray/rllib/env/external_multi_agent_env.py rename to rllib/env/external_multi_agent_env.py diff --git a/python/ray/rllib/env/group_agents_wrapper.py b/rllib/env/group_agents_wrapper.py similarity index 100% rename from python/ray/rllib/env/group_agents_wrapper.py rename to rllib/env/group_agents_wrapper.py diff --git a/python/ray/rllib/env/multi_agent_env.py b/rllib/env/multi_agent_env.py similarity index 100% rename from python/ray/rllib/env/multi_agent_env.py rename to rllib/env/multi_agent_env.py diff --git a/python/ray/rllib/env/remote_vector_env.py b/rllib/env/remote_vector_env.py similarity index 100% rename from python/ray/rllib/env/remote_vector_env.py rename to rllib/env/remote_vector_env.py diff --git a/python/ray/rllib/env/serving_env.py b/rllib/env/serving_env.py similarity index 100% rename from python/ray/rllib/env/serving_env.py rename to rllib/env/serving_env.py diff --git a/python/ray/rllib/env/vector_env.py b/rllib/env/vector_env.py similarity index 100% rename from python/ray/rllib/env/vector_env.py rename to rllib/env/vector_env.py diff --git a/python/ray/rllib/evaluation/__init__.py b/rllib/evaluation/__init__.py similarity index 100% rename from python/ray/rllib/evaluation/__init__.py rename to rllib/evaluation/__init__.py diff --git a/python/ray/rllib/evaluation/episode.py b/rllib/evaluation/episode.py similarity index 100% rename from python/ray/rllib/evaluation/episode.py rename to rllib/evaluation/episode.py diff --git a/python/ray/rllib/evaluation/interface.py b/rllib/evaluation/interface.py similarity index 100% rename from python/ray/rllib/evaluation/interface.py rename to rllib/evaluation/interface.py diff --git a/python/ray/rllib/evaluation/metrics.py b/rllib/evaluation/metrics.py similarity index 100% rename from python/ray/rllib/evaluation/metrics.py rename to rllib/evaluation/metrics.py diff --git a/python/ray/rllib/evaluation/policy_evaluator.py b/rllib/evaluation/policy_evaluator.py similarity index 100% rename from python/ray/rllib/evaluation/policy_evaluator.py rename to rllib/evaluation/policy_evaluator.py diff --git a/python/ray/rllib/evaluation/policy_graph.py b/rllib/evaluation/policy_graph.py similarity index 100% rename from python/ray/rllib/evaluation/policy_graph.py rename to rllib/evaluation/policy_graph.py diff --git a/python/ray/rllib/evaluation/postprocessing.py b/rllib/evaluation/postprocessing.py similarity index 100% rename from python/ray/rllib/evaluation/postprocessing.py rename to rllib/evaluation/postprocessing.py diff --git a/python/ray/rllib/evaluation/rollout_metrics.py b/rllib/evaluation/rollout_metrics.py similarity index 100% rename from python/ray/rllib/evaluation/rollout_metrics.py rename to rllib/evaluation/rollout_metrics.py diff --git a/python/ray/rllib/evaluation/rollout_worker.py b/rllib/evaluation/rollout_worker.py similarity index 100% rename from python/ray/rllib/evaluation/rollout_worker.py rename to rllib/evaluation/rollout_worker.py diff --git a/python/ray/rllib/evaluation/sample_batch.py b/rllib/evaluation/sample_batch.py similarity index 100% rename from python/ray/rllib/evaluation/sample_batch.py rename to rllib/evaluation/sample_batch.py diff --git a/python/ray/rllib/evaluation/sample_batch_builder.py b/rllib/evaluation/sample_batch_builder.py similarity index 100% rename from python/ray/rllib/evaluation/sample_batch_builder.py rename to rllib/evaluation/sample_batch_builder.py diff --git a/python/ray/rllib/evaluation/sampler.py b/rllib/evaluation/sampler.py similarity index 100% rename from python/ray/rllib/evaluation/sampler.py rename to rllib/evaluation/sampler.py diff --git a/python/ray/rllib/evaluation/tf_policy_graph.py b/rllib/evaluation/tf_policy_graph.py similarity index 100% rename from python/ray/rllib/evaluation/tf_policy_graph.py rename to rllib/evaluation/tf_policy_graph.py diff --git a/python/ray/rllib/evaluation/torch_policy_graph.py b/rllib/evaluation/torch_policy_graph.py similarity index 100% rename from python/ray/rllib/evaluation/torch_policy_graph.py rename to rllib/evaluation/torch_policy_graph.py diff --git a/python/ray/rllib/evaluation/worker_set.py b/rllib/evaluation/worker_set.py similarity index 100% rename from python/ray/rllib/evaluation/worker_set.py rename to rllib/evaluation/worker_set.py diff --git a/python/ray/rllib/examples/__init__.py b/rllib/examples/__init__.py similarity index 100% rename from python/ray/rllib/examples/__init__.py rename to rllib/examples/__init__.py diff --git a/python/ray/rllib/examples/batch_norm_model.py b/rllib/examples/batch_norm_model.py similarity index 100% rename from python/ray/rllib/examples/batch_norm_model.py rename to rllib/examples/batch_norm_model.py diff --git a/python/ray/rllib/examples/cartpole_lstm.py b/rllib/examples/cartpole_lstm.py similarity index 100% rename from python/ray/rllib/examples/cartpole_lstm.py rename to rllib/examples/cartpole_lstm.py diff --git a/python/ray/rllib/examples/custom_env.py b/rllib/examples/custom_env.py similarity index 100% rename from python/ray/rllib/examples/custom_env.py rename to rllib/examples/custom_env.py diff --git a/python/ray/rllib/examples/custom_fast_model.py b/rllib/examples/custom_fast_model.py similarity index 100% rename from python/ray/rllib/examples/custom_fast_model.py rename to rllib/examples/custom_fast_model.py diff --git a/python/ray/rllib/examples/custom_keras_model.py b/rllib/examples/custom_keras_model.py similarity index 100% rename from python/ray/rllib/examples/custom_keras_model.py rename to rllib/examples/custom_keras_model.py diff --git a/python/ray/rllib/examples/custom_keras_rnn_model.py b/rllib/examples/custom_keras_rnn_model.py similarity index 100% rename from python/ray/rllib/examples/custom_keras_rnn_model.py rename to rllib/examples/custom_keras_rnn_model.py diff --git a/python/ray/rllib/examples/custom_loss.py b/rllib/examples/custom_loss.py similarity index 100% rename from python/ray/rllib/examples/custom_loss.py rename to rllib/examples/custom_loss.py diff --git a/python/ray/rllib/examples/custom_metrics_and_callbacks.py b/rllib/examples/custom_metrics_and_callbacks.py similarity index 100% rename from python/ray/rllib/examples/custom_metrics_and_callbacks.py rename to rllib/examples/custom_metrics_and_callbacks.py diff --git a/python/ray/rllib/examples/custom_tf_policy.py b/rllib/examples/custom_tf_policy.py similarity index 100% rename from python/ray/rllib/examples/custom_tf_policy.py rename to rllib/examples/custom_tf_policy.py diff --git a/python/ray/rllib/examples/custom_torch_policy.py b/rllib/examples/custom_torch_policy.py similarity index 100% rename from python/ray/rllib/examples/custom_torch_policy.py rename to rllib/examples/custom_torch_policy.py diff --git a/python/ray/rllib/examples/custom_train_fn.py b/rllib/examples/custom_train_fn.py similarity index 100% rename from python/ray/rllib/examples/custom_train_fn.py rename to rllib/examples/custom_train_fn.py diff --git a/python/ray/rllib/examples/eager_execution.py b/rllib/examples/eager_execution.py similarity index 100% rename from python/ray/rllib/examples/eager_execution.py rename to rllib/examples/eager_execution.py diff --git a/python/ray/rllib/examples/export/cartpole_dqn_export.py b/rllib/examples/export/cartpole_dqn_export.py similarity index 100% rename from python/ray/rllib/examples/export/cartpole_dqn_export.py rename to rllib/examples/export/cartpole_dqn_export.py diff --git a/python/ray/rllib/examples/hierarchical_training.py b/rllib/examples/hierarchical_training.py similarity index 100% rename from python/ray/rllib/examples/hierarchical_training.py rename to rllib/examples/hierarchical_training.py diff --git a/python/ray/rllib/examples/multiagent_cartpole.py b/rllib/examples/multiagent_cartpole.py similarity index 100% rename from python/ray/rllib/examples/multiagent_cartpole.py rename to rllib/examples/multiagent_cartpole.py diff --git a/python/ray/rllib/examples/multiagent_custom_policy.py b/rllib/examples/multiagent_custom_policy.py similarity index 100% rename from python/ray/rllib/examples/multiagent_custom_policy.py rename to rllib/examples/multiagent_custom_policy.py diff --git a/python/ray/rllib/examples/multiagent_two_trainers.py b/rllib/examples/multiagent_two_trainers.py similarity index 100% rename from python/ray/rllib/examples/multiagent_two_trainers.py rename to rllib/examples/multiagent_two_trainers.py diff --git a/python/ray/rllib/examples/parametric_action_cartpole.py b/rllib/examples/parametric_action_cartpole.py similarity index 100% rename from python/ray/rllib/examples/parametric_action_cartpole.py rename to rllib/examples/parametric_action_cartpole.py diff --git a/python/ray/rllib/examples/rock_paper_scissors_multiagent.py b/rllib/examples/rock_paper_scissors_multiagent.py similarity index 100% rename from python/ray/rllib/examples/rock_paper_scissors_multiagent.py rename to rllib/examples/rock_paper_scissors_multiagent.py diff --git a/python/ray/rllib/examples/rollout_worker_custom_workflow.py b/rllib/examples/rollout_worker_custom_workflow.py similarity index 100% rename from python/ray/rllib/examples/rollout_worker_custom_workflow.py rename to rllib/examples/rollout_worker_custom_workflow.py diff --git a/python/ray/rllib/examples/saving_experiences.py b/rllib/examples/saving_experiences.py similarity index 100% rename from python/ray/rllib/examples/saving_experiences.py rename to rllib/examples/saving_experiences.py diff --git a/python/ray/rllib/examples/serving/cartpole_client.py b/rllib/examples/serving/cartpole_client.py similarity index 100% rename from python/ray/rllib/examples/serving/cartpole_client.py rename to rllib/examples/serving/cartpole_client.py diff --git a/python/ray/rllib/examples/serving/cartpole_server.py b/rllib/examples/serving/cartpole_server.py similarity index 100% rename from python/ray/rllib/examples/serving/cartpole_server.py rename to rllib/examples/serving/cartpole_server.py diff --git a/python/ray/rllib/examples/serving/test.sh b/rllib/examples/serving/test.sh similarity index 100% rename from python/ray/rllib/examples/serving/test.sh rename to rllib/examples/serving/test.sh diff --git a/python/ray/rllib/examples/twostep_game.py b/rllib/examples/twostep_game.py similarity index 100% rename from python/ray/rllib/examples/twostep_game.py rename to rllib/examples/twostep_game.py diff --git a/python/ray/rllib/keras_policy.py b/rllib/keras_policy.py similarity index 100% rename from python/ray/rllib/keras_policy.py rename to rllib/keras_policy.py diff --git a/python/ray/rllib/models/README.txt b/rllib/models/README.txt similarity index 100% rename from python/ray/rllib/models/README.txt rename to rllib/models/README.txt diff --git a/python/ray/rllib/models/__init__.py b/rllib/models/__init__.py similarity index 100% rename from python/ray/rllib/models/__init__.py rename to rllib/models/__init__.py diff --git a/python/ray/rllib/models/action_dist.py b/rllib/models/action_dist.py similarity index 100% rename from python/ray/rllib/models/action_dist.py rename to rllib/models/action_dist.py diff --git a/python/ray/rllib/models/catalog.py b/rllib/models/catalog.py similarity index 100% rename from python/ray/rllib/models/catalog.py rename to rllib/models/catalog.py diff --git a/python/ray/rllib/models/extra_spaces.py b/rllib/models/extra_spaces.py similarity index 100% rename from python/ray/rllib/models/extra_spaces.py rename to rllib/models/extra_spaces.py diff --git a/python/ray/rllib/models/model.py b/rllib/models/model.py similarity index 100% rename from python/ray/rllib/models/model.py rename to rllib/models/model.py diff --git a/python/ray/rllib/models/modelv2.py b/rllib/models/modelv2.py similarity index 100% rename from python/ray/rllib/models/modelv2.py rename to rllib/models/modelv2.py diff --git a/python/ray/rllib/models/preprocessors.py b/rllib/models/preprocessors.py similarity index 100% rename from python/ray/rllib/models/preprocessors.py rename to rllib/models/preprocessors.py diff --git a/python/ray/rllib/models/tf/__init__.py b/rllib/models/tf/__init__.py similarity index 100% rename from python/ray/rllib/models/tf/__init__.py rename to rllib/models/tf/__init__.py diff --git a/python/ray/rllib/models/tf/fcnet_v1.py b/rllib/models/tf/fcnet_v1.py similarity index 100% rename from python/ray/rllib/models/tf/fcnet_v1.py rename to rllib/models/tf/fcnet_v1.py diff --git a/python/ray/rllib/models/tf/fcnet_v2.py b/rllib/models/tf/fcnet_v2.py similarity index 100% rename from python/ray/rllib/models/tf/fcnet_v2.py rename to rllib/models/tf/fcnet_v2.py diff --git a/python/ray/rllib/models/tf/lstm_v1.py b/rllib/models/tf/lstm_v1.py similarity index 100% rename from python/ray/rllib/models/tf/lstm_v1.py rename to rllib/models/tf/lstm_v1.py diff --git a/python/ray/rllib/models/tf/misc.py b/rllib/models/tf/misc.py similarity index 100% rename from python/ray/rllib/models/tf/misc.py rename to rllib/models/tf/misc.py diff --git a/python/ray/rllib/models/tf/modelv1_compat.py b/rllib/models/tf/modelv1_compat.py similarity index 100% rename from python/ray/rllib/models/tf/modelv1_compat.py rename to rllib/models/tf/modelv1_compat.py diff --git a/python/ray/rllib/models/tf/recurrent_tf_modelv2.py b/rllib/models/tf/recurrent_tf_modelv2.py similarity index 100% rename from python/ray/rllib/models/tf/recurrent_tf_modelv2.py rename to rllib/models/tf/recurrent_tf_modelv2.py diff --git a/python/ray/rllib/models/tf/tf_action_dist.py b/rllib/models/tf/tf_action_dist.py similarity index 100% rename from python/ray/rllib/models/tf/tf_action_dist.py rename to rllib/models/tf/tf_action_dist.py diff --git a/python/ray/rllib/models/tf/tf_modelv2.py b/rllib/models/tf/tf_modelv2.py similarity index 100% rename from python/ray/rllib/models/tf/tf_modelv2.py rename to rllib/models/tf/tf_modelv2.py diff --git a/python/ray/rllib/models/tf/visionnet_v1.py b/rllib/models/tf/visionnet_v1.py similarity index 100% rename from python/ray/rllib/models/tf/visionnet_v1.py rename to rllib/models/tf/visionnet_v1.py diff --git a/python/ray/rllib/models/torch/__init__.py b/rllib/models/torch/__init__.py similarity index 100% rename from python/ray/rllib/models/torch/__init__.py rename to rllib/models/torch/__init__.py diff --git a/python/ray/rllib/models/torch/fcnet.py b/rllib/models/torch/fcnet.py similarity index 100% rename from python/ray/rllib/models/torch/fcnet.py rename to rllib/models/torch/fcnet.py diff --git a/python/ray/rllib/models/torch/misc.py b/rllib/models/torch/misc.py similarity index 100% rename from python/ray/rllib/models/torch/misc.py rename to rllib/models/torch/misc.py diff --git a/python/ray/rllib/models/torch/torch_action_dist.py b/rllib/models/torch/torch_action_dist.py similarity index 100% rename from python/ray/rllib/models/torch/torch_action_dist.py rename to rllib/models/torch/torch_action_dist.py diff --git a/python/ray/rllib/models/torch/torch_modelv2.py b/rllib/models/torch/torch_modelv2.py similarity index 100% rename from python/ray/rllib/models/torch/torch_modelv2.py rename to rllib/models/torch/torch_modelv2.py diff --git a/python/ray/rllib/models/torch/visionnet.py b/rllib/models/torch/visionnet.py similarity index 100% rename from python/ray/rllib/models/torch/visionnet.py rename to rllib/models/torch/visionnet.py diff --git a/python/ray/rllib/offline/__init__.py b/rllib/offline/__init__.py similarity index 100% rename from python/ray/rllib/offline/__init__.py rename to rllib/offline/__init__.py diff --git a/python/ray/rllib/offline/input_reader.py b/rllib/offline/input_reader.py similarity index 100% rename from python/ray/rllib/offline/input_reader.py rename to rllib/offline/input_reader.py diff --git a/python/ray/rllib/offline/io_context.py b/rllib/offline/io_context.py similarity index 100% rename from python/ray/rllib/offline/io_context.py rename to rllib/offline/io_context.py diff --git a/python/ray/rllib/offline/is_estimator.py b/rllib/offline/is_estimator.py similarity index 100% rename from python/ray/rllib/offline/is_estimator.py rename to rllib/offline/is_estimator.py diff --git a/python/ray/rllib/offline/json_reader.py b/rllib/offline/json_reader.py similarity index 100% rename from python/ray/rllib/offline/json_reader.py rename to rllib/offline/json_reader.py diff --git a/python/ray/rllib/offline/json_writer.py b/rllib/offline/json_writer.py similarity index 100% rename from python/ray/rllib/offline/json_writer.py rename to rllib/offline/json_writer.py diff --git a/python/ray/rllib/offline/mixed_input.py b/rllib/offline/mixed_input.py similarity index 100% rename from python/ray/rllib/offline/mixed_input.py rename to rllib/offline/mixed_input.py diff --git a/python/ray/rllib/offline/off_policy_estimator.py b/rllib/offline/off_policy_estimator.py similarity index 100% rename from python/ray/rllib/offline/off_policy_estimator.py rename to rllib/offline/off_policy_estimator.py diff --git a/python/ray/rllib/offline/output_writer.py b/rllib/offline/output_writer.py similarity index 100% rename from python/ray/rllib/offline/output_writer.py rename to rllib/offline/output_writer.py diff --git a/python/ray/rllib/offline/shuffled_input.py b/rllib/offline/shuffled_input.py similarity index 100% rename from python/ray/rllib/offline/shuffled_input.py rename to rllib/offline/shuffled_input.py diff --git a/python/ray/rllib/offline/wis_estimator.py b/rllib/offline/wis_estimator.py similarity index 100% rename from python/ray/rllib/offline/wis_estimator.py rename to rllib/offline/wis_estimator.py diff --git a/python/ray/rllib/optimizers/__init__.py b/rllib/optimizers/__init__.py similarity index 100% rename from python/ray/rllib/optimizers/__init__.py rename to rllib/optimizers/__init__.py diff --git a/python/ray/rllib/optimizers/aso_aggregator.py b/rllib/optimizers/aso_aggregator.py similarity index 100% rename from python/ray/rllib/optimizers/aso_aggregator.py rename to rllib/optimizers/aso_aggregator.py diff --git a/python/ray/rllib/optimizers/aso_learner.py b/rllib/optimizers/aso_learner.py similarity index 100% rename from python/ray/rllib/optimizers/aso_learner.py rename to rllib/optimizers/aso_learner.py diff --git a/python/ray/rllib/optimizers/aso_minibatch_buffer.py b/rllib/optimizers/aso_minibatch_buffer.py similarity index 100% rename from python/ray/rllib/optimizers/aso_minibatch_buffer.py rename to rllib/optimizers/aso_minibatch_buffer.py diff --git a/python/ray/rllib/optimizers/aso_multi_gpu_learner.py b/rllib/optimizers/aso_multi_gpu_learner.py similarity index 100% rename from python/ray/rllib/optimizers/aso_multi_gpu_learner.py rename to rllib/optimizers/aso_multi_gpu_learner.py diff --git a/python/ray/rllib/optimizers/aso_tree_aggregator.py b/rllib/optimizers/aso_tree_aggregator.py similarity index 100% rename from python/ray/rllib/optimizers/aso_tree_aggregator.py rename to rllib/optimizers/aso_tree_aggregator.py diff --git a/python/ray/rllib/optimizers/async_gradients_optimizer.py b/rllib/optimizers/async_gradients_optimizer.py similarity index 100% rename from python/ray/rllib/optimizers/async_gradients_optimizer.py rename to rllib/optimizers/async_gradients_optimizer.py diff --git a/python/ray/rllib/optimizers/async_replay_optimizer.py b/rllib/optimizers/async_replay_optimizer.py similarity index 100% rename from python/ray/rllib/optimizers/async_replay_optimizer.py rename to rllib/optimizers/async_replay_optimizer.py diff --git a/python/ray/rllib/optimizers/async_samples_optimizer.py b/rllib/optimizers/async_samples_optimizer.py similarity index 100% rename from python/ray/rllib/optimizers/async_samples_optimizer.py rename to rllib/optimizers/async_samples_optimizer.py diff --git a/python/ray/rllib/optimizers/multi_gpu_impl.py b/rllib/optimizers/multi_gpu_impl.py similarity index 100% rename from python/ray/rllib/optimizers/multi_gpu_impl.py rename to rllib/optimizers/multi_gpu_impl.py diff --git a/python/ray/rllib/optimizers/multi_gpu_optimizer.py b/rllib/optimizers/multi_gpu_optimizer.py similarity index 100% rename from python/ray/rllib/optimizers/multi_gpu_optimizer.py rename to rllib/optimizers/multi_gpu_optimizer.py diff --git a/python/ray/rllib/optimizers/policy_optimizer.py b/rllib/optimizers/policy_optimizer.py similarity index 100% rename from python/ray/rllib/optimizers/policy_optimizer.py rename to rllib/optimizers/policy_optimizer.py diff --git a/python/ray/rllib/optimizers/replay_buffer.py b/rllib/optimizers/replay_buffer.py similarity index 100% rename from python/ray/rllib/optimizers/replay_buffer.py rename to rllib/optimizers/replay_buffer.py diff --git a/python/ray/rllib/optimizers/rollout.py b/rllib/optimizers/rollout.py similarity index 100% rename from python/ray/rllib/optimizers/rollout.py rename to rllib/optimizers/rollout.py diff --git a/python/ray/rllib/optimizers/segment_tree.py b/rllib/optimizers/segment_tree.py similarity index 100% rename from python/ray/rllib/optimizers/segment_tree.py rename to rllib/optimizers/segment_tree.py diff --git a/python/ray/rllib/optimizers/sync_batch_replay_optimizer.py b/rllib/optimizers/sync_batch_replay_optimizer.py similarity index 100% rename from python/ray/rllib/optimizers/sync_batch_replay_optimizer.py rename to rllib/optimizers/sync_batch_replay_optimizer.py diff --git a/python/ray/rllib/optimizers/sync_replay_optimizer.py b/rllib/optimizers/sync_replay_optimizer.py similarity index 100% rename from python/ray/rllib/optimizers/sync_replay_optimizer.py rename to rllib/optimizers/sync_replay_optimizer.py diff --git a/python/ray/rllib/optimizers/sync_samples_optimizer.py b/rllib/optimizers/sync_samples_optimizer.py similarity index 100% rename from python/ray/rllib/optimizers/sync_samples_optimizer.py rename to rllib/optimizers/sync_samples_optimizer.py diff --git a/python/ray/rllib/optimizers/tests/test_segment_tree.py b/rllib/optimizers/tests/test_segment_tree.py similarity index 100% rename from python/ray/rllib/optimizers/tests/test_segment_tree.py rename to rllib/optimizers/tests/test_segment_tree.py diff --git a/python/ray/rllib/policy/__init__.py b/rllib/policy/__init__.py similarity index 100% rename from python/ray/rllib/policy/__init__.py rename to rllib/policy/__init__.py diff --git a/python/ray/rllib/policy/dynamic_tf_policy.py b/rllib/policy/dynamic_tf_policy.py similarity index 100% rename from python/ray/rllib/policy/dynamic_tf_policy.py rename to rllib/policy/dynamic_tf_policy.py diff --git a/python/ray/rllib/policy/policy.py b/rllib/policy/policy.py similarity index 100% rename from python/ray/rllib/policy/policy.py rename to rllib/policy/policy.py diff --git a/python/ray/rllib/policy/rnn_sequencing.py b/rllib/policy/rnn_sequencing.py similarity index 100% rename from python/ray/rllib/policy/rnn_sequencing.py rename to rllib/policy/rnn_sequencing.py diff --git a/python/ray/rllib/policy/sample_batch.py b/rllib/policy/sample_batch.py similarity index 100% rename from python/ray/rllib/policy/sample_batch.py rename to rllib/policy/sample_batch.py diff --git a/python/ray/rllib/policy/tf_policy.py b/rllib/policy/tf_policy.py similarity index 100% rename from python/ray/rllib/policy/tf_policy.py rename to rllib/policy/tf_policy.py diff --git a/python/ray/rllib/policy/tf_policy_template.py b/rllib/policy/tf_policy_template.py similarity index 100% rename from python/ray/rllib/policy/tf_policy_template.py rename to rllib/policy/tf_policy_template.py diff --git a/python/ray/rllib/policy/torch_policy.py b/rllib/policy/torch_policy.py similarity index 100% rename from python/ray/rllib/policy/torch_policy.py rename to rllib/policy/torch_policy.py diff --git a/python/ray/rllib/policy/torch_policy_template.py b/rllib/policy/torch_policy_template.py similarity index 100% rename from python/ray/rllib/policy/torch_policy_template.py rename to rllib/policy/torch_policy_template.py diff --git a/python/ray/rllib/rollout.py b/rllib/rollout.py similarity index 100% rename from python/ray/rllib/rollout.py rename to rllib/rollout.py diff --git a/python/ray/rllib/scripts.py b/rllib/scripts.py similarity index 100% rename from python/ray/rllib/scripts.py rename to rllib/scripts.py diff --git a/python/ray/rllib/tests/__init__.py b/rllib/tests/__init__.py similarity index 100% rename from python/ray/rllib/tests/__init__.py rename to rllib/tests/__init__.py diff --git a/python/ray/rllib/tests/data/cartpole_small/output-2019-02-03_20-27-20_worker-0_0.json b/rllib/tests/data/cartpole_small/output-2019-02-03_20-27-20_worker-0_0.json similarity index 100% rename from python/ray/rllib/tests/data/cartpole_small/output-2019-02-03_20-27-20_worker-0_0.json rename to rllib/tests/data/cartpole_small/output-2019-02-03_20-27-20_worker-0_0.json diff --git a/python/ray/rllib/tests/mock_worker.py b/rllib/tests/mock_worker.py similarity index 100% rename from python/ray/rllib/tests/mock_worker.py rename to rllib/tests/mock_worker.py diff --git a/python/ray/rllib/tests/multiagent_pendulum.py b/rllib/tests/multiagent_pendulum.py similarity index 100% rename from python/ray/rllib/tests/multiagent_pendulum.py rename to rllib/tests/multiagent_pendulum.py diff --git a/python/ray/rllib/tests/run_regression_tests.py b/rllib/tests/run_regression_tests.py similarity index 100% rename from python/ray/rllib/tests/run_regression_tests.py rename to rllib/tests/run_regression_tests.py diff --git a/python/ray/rllib/tests/test_avail_actions_qmix.py b/rllib/tests/test_avail_actions_qmix.py similarity index 100% rename from python/ray/rllib/tests/test_avail_actions_qmix.py rename to rllib/tests/test_avail_actions_qmix.py diff --git a/python/ray/rllib/tests/test_catalog.py b/rllib/tests/test_catalog.py similarity index 100% rename from python/ray/rllib/tests/test_catalog.py rename to rllib/tests/test_catalog.py diff --git a/python/ray/rllib/tests/test_checkpoint_restore.py b/rllib/tests/test_checkpoint_restore.py similarity index 100% rename from python/ray/rllib/tests/test_checkpoint_restore.py rename to rllib/tests/test_checkpoint_restore.py diff --git a/python/ray/rllib/tests/test_dependency.py b/rllib/tests/test_dependency.py similarity index 100% rename from python/ray/rllib/tests/test_dependency.py rename to rllib/tests/test_dependency.py diff --git a/python/ray/rllib/tests/test_env_with_subprocess.py b/rllib/tests/test_env_with_subprocess.py similarity index 100% rename from python/ray/rllib/tests/test_env_with_subprocess.py rename to rllib/tests/test_env_with_subprocess.py diff --git a/python/ray/rllib/tests/test_evaluators.py b/rllib/tests/test_evaluators.py similarity index 100% rename from python/ray/rllib/tests/test_evaluators.py rename to rllib/tests/test_evaluators.py diff --git a/python/ray/rllib/tests/test_external_env.py b/rllib/tests/test_external_env.py similarity index 100% rename from python/ray/rllib/tests/test_external_env.py rename to rllib/tests/test_external_env.py diff --git a/python/ray/rllib/tests/test_external_multi_agent_env.py b/rllib/tests/test_external_multi_agent_env.py similarity index 100% rename from python/ray/rllib/tests/test_external_multi_agent_env.py rename to rllib/tests/test_external_multi_agent_env.py diff --git a/python/ray/rllib/tests/test_filters.py b/rllib/tests/test_filters.py similarity index 100% rename from python/ray/rllib/tests/test_filters.py rename to rllib/tests/test_filters.py diff --git a/python/ray/rllib/tests/test_ignore_worker_failure.py b/rllib/tests/test_ignore_worker_failure.py similarity index 100% rename from python/ray/rllib/tests/test_ignore_worker_failure.py rename to rllib/tests/test_ignore_worker_failure.py diff --git a/python/ray/rllib/tests/test_io.py b/rllib/tests/test_io.py similarity index 100% rename from python/ray/rllib/tests/test_io.py rename to rllib/tests/test_io.py diff --git a/python/ray/rllib/tests/test_legacy.py b/rllib/tests/test_legacy.py similarity index 100% rename from python/ray/rllib/tests/test_legacy.py rename to rllib/tests/test_legacy.py diff --git a/python/ray/rllib/tests/test_local.py b/rllib/tests/test_local.py similarity index 100% rename from python/ray/rllib/tests/test_local.py rename to rllib/tests/test_local.py diff --git a/python/ray/rllib/tests/test_lstm.py b/rllib/tests/test_lstm.py similarity index 100% rename from python/ray/rllib/tests/test_lstm.py rename to rllib/tests/test_lstm.py diff --git a/python/ray/rllib/tests/test_multi_agent_env.py b/rllib/tests/test_multi_agent_env.py similarity index 100% rename from python/ray/rllib/tests/test_multi_agent_env.py rename to rllib/tests/test_multi_agent_env.py diff --git a/python/ray/rllib/tests/test_nested_spaces.py b/rllib/tests/test_nested_spaces.py similarity index 100% rename from python/ray/rllib/tests/test_nested_spaces.py rename to rllib/tests/test_nested_spaces.py diff --git a/python/ray/rllib/tests/test_optimizers.py b/rllib/tests/test_optimizers.py similarity index 100% rename from python/ray/rllib/tests/test_optimizers.py rename to rllib/tests/test_optimizers.py diff --git a/python/ray/rllib/tests/test_perf.py b/rllib/tests/test_perf.py similarity index 100% rename from python/ray/rllib/tests/test_perf.py rename to rllib/tests/test_perf.py diff --git a/python/ray/rllib/tests/test_reproducibility.py b/rllib/tests/test_reproducibility.py similarity index 100% rename from python/ray/rllib/tests/test_reproducibility.py rename to rllib/tests/test_reproducibility.py diff --git a/python/ray/rllib/tests/test_rollout.sh b/rllib/tests/test_rollout.sh similarity index 88% rename from python/ray/rllib/tests/test_rollout.sh rename to rllib/tests/test_rollout.sh index 04685b2be345d..02a65a7c54e4f 100755 --- a/python/ray/rllib/tests/test_rollout.sh +++ b/rllib/tests/test_rollout.sh @@ -1,10 +1,10 @@ #!/bin/bash -e -TRAIN=/ray/python/ray/rllib/train.py +TRAIN=/ray/rllib/train.py if [ ! -e "$TRAIN" ]; then TRAIN=../train.py fi -ROLLOUT=/ray/python/ray/rllib/rollout.py +ROLLOUT=/ray/rllib/rollout.py if [ ! -e "$ROLLOUT" ]; then ROLLOUT=../rollout.py fi diff --git a/python/ray/rllib/tests/test_rollout_worker.py b/rllib/tests/test_rollout_worker.py similarity index 100% rename from python/ray/rllib/tests/test_rollout_worker.py rename to rllib/tests/test_rollout_worker.py diff --git a/python/ray/rllib/tests/test_supported_spaces.py b/rllib/tests/test_supported_spaces.py similarity index 100% rename from python/ray/rllib/tests/test_supported_spaces.py rename to rllib/tests/test_supported_spaces.py diff --git a/python/ray/rllib/train.py b/rllib/train.py similarity index 100% rename from python/ray/rllib/train.py rename to rllib/train.py diff --git a/python/ray/rllib/tuned_examples/atari-a2c.yaml b/rllib/tuned_examples/atari-a2c.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/atari-a2c.yaml rename to rllib/tuned_examples/atari-a2c.yaml diff --git a/python/ray/rllib/tuned_examples/atari-apex.yaml b/rllib/tuned_examples/atari-apex.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/atari-apex.yaml rename to rllib/tuned_examples/atari-apex.yaml diff --git a/python/ray/rllib/tuned_examples/atari-dist-dqn.yaml b/rllib/tuned_examples/atari-dist-dqn.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/atari-dist-dqn.yaml rename to rllib/tuned_examples/atari-dist-dqn.yaml diff --git a/python/ray/rllib/tuned_examples/atari-dqn.yaml b/rllib/tuned_examples/atari-dqn.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/atari-dqn.yaml rename to rllib/tuned_examples/atari-dqn.yaml diff --git a/python/ray/rllib/tuned_examples/atari-duel-ddqn.yaml b/rllib/tuned_examples/atari-duel-ddqn.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/atari-duel-ddqn.yaml rename to rllib/tuned_examples/atari-duel-ddqn.yaml diff --git a/python/ray/rllib/tuned_examples/atari-impala-large.yaml b/rllib/tuned_examples/atari-impala-large.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/atari-impala-large.yaml rename to rllib/tuned_examples/atari-impala-large.yaml diff --git a/python/ray/rllib/tuned_examples/atari-impala.yaml b/rllib/tuned_examples/atari-impala.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/atari-impala.yaml rename to rllib/tuned_examples/atari-impala.yaml diff --git a/python/ray/rllib/tuned_examples/atari-ppo.yaml b/rllib/tuned_examples/atari-ppo.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/atari-ppo.yaml rename to rllib/tuned_examples/atari-ppo.yaml diff --git a/python/ray/rllib/tuned_examples/cartpole-grid-search-example.yaml b/rllib/tuned_examples/cartpole-grid-search-example.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/cartpole-grid-search-example.yaml rename to rllib/tuned_examples/cartpole-grid-search-example.yaml diff --git a/python/ray/rllib/tuned_examples/cartpole-marwil.yaml b/rllib/tuned_examples/cartpole-marwil.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/cartpole-marwil.yaml rename to rllib/tuned_examples/cartpole-marwil.yaml diff --git a/python/ray/rllib/tuned_examples/halfcheetah-appo.yaml b/rllib/tuned_examples/halfcheetah-appo.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/halfcheetah-appo.yaml rename to rllib/tuned_examples/halfcheetah-appo.yaml diff --git a/python/ray/rllib/tuned_examples/halfcheetah-ddpg.yaml b/rllib/tuned_examples/halfcheetah-ddpg.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/halfcheetah-ddpg.yaml rename to rllib/tuned_examples/halfcheetah-ddpg.yaml diff --git a/python/ray/rllib/tuned_examples/halfcheetah-ppo.yaml b/rllib/tuned_examples/halfcheetah-ppo.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/halfcheetah-ppo.yaml rename to rllib/tuned_examples/halfcheetah-ppo.yaml diff --git a/python/ray/rllib/tuned_examples/hopper-ppo.yaml b/rllib/tuned_examples/hopper-ppo.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/hopper-ppo.yaml rename to rllib/tuned_examples/hopper-ppo.yaml diff --git a/python/ray/rllib/tuned_examples/humanoid-es.yaml b/rllib/tuned_examples/humanoid-es.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/humanoid-es.yaml rename to rllib/tuned_examples/humanoid-es.yaml diff --git a/python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml b/rllib/tuned_examples/humanoid-ppo-gae.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/humanoid-ppo-gae.yaml rename to rllib/tuned_examples/humanoid-ppo-gae.yaml diff --git a/python/ray/rllib/tuned_examples/humanoid-ppo.yaml b/rllib/tuned_examples/humanoid-ppo.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/humanoid-ppo.yaml rename to rllib/tuned_examples/humanoid-ppo.yaml diff --git a/python/ray/rllib/tuned_examples/hyperband-cartpole.yaml b/rllib/tuned_examples/hyperband-cartpole.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/hyperband-cartpole.yaml rename to rllib/tuned_examples/hyperband-cartpole.yaml diff --git a/python/ray/rllib/tuned_examples/invertedpendulum-td3.yaml b/rllib/tuned_examples/invertedpendulum-td3.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/invertedpendulum-td3.yaml rename to rllib/tuned_examples/invertedpendulum-td3.yaml diff --git a/python/ray/rllib/tuned_examples/mountaincarcontinuous-apex-ddpg.yaml b/rllib/tuned_examples/mountaincarcontinuous-apex-ddpg.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/mountaincarcontinuous-apex-ddpg.yaml rename to rllib/tuned_examples/mountaincarcontinuous-apex-ddpg.yaml diff --git a/python/ray/rllib/tuned_examples/mountaincarcontinuous-ddpg.yaml b/rllib/tuned_examples/mountaincarcontinuous-ddpg.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/mountaincarcontinuous-ddpg.yaml rename to rllib/tuned_examples/mountaincarcontinuous-ddpg.yaml diff --git a/python/ray/rllib/tuned_examples/mujoco-td3.yaml b/rllib/tuned_examples/mujoco-td3.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/mujoco-td3.yaml rename to rllib/tuned_examples/mujoco-td3.yaml diff --git a/python/ray/rllib/tuned_examples/pendulum-apex-ddpg.yaml b/rllib/tuned_examples/pendulum-apex-ddpg.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/pendulum-apex-ddpg.yaml rename to rllib/tuned_examples/pendulum-apex-ddpg.yaml diff --git a/python/ray/rllib/tuned_examples/pendulum-ddpg.yaml b/rllib/tuned_examples/pendulum-ddpg.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/pendulum-ddpg.yaml rename to rllib/tuned_examples/pendulum-ddpg.yaml diff --git a/python/ray/rllib/tuned_examples/pendulum-ppo.yaml b/rllib/tuned_examples/pendulum-ppo.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/pendulum-ppo.yaml rename to rllib/tuned_examples/pendulum-ppo.yaml diff --git a/python/ray/rllib/tuned_examples/pendulum-td3.yaml b/rllib/tuned_examples/pendulum-td3.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/pendulum-td3.yaml rename to rllib/tuned_examples/pendulum-td3.yaml diff --git a/python/ray/rllib/tuned_examples/pong-a3c-pytorch.yaml b/rllib/tuned_examples/pong-a3c-pytorch.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/pong-a3c-pytorch.yaml rename to rllib/tuned_examples/pong-a3c-pytorch.yaml diff --git a/python/ray/rllib/tuned_examples/pong-a3c.yaml b/rllib/tuned_examples/pong-a3c.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/pong-a3c.yaml rename to rllib/tuned_examples/pong-a3c.yaml diff --git a/python/ray/rllib/tuned_examples/pong-apex.yaml b/rllib/tuned_examples/pong-apex.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/pong-apex.yaml rename to rllib/tuned_examples/pong-apex.yaml diff --git a/python/ray/rllib/tuned_examples/pong-appo.yaml b/rllib/tuned_examples/pong-appo.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/pong-appo.yaml rename to rllib/tuned_examples/pong-appo.yaml diff --git a/python/ray/rllib/tuned_examples/pong-dqn.yaml b/rllib/tuned_examples/pong-dqn.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/pong-dqn.yaml rename to rllib/tuned_examples/pong-dqn.yaml diff --git a/python/ray/rllib/tuned_examples/pong-impala-fast.yaml b/rllib/tuned_examples/pong-impala-fast.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/pong-impala-fast.yaml rename to rllib/tuned_examples/pong-impala-fast.yaml diff --git a/python/ray/rllib/tuned_examples/pong-impala-vectorized.yaml b/rllib/tuned_examples/pong-impala-vectorized.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/pong-impala-vectorized.yaml rename to rllib/tuned_examples/pong-impala-vectorized.yaml diff --git a/python/ray/rllib/tuned_examples/pong-impala.yaml b/rllib/tuned_examples/pong-impala.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/pong-impala.yaml rename to rllib/tuned_examples/pong-impala.yaml diff --git a/python/ray/rllib/tuned_examples/pong-ppo.yaml b/rllib/tuned_examples/pong-ppo.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/pong-ppo.yaml rename to rllib/tuned_examples/pong-ppo.yaml diff --git a/python/ray/rllib/tuned_examples/pong-rainbow.yaml b/rllib/tuned_examples/pong-rainbow.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/pong-rainbow.yaml rename to rllib/tuned_examples/pong-rainbow.yaml diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-a2c-torch.yaml b/rllib/tuned_examples/regression_tests/cartpole-a2c-torch.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/regression_tests/cartpole-a2c-torch.yaml rename to rllib/tuned_examples/regression_tests/cartpole-a2c-torch.yaml diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-a3c.yaml b/rllib/tuned_examples/regression_tests/cartpole-a3c.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/regression_tests/cartpole-a3c.yaml rename to rllib/tuned_examples/regression_tests/cartpole-a3c.yaml diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-appo-vtrace.yaml b/rllib/tuned_examples/regression_tests/cartpole-appo-vtrace.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/regression_tests/cartpole-appo-vtrace.yaml rename to rllib/tuned_examples/regression_tests/cartpole-appo-vtrace.yaml diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-appo.yaml b/rllib/tuned_examples/regression_tests/cartpole-appo.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/regression_tests/cartpole-appo.yaml rename to rllib/tuned_examples/regression_tests/cartpole-appo.yaml diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-ars.yaml b/rllib/tuned_examples/regression_tests/cartpole-ars.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/regression_tests/cartpole-ars.yaml rename to rllib/tuned_examples/regression_tests/cartpole-ars.yaml diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-dqn.yaml b/rllib/tuned_examples/regression_tests/cartpole-dqn.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/regression_tests/cartpole-dqn.yaml rename to rllib/tuned_examples/regression_tests/cartpole-dqn.yaml diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-es.yaml b/rllib/tuned_examples/regression_tests/cartpole-es.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/regression_tests/cartpole-es.yaml rename to rllib/tuned_examples/regression_tests/cartpole-es.yaml diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-pg.yaml b/rllib/tuned_examples/regression_tests/cartpole-pg.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/regression_tests/cartpole-pg.yaml rename to rllib/tuned_examples/regression_tests/cartpole-pg.yaml diff --git a/python/ray/rllib/tuned_examples/regression_tests/cartpole-ppo.yaml b/rllib/tuned_examples/regression_tests/cartpole-ppo.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/regression_tests/cartpole-ppo.yaml rename to rllib/tuned_examples/regression_tests/cartpole-ppo.yaml diff --git a/python/ray/rllib/tuned_examples/regression_tests/pendulum-appo-vtrace.yaml b/rllib/tuned_examples/regression_tests/pendulum-appo-vtrace.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/regression_tests/pendulum-appo-vtrace.yaml rename to rllib/tuned_examples/regression_tests/pendulum-appo-vtrace.yaml diff --git a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ddpg.yaml b/rllib/tuned_examples/regression_tests/pendulum-ddpg.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/regression_tests/pendulum-ddpg.yaml rename to rllib/tuned_examples/regression_tests/pendulum-ddpg.yaml diff --git a/python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml b/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/regression_tests/pendulum-ppo.yaml rename to rllib/tuned_examples/regression_tests/pendulum-ppo.yaml diff --git a/python/ray/rllib/tuned_examples/regression_tests/pendulum-sac.yaml b/rllib/tuned_examples/regression_tests/pendulum-sac.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/regression_tests/pendulum-sac.yaml rename to rllib/tuned_examples/regression_tests/pendulum-sac.yaml diff --git a/python/ray/rllib/tuned_examples/swimmer-ars.yaml b/rllib/tuned_examples/swimmer-ars.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/swimmer-ars.yaml rename to rllib/tuned_examples/swimmer-ars.yaml diff --git a/python/ray/rllib/tuned_examples/walker2d-ppo.yaml b/rllib/tuned_examples/walker2d-ppo.yaml similarity index 100% rename from python/ray/rllib/tuned_examples/walker2d-ppo.yaml rename to rllib/tuned_examples/walker2d-ppo.yaml diff --git a/python/ray/rllib/utils/__init__.py b/rllib/utils/__init__.py similarity index 100% rename from python/ray/rllib/utils/__init__.py rename to rllib/utils/__init__.py diff --git a/python/ray/rllib/utils/actors.py b/rllib/utils/actors.py similarity index 100% rename from python/ray/rllib/utils/actors.py rename to rllib/utils/actors.py diff --git a/python/ray/rllib/utils/annotations.py b/rllib/utils/annotations.py similarity index 100% rename from python/ray/rllib/utils/annotations.py rename to rllib/utils/annotations.py diff --git a/python/ray/rllib/utils/compression.py b/rllib/utils/compression.py similarity index 100% rename from python/ray/rllib/utils/compression.py rename to rllib/utils/compression.py diff --git a/python/ray/rllib/utils/debug.py b/rllib/utils/debug.py similarity index 100% rename from python/ray/rllib/utils/debug.py rename to rllib/utils/debug.py diff --git a/python/ray/rllib/utils/error.py b/rllib/utils/error.py similarity index 100% rename from python/ray/rllib/utils/error.py rename to rllib/utils/error.py diff --git a/python/ray/rllib/utils/explained_variance.py b/rllib/utils/explained_variance.py similarity index 100% rename from python/ray/rllib/utils/explained_variance.py rename to rllib/utils/explained_variance.py diff --git a/python/ray/rllib/utils/filter.py b/rllib/utils/filter.py similarity index 100% rename from python/ray/rllib/utils/filter.py rename to rllib/utils/filter.py diff --git a/python/ray/rllib/utils/filter_manager.py b/rllib/utils/filter_manager.py similarity index 100% rename from python/ray/rllib/utils/filter_manager.py rename to rllib/utils/filter_manager.py diff --git a/python/ray/rllib/utils/memory.py b/rllib/utils/memory.py similarity index 100% rename from python/ray/rllib/utils/memory.py rename to rllib/utils/memory.py diff --git a/python/ray/rllib/utils/policy_client.py b/rllib/utils/policy_client.py similarity index 100% rename from python/ray/rllib/utils/policy_client.py rename to rllib/utils/policy_client.py diff --git a/python/ray/rllib/utils/policy_server.py b/rllib/utils/policy_server.py similarity index 100% rename from python/ray/rllib/utils/policy_server.py rename to rllib/utils/policy_server.py diff --git a/python/ray/rllib/utils/schedules.py b/rllib/utils/schedules.py similarity index 100% rename from python/ray/rllib/utils/schedules.py rename to rllib/utils/schedules.py diff --git a/python/ray/rllib/utils/seed.py b/rllib/utils/seed.py similarity index 100% rename from python/ray/rllib/utils/seed.py rename to rllib/utils/seed.py diff --git a/python/ray/rllib/utils/tf_ops.py b/rllib/utils/tf_ops.py similarity index 100% rename from python/ray/rllib/utils/tf_ops.py rename to rllib/utils/tf_ops.py diff --git a/python/ray/rllib/utils/tf_run_builder.py b/rllib/utils/tf_run_builder.py similarity index 100% rename from python/ray/rllib/utils/tf_run_builder.py rename to rllib/utils/tf_run_builder.py diff --git a/python/ray/rllib/utils/timer.py b/rllib/utils/timer.py similarity index 100% rename from python/ray/rllib/utils/timer.py rename to rllib/utils/timer.py diff --git a/python/ray/rllib/utils/tracking_dict.py b/rllib/utils/tracking_dict.py similarity index 100% rename from python/ray/rllib/utils/tracking_dict.py rename to rllib/utils/tracking_dict.py diff --git a/python/ray/rllib/utils/window_stat.py b/rllib/utils/window_stat.py similarity index 100% rename from python/ray/rllib/utils/window_stat.py rename to rllib/utils/window_stat.py