Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RLlib] Change default framework from tf to torch #33604

Merged
Prev Previous commit
Next Next commit
changed framework in yaml files from tf to torch or tf2
Signed-off-by: Kourosh Hakhamaneshi <[email protected]>
  • Loading branch information
Kourosh Hakhamaneshi authored and Kourosh Hakhamaneshi committed Mar 22, 2023
commit 7e2888bf5b2f47fd733dcc10bb3ddc5ccc0f5e28
2 changes: 1 addition & 1 deletion rllib/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ py_test(
data = [
"tuned_examples/appo/cartpole-appo-vtrace-separate-losses.yaml"
],
args = ["--dir=tuned_examples/appo"]
args = ["--dir=tuned_examples/appo", "--framework=tf"]
)

py_test(
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/a2c/atari-a2c.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ atari-a2c:
run: A2C
config:
# Works for both torch and tf.
framework: tf
framework: torch
env_config:
frameskip: 1 # no frameskip
train_batch_size: 500
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/a2c/cartpole-a2c-fake-gpus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cartpole-a2c-fake-gpus:
training_iteration: 200
config:
# Works for both torch and tf.
framework: tf
framework: torch
train_batch_size: 20
rollout_fragment_length: auto
num_workers: 0
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/a2c/cartpole-a2c-microbatch.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cartpole-a2c-microbatch:
timesteps_total: 1000000
config:
# Works for both torch and tf.
framework: tf
framework: torch
num_workers: 2
gamma: 0.95
rollout_fragment_length: 20
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/a2c/cartpole-a2c.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cartpole-a2c:
timesteps_total: 500000
config:
# Works for both torch and tf.
framework: tf
framework: torch
train_batch_size: 40
rollout_fragment_length: auto
num_workers: 0
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/a3c/cartpole-a3c.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,6 @@ cartpole-a3c:
timesteps_total: 200000
config:
# Works for both torch and tf.
framework: tf
framework: torch
num_workers: 1
gamma: 0.95
2 changes: 1 addition & 1 deletion rllib/tuned_examples/a3c/memory-leak-test-a3c.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ memory-leak-test-a3c:
run: A3C
config:
# Works for both torch and tf.
framework: tf
framework: torch
# Switch off np.random, which is known to have memory leaks.
env_config:
config:
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/a3c/pong-a3c.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ pong-a3c:
run: A3C
config:
# Works for both torch and tf.
framework: tf
framework: torch
env_config:
nondeterministic: False # deterministic
num_workers: 16
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ multi-agent-cartpole-alpha-star:
timesteps_total: 200000
config:
# Works for both torch and tf.
framework: tf
framework: torch

# 4-agent MA cartpole.
env_config:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ mountaincarcontinuous-apex-ddpg:
episode_reward_mean: 90
config:
# Works for both torch and tf.
framework: tf
framework: torch
clip_rewards: False
num_workers: 16
exploration_config:
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/apex_ddpg/pendulum-apex-ddpg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ pendulum-apex-ddpg:
episode_reward_mean: -160
config:
# Works for both torch and tf.
framework: tf
framework: torch
use_huber: True
clip_rewards: False
num_workers: 16
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ cartpole-apex-dqn:
timesteps_total: 250000
config:
# Works for both torch and tf.
framework: tf
framework: torch
num_workers: 3
optimizer:
num_replay_buffer_shards: 1
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/apex_dqn/cartpole-apex-dqn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ cartpole-apex-dqn-training-itr:
timesteps_total: 250000
config:
# Works for both torch and tf.
framework: tf
framework: torch
# Make this work with only 5 CPUs and 0 GPUs:
num_workers: 3
optimizer:
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/apex_dqn/pong-apex-dqn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ pong-apex:
timesteps_total: 4000000
config:
# Works for both torch and tf.
framework: tf
framework: torch
env_config:
frameskip: 1 # no frameskip
target_network_update_freq: 20000
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cartpole-appo-vtrace-fake-gpus:
training_iteration: 400
config:
# Works for both torch and tf.
framework: tf
framework: torch
num_envs_per_worker: 5
num_workers: 1
observation_filter: MeanStdFilter
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cartpole-appo-vtrace-separate-losses:
timesteps_total: 200000
config:
# Only works for tf|tf2 so far.
framework: tf
framework: torch
# Switch on >1 loss/optimizer API for TFPolicy and EagerTFPolicy.
_tf_policy_handles_more_than_one_loss: true
# APPO will produce two separate loss terms:
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/appo/cartpole-appo-vtrace.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cartpole-appo-vtrace:
timesteps_total: 200000
config:
# Works for both torch and tf.
framework: tf
framework: torch
num_envs_per_worker: 5
num_workers: 4
num_gpus: 0
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/appo/cartpole-appo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cartpole-appo:
timesteps_total: 200000
config:
# Works for both torch and tf.
framework: tf
framework: torch
num_envs_per_worker: 5
num_workers: 1
num_gpus: 0
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/appo/frozenlake-appo-vtrace.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ frozenlake-appo-vtrace:
timesteps_total: 1000000
config:
# Works for both torch and tf.
framework: tf
framework: torch

# Sparse reward environment (short horizon).
env_config:
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/appo/halfcheetah-appo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ halfcheetah-appo:
time_total_s: 10800
config:
# Works for both torch and tf.
framework: tf
framework: torch
vtrace: True
gamma: 0.99
lambda: 0.95
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/appo/memory-leak-test-appo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ memory-leak-test-appo:
run: APPO
config:
# Works for both torch and tf.
framework: tf
framework: torch
# Switch off np.random, which is known to have memory leaks.
env_config:
config:
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/appo/multi-agent-cartpole-appo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ multi-agent-cartpole-appo:
timesteps_total: 200000
config:
# Works for both torch and tf.
framework: tf
framework: torch

# 4-agent MA cartpole.
env_config:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ multi-agent-cartpole-crashing-appo:
evaluation/episode_reward_mean: 300.0
config:
# Works for both torch and tf.
framework: tf
framework: torch

env_config:
config:
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/appo/pendulum-appo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ pendulum-appo-vtrace:
timesteps_total: 500000
config:
# Works for both torch and tf.
framework: tf
framework: torch
vtrace: true
num_gpus: 0
num_workers: 1
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/appo/pong-appo.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ pong-appo:
timesteps_total: 5000000
config:
# Works for both torch and tf.
framework: tf
framework: torch
env_config:
frameskip: 1 # no frameskip
vtrace: True
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/ars/cartpole-ars.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cartpole-ars:
timesteps_total: 1000000
config:
# Works for both torch and tf.
framework: tf
framework: torch
noise_stdev: 0.02
num_rollouts: 50
rollouts_used: 25
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/ars/swimmer-ars.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ swimmer-ars:
run: ARS
config:
# Works for both torch and tf.
framework: tf
framework: torch
noise_stdev: 0.01
num_rollouts: 1
rollouts_used: 1
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/bc/cartpole-bc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ cartpole-bc:
timesteps_total: 500000
config:
# Works for both torch and tf.
framework: tf
framework: torch
# In order to evaluate on an actual environment, use these following
# settings:
evaluation_num_workers: 1
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/cql/halfcheetah-cql.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ halfcheetah_cql:
#input: d4rl.halfcheetah-medium-replay-v0

# Works for both torch and tf.
framework: tf
framework: torch
q_model_config:
fcnet_activation: relu
fcnet_hiddens: [256, 256, 256]
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/cql/pendulum-cql.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pendulum-cql:
timesteps_total: 800000
config:
# Works for both torch and tf.
framework: tf
framework: torch

# Use one or more offline files or "input: sampler" for online learning.
input: 'dataset'
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/ddpg/halfcheetah-ddpg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ halfcheetah-ddpg:
time_total_s: 5400 # 90 minutes
config:
# Works for both torch and tf.
framework: tf
framework: torch
# === Model ===
actor_hiddens: [64, 64]
critic_hiddens: [64, 64]
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/ddpg/memory-leak-test-ddpg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ memory-leak-test-ddpg:
run: DDPG
config:
# Works for both torch and tf.
framework: tf
framework: torch
# Switch off np.random, which is known to have memory leaks.
env_config:
config:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ mountaincarcontinuous-ddpg:
time_total_s: 600 # 10 minutes
config:
# Works for both torch and tf.
framework: tf
framework: torch
# === Model ===
actor_hiddens: [32, 64]
critic_hiddens: [64, 64]
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/ddpg/pendulum-ddpg-fake-gpus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pendulum-ddpg-fake-gpus:
config:
# Works for both torch and tf.
seed: 42
framework: tf
framework: torch
actor_hiddens: [64, 64]
critic_hiddens: [64, 64]
n_step: 1
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/dqn/atari-dqn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ atari-basic-dqn:
run: DQN
config:
# Works for both torch and tf.
framework: tf
framework: torch
env_config:
frameskip: 1 # no frameskip
double_q: false
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/dqn/atari-duel-ddqn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ dueling-ddqn:
run: DQN
config:
# Works for both torch and tf.
framework: tf
framework: torch
env_config:
frameskip: 1 # no frameskip
double_q: true
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/dqn/cartpole-dqn-fake-gpus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cartpole-dqn-fake-gpus:
training_iteration: 400
config:
# Works for both torch and tf.
framework: tf
framework: torch
model:
fcnet_hiddens: [64]
fcnet_activation: linear
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/dqn/cartpole-dqn-param-noise.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cartpole-dqn-w-param-noise:
timesteps_total: 300000
config:
# Works for both torch and tf.
framework: tf
framework: torch
exploration_config:
type: ParameterNoise
random_timesteps: 10000
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/dqn/cartpole-dqn-softq.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cartpole-dqn:
timesteps_total: 100000
config:
# Works for both torch and tf.
framework: tf
framework: torch
model:
fcnet_hiddens: [64]
fcnet_activation: linear
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/dqn/cartpole-dqn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cartpole-dqn:
timesteps_total: 100000
config:
# Works for both torch and tf.
framework: tf
framework: torch
model:
fcnet_hiddens: [64]
fcnet_activation: linear
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/dqn/memory-leak-test-dqn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ memory-leak-test-dqn:
run: DQN
config:
# Works for both torch and tf.
framework: tf
framework: torch
# Switch off np.random, which is known to have memory leaks.
env_config:
config:
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/dqn/pong-dqn.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ pong-deterministic-dqn:
time_total_s: 7200
config:
# Works for both torch and tf.
framework: tf
framework: torch
env_config:
nondeterministic: False # deterministic
num_gpus: 1
Expand Down
2 changes: 1 addition & 1 deletion rllib/tuned_examples/es/cartpole-es.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ cartpole-es:
timesteps_total: 500000
config:
# Works for both torch and tf.
framework: tf
framework: torch
num_workers: 2
noise_size: 25000000
episodes_per_batch: 50
2 changes: 1 addition & 1 deletion rllib/tuned_examples/es/humanoid-es.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,5 @@ humanoid-v2-es:
episode_reward_mean: 6000
config:
# Works for both torch and tf.
framework: tf
framework: torch
num_workers: 100
Loading