changed framework in yaml files from tf to torch or tf2

Signed-off-by: Kourosh Hakhamaneshi <[email protected]>
ray-project · gjoliver · Mar 24, 2023 · Mar 22, 2023 · Mar 22, 2023 · Mar 22, 2023
commit 7e2888bf5b2f47fd733dcc10bb3ddc5ccc0f5e28
@@ -188,7 +188,7 @@ py_test(
  data = [
  "tuned_examples/appo/cartpole-appo-vtrace-separate-losses.yaml"
  ],
- args = ["--dir=tuned_examples/appo"]
+ args = ["--dir=tuned_examples/appo", "--framework=tf"]
 )
 
 py_test(

@@ -10,7 +10,7 @@ atari-a2c:
  run: A2C
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  env_config:
  frameskip: 1 # no frameskip
  train_batch_size: 500

@@ -6,7 +6,7 @@ cartpole-a2c-fake-gpus:
  training_iteration: 200
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  train_batch_size: 20
  rollout_fragment_length: auto
  num_workers: 0

@@ -6,7 +6,7 @@ cartpole-a2c-microbatch:
  timesteps_total: 1000000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  num_workers: 2
  gamma: 0.95
  rollout_fragment_length: 20

@@ -6,7 +6,7 @@ cartpole-a2c:
  timesteps_total: 500000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  train_batch_size: 40
  rollout_fragment_length: auto
  num_workers: 0

@@ -6,6 +6,6 @@ cartpole-a3c:
  timesteps_total: 200000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  num_workers: 1
  gamma: 0.95
@@ -6,7 +6,7 @@ memory-leak-test-a3c:
  run: A3C
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  # Switch off np.random, which is known to have memory leaks.
  env_config:
  config:

@@ -5,7 +5,7 @@ pong-a3c:
  run: A3C
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  env_config:
  nondeterministic: False # deterministic
  num_workers: 16

@@ -6,7 +6,7 @@ multi-agent-cartpole-alpha-star:
  timesteps_total: 200000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
 
  # 4-agent MA cartpole.
  env_config:

@@ -6,7 +6,7 @@ mountaincarcontinuous-apex-ddpg:
  episode_reward_mean: 90
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  clip_rewards: False
  num_workers: 16
  exploration_config:

@@ -6,7 +6,7 @@ pendulum-apex-ddpg:
  episode_reward_mean: -160
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  use_huber: True
  clip_rewards: False
  num_workers: 16

@@ -13,7 +13,7 @@ cartpole-apex-dqn:
  timesteps_total: 250000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  num_workers: 3
  optimizer:
  num_replay_buffer_shards: 1

@@ -13,7 +13,7 @@ cartpole-apex-dqn-training-itr:
  timesteps_total: 250000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  # Make this work with only 5 CPUs and 0 GPUs:
  num_workers: 3
  optimizer:

@@ -9,7 +9,7 @@ pong-apex:
  timesteps_total: 4000000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  env_config:
  frameskip: 1 # no frameskip
  target_network_update_freq: 20000

@@ -6,7 +6,7 @@ cartpole-appo-vtrace-fake-gpus:
  training_iteration: 400
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  num_envs_per_worker: 5
  num_workers: 1
  observation_filter: MeanStdFilter

@@ -6,7 +6,7 @@ cartpole-appo-vtrace-separate-losses:
  timesteps_total: 200000
  config:
  # Only works for tf|tf2 so far.
- framework: tf
+ framework: torch
  # Switch on >1 loss/optimizer API for TFPolicy and EagerTFPolicy.
  _tf_policy_handles_more_than_one_loss: true
  # APPO will produce two separate loss terms:

@@ -6,7 +6,7 @@ cartpole-appo-vtrace:
  timesteps_total: 200000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  num_envs_per_worker: 5
  num_workers: 4
  num_gpus: 0

@@ -6,7 +6,7 @@ cartpole-appo:
  timesteps_total: 200000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  num_envs_per_worker: 5
  num_workers: 1
  num_gpus: 0

@@ -6,7 +6,7 @@ frozenlake-appo-vtrace:
  timesteps_total: 1000000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
 
  # Sparse reward environment (short horizon).
  env_config:

@@ -7,7 +7,7 @@ halfcheetah-appo:
  time_total_s: 10800
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  vtrace: True
  gamma: 0.99
  lambda: 0.95

@@ -4,7 +4,7 @@ memory-leak-test-appo:
  run: APPO
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  # Switch off np.random, which is known to have memory leaks.
  env_config:
  config:

@@ -6,7 +6,7 @@ multi-agent-cartpole-appo:
  timesteps_total: 200000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
 
  # 4-agent MA cartpole.
  env_config:

@@ -5,7 +5,7 @@ multi-agent-cartpole-crashing-appo:
  evaluation/episode_reward_mean: 300.0
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
 
  env_config:
  config:

@@ -6,7 +6,7 @@ pendulum-appo-vtrace:
  timesteps_total: 500000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  vtrace: true
  num_gpus: 0
  num_workers: 1

@@ -11,7 +11,7 @@ pong-appo:
  timesteps_total: 5000000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  env_config:
  frameskip: 1 # no frameskip
  vtrace: True

@@ -6,7 +6,7 @@ cartpole-ars:
  timesteps_total: 1000000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  noise_stdev: 0.02
  num_rollouts: 50
  rollouts_used: 25

@@ -4,7 +4,7 @@ swimmer-ars:
  run: ARS
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  noise_stdev: 0.01
  num_rollouts: 1
  rollouts_used: 1

@@ -9,7 +9,7 @@ cartpole-bc:
  timesteps_total: 500000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  # In order to evaluate on an actual environment, use these following
  # settings:
  evaluation_num_workers: 1

@@ -14,7 +14,7 @@ halfcheetah_cql:
  #input: d4rl.halfcheetah-medium-replay-v0
 
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  q_model_config:
  fcnet_activation: relu
  fcnet_hiddens: [256, 256, 256]

@@ -10,7 +10,7 @@ pendulum-cql:
  timesteps_total: 800000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
 
  # Use one or more offline files or "input: sampler" for online learning.
  input: 'dataset'

@@ -7,7 +7,7 @@ halfcheetah-ddpg:
  time_total_s: 5400 # 90 minutes
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  # === Model ===
  actor_hiddens: [64, 64]
  critic_hiddens: [64, 64]

@@ -4,7 +4,7 @@ memory-leak-test-ddpg:
  run: DDPG
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  # Switch off np.random, which is known to have memory leaks.
  env_config:
  config:

@@ -7,7 +7,7 @@ mountaincarcontinuous-ddpg:
  time_total_s: 600 # 10 minutes
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  # === Model ===
  actor_hiddens: [32, 64]
  critic_hiddens: [64, 64]

@@ -7,7 +7,7 @@ pendulum-ddpg-fake-gpus:
  config:
  # Works for both torch and tf.
  seed: 42
- framework: tf
+ framework: torch
  actor_hiddens: [64, 64]
  critic_hiddens: [64, 64]
  n_step: 1

@@ -10,7 +10,7 @@ atari-basic-dqn:
  run: DQN
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  env_config:
  frameskip: 1 # no frameskip
  double_q: false

@@ -10,7 +10,7 @@ dueling-ddqn:
  run: DQN
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  env_config:
  frameskip: 1 # no frameskip
  double_q: true

@@ -6,7 +6,7 @@ cartpole-dqn-fake-gpus:
  training_iteration: 400
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  model:
  fcnet_hiddens: [64]
  fcnet_activation: linear

@@ -6,7 +6,7 @@ cartpole-dqn-w-param-noise:
  timesteps_total: 300000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  exploration_config:
  type: ParameterNoise
  random_timesteps: 10000

@@ -6,7 +6,7 @@ cartpole-dqn:
  timesteps_total: 100000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  model:
  fcnet_hiddens: [64]
  fcnet_activation: linear

@@ -6,7 +6,7 @@ cartpole-dqn:
  timesteps_total: 100000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  model:
  fcnet_hiddens: [64]
  fcnet_activation: linear

@@ -4,7 +4,7 @@ memory-leak-test-dqn:
  run: DQN
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  # Switch off np.random, which is known to have memory leaks.
  env_config:
  config:

@@ -7,7 +7,7 @@ pong-deterministic-dqn:
  time_total_s: 7200
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  env_config:
  nondeterministic: False # deterministic
  num_gpus: 1

@@ -6,7 +6,7 @@ cartpole-es:
  timesteps_total: 500000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  num_workers: 2
  noise_size: 25000000
  episodes_per_batch: 50
@@ -5,5 +5,5 @@ humanoid-v2-es:
  episode_reward_mean: 6000
  config:
  # Works for both torch and tf.
- framework: tf
+ framework: torch
  num_workers: 100