-
Notifications
You must be signed in to change notification settings - Fork 4
/
trainer.py
58 lines (51 loc) · 2.25 KB
/
trainer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import gym
import numpy as np
import matplotlib.pyplot as plt
from Discrete_SAC_Agent import SACAgent
TRAINING_EVALUATION_RATIO = 4
RUNS = 5
EPISODES_PER_RUN = 400
STEPS_PER_EPISODE = 200
if __name__ == "__main__":
env = gym.make("CartPole-v1")
agent_results = []
for run in range(RUNS):
agent = SACAgent(env)
run_results = []
for episode_number in range(EPISODES_PER_RUN):
print('\r', f'Run: {run + 1}/{RUNS} | Episode: {episode_number + 1}/{EPISODES_PER_RUN}', end=' ')
evaluation_episode = episode_number % TRAINING_EVALUATION_RATIO == 0
episode_reward = 0
state = env.reset()
done = False
i = 0
while not done and i < STEPS_PER_EPISODE:
i += 1
action = agent.get_next_action(state, evaluation_episode=evaluation_episode)
next_state, reward, done, info = env.step(action)
if not evaluation_episode:
agent.train_on_transition(state, action, next_state, reward, done)
else:
episode_reward += reward
state = next_state
if evaluation_episode:
run_results.append(episode_reward)
agent_results.append(run_results)
env.close()
n_results = EPISODES_PER_RUN // TRAINING_EVALUATION_RATIO
results_mean = [np.mean([agent_result[n] for agent_result in agent_results]) for n in range(n_results)]
results_std = [np.std([agent_result[n] for agent_result in agent_results]) for n in range(n_results)]
mean_plus_std = [m + s for m, s in zip(results_mean, results_std)]
mean_minus_std = [m - s for m, s in zip(results_mean, results_std)]
x_vals = list(range(len(results_mean)))
x_vals = [x_val * (TRAINING_EVALUATION_RATIO - 1) for x_val in x_vals]
ax = plt.gca()
ax.set_ylim([0, 200])
ax.set_ylabel('Episode Score')
ax.set_xlabel('Training Episode')
ax.plot(x_vals, results_mean, label='Average Result', color='blue')
ax.plot(x_vals, mean_plus_std, color='blue', alpha=0.1)
ax.plot(x_vals, mean_minus_std, color='blue', alpha=0.1)
ax.fill_between(x_vals, y1=mean_minus_std, y2=mean_plus_std, alpha=0.1, color='blue')
plt.legend(loc='best')
plt.show()