OpenAI defines CartPole as solved "when the average reward is greater than or equal to 195.0 over 100 consecutive trials."
gamma = 0.99
lambda = 0.95
update_freq = 1
k_epoch = 3
initial_learning_rate = 0.02
eps_clip = 0.2
v_coef = 1
entropy_coef = 0.01