Skip to content

Commit

Permalink
pad state tensor with zero and use masked softmax for batch training
Browse files Browse the repository at this point in the history
  • Loading branch information
ChanganVR committed Sep 21, 2018
1 parent 176a817 commit 6c20378
Show file tree
Hide file tree
Showing 8 changed files with 29 additions and 24 deletions.
4 changes: 2 additions & 2 deletions dynav/configs/orca_env.config
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ discomfort_penalty_factor = 0.5


[sim]
train_val_sim = circle_crossing
test_sim = circle_crossing
train_val_sim = mixed
test_sim = mixed
square_width = 10
circle_radius = 4
ped_num = 5
Expand Down
2 changes: 1 addition & 1 deletion dynav/configs/train.config
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ rl_learning_rate = 0.001
# number of batches to train at the end of training episode
train_batches = 100
# training episodes in outer loop
train_episodes = 11000
train_episodes = 20000
# number of episodes sampled in one training episode
sample_episodes = 1
target_update_interval = 50
Expand Down
2 changes: 1 addition & 1 deletion dynav/policy/cadrl.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ def predict(self, state):

if self.reach_destination(state):
return ActionXY(0, 0) if self.kinematics == 'holonomic' else ActionRot(0, 0)
if self.action_space is not None:
if self.action_space is None:
self.build_action_space(state.self_state.v_pref)

probability = np.random.random()
Expand Down
7 changes: 5 additions & 2 deletions dynav/policy/sarl.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import numpy as np
import torch
import torch.nn as nn
from torch.nn.functional import softmax
Expand Down Expand Up @@ -47,7 +46,11 @@ def forward(self, state):
else:
attention_input = mlp1_output
scores = self.attention(attention_input).view(size[0], size[1], 1).squeeze(dim=2)
weights = softmax(scores, dim=1).unsqueeze(2)

# masked softmax
# weights = softmax(scores, dim=1).unsqueeze(2)
scores_exp = torch.exp(scores) * (scores != 0).float()
weights = (scores_exp / torch.sum(scores_exp, dim=1, keepdim=True)).unsqueeze(2)
self.attention_weights = weights[0, :, 0].data.cpu().numpy()

# output feature is a linear combination of input features
Expand Down
6 changes: 1 addition & 5 deletions dynav/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,11 +104,7 @@ def main():
ped_times = env.get_ped_times()
logging.info('Average time for peds to reach goal: {:.2f}'.format(sum(ped_times) / len(ped_times)))
else:
nav_times, rewards = explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True)
if args.model_dir is not None:
with open(os.path.join(args.model_dir, 'results.txt'), mode='w') as fo:
fo.write(' '.join([str(time) for time in nav_times]))
fo.write('\n' + ' '.join([str(reward) for reward in rewards]))
explorer.run_k_episodes(env.case_size[args.phase], args.phase, print_failure=True)


if __name__ == '__main__':
Expand Down
27 changes: 16 additions & 11 deletions dynav/utils/explorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ def update_target_model(self, target_model):
def run_k_episodes(self, k, phase, update_memory=False, imitation_learning=False, episode=None,
print_failure=False):
self.navigator.policy.set_phase(phase)
navigator_times = []
all_nav_times = []
success_times = []
collision_times = []
timeout_times = []
success = 0
collision = 0
timeout = 0
Expand Down Expand Up @@ -50,16 +51,15 @@ def run_k_episodes(self, k, phase, update_memory=False, imitation_learning=False

if isinstance(info, ReachGoal):
success += 1
navigator_times.append(self.env.global_time)
all_nav_times.append(self.env.global_time)
success_times.append(self.env.global_time)
elif isinstance(info, Collision):
collision += 1
collision_cases.append(i)
all_nav_times.append(self.env.time_limit)
collision_times.append(self.env.global_time)
elif isinstance(info, Timeout):
timeout += 1
timeout_cases.append(i)
all_nav_times.append(self.env.time_limit)
timeout_times.append(self.env.time_limit)
else:
raise ValueError('Invalid end signal from environment')

Expand All @@ -68,27 +68,27 @@ def run_k_episodes(self, k, phase, update_memory=False, imitation_learning=False
# only add positive(success) or negative(collision) experience in experience set
self.update_memory(states, actions, rewards, imitation_learning)

cumulative_rewards.append(sum([pow(self.gamma, t * self.navigator.time_step * self.navigator.v_pref) * reward
for t, reward in enumerate(rewards)]))
cumulative_rewards.append(sum([pow(self.gamma, t * self.navigator.time_step * self.navigator.v_pref)
* reward for t, reward in enumerate(rewards)]))

success_rate = success / k
collision_rate = collision / k
assert success + collision + timeout == k
avg_nav_time = sum(navigator_times) / len(navigator_times) if len(navigator_times) != 0 else self.env.time_limit
avg_nav_time = sum(success_times) / len(success_times) if len(success_times) != 0 else self.env.time_limit

extra_info = '' if episode is None else 'in episode {} '.format(episode)
logging.info('{:<5} {}has success rate: {:.2f}, collision rate: {:.2f}, nav time: {:.2f}, total reward: {:.4f}'.
format(phase.upper(), extra_info, success_rate, collision_rate, avg_nav_time,
average(cumulative_rewards)))
if phase in ['val', 'test']:
total_time = sum(success_times + collision_times + timeout_times) * self.navigator.time_step
logging.info('Frequency of being in danger: {:.2f} and average min separate distance in danger: {:.2f}'.
format(too_close/sum(navigator_times)*self.navigator.time_step, average(min_dist)))
format(too_close / total_time, average(min_dist)))

if print_failure:
logging.info('Collision cases: ' + ' '.join([str(x) for x in collision_cases]))
logging.info('Timeout cases: ' + ' '.join([str(x) for x in timeout_cases]))

return all_nav_times, cumulative_rewards

def update_memory(self, states, actions, rewards, imitation_learning=False):
if self.memory is None or self.gamma is None:
Expand All @@ -115,6 +115,11 @@ def update_memory(self, states, actions, rewards, imitation_learning=False):
value = reward + gamma_bar * self.target_model(next_state.unsqueeze(0)).data.item()
value = torch.Tensor([value]).to(self.device)

# transform state of different ped_num into fixed-size tensor
ped_num, feature_size = state.size()
if ped_num != 5:
padding = torch.zeros((5 - ped_num, feature_size))
state = torch.cat([state, padding])
self.memory.push((state, value))


Expand Down
2 changes: 0 additions & 2 deletions gym_crowd/envs/crowd_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ def generate_random_ped_position(self, ped_num, rule):
dynamic_ped_num = {1: 0.2, 2: 0.2, 3: 0.2, 4: 0.2, 5: 0.2}
static = True if np.random.random() < 0.2 else False
prob = np.random.random()
print(prob)
for key, value in sorted(static_ped_num.items() if static else dynamic_ped_num.items()):
if prob - value <= 0:
ped_num = key
Expand All @@ -116,7 +115,6 @@ def generate_random_ped_position(self, ped_num, rule):
prob -= value
self.peds = []
if static:
print('static')
# randomly initialize static objects in a square of (width, height)
width = 4
height = 8
Expand Down
3 changes: 3 additions & 0 deletions gym_crowd/envs/policy/orca.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ def predict(self, state):
"""
self_state = state.self_state
params = self.neighbor_dist, self.max_neighbors, self.time_horizon, self.time_horizon_obst
if self.sim is not None and self.sim.getNumAgents() != len(state.ped_states) + 1:
del self.sim
self.sim = None
if self.sim is None:
self.sim = rvo2.PyRVOSimulator(self.time_step, *params, self.radius, self.max_speed)
self.sim.addAgent(self_state.position, *params, self_state.radius + 0.01 + self.safety_space,
Expand Down

0 comments on commit 6c20378

Please sign in to comment.