Fix bug in simulate_mdp

simulate_mdp doesn't like it if you start in a terminal state From #2 ``` If you aren't careful you leave the starting point at 0,0 when you move to cliff world, and then simulate_mdp blows up because you immediately terminate and there are no available actions. ```
alessiodm · May 7, 2024 · 52d37a3 · 52d37a3
1 parent 9f0f7c2
commit 52d37a3
Showing 1 changed file with 4 additions and 4 deletions.
diff --git a/util/gridworld.py b/util/gridworld.py
@@ -193,7 +193,9 @@ def simulate_mdp(mdp, policy, max_iterations=20) -> list[Step]:
  steps = []
  state = mdp.start
  current_iteration = 0
- while True:
+ while current_iteration != max_iterations and \
+ not mdp.is_terminal(state) and \
+ mdp.is_reachable(state):
  current_iteration += 1
  action = policy(state)
  state_probs = [(s, p) for s, p in mdp.transition(state, action).items()]
@@ -203,9 +205,7 @@ def simulate_mdp(mdp, policy, max_iterations=20) -> list[Step]:
  reward = mdp.reward(state, action, next_state)
  steps.append(Step(state, action, reward))
  state = next_state
- if current_iteration == max_iterations or mdp.is_terminal(state):
- steps.append(Step(next_state, None, 0.0))
- break
+ steps.append(Step(state, None, 0.0))
  return steps
 
 #--------------------------