-
Notifications
You must be signed in to change notification settings - Fork 5
/
bellman.py
30 lines (25 loc) · 1.13 KB
/
bellman.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
"""
Another approach to dynamic programming.
The example from `https://www.stanford.edu/class/ee365/lectures/det_sys`
>>> inf = float('inf')
>>> nodes = list('abcd')
>>> edges = {'ab': 2, 'ac': 6, 'bc': 3, 'bd': 8, 'cb': 1, 'cd': 4, 'dd': 0}
>>> step = lambda t, x, u, T: u
>>> cost = lambda t, x, u, T: edges.get(x + u, inf) if t < T - 1 else (0 if x == 'd' else inf)
>>> p = dict(policy(T=4, X=lambda t: nodes, U=lambda t, x: nodes, f=step, g=cost))
>>> P = {0: {'a': (9, 'b'), 'b': (7, 'c'), 'c': (4, 'd'), 'd': (0, 'd')}, \
1: {'a': (10, 'b'), 'b': (7, 'c'), 'c': (4, 'd'), 'd': (0, 'd')}, \
2: {'a': (inf, 'a'), 'b': (8, 'd'), 'c': (4, 'd'), 'd': (0, 'd')}, \
3: {'a': (inf, 'a'), 'b': (inf, 'a'), 'c': (inf, 'a'), 'd': (0, 'a')}}
>>> p == P
True
"""
def step(t, x, u, T=None):
"""Return the state at time t + 1."""
def cost(t, x, u, T=None):
"""Return a real or infinite value."""
def policy(T, X, U, f=step, g=cost, p={}):
V = lambda x: p.get(x, (0, None))[0]
for t in reversed(xrange(T)):
p = dict((x, min((g(t, x, u, T) + V(f(t, x, u, T)), u) for u in U(t, x))) for x in X(t))
yield t, p