-
Notifications
You must be signed in to change notification settings - Fork 0
/
utils.py
207 lines (167 loc) · 7.08 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
import os
import json
import numpy as np
class Seeder(object):
def __init__(self, init_seed=0):
np.random.seed(init_seed)
self.limit = np.int32(2**31 - 1)
def next_seed(self):
result = np.random.randint(self.limit)
return result
def next_batch(self, batch_size):
result = np.random.randint(self.limit, size=batch_size).tolist()
return result
class Logger(object):
def __init__(self, args):
log_dir = './log/' if args.log_dir is None else args.log_dir
if not os.path.exists(log_dir):
os.makedirs(log_dir)
pop_size = args.num_workers * args.num_trails
filebase = log_dir + args.game_name + '-' + args.es_name + '-' + str(pop_size)
self.file = filebase + '.json'
self.file_eval = filebase + '.eval.json'
self.file_hist = filebase + '.hist.json'
self.file_best = filebase + '.best.json'
# print task info
print('------------------------------')
print('%-20s: %s' % ('Game', args.game_name))
print('%-20s: %d' % ('Num_worker', args.num_workers))
print('%-20s: %d' % ('Num_worker_trial', args.num_trails))
print('%-20s: %d' % ('Population', args.num_workers * args.num_trails))
print('------------------------------')
def write_params(self, params):
with open(self.file, 'wt') as out:
res = json.dump(
[np.array(params).round(4).tolist()], out,
sort_keys=True, indent=2, separators=(',', ': '))
def write_history(self, history):
with open(self.file_hist, 'wt') as out:
res = json.dump(
history, out,
sort_keys=False, indent=0, separators=(',', ': '))
def write_eval(self, eval_info):
with open(self.file_eval, 'wt') as out:
res = json.dump(eval_info, out)
def write_best(self, best):
with open(self.file_best, 'wt') as out:
res = json.dump(
best, out,
sort_keys=True, indent=0, separators=(',', ': '))
def log_gen(self, info):
print('gen:%-4d time:%-4d avg|min|max|std: %-8.2f %-6.2f %-8.2f %-6.2f avg_steps: %-6.1f avg_sigma: %-6.4f' % info)
def log_eval(self, info):
print('EVAL gen:%-4d curr_rew:%-8.2f improvement: %-8.2f best_rew: %-8.2f' % info)
class Communicator(object):
''' A class that manage communication between master and workers. '''
def __init__(self, comm, args, num_params):
self._comm = comm
self.num_workers = args.num_workers
self.master_rank = 0
self.worker_ranks = range(1, self.num_workers+1)
self.num_trails = args.num_trails
self.pop_size = args.num_workers * args.num_trails
self.precision = 10000 # packaged into a int array
self.solution_packet_size = (5 + num_params) * args.num_trails
self.result_packet_size = 4 * args.num_trails
class MasterComm(Communicator):
def __init__(self, comm, args, num_params):
Communicator.__init__(self, comm, args, num_params)
def _encode_solution(self, seeds, solutions, is_train, max_len=-1):
n = len(seeds)
result = []
worker_num = 0
train_mode = 1 if is_train else 0
for i in range(n):
worker_num = int(i / self.num_trails) + 1
result.append([worker_num, i, seeds[i], train_mode, max_len])
result.append(np.round(np.array(solutions[i]) * self.precision, 0))
result = np.concatenate(result).astype(np.int32)
result = np.split(result, self.num_workers)
return result
def distribute_solutions(self, seeds, solutions, is_train=True, max_len=-1):
solution_list = self._encode_solution(seeds, solutions, is_train, max_len=max_len)
assert len(solution_list) == self.num_workers
for i in self.worker_ranks:
packet = solution_list[i - 1]
assert len(packet) == self.solution_packet_size
self._comm.Send(packet, dest=i)
def gather_results(self):
result_packet = np.empty(self.result_packet_size, dtype=np.int32)
reward_list_total = np.zeros((self.pop_size, 2))
check_results = np.ones(self.pop_size, dtype=np.int)
for i in self.worker_ranks:
self._comm.Recv(result_packet, source=i)
results = self._decode_result(result_packet)
for result in results:
worker_id = int(result[0])
assert worker_id == i, 'work_id=%d source=%d' % (worker_id, i)
idx = int(result[1])
reward_list_total[idx, 0] = result[2]
reward_list_total[idx, 1] = result[3]
check_results[idx] = 0
check_sum = check_results.sum()
assert check_sum == 0, check_sum
return reward_list_total
def _decode_result(self, packet):
r = packet.reshape(self.num_trails, 4)
workers = r[:, 0].tolist()
jobs = r[:, 1].tolist()
fits = r[:, 2].astype(np.float) / self.precision
fits = fits.tolist()
times = r[:, 3].astype(np.float) / self.precision
times = times.tolist()
result = []
for i in range(len(jobs)):
result.append([workers[i], jobs[i], fits[i], times[i]])
return result
class WorkerComm(Communicator):
def __init__(self, comm, args, num_params):
Communicator.__init__(self, comm, args, num_params)
def receive_solution(self):
solution_packet = np.empty(self.solution_packet_size, dtype=np.int32)
self._comm.Recv(solution_packet, source=self.master_rank)
assert len(solution_packet) == self.solution_packet_size
solutions = self._decode_solution(solution_packet)
return solutions
def _decode_solution(self, packet):
packets = np.split(packet, self.num_trails)
result = []
for p in packets:
result.append(
[int(p[0]), int(p[1]), int(p[2]), p[3] == 1,
p[4], p[5:].astype(np.float) / self.precision])
return result
def send_results(self, results):
result_packet = self._encode_result(results)
assert len(result_packet) == self.result_packet_size
self._comm.Send(result_packet, dest=self.master_rank)
def _encode_result(self, results):
r = np.array(results)
r[:, 2:4] *= self.precision
return r.flatten().astype(np.int32)
class ActivationFunc(object):
@staticmethod
def sigmoid(x):
return 1 / (1 + np.exp(-x))
@staticmethod
def relu(x):
return np.maximum(x, 0)
@staticmethod
def passthru(x):
return x
@staticmethod
def softmax(x):
e_x = np.exp(x - np.max(x))
return e_x / e_x.sum(axis=0)
@staticmethod
def sample(p):
return np.argmax(np.random.multinomial(1, p))
@staticmethod
def tanh(x):
return np.tanh(x)
af = {'relu': ActivationFunc.sigmoid,
'sigmoid': ActivationFunc.sigmoid,
'passthru': ActivationFunc.passthru,
'softmax': ActivationFunc.softmax,
'tanh': ActivationFunc.tanh,
'sample': ActivationFunc.sample}