Skip to content

Commit

Permalink
first refine
Browse files Browse the repository at this point in the history
  • Loading branch information
orbxball committed Mar 6, 2017
1 parent ae06495 commit 3d55065
Showing 1 changed file with 32 additions and 26 deletions.
58 changes: 32 additions & 26 deletions hw1/linreg.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,16 @@ def ensure_dir(file_path):
if not os.path.exists(directory):
os.makedirs(directory)

def extract_feature(M, features):
def extract_feature(M, features, squares):
x_data = []
y_data = []
for month in range(M.shape[0]):
for i in range(M.shape[2]-10+1):
x_data.append(M[month, features, i:i+9].flatten().astype("float"))
y_data.append(float(M[month, 9, i+9]))
X = M[month, features, i:i+9].flatten()
Y = M[month, squares, i:i+9].flatten()
Z = np.concatenate((X, Y), axis=0)
x_data.append(Z)
y_data.append(M[month, 9, i+9])
return np.array(x_data), np.array(y_data)

# Start Program
Expand All @@ -26,27 +29,28 @@ def extract_feature(M, features):
M = M[:, 3:] #shape: (4320, 24)
M = np.reshape(M, (12, -1, 18, 24)) #shape: (12, 20, 18, 24)
M = M.swapaxes(1, 2).reshape(12, 18, -1) #shape: (12, 18, 480)
## map 'NR' -> '-1'
for month in range(12):
M[month, 10, :] = np.array(list(map(lambda i: i if i != 'NR' else '-1', M[month, 10, :])))
M[M == 'NR'] = '0.0'
M = M.astype(float)


# extract feature into x_data <shape:(5652, 9*18)>, y_data <shape:(5652,)>
feature_sieve = [i for i in range(0, 18)]
x_data, y_data = extract_feature(M, feature_sieve)
feature_sieve = [i for i in range(18)]
square_sieve = []
length = len(feature_sieve) + len(square_sieve)
x_data, y_data = extract_feature(M, feature_sieve, square_sieve)

# ydata = b + w * xdata
b = 0.0
w = np.zeros(len(feature_sieve)*9)
lr = 5e-3
w = np.zeros(length*9)
lr = 0.5
epoch = 200000
b_lr = 0.0
w_lr = np.zeros(len(feature_sieve)*9)
w_lr = np.zeros(length*9)

prev_loss = 1e10
for e in range(epoch):
b_grad = 0.0
w_grad = np.zeros(len(feature_sieve)*9)
w_grad = np.zeros(length*9)
loss = 0.0

# Calculate the value of the loss function
Expand All @@ -64,10 +68,10 @@ def extract_feature(M, features):
w = w - lr/np.sqrt(w_lr) * w_grad

# Print loss
if e % 100 == 0:
print('epoch:{}\n Loss:{}'.format(e, loss))
if prev_loss - loss < 1e-8: break
prev_loss = loss
if (e+1) % 1000 == 0:
print('epoch:{}\n Loss:{}'.format(e+1, np.sqrt(loss)))
# if prev_loss - loss < 1e-8: break
# prev_loss = loss


# Test
Expand All @@ -82,16 +86,18 @@ def extract_feature(M, features):
f.write('{}\n'.format(','.join(list(map(lambda x: str(x), w.flatten())))))

with open(outfile, 'w+') as f:
f.write('id,value\n')
M = pd.read_csv(infile2, header=None, encoding='big5').as_matrix()
M = M[:, 2:] #shape: (4320, 9)
M = M.reshape(-1, 18, 9) #shape: (240, 18, 9)
M[M == 'NR'] = '0.0'
M = M.astype(float)

i = 0
while i < M.shape[0]:
## map 'NR' -> '-1'
M[i+10, :] = np.array(list(map(lambda x: x if x != 'NR' else '-1', M[i+10, :])))
selected = [i for i in feature_sieve]
square_selected = [i for i in square_sieve]

modified_sieve = [n+i for n in feature_sieve]
X = M[modified_sieve, 2:].flatten().astype("float")
y = M[i, 0]
f.write('{},{}\n'.format(y, b + np.dot(w, X)))
i += 18
f.write('id,value\n')
for i in range(M.shape[0]):
X = M[i, selected, :].flatten()
Y = M[i, square_selected, :].flatten()
Z = np.concatenate((X, Y), axis=0)
f.write('id_{},{}\n'.format(i, b + np.dot(w, Z)))

0 comments on commit 3d55065

Please sign in to comment.