Skip to content

Commit

Permalink
added inference feature calculation
Browse files Browse the repository at this point in the history
  • Loading branch information
berylgithub committed Jan 14, 2020
1 parent 6a2deb6 commit 5c1bee1
Showing 1 changed file with 63 additions and 17 deletions.
80 changes: 63 additions & 17 deletions driver.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,36 +9,85 @@
import pdb_processor as pdbp
import feature_calculator as fc
import time
import multiprocessing
from multiprocessing import Pool
import pickle





if __name__=="__main__":
import json

with open('config.json') as json_data_file:
conf = json.load(json_data_file)

x_path = conf['root']['PP']
y_path = conf['index']['PP']

complex_files = pdbp.list_files(x_path)
# id_name = complex_files[1]
# test_file = x_path+"/"+id_name
# chains = pdbp.loader_pdbbind(test_file)

atom_types = ['C','N','O','S']
cutoff = 12

# print(chains, len(chains))
start_time = time.time()
pool = multiprocessing.Pool()
pool = Pool(6)

# x_vec = fc.x_processor_mp([chains, id_name, atom_types, cutoff, pool])
# print(x_vec)

filename = "dataset_mini_HPC.pkl"
#y_data loader
df_y = fc.y_processor(conf['index']['PP'])

#check if id is already existed within file, if yes, skip it

# '''
# dataset generator
# '''
# filename = "dataset_beta.pkl"
# #y_data loader
# df_y = fc.y_processor(conf['index']['PP'])
#
# #check if id is already existed within file, if yes, skip it
# data = []
# try:
# with open(filename, 'rb') as fr:
# print(filename, 'is found')
# try:
# while True:
# data.append(pickle.load(fr))
# except EOFError:
# pass
# except FileNotFoundError:
# print('File is not found')
# saved_ids = [d['id'] for d in data]
#
# #process and save the data
# try:
# i=0
# for id_name in complex_files:
# if id_name in saved_ids:
# continue
# else:
# print("start of process for ID :",id_name)
# pathfile = x_path+"/"+id_name
# chains = pdbp.loader_pdbbind(pathfile)
# vector = fc.x_processor_mp([chains, id_name, atom_types, cutoff, pool])
# y = df_y.loc[df_y['id']==id_name.split('.')[0]]['log_y'].values[0]
# vector["y"]=y
# print("ID : ", id_name)
# print('value of x vector (R^N) = ', vector)
# with open(filename, 'ab') as f:
# pickle.dump(vector, f)
# i+=1
# except KeyboardInterrupt:
# print('interrupted !!')
#
# end_time = time.time()
# print("the number of protein processed in current run = ",i)
# print('time elapsed =',end_time-start_time,'seconds')

'''
inference data
'''
x_path = conf['root']['zdock']["4AZU"]
complex_files = pdbp.list_files(x_path)
print(complex_files)
filename = "Data/data_4AZU.pkl"
data = []
try:
with open(filename, 'rb') as fr:
Expand All @@ -63,8 +112,6 @@
pathfile = x_path+"/"+id_name
chains = pdbp.loader_pdbbind(pathfile)
vector = fc.x_processor_mp([chains, id_name, atom_types, cutoff, pool])
y = df_y.loc[df_y['id']==id_name.split('.')[0]]['log_y'].values[0]
vector["y"]=y
print("ID : ", id_name)
print('value of x vector (R^N) = ', vector)
with open(filename, 'ab') as f:
Expand All @@ -75,5 +122,4 @@

end_time = time.time()
print("the number of protein processed in current run = ",i)
print('time elapsed =',end_time-start_time,'seconds')

print('time elapsed =',end_time-start_time,'seconds')

0 comments on commit 5c1bee1

Please sign in to comment.