added inference feature calculation

berylgithub · Jan 14, 2020 · 5c1bee1 · 5c1bee1
1 parent 6a2deb6
commit 5c1bee1
Showing 1 changed file with 63 additions and 17 deletions.
diff --git a/driver.py b/driver.py
@@ -9,36 +9,85 @@
 import pdb_processor as pdbp
 import feature_calculator as fc
 import time
-import multiprocessing
+from multiprocessing import Pool
 import pickle
 
+
+
+
+
 if __name__=="__main__":
  import json
 
  with open('config.json') as json_data_file:
  conf = json.load(json_data_file)
+
  x_path = conf['root']['PP']
  y_path = conf['index']['PP'] 
-
  complex_files = pdbp.list_files(x_path)
-# id_name = complex_files[1]
-# test_file = x_path+"/"+id_name
-# chains = pdbp.loader_pdbbind(test_file) 
+
  atom_types = ['C','N','O','S']
  cutoff = 12
 
-# print(chains, len(chains))
  start_time = time.time()
- pool = multiprocessing.Pool() 
+ pool = Pool(6) 
 
-# x_vec = fc.x_processor_mp([chains, id_name, atom_types, cutoff, pool])
-# print(x_vec)
 
- filename = "dataset_mini_HPC.pkl"
- #y_data loader
- df_y = fc.y_processor(conf['index']['PP'])
 
- #check if id is already existed within file, if yes, skip it
+
+# '''
+# dataset generator
+# '''
+# filename = "dataset_beta.pkl"
+# #y_data loader
+# df_y = fc.y_processor(conf['index']['PP'])
+#
+# #check if id is already existed within file, if yes, skip it
+# data = []
+# try:
+# with open(filename, 'rb') as fr:
+# print(filename, 'is found')
+# try:
+# while True:
+# data.append(pickle.load(fr))
+# except EOFError:
+# pass 
+# except FileNotFoundError:
+# print('File is not found')
+# saved_ids = [d['id'] for d in data]
+#
+# #process and save the data
+# try:
+# i=0
+# for id_name in complex_files:
+# if id_name in saved_ids:
+# continue
+# else:
+# print("start of process for ID :",id_name)
+# pathfile = x_path+"/"+id_name
+# chains = pdbp.loader_pdbbind(pathfile) 
+# vector = fc.x_processor_mp([chains, id_name, atom_types, cutoff, pool])
+# y = df_y.loc[df_y['id']==id_name.split('.')[0]]['log_y'].values[0]
+# vector["y"]=y
+# print("ID : ", id_name)
+# print('value of x vector (R^N) = ', vector)
+# with open(filename, 'ab') as f:
+# pickle.dump(vector, f)
+# i+=1
+# except KeyboardInterrupt:
+# print('interrupted !!')
+# 
+# end_time = time.time()
+# print("the number of protein processed in current run = ",i)
+# print('time elapsed =',end_time-start_time,'seconds')
+
+ '''
+ inference data
+ '''
+ x_path = conf['root']['zdock']["4AZU"]
+ complex_files = pdbp.list_files(x_path)
+ print(complex_files)
+ filename = "Data/data_4AZU.pkl"
  data = []
  try:
  with open(filename, 'rb') as fr:
@@ -63,8 +112,6 @@
  pathfile = x_path+"/"+id_name
  chains = pdbp.loader_pdbbind(pathfile) 
  vector = fc.x_processor_mp([chains, id_name, atom_types, cutoff, pool])
- y = df_y.loc[df_y['id']==id_name.split('.')[0]]['log_y'].values[0]
- vector["y"]=y
  print("ID : ", id_name)
  print('value of x vector (R^N) = ', vector)
  with open(filename, 'ab') as f:
@@ -75,5 +122,4 @@
 
  end_time = time.time()
  print("the number of protein processed in current run = ",i)
- print('time elapsed =',end_time-start_time,'seconds')
-
+ print('time elapsed =',end_time-start_time,'seconds')