multi-gpu support in call_mods module and other fixes

PengNi · May 21, 2022 · 789c524 · 789c524
1 parent 1e3dc9c
commit 789c524
Show file tree

Hide file tree

Showing 8 changed files with 70 additions and 23 deletions.
diff --git a/README.rst b/README.rst
@@ -10,6 +10,8 @@ bug fixes
 
 enable .gz output in extract/call_mods/call_freq modules
 
+multi-gpu support in call_mods module
+
 
 0.1.5
 -----

diff --git a/deepsignal_plant/call_modifications.py b/deepsignal_plant/call_modifications.py
@@ -125,7 +125,7 @@ def _read_features_file(features_file, features_batch_q, f5_batch_size=10):
  f5_batch_size))
 
 
-def _call_mods(features_batch, model, batch_size):
+def _call_mods(features_batch, model, batch_size, device=0):
  """
  call modification from a batch of features
  :param features_batch: a bathc of features, contains lists
@@ -154,8 +154,10 @@ def _call_mods(features_batch, model, batch_size):
 
  # call mods of each batch
  if len(b_sampleinfo) > 0:
- voutputs, vlogits = model(FloatTensor(b_kmers), FloatTensor(b_base_means), FloatTensor(b_base_stds),
- FloatTensor(b_base_signal_lens), FloatTensor(b_k_signals))
+ voutputs, vlogits = model(FloatTensor(b_kmers, device), FloatTensor(b_base_means, device),
+ FloatTensor(b_base_stds, device),
+ FloatTensor(b_base_signal_lens, device),
+ FloatTensor(b_k_signals, device))
  _, vpredicted = torch.max(vlogits.data, 1)
  if use_cuda:
  vlogits = vlogits.cpu()
@@ -189,7 +191,7 @@ def _call_mods(features_batch, model, batch_size):
  return pred_str, accuracy, batch_num
 
 
-def _call_mods_q(model_path, features_batch_q, pred_str_q, success_file, args):
+def _call_mods_q(model_path, features_batch_q, pred_str_q, success_file, args, device=0):
  """
  subprocess for calling modifications
  :param model_path:
@@ -203,17 +205,16 @@ def _call_mods_q(model_path, features_batch_q, pred_str_q, success_file, args):
  model = ModelBiLSTM(args.seq_len, args.signal_len, args.layernum1, args.layernum2, args.class_num,
  args.dropout_rate, args.hid_rnn,
  args.n_vocab, args.n_embed, str2bool(args.is_base), str2bool(args.is_signallen),
- args.model_type)
- if use_cuda:
- model = model.cuda()
- para_dict = torch.load(model_path)
- else:
- para_dict = torch.load(model_path, map_location=torch.device('cpu'))
+ module=args.model_type, device=device)
 
+ para_dict = torch.load(model_path, map_location=torch.device('cpu'))
+ # para_dict = torch.load(model_path, map_location=torch.device(device))
  model_dict = model.state_dict()
  model_dict.update(para_dict)
  model.load_state_dict(model_dict)
 
+ if use_cuda:
+ model = model.cuda(device)
  model.eval()
 
  accuracy_list = []
@@ -233,7 +234,7 @@ def _call_mods_q(model_path, features_batch_q, pred_str_q, success_file, args):
  # open(success_file, 'w').close()
  break
 
- pred_str, accuracy, batch_num = _call_mods(features_batch, model, args.batch_size)
+ pred_str, accuracy, batch_num = _call_mods(features_batch, model, args.batch_size, device)
 
  pred_str_q.put(pred_str)
  while pred_str_q.qsize() > queue_size_border:
@@ -391,9 +392,12 @@ def _call_mods_from_fast5s_gpu(motif_seqs, chrom2len, fast5s_q, len_fast5s, posi
 
  # queues of features->mods_call
  call_mods_gpu_procs = []
+ gpulist = _get_gpus()
+ gpuindex = 0
  for _ in range(nproc_gpu):
  p_call_mods_gpu = mp.Process(target=_call_mods_q, args=(model_path, features_batch_q, pred_str_q,
- success_file, args))
+ success_file, args, gpulist[gpuindex]))
+ gpuindex += 1
  p_call_mods_gpu.daemon = True
  p_call_mods_gpu.start()
  call_mods_gpu_procs.append(p_call_mods_gpu)
@@ -505,6 +509,15 @@ def _call_mods_from_fast5s_cpu2(motif_seqs, chrom2len, fast5s_q, len_fast5s, pos
  print("%d of %d fast5 files failed.." % (errornum_sum, len_fast5s))
 
 
+def _get_gpus():
+ num_gpus = torch.cuda.device_count()
+ if num_gpus > 0:
+ gpulist = list(range(num_gpus))
+ else:
+ gpulist = [0]
+ return gpulist * 1000
+
+
 def call_mods(args):
  """
  main function of calling modification
@@ -586,9 +599,12 @@ def call_mods(args):
  if nproc_dp > nproc_to_call_mods_in_cpu_mode:
  nproc_dp = nproc_to_call_mods_in_cpu_mode
 
+ gpulist = _get_gpus()
+ gpuindex = 0
  for _ in range(nproc_dp):
  p = mp.Process(target=_call_mods_q, args=(model_path, features_batch_q, pred_str_q,
- success_file, args))
+ success_file, args, gpulist[gpuindex]))
+ gpuindex += 1
  p.daemon = True
  p.start()
  predstr_procs.append(p)

diff --git a/deepsignal_plant/denoise.py b/deepsignal_plant/denoise.py
@@ -152,7 +152,7 @@ def train_1time(train_file, valid_file, valid_lidxs, args):
  vprecs.append(i_precision)
  vrecas.append(i_recall)
 
- if (vi + 1) % args.step_interval == 0:
+ if (vi + 1) % args.step_interval == 0 or (vi + 1) == total_step:
  endtime = time.time()
  print('===Test, Step [{}/{}], ValidLoss: {:.4f}, '
  'Accuracy: {:.4f}, Precision: {:.4f}, Recall: {:.4f}, '

diff --git a/deepsignal_plant/models.py b/deepsignal_plant/models.py
@@ -103,10 +103,11 @@ class ModelBiLSTM(nn.Module):
  def __init__(self, seq_len=13, signal_len=16, num_layers1=3, num_layers2=1, num_classes=2,
  dropout_rate=0.5, hidden_size=256,
  vocab_size=16, embedding_size=4, is_base=True, is_signallen=True,
- module="both_bilstm"):
+ module="both_bilstm", device=0):
  super(ModelBiLSTM, self).__init__()
  self.model_type = 'BiLSTM'
  self.module = module
+ self.device = device
 
  self.seq_len = seq_len
  self.signal_len = signal_len
@@ -170,8 +171,8 @@ def init_hidden(self, batch_size, num_layers, hidden_size):
  h0 = autograd.Variable(torch.randn(num_layers * 2, batch_size, hidden_size))
  c0 = autograd.Variable(torch.randn(num_layers * 2, batch_size, hidden_size))
  if use_cuda:
- h0 = h0.cuda()
- c0 = c0.cuda()
+ h0 = h0.cuda(self.device)
+ c0 = c0.cuda(self.device)
  return h0, c0
 
  def forward(self, kmer, base_means, base_stds, base_signal_lens, signals):

diff --git a/deepsignal_plant/train.py b/deepsignal_plant/train.py
@@ -127,7 +127,7 @@ def train(args):
  torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
  optimizer.step()
 
- if (i + 1) % args.step_interval == 0 or i == total_step - 1:
+ if (i + 1) % args.step_interval == 0 or (i + 1) == total_step:
  model.eval()
  with torch.no_grad():
  vlosses, vlabels_total, vpredicted_total = [], [], []

diff --git a/deepsignal_plant/utils/constants_torch.py b/deepsignal_plant/utils/constants_torch.py
@@ -4,5 +4,17 @@
 # Device configuration
 # device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 use_cuda = torch.cuda.is_available()
-FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
-LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
+
+
+# FloatTensor = torch.cuda.FloatTensor if use_cuda else torch.FloatTensor
+def FloatTensor(tensor, device=0):
+ if use_cuda:
+ return torch.FloatTensor(tensor).cuda(device)
+ return torch.FloatTensor(tensor)
+
+
+# LongTensor = torch.cuda.LongTensor if use_cuda else torch.LongTensor
+def LongTensor(tensor, device=0):
+ if use_cuda:
+ return torch.LongTensor(tensor).cuda(device)
+ return torch.LongTensor(tensor)
diff --git a/deepsignal_plant/utils/process_utils.py b/deepsignal_plant/utils/process_utils.py
@@ -451,6 +451,7 @@ def _rand_select_by_kmer_ratio(kmer2lines, kmer2ratios, totalline):
  len(line_kmer_diff),
  len(ratio_kmer_diff)))
  selected_lines = []
+ unselected_lines = []
  unratioed_kmers = line_kmer_diff
  cnts = 0
  for kmer in inter_kmers:
@@ -460,7 +461,9 @@ def _rand_select_by_kmer_ratio(kmer2lines, kmer2ratios, totalline):
  selected_lines += lines
  cnts += (linenum - len(lines))
  else:
- selected_lines += random.sample(lines, linenum)
+ seledtmp = random.sample(lines, linenum)
+ selected_lines += seledtmp
+ unselected_lines += list(set(lines).difference(seledtmp))
  print("for {} common kmers, fill {} samples, "
  "{} samples that can't be filled".format(len(inter_kmers),
  len(selected_lines),
@@ -480,9 +483,22 @@ def _rand_select_by_kmer_ratio(kmer2lines, kmer2ratios, totalline):
  selected_lines += lines
  cnts += len(lines)
  else:
- selected_lines += random.sample(lines, minlinenum)
+ seledtmp = random.sample(lines, minlinenum)
+ selected_lines += seledtmp
  cnts += minlinenum
+ unselected_lines += list(set(lines).difference(seledtmp))
  print("extract {} samples from {} line_diff kmers".format(cnts, len(unratioed_kmers)))
+ unfilled_cnt = totalline - len(selected_lines)
+ if unfilled_cnt > 0:
+ print("totalline: {}, still need to fill: {}".format(totalline, unfilled_cnt))
+ random.shuffle(unselected_lines)
+ triplefill_cnt = unfilled_cnt
+ if len(unselected_lines) <= unfilled_cnt:
+ selected_lines += unselected_lines
+ triplefill_cnt = len(unselected_lines)
+ else:
+ selected_lines += unselected_lines[:unfilled_cnt]
+ print("extract {} samples from {} samples not used above".format(triplefill_cnt, len(unselected_lines)))
  selected_lines = sorted(selected_lines)
  selected_lines = [-1] + selected_lines
  return selected_lines

diff --git a/scripts/combine_two_strands_frequency.py b/scripts/combine_two_strands_frequency.py
@@ -115,7 +115,7 @@ def combine_fb_of_bed(report_fp, cgposes):
  chrom, fpos = cgpos[0], cgpos[1]
  mposinfo.append([chrom, fpos, fpos+1, ".", pos2info[cgpos][0], "+",
  fpos, fpos+1, "0,0,0", pos2info[cgpos][0],
- int(round(pos2info[cgpos][2], 2) * 100)])
+ int(round(pos2info[cgpos][2] + 0.000001, 2) * 100)])
  mposinfo = sorted(mposinfo, key=lambda x: (x[0], x[1]))
  return mposinfo