Fix multi-gpu problem (#55)

microsoft · May 27, 2019 · dee9dfd · dee9dfd
1 parent 7ca4ac7
commit dee9dfd
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 9 deletions.
diff --git a/LearningMachine.py b/LearningMachine.py
@@ -34,6 +34,12 @@ def __init__(self, phase, conf, problem, vocab_info=None, initialize=True, use_g
  if use_gpu is True:
  self.model = nn.DataParallel(self.model)
  self.model = transfer_to_gpu(self.model)
+ # judge the embedding matrix weight's device
+ emb_weight_device = list(self.model.module.layers.embedding.embeddings.values())[0].weight.device.type if isinstance(self.model, nn.DataParallel) \
+ else list(self.model.layers.embedding.embeddings.values())[0].weight.device.type
+ device = 'GPU' if 'cuda' in emb_weight_device else 'CPU'
+ logging.info(
+ "The embedding matrix is on %s now, you can modify the weight_on_gpu parameter to change embeddings weight device." % device)
  logging.info(self.model)
  #logging.info("Total parameters: %d; trainable parameters: %d" % (get_param_num(self.model), get_trainable_param_num(self.model)))
  logging.info("Total trainable parameters: %d" % (get_trainable_param_num(self.model)))

diff --git a/Model.py b/Model.py
@@ -44,6 +44,11 @@ def get_conf(layer_id, layer_name, input_layer_ids, all_layer_configs, model_inp
  try:
  conf_dict['use_gpu'] = use_gpu
 
+ # for Embedding layer, add weight_on_gpu parameters
+ if layer_id == EMBED_LAYER_ID:
+ conf_dict['weight_on_gpu'] = conf_dict['conf']['weight_on_gpu']
+ del conf_dict['conf']['weight_on_gpu']
+
  # for classification tasks, we usually add a Linear layer to project the output to dimension of number of classes. If we don't know the #classes, we can use '-1' instead and we would calculate the number of classes from the corpus.
  if layer_name == 'Linear':
  if isinstance(conf_dict['hidden_dim'], list) and conf_dict['hidden_dim'][-1] == -1:
@@ -201,7 +206,7 @@ def __init__(self, conf, problem, vocab_info, use_gpu):
  for input_cluster in emb_conf:
  emb_conf[input_cluster]['dim'] = layer_arch['conf'][input_cluster]['dim']
  emb_conf[input_cluster]['fix_weight'] = layer_arch['conf'][input_cluster].get('fix_weight', False)
-  emb_conf[input_cluster]['weight_on_gpu'] = layer_arch['conf'][input_cluster].get('weight_on_gpu', True)
+ emb_conf['weight_on_gpu'] = layer_arch.get('weight_on_gpu', True)
 
  all_layer_configs[EMBED_LAYER_ID] = get_conf(EMBED_LAYER_ID, layer_arch['layer'],
  None, all_layer_configs, inputs, self.use_gpu, conf_dict={'conf': emb_conf},

diff --git a/block_zoo/Embedding.py b/block_zoo/Embedding.py
@@ -112,7 +112,7 @@ def __init__(self, layer_conf):
  super(Embedding, self).__init__(layer_conf)
  self.layer_conf = layer_conf
 
- self.embeddings = dict()
+ self.embeddings = nn.ModuleDict() if layer_conf.weight_on_gpu else dict()
  for input_cluster in layer_conf.conf:
  if 'type' in layer_conf.conf[input_cluster]:
  # char embedding
@@ -129,13 +129,6 @@ def __init__(self, layer_conf):
  if 'init_weights' in layer_conf.conf[input_cluster] and layer_conf.conf[input_cluster]['init_weights'] is not None:
  self.embeddings[input_cluster].weight = nn.Parameter(torch.from_numpy(layer_conf.conf[input_cluster]['init_weights']))
 
- # judge the embedding matrix weight's device
- if layer_conf.conf[input_cluster]['weight_on_gpu']:
- self.embeddings[input_cluster].to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
- logging.info("The embeddings[%s]'s weight is on GPU now, you can modify the weight_on_gpu parameter to change embeddings weight device" % input_cluster)
- else:
- logging.info(
- "The embeddings[%s]'s weight is on cpu now, you can modify the weight_on_gpu parameter to change embeddings weight device" % input_cluster)
  # judge if fix the embedding weight
  if layer_conf.conf[input_cluster]['fix_weight']:
  self.embeddings[input_cluster].weight.requires_grad = False