add score in rec_infer

PaddlePaddle · Jun 2, 2020 · ade18e1 · ade18e1
1 parent 78d9051
commit ade18e1
Show file tree

Hide file tree

Showing 4 changed files with 38 additions and 18 deletions.
diff --git a/ppocr/data/rec/dataset_traversal.py b/ppocr/data/rec/dataset_traversal.py
@@ -48,6 +48,7 @@ def __init__(self, params):
  elif params['mode'] == "test":
  self.batch_size = 1
  self.infer_img = params["infer_img"]
+
  def load_hierarchical_lmdb_dataset(self):
  lmdb_sets = {}
  dataset_idx = 0

diff --git a/ppocr/modeling/architectures/rec_model.py b/ppocr/modeling/architectures/rec_model.py
@@ -110,7 +110,11 @@ def __call__(self, mode):
  return loader, outputs
  elif mode == "export":
  predict = predicts['predict']
- predict = fluid.layers.softmax(predict)
+ if self.loss_type == "ctc":
+ predict = fluid.layers.softmax(predict)
  return [image, {'decoded_out': decoded_out, 'predicts': predict}]
  else:
- return loader, {'decoded_out': decoded_out}
+ predict = predicts['predict']
+ if self.loss_type == "ctc":
+ predict = fluid.layers.softmax(predict)
+ return loader, {'decoded_out': decoded_out, 'predicts': predict}
diff --git a/ppocr/modeling/heads/rec_attention_head.py b/ppocr/modeling/heads/rec_attention_head.py
@@ -123,6 +123,8 @@ def gru_attention_infer(self, decoder_boot, max_length, char_num,
 
  full_ids = fluid.layers.fill_constant_batch_size_like(
  input=init_state, shape=[-1, 1], dtype='int64', value=1)
+ full_scores = fluid.layers.fill_constant_batch_size_like(
+ input=init_state, shape=[-1, 1], dtype='float32', value=1)
 
  cond = layers.less_than(x=counter, y=array_len)
  while_op = layers.While(cond=cond)
@@ -171,6 +173,9 @@ def gru_attention_infer(self, decoder_boot, max_length, char_num,
  new_ids = fluid.layers.concat([full_ids, topk_indices], axis=1)
  fluid.layers.assign(new_ids, full_ids)
 
+ new_scores = fluid.layers.concat([full_scores, topk_scores], axis=1)
+ fluid.layers.assign(new_scores, full_scores)
+
  layers.increment(x=counter, value=1, in_place=True)
 
  # update the memories
@@ -184,7 +189,7 @@ def gru_attention_infer(self, decoder_boot, max_length, char_num,
  length_cond = layers.less_than(x=counter, y=array_len)
  finish_cond = layers.logical_not(layers.is_empty(x=topk_indices))
  layers.logical_and(x=length_cond, y=finish_cond, out=cond)
- return full_ids
+ return full_ids, full_scores
 
  def __call__(self, inputs, labels=None, mode=None):
  encoder_features = self.encoder(inputs)
@@ -223,10 +228,10 @@ def __call__(self, inputs, labels=None, mode=None):
  decoder_size, char_num)
  _, decoded_out = layers.topk(input=predict, k=1)
  decoded_out = layers.lod_reset(decoded_out, y=label_out)
- predicts = {'predict': predict, 'decoded_out': decoded_out}
+ predicts = {'predict':predict, 'decoded_out':decoded_out}
  else:
- ids = self.gru_attention_infer(
+ ids, predict = self.gru_attention_infer(
  decoder_boot, self.max_length, char_num, word_vector_dim,
  encoded_vector, encoded_proj, decoder_size)
- predicts = {'decoded_out': ids}
+ predicts = {'predict':predict, 'decoded_out':ids}
  return predicts
diff --git a/tools/infer_rec.py b/tools/infer_rec.py
@@ -79,34 +79,44 @@ def main():
 
  blobs = reader_main(config, 'test')()
  infer_img = config['TestReader']['infer_img']
+ loss_type = config['Global']['loss_type']
  infer_list = get_image_file_list(infer_img)
  max_img_num = len(infer_list)
  if len(infer_list) == 0:
  logger.info("Can not find img in infer_img dir.")
  for i in range(max_img_num):
- print("infer_img:",infer_list[i])
+ logger.info("infer_img:%s" % infer_list[i])
  img = next(blobs)
  predict = exe.run(program=eval_prog,
  feed={"image": img},
  fetch_list=fetch_varname_list,
  return_numpy=False)
-
- preds = np.array(predict[0])
- if preds.shape[1] == 1:
+ if loss_type == "ctc":
+ preds = np.array(predict[0])
  preds = preds.reshape(-1)
  preds_lod = predict[0].lod()[0]
  preds_text = char_ops.decode(preds)
- else:
+ probs = np.array(predict[1])
+ ind = np.argmax(probs, axis=1)
+ blank = probs.shape[1]
+ valid_ind = np.where(ind != (blank - 1))[0]
+ score = np.mean(probs[valid_ind, ind[valid_ind]])
+ elif loss_type == "attention":
+ preds = np.array(predict[0])
+ probs = np.array(predict[1])
  end_pos = np.where(preds[0, :] == 1)[0]
  if len(end_pos) <= 1:
- preds_text = preds[0, 1:]
+ preds = preds[0, 1:]
+ score = np.mean(probs[0, 1:])
  else:
- preds_text = preds[0, 1:end_pos[1]]
- preds_text = preds_text.reshape(-1)
- preds_text = char_ops.decode(preds_text)
-
- print("\t index:",preds)
- print("\t word :",preds_text)
+ preds = preds[0, 1:end_pos[1]]
+ score = np.mean(probs[0, 1:end_pos[1]])
+ preds = preds.reshape(-1)
+ preds_text = char_ops.decode(preds)
+
+ print("\t index:", preds)
+ print("\t word :", preds_text)
+ print("\t score :", score)
 
  # save for inference model
  target_var = []