Merge branch 'develop' of https://github.com/PaddlePaddle/PaddleOCR i…

…nto fixocr
PaddlePaddle · May 25, 2020 · afecc49 · afecc49
2 parents 75d2c47 + c63624b
commit afecc49
Show file tree

Hide file tree

Showing 5 changed files with 20 additions and 7 deletions.
diff --git a/ppocr/data/det/dataset_traversal.py b/ppocr/data/det/dataset_traversal.py
@@ -13,6 +13,7 @@
 #limitations under the License.
 
 import os
+import sys
 import math
 import random
 import functools
@@ -42,6 +43,10 @@ def sample_iter_reader():
  img_num = len(label_infor_list)
  img_id_list = list(range(img_num))
  random.shuffle(img_id_list)
+ if sys.platform == "win32":
+ print("multiprocess is not fully compatible with Windows."
+ "num_workers will be 1.")
+ self.num_workers = 1
  for img_id in range(process_id, img_num, self.num_workers):
  label_infor = label_infor_list[img_id_list[img_id]]
  outs = self.process(label_infor)

diff --git a/ppocr/data/reader_main.py b/ppocr/data/reader_main.py
@@ -66,6 +66,8 @@ def reader_main(config=None, mode=None):
  reader_function = params['reader_function']
  function = create_module(reader_function)(params)
  if mode == "train":
+ if sys.platform == "win32":
+ return function(0)
  readers = []
  num_workers = params['num_workers']
  for process_id in range(num_workers):

diff --git a/ppocr/data/rec/dataset_traversal.py b/ppocr/data/rec/dataset_traversal.py
@@ -13,6 +13,7 @@
 #limitations under the License.
 
 import os
+import sys
 import math
 import random
 import numpy as np
@@ -191,16 +192,21 @@ def sample_iter_reader():
  img_num = len(label_infor_list)
  img_id_list = list(range(img_num))
  random.shuffle(img_id_list)
+ if sys.platform=="win32":
+ print("multiprocess is not fully compatible with Windows."
+ "num_workers will be 1.")
+ self.num_workers = 1
  for img_id in range(process_id, img_num, self.num_workers):
  label_infor = label_infor_list[img_id_list[img_id]]
  substr = label_infor.decode('utf-8').strip("\n").split("\t")
  img_path = self.img_set_dir + "/" + substr[0]
  img = cv2.imread(img_path)
- if img.shape[-1]==1 or len(list(img.shape))==2:
- img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
  if img is None:
  logger.info("{} does not exist!".format(img_path))
  continue
+ if img.shape[-1]==1 or len(list(img.shape))==2:
+ img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
+
  label = substr[1]
  outs = process_image(img, self.image_shape, label,
  self.char_ops, self.loss_type,

diff --git a/ppocr/utils/character.py b/ppocr/utils/character.py
@@ -34,7 +34,7 @@ def __init__(self, config):
  with open(character_dict_path, "rb") as fin:
  lines = fin.readlines()
  for line in lines:
- line = line.decode('utf-8').strip("\n")
+ line = line.decode('utf-8').strip("\n").strip("\r\n")
  self.character_str += line
  dict_character = list(self.character_str)
  elif self.character_type == "en_sensitive":

diff --git a/tools/eval_utils/eval_rec_utils.py b/tools/eval_utils/eval_rec_utils.py
@@ -48,7 +48,7 @@ def eval_rec_run(exe, config, eval_info_dict, mode):
  total_sample_num = 0
  total_acc_num = 0
  total_batch_num = 0
- if mode == "eval":
+ if mode == "test":
  is_remove_duplicate = False
  else:
  is_remove_duplicate = True
@@ -91,11 +91,11 @@ def test_rec_benchmark(exe, config, eval_info_dict):
  total_correct_number = 0
  eval_data_acc_info = {}
  for eval_data in eval_data_list:
- config['TestReader']['lmdb_sets_dir'] = \
+ config['EvalReader']['lmdb_sets_dir'] = \
  eval_data_dir + "/" + eval_data
- eval_reader = reader_main(config=config, mode="test")
+ eval_reader = reader_main(config=config, mode="eval")
  eval_info_dict['reader'] = eval_reader
- metrics = eval_rec_run(exe, config, eval_info_dict, "test")
+ metrics = eval_rec_run(exe, config, eval_info_dict, "eval")
  total_evaluation_data_number += metrics['total_sample_num']
  total_correct_number += metrics['total_acc_num']
  eval_data_acc_info[eval_data] = metrics