Skip to content

Commit

Permalink
Toward Devkit Consistency (PaddlePaddle#10150)
Browse files Browse the repository at this point in the history
* Accommodate UAPI

* Fix signal handler

* Save model.pdopt

* Change variable name

* Update vdl dir
  • Loading branch information
Bobholamovic committed Jun 14, 2023
1 parent 15abbcc commit 2d44a71
Show file tree
Hide file tree
Showing 12 changed files with 60 additions and 25 deletions.
3 changes: 2 additions & 1 deletion deploy/slim/prune/export_prune_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
sys.path.append(os.path.join(__dir__, '..', '..', '..', 'tools'))

import paddle
from ppocr.data import build_dataloader
from ppocr.data import build_dataloader, set_signal_handlers
from ppocr.modeling.architectures import build_model

from ppocr.postprocess import build_post_process
Expand All @@ -39,6 +39,7 @@ def main(config, device, logger, vdl_writer):
global_config = config['Global']

# build dataloader
set_signal_handlers()
valid_dataloader = build_dataloader(config, 'Eval', device, logger)

# build post process
Expand Down
3 changes: 2 additions & 1 deletion deploy/slim/prune/sensitivity_anal.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@

import paddle
import paddle.distributed as dist
from ppocr.data import build_dataloader
from ppocr.data import build_dataloader, set_signal_handlers
from ppocr.modeling.architectures import build_model
from ppocr.losses import build_loss
from ppocr.optimizer import build_optimizer
Expand Down Expand Up @@ -57,6 +57,7 @@ def main(config, device, logger, vdl_writer):
global_config = config['Global']

# build dataloader
set_signal_handlers()
train_dataloader = build_dataloader(config, 'Train', device, logger)
if config['Eval']:
valid_dataloader = build_dataloader(config, 'Eval', device, logger)
Expand Down
3 changes: 2 additions & 1 deletion deploy/slim/quantization/export_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
from ppocr.metrics import build_metric
import tools.program as program
from paddleslim.dygraph.quant import QAT
from ppocr.data import build_dataloader
from ppocr.data import build_dataloader, set_signal_handlers
from tools.export_model import export_single_model


Expand Down Expand Up @@ -134,6 +134,7 @@ def main():
eval_class = build_metric(config['Metric'])

# build dataloader
set_signal_handlers()
valid_dataloader = build_dataloader(config, 'Eval', device, logger)

use_srn = config['Architecture']['algorithm'] == "SRN"
Expand Down
3 changes: 2 additions & 1 deletion deploy/slim/quantization/quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

paddle.seed(2)

from ppocr.data import build_dataloader
from ppocr.data import build_dataloader, set_signal_handlers
from ppocr.modeling.architectures import build_model
from ppocr.losses import build_loss
from ppocr.optimizer import build_optimizer
Expand Down Expand Up @@ -95,6 +95,7 @@ def main(config, device, logger, vdl_writer):
global_config = config['Global']

# build dataloader
set_signal_handlers()
train_dataloader = build_dataloader(config, 'Train', device, logger)
if config['Eval']:
valid_dataloader = build_dataloader(config, 'Eval', device, logger)
Expand Down
3 changes: 2 additions & 1 deletion deploy/slim/quantization/quant_kl.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@

paddle.seed(2)

from ppocr.data import build_dataloader
from ppocr.data import build_dataloader, set_signal_handlers
from ppocr.modeling.architectures import build_model
from ppocr.losses import build_loss
from ppocr.optimizer import build_optimizer
Expand Down Expand Up @@ -117,6 +117,7 @@ def main(config, device, logger, vdl_writer):
global_config = config['Global']

# build dataloader
set_signal_handlers()
config['Train']['loader']['num_workers'] = 0
is_layoutxlm_ser = config['Architecture']['model_type'] =='kie' and config['Architecture']['Backbone']['name'] == 'LayoutXLMForSer'
train_dataloader = build_dataloader(config, 'Train', device, logger)
Expand Down
21 changes: 16 additions & 5 deletions ppocr/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
from ppocr.data.pubtab_dataset import PubTabDataSet
from ppocr.data.multi_scale_sampler import MultiScaleSampler

__all__ = ['build_dataloader', 'transform', 'create_operators']
__all__ = ['build_dataloader', 'transform', 'create_operators', 'set_signal_handlers']


def term_mp(sig_num, frame):
Expand All @@ -51,6 +51,21 @@ def term_mp(sig_num, frame):
os.killpg(pgid, signal.SIGKILL)


def set_signal_handlers():
pid = os.getpid()
pgid = os.getpgid(os.getpid())
# XXX: `term_mp` kills all processes in the process group, which in
# some cases includes the parent process of current process and may
# cause unexpected results. To solve this problem, we set signal
# handlers only when current process is the group leader. In the
# future, it would be better to consider killing only descendants of
# the current process.
if pid == pgid:
# support exit using ctrl+c
signal.signal(signal.SIGINT, term_mp)
signal.signal(signal.SIGTERM, term_mp)


def build_dataloader(config, mode, device, logger, seed=None):
config = copy.deepcopy(config)

Expand Down Expand Up @@ -109,8 +124,4 @@ def build_dataloader(config, mode, device, logger, seed=None):
use_shared_memory=use_shared_memory,
collate_fn=collate_fn)

# support exit using ctrl+c
signal.signal(signal.SIGINT, term_mp)
signal.signal(signal.SIGTERM, term_mp)

return data_loader
17 changes: 17 additions & 0 deletions ppocr/utils/save_load.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,13 +197,26 @@ def save_model(model,
"""
_mkdir_if_not_exist(model_path, logger)
model_prefix = os.path.join(model_path, prefix)

if prefix == 'best_accuracy':
best_model_path = os.path.join(model_path, 'best_model')
_mkdir_if_not_exist(best_model_path, logger)

paddle.save(optimizer.state_dict(), model_prefix + '.pdopt')
if prefix == 'best_accuracy':
paddle.save(optimizer.state_dict(),
os.path.join(best_model_path, 'model.pdopt'))

is_nlp_model = config['Architecture']["model_type"] == 'kie' and config[
"Architecture"]["algorithm"] not in ["SDMGR"]
if is_nlp_model is not True:
paddle.save(model.state_dict(), model_prefix + '.pdparams')
metric_prefix = model_prefix

if prefix == 'best_accuracy':
paddle.save(model.state_dict(),
os.path.join(best_model_path, 'model.pdparams'))

else: # for kie system, we follow the save/load rules in NLP
if config['Global']['distributed']:
arch = model._layers
Expand All @@ -213,6 +226,10 @@ def save_model(model,
arch = arch.Student
arch.backbone.model.save_pretrained(model_prefix)
metric_prefix = os.path.join(model_prefix, 'metric')

if prefix == 'best_accuracy':
arch.backbone.model.save_pretrained(best_model_path)

# save metric and config
with open(metric_prefix + '.states', 'wb') as f:
pickle.dump(kwargs, f, protocol=2)
Expand Down
3 changes: 2 additions & 1 deletion tools/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
sys.path.insert(0, os.path.abspath(os.path.join(__dir__, '..')))

import paddle
from ppocr.data import build_dataloader
from ppocr.data import build_dataloader, set_signal_handlers
from ppocr.modeling.architectures import build_model
from ppocr.postprocess import build_post_process
from ppocr.metrics import build_metric
Expand All @@ -35,6 +35,7 @@
def main():
global_config = config['Global']
# build dataloader
set_signal_handlers()
valid_dataloader = build_dataloader(config, 'Eval', device, logger)

# build post process
Expand Down
3 changes: 2 additions & 1 deletion tools/export_center.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
sys.path.append(__dir__)
sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))

from ppocr.data import build_dataloader
from ppocr.data import build_dataloader, set_signal_handlers
from ppocr.modeling.architectures import build_model
from ppocr.postprocess import build_post_process
from ppocr.utils.save_load import load_model
Expand All @@ -40,6 +40,7 @@ def main():
'data_dir']
config['Eval']['dataset']['label_file_list'] = config['Train']['dataset'][
'label_file_list']
set_signal_handlers()
eval_dataloader = build_dataloader(config, 'Eval', device, logger)

# build post process
Expand Down
21 changes: 10 additions & 11 deletions tools/infer_det.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,17 +40,16 @@


def draw_det_res(dt_boxes, config, img, img_name, save_path):
if len(dt_boxes) > 0:
import cv2
src_im = img
for box in dt_boxes:
box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
if not os.path.exists(save_path):
os.makedirs(save_path)
save_path = os.path.join(save_path, os.path.basename(img_name))
cv2.imwrite(save_path, src_im)
logger.info("The detected Image saved in {}".format(save_path))
import cv2
src_im = img
for box in dt_boxes:
box = np.array(box).astype(np.int32).reshape((-1, 1, 2))
cv2.polylines(src_im, [box], True, color=(255, 255, 0), thickness=2)
if not os.path.exists(save_path):
os.makedirs(save_path)
save_path = os.path.join(save_path, os.path.basename(img_name))
cv2.imwrite(save_path, src_im)
logger.info("The detected Image saved in {}".format(save_path))


@paddle.no_grad()
Expand Down
2 changes: 1 addition & 1 deletion tools/program.py
Original file line number Diff line number Diff line change
Expand Up @@ -683,7 +683,7 @@ def preprocess(is_train=False):

if 'use_visualdl' in config['Global'] and config['Global']['use_visualdl']:
save_model_dir = config['Global']['save_model_dir']
vdl_writer_path = '{}/vdl/'.format(save_model_dir)
vdl_writer_path = save_model_dir
log_writer = VDLLogger(vdl_writer_path)
loggers.append(log_writer)
if ('use_wandb' in config['Global'] and
Expand Down
3 changes: 2 additions & 1 deletion tools/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
import paddle
import paddle.distributed as dist

from ppocr.data import build_dataloader
from ppocr.data import build_dataloader, set_signal_handlers
from ppocr.modeling.architectures import build_model
from ppocr.losses import build_loss
from ppocr.optimizer import build_optimizer
Expand All @@ -49,6 +49,7 @@ def main(config, device, logger, vdl_writer):
global_config = config['Global']

# build dataloader
set_signal_handlers()
train_dataloader = build_dataloader(config, 'Train', device, logger)
if len(train_dataloader) == 0:
logger.error(
Expand Down

0 comments on commit 2d44a71

Please sign in to comment.