Skip to content

Commit

Permalink
Added test for restore checkpoint.
Browse files Browse the repository at this point in the history
Reviewed By: rbgirshick

Differential Revision: D7015424

fbshipit-source-id: b8e4a998ae55e717f0b7981f15349d93e464b734
  • Loading branch information
agrimgupta92 authored and facebook-github-bot committed Mar 28, 2018
1 parent 7d8da09 commit 609dc51
Showing 1 changed file with 132 additions and 0 deletions.
132 changes: 132 additions & 0 deletions tests/test_restore_checkpoint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
# Copyright (c) 2017-present, Facebook, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
##############################################################################

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals

import logging
import numpy as np
import os
import shutil
import tempfile

from caffe2.python import workspace

from core.config import assert_and_infer_cfg
from core.config import cfg
from core.config import get_output_dir
from datasets.roidb import combined_roidb_for_training
from modeling import model_builder
import utils.c2 as c2_utils
import utils.logging
import utils.net as nu

utils.c2.import_detectron_ops()


def get_params(model):
blobs = {} # gpu_0 blobs with unscoped_name as key
all_blobs = {} # all blobs with scoped name as key
# Save all parameters
for param in model.params:
scoped_name = str(param)
unscoped_name = c2_utils.UnscopeName(scoped_name)
if 'gpu_0' in scoped_name:
blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
all_blobs[scoped_name] = workspace.FetchBlob(scoped_name)
for param in model.TrainableParams():
scoped_name = str(param) + '_momentum'
unscoped_name = c2_utils.UnscopeName(scoped_name)
if 'gpu_0' in scoped_name:
blobs[unscoped_name] = workspace.FetchBlob(scoped_name)
all_blobs[scoped_name] = workspace.FetchBlob(scoped_name)
return blobs, all_blobs


def add_momentum_init_ops(model):
for param in model.TrainableParams(gpu_id=0):
model.param_init_net.GaussianFill(
[param + '_momentum'], param + '_momentum', mean=0.0, std=1.0)


def init_weights(model):
# init weights in gpu_id = 0 and then broadcast
workspace.RunNetOnce(model.param_init_net)
nu.broadcast_parameters(model)


def test_restore_checkpoint():
# Create Model
model = model_builder.create(cfg.MODEL.TYPE, train=True)
add_momentum_init_ops(model)
init_weights(model)
# Fill input blobs
roidb = combined_roidb_for_training(
cfg.TRAIN.DATASETS, cfg.TRAIN.PROPOSAL_FILES
)
model_builder.add_training_inputs(model, roidb=roidb)
workspace.CreateNet(model.net)
# Bookkeeping for checkpoint creation
iter_num = 0
checkpoints = {}
output_dir = get_output_dir(cfg.TRAIN.DATASETS, training=True)
chk_file_path = os.path.join(output_dir, 'model_iter{}.pkl'.format(iter_num))
checkpoints[iter_num] = chk_file_path
# Save model weights
nu.save_model_to_weights_file(checkpoints[iter_num], model)
orig_gpu_0_params, orig_all_params = get_params(model)
# Change the model weights
init_weights(model)
# Reload the weights in the model
nu.initialize_gpu_from_weights_file(model, chk_file_path, gpu_id=0)
nu.broadcast_parameters(model)
shutil.rmtree(cfg.OUTPUT_DIR)
_, restored_all_params = get_params(model)
# Check if all params are loaded correctly
for scoped_name, blob in orig_all_params.items():
np.testing.assert_array_equal(blob, restored_all_params[scoped_name])
# Check if broadcast_parameters works
for scoped_name, blob in restored_all_params.items():
unscoped_name = c2_utils.UnscopeName(scoped_name)
np.testing.assert_array_equal(blob, orig_gpu_0_params[unscoped_name])


if __name__ == '__main__':
workspace.GlobalInit(['caffe2', '--caffe2_log_level=0'])
logger = utils.logging.setup_logging(__name__)
logger.setLevel(logging.DEBUG)
logging.getLogger('roi_data.loader').setLevel(logging.INFO)
np.random.seed(cfg.RNG_SEED)
output_dir = tempfile.mkdtemp()
# Generate config for test
cfg.MODEL.TYPE = 'generalized_rcnn'
cfg.MODEL.CONV_BODY = 'FPN.add_fpn_ResNet50_conv5_body'
cfg.MODEL.NUM_CLASSES = 81
cfg.MODEL.FASTER_RCNN = True
cfg.FPN.FPN_ON = True
cfg.FPN.MULTILEVEL_ROIS = True
cfg.FPN.MULTILEVEL_RPN = True
cfg.FAST_RCNN.ROI_BOX_HEAD = 'fast_rcnn_heads.add_roi_2mlp_head'
cfg.FAST_RCNN.ROI_XFORM_METHOD = 'RoIAlign'
cfg.OUTPUT_DIR = output_dir
cfg.TRAIN.DATASETS = ('coco_2014_minival',)
cfg.TRAIN.WEIGHTS = b''
for num_gpu in range(workspace.NumCudaDevices()):
cfg.immutable(False)
cfg.NUM_GPUS = num_gpu + 1
assert_and_infer_cfg()
test_restore_checkpoint()

0 comments on commit 609dc51

Please sign in to comment.