Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Add more models to benchmark_score #12780

Merged
merged 17 commits into from
Oct 21, 2018
Merged
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add a new parameter 'global batchsize' for the batch size multiplicat…
…ion for multi-gpu case
  • Loading branch information
Shufan committed Oct 14, 2018
commit 129802536f25ea8ec6839e646e1e6c742d9e2394
23 changes: 19 additions & 4 deletions benchmark/python/gluon/benchmark_gluon.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,14 @@
parser.add_argument('--num-batches', type=int, default=10)
parser.add_argument('--gpus', type=str, default='',
help='ordinates of gpus to use, can be "0,1,2" or empty for cpu only.')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

GPU IDs to use for this benchmark task. Example: --gpus=0,1,2,3 to use 4 GPUs. By default, use CPU only.

parser.add_argument('--global-batchsize', type=bool, default=True,
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Rather than adding an option, I think you can just pick one convention.

help='for multi-gpu case, the batchsize will not be multiplied with GPU number if set true.')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optional. Set this to True if batch-size should be used as is.
Example: If --batch-size=64, --gpus=0,1. With --global-batchsize=True, each of the 2 GPUs will get 64/2 = 32 samples per batch. With --global-batchsize=False, each of the 2 GPUs will get 64 samples per batch

parser.add_argument('--type', type=str, default='inference', choices=['all', 'training', 'inference'])

opt = parser.parse_args()

num_batches = opt.num_batches
global_bs = opt.global_batchsize
dry_run = 10 # use 10 iterations to warm up
batch_inf = [1, 16, 32, 64, 128, 256]
batch_train = [1, 2, 4, 8, 16, 32, 64, 126, 256]
Expand Down Expand Up @@ -143,25 +146,37 @@ def train(network, batch_size, ctx):
logging.info('device: %s', devs)
if runtype == 'inference' or runtype == 'all':
if bs != 0:
batch_sizes = bs * max(1, num_gpus)
if not global_bs:
batch_sizes = bs * max(1, num_gpus)
else:
batch_sizes = bs
fwd_time = score(network, batch_sizes, devs)
fps = (batch_sizes * num_batches)/fwd_time
logging.info(network + ' inference perf for BS %d is %f img/s', bs, fps)
else:
for batch_size in batch_inf:
batch_sizes = batch_size * max(1, num_gpus)
if not global_bs:
batch_sizes = batch_size * max(1, num_gpus)
else:
batch_sizes = batch_size
fwd_time = score(network, batch_sizes, devs)
fps = (batch_sizes * num_batches) / fwd_time
logging.info(network + ' inference perf for BS %d is %f img/s', batch_size, fps)
if runtype == 'training' or runtype == 'all':
if bs != 0:
batch_sizes = bs * max(1, num_gpus)
if not global_bs:
batch_sizes = bs * max(1, num_gpus)
else:
batch_sizes = bs
bwd_time = train(network, batch_sizes, devs)
fps = (batch_sizes * num_batches) / bwd_time
logging.info(network + ' training perf for BS %d is %f img/s', bs, fps)
else:
for batch_size in batch_train:
batch_sizes = batch_size * max(1, num_gpus)
if not global_bs:
batch_sizes = batch_size * max(1, num_gpus)
else:
batch_sizes = batch_size
bwd_time = train(network, batch_sizes, devs)
fps = (batch_sizes * num_batches) / bwd_time
logging.info(network + ' training perf for BS %d is %f img/s', batch_size, fps)