Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Dual stream cudnn Convolution backward() with MXNET_GPU_WORKER_NSTREAMS=2. #14006

Merged
merged 11 commits into from
Feb 24, 2019
Prev Previous commit
Next Next commit
Improve test function and commenting.
  • Loading branch information
DickJC123 committed Jan 30, 2019
commit d16e85fc13f1254529cda875e3150686be79fedf
17 changes: 10 additions & 7 deletions tests/python/gpu/test_operator_gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,23 +540,26 @@ def _test_in_separate_process(func, *args):
def _conv_with_num_streams(seed, num_streams):
os.environ['MXNET_GPU_WORKER_NSTREAMS'] = str(num_streams)
with random_seed(seed):
num_trials = 10
# Try to expose timing-dependent improper workspace sharing by parallel dgrad and wgrad
num_trials = 20
for _ in range(num_trials):
size = np.random.randint(32, 512)
print('size = {}'.format(size))
size = np.random.randint(32, 128)
# The cudnn conv operator runs dgrad and wgrad in separate streams if enabled, with possible
# kernel overlap. The non-cudnn conv op doesn't do this so is used as the 'golden copy'.
ctx = {'ctx': mx.gpu(0), 'conv_data': (2, 2, size, size),
'type_dict': {'conv_data': np.float32}}
ctx = {'ctx': mx.gpu(0), 'conv_data': (2, 2, size, size),
'type_dict': {'conv_data': np.float32}}
# Adding 'flip' here isolates the model from the input node (which can't use inplace store)
flipped = mx.sym.flip(axis=0, name='conv')
sym = mx.sym.Convolution(data=flipped, num_filter=3, kernel=(3,3), pad=(1,1), name='conv')
flipped_no_cudnn = mx.sym.flip(axis=0, name='conv')
sym_no_cudnn = mx.sym.Convolution(data = flipped_no_cudnn, num_filter=3, kernel=(3,3), pad=(1,1),
sym_no_cudnn = mx.sym.Convolution(data=flipped_no_cudnn, num_filter=3, kernel=(3,3), pad=(1,1),
cudnn_off=True, name='conv')
check_consistency([sym, sym_no_cudnn], [ctx, ctx])
try:
# tol can be pretty high- we're looking for a large diff due to garbaged workspace
check_consistency([sym, sym_no_cudnn], [ctx, ctx], tol=1e-2)
except:
print('Failing conv size = {}'.format(size))
raise

@with_seed()
def test_convolution_multiple_streams():
Expand Down