Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
[BUGFIX] Fix threadsafety and shutdown issues with threaded_engine_pe…
Browse files Browse the repository at this point in the history
…rdevice (#21110)

* Fix threadsafety and shutdown issues with threaded_engine_perdevice

* Fix lint

* Add MXNET_USE_CUDA compile guards

* Remove unneeded include
  • Loading branch information
DickJC123 committed Aug 2, 2022
1 parent dedb8c9 commit 0b4ecdb
Showing 1 changed file with 21 additions and 11 deletions.
32 changes: 21 additions & 11 deletions src/engine/threaded_engine_perdevice.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
#include <dmlc/concurrency.h>
#include <dmlc/thread_group.h>

#include <mutex>
#include <memory>
#include "../initialize.h"
#include "./threaded_engine.h"
Expand Down Expand Up @@ -74,6 +75,10 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
gpu_copy_workers_.Clear();
cpu_normal_workers_.Clear();
cpu_priority_worker_.reset(nullptr);
#if MXNET_USE_CUDA
streams_.clear();
cuda_event_pool_per_worker_.clear();
#endif
}

void Stop() override {
Expand Down Expand Up @@ -278,6 +283,7 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
CHECK(block != nullptr);
mshadow::Stream<gpu>* stream = nullptr;
GPUAuxStream* aux_stream = nullptr;
CUDAEventPool* event_pool = nullptr;
do {
ThreadPool::SetReadyOnDestroy setReady(ready_event);
// allocate stream
Expand All @@ -288,18 +294,22 @@ class ThreadedEnginePerDevice : public ThreadedEngine {
stream = mshadow::NewStream<gpu>(true, MXNET_USE_CUDNN != 0, ctx.dev_id);
aux_stream = new GPUAuxStream(stream);
}
// With thread safety...
{
static std::mutex m;
std::lock_guard<std::mutex> lock(m);
// register stream
streams_.push_back(stream);
auto event_pool_it = cuda_event_pool_per_worker_.find(ctx.dev_id);
if (event_pool_it != cuda_event_pool_per_worker_.end()) {
event_pool = event_pool_it->second.get();
} else {
auto res =
cuda_event_pool_per_worker_.emplace(ctx.dev_id, std::make_unique<CUDAEventPool>(ctx));
event_pool = res.first->second.get();
}
}
} while (false);
// register stream
streams_.push_back(stream);
CUDAEventPool* event_pool;
auto event_pool_it = cuda_event_pool_per_worker_.find(ctx.dev_id);
if (event_pool_it != cuda_event_pool_per_worker_.end()) {
event_pool = event_pool_it->second.get();
} else {
auto res =
cuda_event_pool_per_worker_.emplace(ctx.dev_id, std::make_unique<CUDAEventPool>(ctx));
event_pool = res.first->second.get();
}
// execute task
OprBlock* opr_block;
RunContext run_ctx{ctx, stream, aux_stream};
Expand Down

0 comments on commit 0b4ecdb

Please sign in to comment.