Skip to content

Commit

Permalink
ResNet50, ResNet101, ResNet152
Browse files Browse the repository at this point in the history
  • Loading branch information
patflick committed Jul 20, 2017
1 parent ef33e3b commit a2a39f7
Show file tree
Hide file tree
Showing 9 changed files with 173 additions and 87 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ INCLUDE_DIRS=-I$(HIP_PATH)/include -I$(ROCM_PATH)/include
LD_FLAGS=-L$(ROCM_PATH)/lib -L$(ROCM_PATH)/opencl/lib/x86_64 -lMIOpen -lOpenCL -lmiopengemm -lhipblas-hcc -lrocblas-hcc
TARGET=--amdgpu-target=gfx900

HIPCC_FLAGS=-g $(CXXFLAGS) $(TARGET) $(INCLUDE_DIRS)
HIPCC_FLAGS=-g -Wall $(CXXFLAGS) $(TARGET) $(INCLUDE_DIRS)

all: alexnet resnet benchmark_wino layerwise gputop

Expand Down
6 changes: 3 additions & 3 deletions function.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
struct Function {
// every layer has to implement forward and backward
virtual void forward(const Tensor& input, Tensor& output) = 0;
virtual void init_forward(const Tensor& input, Tensor& output) {};
virtual void init_forward(const Tensor&, Tensor&) {};
virtual void backward(const Tensor& doutput, Tensor& dinput) = 0;
virtual void init_backward(const Tensor& doutput, Tensor& dinput) {};
virtual void init_backward(const Tensor&, Tensor&) {};

// return the input dimensions
virtual const TensorDesc& getInputDesc() const = 0;
Expand All @@ -25,7 +25,7 @@ struct Function {
}

virtual std::ostream& write(std::ostream& os) const {
return this->write_dims(this->write_name(os) << ":");
return this->write_dims(this->write_name(os) << " ");
}
};

Expand Down
55 changes: 24 additions & 31 deletions layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,15 @@ struct ConvLayer : public ConvDesc, public ConvLayerDesc, public Layer {
Tensor dweights;
const Tensor* input_ref;

DevBuffer buffer; // TODO: joined buffer for fwd/bwd

// algorithm selection:
miopenConvFwdAlgorithm_t fwd_algo;
miopenConvBwdWeightsAlgorithm_t bwd_weights_algo;
miopenConvBwdDataAlgorithm_t bwd_data_algo;


virtual std::ostream& write_name(std::ostream& os) const {
return os << "Conv(" << kernel_size << "x" << kernel_size << ")";
//return os << "Conv(" << kernel_size << "x" << kernel_size << ")";
return os << "Conv(" << kernel_size << "x" << kernel_size << ",pad=" << padding << ",s=" << stride << ")";
}

ConvLayer(const TensorDesc& input_dims, int channels_out, int kernel_size, int padding, int stride)
Expand Down Expand Up @@ -93,19 +92,16 @@ struct ConvLayer : public ConvDesc, public ConvLayerDesc, public Layer {
}

void init_forward(const Tensor& input, Tensor& output) override {
DEBUG("init conv " << *this);
size_t workspace_size;
CHECK_MIO(miopenConvolutionForwardGetWorkSpaceSize(mio::handle(), weights.desc, input.desc, this->desc, output.desc, &workspace_size));
size_t fwd_workspace_size;
CHECK_MIO(miopenConvolutionForwardGetWorkSpaceSize(mio::handle(), weights.desc, input.desc, this->desc, output.desc, &fwd_workspace_size));
DEBUG("Init fwd " << *this << " req workspace: " << fwd_workspace_size);

//std::cout << "\tWorkspace size required for fwd: " << workspace_size << std::endl;
if (workspace_size > buffer.size) {
buffer = DevBuffer(workspace_size);
}
DevBuffer& buffer = WorkSpace::get(fwd_workspace_size);

// find best algo, and benchmark!
miopenConvAlgoPerf_t perfs[4];
int returned_algos;
CHECK_MIO(miopenFindConvolutionForwardAlgorithm(mio::handle(), input.desc, input.data, weights.desc, weights.data, this->desc, output.desc, output.data, 4, &returned_algos, perfs, buffer.data, buffer.size, false));
CHECK_MIO(miopenFindConvolutionForwardAlgorithm(mio::handle(), input.desc, input.data, weights.desc, weights.data, this->desc, output.desc, output.data, 4, &returned_algos, perfs, buffer.data, fwd_workspace_size, false));

INFO("\tMIOpen Found " << returned_algos << " fwd algorithms, choosing " << perfs[0].fwd_algo << ": ");
for (int i = 0; i < returned_algos; ++i) {
Expand All @@ -116,18 +112,16 @@ struct ConvLayer : public ConvDesc, public ConvLayerDesc, public Layer {
}

void find_bwd_data_algo(const Tensor& doutput, Tensor& dinput) {
size_t workspace_size;
CHECK_MIO(miopenConvolutionBackwardDataGetWorkSpaceSize(mio::handle(), doutput.desc, weights.desc, this->desc, dinput.desc, &workspace_size));
size_t bwd_data_workspace_size;
CHECK_MIO(miopenConvolutionBackwardDataGetWorkSpaceSize(mio::handle(), doutput.desc, weights.desc, this->desc, dinput.desc, &bwd_data_workspace_size));
DEBUG("Init bwd_data " << *this << " req workspace: " << bwd_data_workspace_size);

//std::cout << "\tWorkspace size required for bwd_data: " << workspace_size << std::endl;
if (workspace_size > buffer.size) {
buffer = DevBuffer(workspace_size);
}
DevBuffer& buffer = WorkSpace::get(bwd_data_workspace_size);

// find best algo, and benchmark!
miopenConvAlgoPerf_t perfs[5];
int returned_algos;
CHECK_MIO(miopenFindConvolutionBackwardDataAlgorithm(mio::handle(), doutput.desc, doutput.data, weights.desc, weights.data, this->desc, dinput.desc, dinput.data, 5, &returned_algos, perfs, buffer.data, buffer.size, false));
CHECK_MIO(miopenFindConvolutionBackwardDataAlgorithm(mio::handle(), doutput.desc, doutput.data, weights.desc, weights.data, this->desc, dinput.desc, dinput.data, 5, &returned_algos, perfs, buffer.data, bwd_data_workspace_size, false));

INFO("\tMIOpen Found " << returned_algos << " bwd_data algorithms, choosing " << perfs[0].fwd_algo << ": ");
for (int i = 0; i < returned_algos; ++i) {
Expand All @@ -138,18 +132,16 @@ struct ConvLayer : public ConvDesc, public ConvLayerDesc, public Layer {
}

void find_bwd_weights_algo(const Tensor& doutput, Tensor& input) {
size_t workspace_size;
CHECK_MIO(miopenConvolutionBackwardWeightsGetWorkSpaceSize(mio::handle(), doutput.desc, input.desc, this->desc, weights.desc, &workspace_size));
size_t bwd_weights_workspace_size;
CHECK_MIO(miopenConvolutionBackwardWeightsGetWorkSpaceSize(mio::handle(), doutput.desc, input.desc, this->desc, weights.desc, &bwd_weights_workspace_size));
DEBUG("Init bwd_weights " << *this << " req workspace: " << bwd_weights_workspace_size);

//std::cout << "\tWorkspace size required for bwd_weights: " << workspace_size << std::endl;
if (workspace_size > buffer.size) {
buffer = DevBuffer(workspace_size);
}
DevBuffer& buffer = WorkSpace::get(bwd_weights_workspace_size);

// find best algo, and benchmark!
miopenConvAlgoPerf_t perfs[5];
int returned_algos;
CHECK_MIO(miopenFindConvolutionBackwardWeightsAlgorithm(mio::handle(), doutput.desc, doutput.data, input.desc, input.data, this->desc, dweights.desc, dweights.data, 5, &returned_algos, perfs, buffer.data, buffer.size, false));
CHECK_MIO(miopenFindConvolutionBackwardWeightsAlgorithm(mio::handle(), doutput.desc, doutput.data, input.desc, input.data, this->desc, dweights.desc, dweights.data, 5, &returned_algos, perfs, buffer.data, bwd_weights_workspace_size, false));

INFO("\tMIOpen Found " << returned_algos << " bwd_weights algorithms, choosing " << perfs[0].fwd_algo << ": ");
for (int i = 0; i < returned_algos; ++i) {
Expand All @@ -167,6 +159,7 @@ struct ConvLayer : public ConvDesc, public ConvLayerDesc, public Layer {
void forward(const Tensor& input, Tensor& output) override {
float alpha = 1.f;
float beta = 0.f;
DevBuffer& buffer = WorkSpace::get();
CHECK_MIO(miopenConvolutionForward(mio::handle(), &alpha, input.desc, input.data, weights.desc, weights.data, this->desc, fwd_algo, &beta, output.desc, output.data, buffer.data, buffer.size));
// save for backward
input_ref = &input;
Expand All @@ -175,6 +168,7 @@ struct ConvLayer : public ConvDesc, public ConvLayerDesc, public Layer {
void backward(const Tensor& doutput, Tensor& dinput) override {
float alpha = 1.f;
float beta = 0.f;
DevBuffer& buffer = WorkSpace::get();
CHECK_MIO(miopenConvolutionBackwardData(mio::handle(), &alpha, doutput.desc, doutput.data, weights.desc, weights.data, this->desc, bwd_data_algo, &beta, dinput.desc, dinput.data, buffer.data, buffer.size));
CHECK_MIO(miopenConvolutionBackwardWeights(mio::handle(), &alpha, doutput.desc, doutput.data, input_ref->desc, input_ref->data, this->desc, bwd_weights_algo, &beta, dweights.desc, dweights.data, buffer.data, buffer.size));
}
Expand Down Expand Up @@ -224,7 +218,7 @@ struct PoolingLayer : public Layer {
CHECK_MIO(miopenDestroyPoolingDescriptor(desc));
}

virtual void init_forward(const Tensor& input, Tensor& output) override {
virtual void init_forward(const Tensor&, Tensor&) override {
size_t size;
CHECK_MIO(miopenPoolingGetWorkSpaceSize(output_desc.desc, &size));
indeces_buf = DevBuffer(size);
Expand Down Expand Up @@ -462,21 +456,20 @@ struct Reshape : public Layer {
assert(input_dim.c * input_dim.h * input_dim.w == c*h*w);
}


void init_forward(const Tensor& input, Tensor& output) override {
output = std::move(input.viewAs(getOutputDesc()));
output = input.viewAs(getOutputDesc());
}

void forward(const Tensor& input, Tensor& output) override {
output = std::move(input.viewAs(getOutputDesc()));
output = input.viewAs(getOutputDesc());
}

void init_backward(const Tensor& doutput, Tensor& dinput) override {
dinput = std::move(doutput.viewAs(getInputDesc()));
dinput = doutput.viewAs(getInputDesc());
}

void backward(const Tensor& doutput, Tensor& dinput) override {
dinput = std::move(doutput.viewAs(getInputDesc()));
dinput = doutput.viewAs(getInputDesc());
}
};

Expand Down
2 changes: 1 addition & 1 deletion layerwise.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ void benchmark_convlayers() {
Model m(input_dim, ss.str());
m.emplace<ConvLayer>(l.channels_out, l.kernel_size, l.padding, l.stride);

BenchmarkLogger::benchmark(m, 10);
BenchmarkLogger::benchmark(m, reps);

--layer;
}
Expand Down
2 changes: 1 addition & 1 deletion miopen.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ struct Device {
std::string pciids = split(line, '=')[1];
std::vector<std::string> ids = split(pciids, ':');
//std::string busid = "0x" + ids[1];
unsigned int pci_busid = std::stoul("0x" + ids[1], nullptr, 16);
int pci_busid = std::stoul("0x" + ids[1], nullptr, 16);
if (pci_busid == hip_props.pciBusID) {
drm_path = carddir;
// find hwmon path
Expand Down
35 changes: 25 additions & 10 deletions multi_layers.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,24 @@
#include <memory>

struct Sequential : public Function {
std::string name;
TensorDesc input_desc;
std::vector<std::shared_ptr<Function>> layers;
std::vector<std::shared_ptr<Tensor>> out_tensors; // the inner buffers

Sequential(const TensorDesc& input_dim) : input_desc(input_dim) {}
Sequential(const TensorDesc& input_dim, const std::string& name) : name(name), input_desc(input_dim) {}
Sequential(const TensorDesc& input_dim) : Sequential(input_dim, "Sequential") {}
Sequential(const Sequential&) = default;
Sequential(Sequential&&) = default;

virtual std::ostream& write_name(std::ostream& os) const {
return os << name;
}

std::string get_name() const {
return this->name;
}

const TensorDesc& last_output_dim() const {
if (layers.empty()) {
return input_desc;
Expand Down Expand Up @@ -153,9 +163,7 @@ struct Model : public Sequential {
bool is_init_fwd;
bool is_init_bwd;

std::string name;

Model(const TensorDesc& input_dim, const std::string& name) : Sequential(input_dim), input(input_dim), is_init_fwd(false), is_init_bwd(false), name(name) {}
Model(const TensorDesc& input_dim, const std::string& name) : Sequential(input_dim, name), input(input_dim), is_init_fwd(false), is_init_bwd(false) {}
Model(const TensorDesc& input_dim) : Model(input_dim, "Model") {}
Model(const Model&) = default;
Model(Model&&) = default;
Expand All @@ -165,10 +173,6 @@ struct Model : public Sequential {
using Sequential::forward;
using Sequential::backward;

std::string get_name() const {
return name;
}

void init_forward() {
if (output.data_size == 0)
output = Tensor(this->getOutputDesc());
Expand Down Expand Up @@ -200,8 +204,7 @@ struct Model : public Sequential {
};



// impleents x += y
// implements x += y
void add_inplace(Tensor& x, const Tensor& y) {
float alpha1 = 1.f, alpha2 = 1.f, beta = 0.f;
miopenOpTensor(mio::handle(), miopenTensorOpAdd, &alpha1, x.desc, x.data, &alpha2, y.desc, y.data, &beta, x.desc, x.data);
Expand Down Expand Up @@ -230,6 +233,10 @@ struct ShortCutAdd : public Function {
ShortCutAdd(const ShortCutAdd&) = default;
ShortCutAdd(ShortCutAdd&&) = default;

virtual std::ostream& write_name(std::ostream& os) const {
return os << "ShortCut";
}

template <typename Func>
void setF(Func f) {
F = std::shared_ptr<Function>(new typename std::remove_reference<Func>::type(std::forward<Func>(f)));
Expand All @@ -253,12 +260,20 @@ struct ShortCutAdd : public Function {

virtual void forward(const Tensor& in, Tensor& out) override {
assert(F.get() != nullptr);
BenchmarkLogger::instance().tic();
F->forward(in, out);
BenchmarkLogger::instance().toc("ShortcutF", false);
if (G.get() != nullptr) {
BenchmarkLogger::instance().tic();
G->forward(in, gout);
BenchmarkLogger::instance().toc("ShortcutG", false);
BenchmarkLogger::instance().tic();
add_inplace(out, gout);
BenchmarkLogger::instance().toc("AddInplace", false);
} else {
BenchmarkLogger::instance().tic();
add_inplace(out, in);
BenchmarkLogger::instance().toc("AddInplace", false);
}
}

Expand Down
Loading

0 comments on commit a2a39f7

Please sign in to comment.