splitting up executables

patflick · Jul 18, 2017 · 5ab53b2 · 5ab53b2
1 parent 7600781
commit 5ab53b2
Show file tree

Hide file tree

Showing 12 changed files with 359 additions and 431 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1 +1,21 @@
 benchmark
+
+## Autogenerated gitignore: ##
+
+# Created by https://www.gitignore.io/api/vim
+
+### Vim ###
+# swap
+[._]*.s[a-v][a-z]
+[._]*.sw[a-p]
+[._]s[a-v][a-z]
+[._]sw[a-p]
+# session
+Session.vim
+# temporary
+.netrwhist
+*~
+# auto-generated tag files
+tags
+
+# End of https://www.gitignore.io/api/vim
diff --git a/Makefile b/Makefile
@@ -7,13 +7,27 @@ TARGET=--amdgpu-target=gfx900
 
 HIPCC_FLAGS=-g $(CXXFLAGS) $(TARGET) $(INCLUDE_DIRS)
 
-all: benchmark segfault
+all: alexnet resnet benchmark_wino layerwise
 
-benchmark: main.cpp
-	$(HIPCC) $(HIPCC_FLAGS) main.cpp $(LD_FLAGS) -o $@
+HEADERS=function.hpp layers.hpp miopen.hpp multi_layers.hpp tensor.hpp utils.hpp
 
-segfault: conv_segfault.cpp
-	$(HIPCC) $(HIPCC_FLAGS) conv_segfault.cpp $(LD_FLAGS) -o $@
+benchmark: all
+	./benchmark_wino && ./layerwise && ./alexnet
+
+alexnet: alexnet.cpp $(HEADERS)
+	$(HIPCC) $(HIPCC_FLAGS) alexnet.cpp $(LD_FLAGS) -o $@
+
+resnet: resnet.cpp $(HEADERS)
+	$(HIPCC) $(HIPCC_FLAGS) resnet.cpp $(LD_FLAGS) -o $@
+
+benchmark_wino: benchmark_wino.cpp $(HEADERS)
+	$(HIPCC) $(HIPCC_FLAGS) benchmark_wino.cpp $(LD_FLAGS) -o $@
+
+layerwise: layerwise.cpp $(HEADERS)
+	$(HIPCC) $(HIPCC_FLAGS) layerwise.cpp $(LD_FLAGS) -o $@
+
+#segfault: conv_segfault.cpp
+#	$(HIPCC) $(HIPCC_FLAGS) conv_segfault.cpp $(LD_FLAGS) -o $@
 
 clean:
-	rm -f *.o *.out benchmark segfault
+	rm -f *.o *.out benchmark segfault alexnet resnet benchmark_wino layerwise
diff --git a/alexnet.cpp b/alexnet.cpp
@@ -0,0 +1,96 @@
+
+
+#include <assert.h>
+
+#include <iostream>
+#include <chrono>
+#include <vector>
+#include <memory>
+#include <string>
+#include <iostream>
+#include <sstream>
+#include <fstream>
+#include <algorithm>
+
+
+#include "miopen.hpp"
+#include "tensor.hpp"
+#include "utils.hpp"
+#include "layers.hpp"
+#include "multi_layers.hpp"
+
+
+/* TODO:
+ * - [ ] create AlexNet class
+ * - [ ] uniform random tensors (via host->device copy), and CPU initialized tensors
+ * - [x] Make `Model` take input and output tensors in forward(), backward()
+ * - [ ] Collect total and average times per layer
+ * - [ ] implement and benchmark ResNet
+ */
+
+
+void alexNet() {
+    TensorDesc input_dim(128, 3, 224, 224);
+
+    Sequential features(input_dim);
+    /* features */
+    features.addConv(64, 11, 2, 4);
+    features.addReLU();
+    features.addMaxPool(3, 0, 2);
+    features.addConv(192, 5, 2, 1);
+    features.addReLU();
+    features.addMaxPool(3, 0, 2);
+    features.addConv(384, 3, 1, 1);
+    features.addReLU();
+    features.addConv(256, 3, 1, 1);
+    features.addReLU();
+    features.addConv(256, 3, 1, 1);
+    features.addReLU();
+    features.addMaxPool(3, 0, 2);
+
+    DEBUG("Dims after Features: " << features.getOutputDesc());
+
+    /* classifier */
+    Sequential classifier(features.getOutputDesc());
+    // TODO Dropout
+    classifier.reshape(128, 256 * 6 * 6, 1, 1);
+    classifier.addLinear(4096);
+    classifier.addReLU();
+    // TODO: Dropout
+    classifier.addLinear(4096);
+    classifier.addReLU();
+    classifier.addLinear(1000);
+
+    Model m(input_dim);
+    m.add(features);
+    m.add(classifier);
+
+    BenchmarkLogger::new_session("alex_net");
+    BenchmarkLogger::benchmark(m);
+}
+
+void check_add() {
+    Tensor x(2, 2, 1, 1);
+    x.fromHost({3, 4, 2, 1});
+    x.print_data();
+
+    Tensor y(2, 2, 1, 1);
+    y.fromHost({-3, .15, 2, 5});
+    y.print_data();
+
+    add_inplace(x, y);
+    x.print_data();
+}
+
+int main(int argc, char *argv[])
+{
+    device_init();
+
+    // enable profiling
+    CHECK_MIO(miopenEnableProfiling(mio::handle(), true));
+
+    alexNet();
+
+    miopenDestroy(mio::handle());
+    return 0;
+}
diff --git a/benchmark_wino.cpp b/benchmark_wino.cpp
@@ -0,0 +1,24 @@
+
+#include "miopen.hpp"
+#include "tensor.hpp"
+#include "utils.hpp"
+#include "layers.hpp"
+#include "multi_layers.hpp"
+
+int main(int argc, char *argv[])
+{
+    device_init();
+    CHECK_MIO(miopenEnableProfiling(mio::handle(), true));
+
+    // batch_size, w, h, channels_in, channels_out, kernel_size, padding, stride
+    ConvLayerDesc l({128, 64, 64, 64, 128, 3, 1, 1});
+    TensorDesc input_dim(l.batch_size, l.channels_in, l.height, l.width);
+    Model m(input_dim);
+    m.emplace<ConvLayer>(l.channels_out, l.kernel_size, l.padding, l.stride);
+
+    // benchmark fwd
+    BenchmarkLogger::new_session("wino_conv");
+    BenchmarkLogger::fwd_layer_benchmark(m, 1000);
+
+    return 0;
+}
diff --git a/function.hpp b/function.hpp
@@ -17,11 +17,11 @@ struct Function {
 
     // Prints the input and output dimensions to the given stream
     std::ostream& write_dims(std::ostream& os) const {
-        return os << getInputDesc() << " -> " << getOutputDesc();
+        return os << getInputDesc() << "->" << getOutputDesc();
     }
 
     virtual std::ostream& write_name(std::ostream& os) const {
-        return os << "Function (unknown)";
+        return os << "Function";
     }
 
     virtual std::ostream& write(std::ostream& os) const {
@@ -51,7 +51,7 @@ struct Layer : public Function {
     }
 
     virtual std::ostream& write_name(std::ostream& os) const override {
-        return os << "Layer (unknown)";
+        return os << "Layer";
     }
 };
 

diff --git a/layerwise.cpp b/layerwise.cpp
@@ -0,0 +1,49 @@
+
+
+#include "miopen.hpp"
+#include "tensor.hpp"
+#include "utils.hpp"
+#include "layers.hpp"
+#include "multi_layers.hpp"
+
+
+void benchmark_convlayers() {
+    // batch_size, w, h, channels_in, channels_out, kernel_size, padding, stride
+    // Layerwise benchmark L1-L5: https://github.com/soumith/convnet-benchmarks
+    std::vector<ConvLayerDesc> runs = {{128, 13, 13, 384, 384, 3, 0, 1},
+                                   {128, 16, 16, 128, 128, 7, 0, 1},
+                                   {128, 32, 32, 128, 128, 9, 0, 1},
+                                   {128, 64, 64, 64, 128, 9, 0, 1},
+                                   {128, 128, 128, 3, 96, 11, 0, 1}};
+
+
+    /*
+    std::vector<ConvLayerDesc> runs = {{128, 64, 64, 64, 128, 3, 1, 1}};
+                                       {128, 64, 64, 64, 128, 3, 0, 1},
+                                       {128, 28, 28, 64, 64, 5, 1, 2}};
+                                       */
+
+
+    int layer = 5;
+    int reps = 10;
+    BenchmarkLogger::new_session("conv_layers");
+    for (ConvLayerDesc& l : runs) {
+        std::stringstream ss;
+        ss << "Layer L" << layer;
+        TensorDesc input_dim(l.batch_size, l.channels_in, l.height, l.width);
+        Model m(input_dim, ss.str());
+        m.emplace<ConvLayer>(l.channels_out, l.kernel_size, l.padding, l.stride);
+
+        BenchmarkLogger::benchmark(m, 10);
+
+        --layer;
+    }
+}
+
+int main(int argc, char *argv[])
+{
+    device_init();
+    CHECK_MIO(miopenEnableProfiling(mio::handle(), true));
+
+    benchmark_convlayers();
+}