Skip to content

Commit

Permalink
catkin friendly and "hacked" for using cpu version of region layer wi…
Browse files Browse the repository at this point in the history
…th tiny yolov2.

The reason of the "hack" is extended in ganyc717#4. As a summary, the time expend in that layer was drastically increasing for some reason. Current version is stable and actually faster than GPU implementation in a MSI laptop with an i7.
  • Loading branch information
Bardo91 committed Jun 16, 2018
1 parent 7b05219 commit 0a0a770
Show file tree
Hide file tree
Showing 4 changed files with 83 additions and 14 deletions.
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
cmake_minimum_required(VERSION 2.8)
project(darknet)
set(CMAKE_CXX_FLAGS "-std=c++11 -O2 -DNDEBUG " )

set(OPENCV ON)
Expand Down Expand Up @@ -28,4 +29,6 @@ include_directories(./darknet_cl/clBLAS)

link_libraries(libpthread.so)

add_executable(darknet ${DIR_LIB_SRC} ${DIR_EXAMPLE_SRC})
catkin_package( INCLUDE_DIRS ./darknet_cl/include ./darknet_cl/clBLAS)

add_library(darknet ${DIR_LIB_SRC} ${DIR_EXAMPLE_SRC})
41 changes: 31 additions & 10 deletions darknet_cl/src/network.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -770,23 +770,44 @@ void forward_network_gpu(network *netp)
if(net.truth){
cl_push_array(net.truth_gpu, net.truth, net.truths*net.batch);
}

system("clear");
int i;
for(i = 0; i < net.n; ++i){
auto t0 = std::chrono::system_clock::now();
net.index = i;
layer l = net.layers[i];
if(l.delta_gpu.buffer && l.delta_gpu.size > 0){
fill_gpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
if (i == 15) {
if (l.delta) {
fill_cpu(l.outputs * l.batch, 0, l.delta, 1);
}
//copy net.input_gpu into net.input
cl_pull_array(net.input_gpu, net.input, l.inputs*l.batch);
l.forward(l, net);

net.input = l.output;
if (l.truth) {
net.truth = l.output;
}
}
l.forward_gpu(l, net);
net.input_gpu = l.output_gpu;
net.input = l.output;
if(l.truth) {
net.truth_gpu = l.output_gpu;
net.truth = l.output;
else{
if (l.delta_gpu.buffer && l.delta_gpu.size > 0) {
fill_gpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
}
l.forward_gpu(l, net);

net.input_gpu = l.output_gpu;
net.input = l.output;
if (l.truth) {
net.truth_gpu = l.output_gpu;
net.truth = l.output;
}
}


auto t1 = std::chrono::system_clock::now();
std::cout << "Layer: " << i << " time: " << std::chrono::duration_cast<std::chrono::microseconds>(t1-t0).count() << std::endl;
}
pull_network_output(netp);
//pull_network_output(netp);
calc_network_cost(netp);
}

Expand Down
6 changes: 3 additions & 3 deletions darknet_cl/src/region_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -159,9 +159,9 @@ int entry_index(layer l, int batch, int location, int entry)
void forward_region_layer(const layer l, network net)
{
int i,j,b,t,n;
memcpy(l.output, net.input, l.outputs*l.batch*sizeof(float));
memcpy(l.output, net.input, l.inputs*l.batch*sizeof(float));

#ifndef GPU
//#ifndef GPU
for (b = 0; b < l.batch; ++b){
for(n = 0; n < l.n; ++n){
int index = entry_index(l, b, n*l.w*l.h, 0);
Expand All @@ -184,7 +184,7 @@ void forward_region_layer(const layer l, network net)
int index = entry_index(l, 0, 0, l.coords + !l.background);
softmax_cpu(net.input + index, l.classes + l.background, l.batch*l.n, l.inputs/l.n, l.w*l.h, 1, l.w*l.h, 1, l.output + index);
}
#endif
//#endif

memset(l.delta, 0, l.outputs * l.batch * sizeof(float));
if(!net.train) return;
Expand Down
45 changes: 45 additions & 0 deletions package.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
<?xml version="1.0"?>
<package>
<name>darknet</name>
<version>0.0.1</version>
<description>Cool lib with tools for 3d and 2d data processing</description>

<!-- One maintainer tag required, multiple allowed, one person per tag -->
<!-- Example: -->
<!-- <maintainer email="[email protected]">Jane Doe</maintainer> -->
<maintainer email="[email protected]">Pablo Ramon Soria</maintainer>


<!-- One license tag required, multiple allowed, one license per tag -->
<!-- Commonly used license strings: -->
<!-- BSD, MIT, Boost Software License, GPLv2, GPLv3, LGPLv2.1, LGPLv3 -->
<license>MIT</license>


<!-- Url tags are optional, but multiple are allowed, one per tag -->
<!-- Optional attribute type can be: website, bugtracker, or repository -->
<!-- Example: -->
<!-- <url type="website">http:https://wiki.ros.org/uav_abstraction_layer</url> -->


<!-- The *_depend tags are used to specify dependencies -->
<!-- Dependencies can be catkin packages or system dependencies -->
<!-- Examples: -->
<!-- Use build_depend for packages you need at compile time: -->
<!-- <build_depend>message_generation</build_depend> -->
<!-- Use buildtool_depend for build tool packages: -->
<!-- <buildtool_depend>catkin</buildtool_depend> -->
<!-- Use run_depend for packages you need at runtime: -->
<!-- <run_depend>message_runtime</run_depend> -->
<!-- Use test_depend for packages you need only for testing: -->
<!-- <test_depend>gtest</test_depend> -->
<buildtool_depend>catkin</buildtool_depend>
<build_depend>roscpp</build_depend>
<run_depend>roscpp</run_depend>

<!-- The export tag contains other, unspecified, tags -->
<export>
<!-- Other tools can request additional information be placed here -->

</export>
</package>

0 comments on commit 0a0a770

Please sign in to comment.