Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DO NOT MERGE] Parallel execution #259

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Parallelize code execution
  • Loading branch information
SudeshnaBora committed Nov 20, 2021
commit 97a8116ed6f9caf61acfbd770d855ab855b695b2
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -437,7 +437,7 @@ void _run_neurongroup_1_stateupdater_codeobject()
}


_run_kernel_neurongroup_1_stateupdater_codeobject<<<num_blocks, num_threads>>>(
_run_kernel_neurongroup_1_stateupdater_codeobject<<<num_blocks, num_threads,0, neurongroup_stream1>>>(
_N,
num_threads,
///// HOST_PARAMETERS /////
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -384,13 +384,13 @@ void _run_neurongroup_1_thresholder_codeobject()
first_run = false;
}

_reset_neurongroup_1_thresholder_codeobject<<<num_blocks, num_threads>>>(
_reset_neurongroup_1_thresholder_codeobject<<<num_blocks, num_threads,0, neurongroup_stream1>>>(
dev_array_neurongroup_1__spikespace[current_idx_array_neurongroup_1__spikespace]
);

CUDA_CHECK_ERROR("_reset_neurongroup_1_thresholder_codeobject");

_run_kernel_neurongroup_1_thresholder_codeobject<<<num_blocks, num_threads>>>(
_run_kernel_neurongroup_1_thresholder_codeobject<<<num_blocks, num_threads, 0, neurongroup_stream1>>>(
_N,
num_threads,
///// HOST_PARAMETERS /////
Expand All @@ -403,7 +403,6 @@ void _run_neurongroup_1_thresholder_codeobject()

CUDA_CHECK_ERROR("_run_kernel_neurongroup_1_thresholder_codeobject");


}


Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -428,7 +428,7 @@ void _run_neurongroup_stateupdater_codeobject()
}


_run_kernel_neurongroup_stateupdater_codeobject<<<num_blocks, num_threads>>>(
_run_kernel_neurongroup_stateupdater_codeobject<<<num_blocks, num_threads,0,neurongroup_stream>>>(
_N,
num_threads,
///// HOST_PARAMETERS /////
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -384,13 +384,13 @@ void _run_neurongroup_thresholder_codeobject()
first_run = false;
}

_reset_neurongroup_thresholder_codeobject<<<num_blocks, num_threads>>>(
_reset_neurongroup_thresholder_codeobject<<<num_blocks, num_threads,0, neurongroup_stream>>>(
dev_array_neurongroup__spikespace[current_idx_array_neurongroup__spikespace]
);

CUDA_CHECK_ERROR("_reset_neurongroup_thresholder_codeobject");

_run_kernel_neurongroup_thresholder_codeobject<<<num_blocks, num_threads>>>(
_run_kernel_neurongroup_thresholder_codeobject<<<num_blocks, num_threads, 0, neurongroup_stream>>>(
_N,
num_threads,
///// HOST_PARAMETERS /////
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -439,7 +439,7 @@ void _run_spikegeneratorgroup_codeobject()

// Note: If we have no delays, there is only one spikespace and
// current_idx equals previous_idx.
_reset_spikegeneratorgroup_codeobject<<<num_blocks, num_threads>>>(
_reset_spikegeneratorgroup_codeobject<<<num_blocks, num_threads, 0, spikegenerator_stream>>>(
dev_array_spikegeneratorgroup__spikespace[previous_idx_array_spikegeneratorgroup__spikespace],
///// HOST_PARAMETERS /////
dev_array_spikegeneratorgroup__lastindex,
Expand All @@ -456,7 +456,7 @@ void _run_spikegeneratorgroup_codeobject()

CUDA_CHECK_ERROR("_reset_spikegeneratorgroup_codeobject");

_run_kernel_spikegeneratorgroup_codeobject<<<num_blocks, num_threads>>>(
_run_kernel_spikegeneratorgroup_codeobject<<<num_blocks, num_threads,0, spikegenerator_stream>>>(
_N,
num_threads,
///// HOST_PARAMETERS /////
Expand All @@ -473,6 +473,7 @@ void _run_spikegeneratorgroup_codeobject()
);

CUDA_CHECK_ERROR("_run_kernel_spikegeneratorgroup_codeobject");
CUDA_SAFE_CALL(cudaDeviceSynchronize());


}
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ void _run_spikemonitor_1_codeobject()
static bool first_run = true;
if (first_run)
{
_init_kernel_spikemonitor_1_codeobject<<<1,1>>>();
_init_kernel_spikemonitor_1_codeobject<<<1,1,0,spikemonitor_stream1>>>();

CUDA_CHECK_ERROR("_init_kernel_spikemonitor_1_codeobject");
num_blocks = 1;
Expand Down Expand Up @@ -374,7 +374,7 @@ num_threads = 1;
}


_run_kernel_spikemonitor_1_codeobject<<<num_blocks, num_threads>>>(
_run_kernel_spikemonitor_1_codeobject<<<num_blocks, num_threads,0,spikemonitor_stream1>>>(
_num_spikespace-1,
dev_array_spikemonitor_1_count,
// HOST_PARAMETERS
Expand Down Expand Up @@ -519,7 +519,7 @@ void _copyToHost_spikemonitor_1_codeobject()
double* const dev_array_spikemonitor_1_t = thrust::raw_pointer_cast(&dev_dynamic_array_spikemonitor_1_t[0]);
const int _numt = dev_dynamic_array_spikemonitor_1_t.size();

_count_kernel_spikemonitor_1_codeobject<<<1,1>>>(
_count_kernel_spikemonitor_1_codeobject<<<1,1,0,spikemonitor_stream1>>>(
dev_num_events,
// HOST_PARAMETERS
dev_array_spikemonitor_1_N,
Expand All @@ -537,7 +537,7 @@ void _copyToHost_spikemonitor_1_codeobject()
CUDA_CHECK_ERROR("_count_kernel_spikemonitor_1_codeobject");

CUDA_SAFE_CALL(
cudaMemcpy(&host_num_events, dev_num_events, sizeof(int), cudaMemcpyDeviceToHost)
cudaMemcpyAsync(&host_num_events, dev_num_events, sizeof(int), cudaMemcpyDeviceToHost,spikemonitor_stream1)
);

// resize monitor device vectors
Expand All @@ -548,7 +548,7 @@ void _copyToHost_spikemonitor_1_codeobject()
dev_dynamic_array_spikemonitor_1_i.resize(host_num_events)
);

_copy_kernel_spikemonitor_1_codeobject<<<1,1>>>(
_copy_kernel_spikemonitor_1_codeobject<<<1,1,0,spikemonitor_stream1>>>(
thrust::raw_pointer_cast(&dev_dynamic_array_spikemonitor_1_t[0]),
thrust::raw_pointer_cast(&dev_dynamic_array_spikemonitor_1_i[0]),
0 );
Expand All @@ -574,7 +574,7 @@ void _debugmsg_spikemonitor_1_codeobject()
// TODO: can't we acces the correct _array_eventmonitor_N[0]
// value here without any kernel call?
// Yes: use _array_spikemonitor_1_N
_debugmsg_kernel_spikemonitor_1_codeobject<<<1,1>>>(
_debugmsg_kernel_spikemonitor_1_codeobject<<<1,1,0,spikemonitor_stream1>>>(
// HOST_PARAMETERS
dev_array_spikemonitor_1_N,
dev_array_neurongroup_i,
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ void _run_spikemonitor_2_codeobject()
static bool first_run = true;
if (first_run)
{
_init_kernel_spikemonitor_2_codeobject<<<1,1>>>();
_init_kernel_spikemonitor_2_codeobject<<<1,1,0,spikemonitor_stream2>>>();

CUDA_CHECK_ERROR("_init_kernel_spikemonitor_2_codeobject");
num_blocks = 1;
Expand Down Expand Up @@ -374,7 +374,7 @@ num_threads = 1;
}


_run_kernel_spikemonitor_2_codeobject<<<num_blocks, num_threads>>>(
_run_kernel_spikemonitor_2_codeobject<<<num_blocks, num_threads,0,spikemonitor_stream2>>>(
_num_spikespace-1,
dev_array_spikemonitor_2_count,
// HOST_PARAMETERS
Expand Down Expand Up @@ -519,7 +519,7 @@ void _copyToHost_spikemonitor_2_codeobject()
double* const dev_array_spikemonitor_2_t = thrust::raw_pointer_cast(&dev_dynamic_array_spikemonitor_2_t[0]);
const int _numt = dev_dynamic_array_spikemonitor_2_t.size();

_count_kernel_spikemonitor_2_codeobject<<<1,1>>>(
_count_kernel_spikemonitor_2_codeobject<<<1,1,0,spikemonitor_stream2>>>(
dev_num_events,
// HOST_PARAMETERS
dev_array_spikemonitor_2_N,
Expand All @@ -537,7 +537,7 @@ void _copyToHost_spikemonitor_2_codeobject()
CUDA_CHECK_ERROR("_count_kernel_spikemonitor_2_codeobject");

CUDA_SAFE_CALL(
cudaMemcpy(&host_num_events, dev_num_events, sizeof(int), cudaMemcpyDeviceToHost)
cudaMemcpyAsync(&host_num_events, dev_num_events, sizeof(int), cudaMemcpyDeviceToHost,spikemonitor_stream2)
);

// resize monitor device vectors
Expand All @@ -548,7 +548,7 @@ void _copyToHost_spikemonitor_2_codeobject()
dev_dynamic_array_spikemonitor_2_i.resize(host_num_events)
);

_copy_kernel_spikemonitor_2_codeobject<<<1,1>>>(
_copy_kernel_spikemonitor_2_codeobject<<<1,1,0,spikemonitor_stream2>>>(
thrust::raw_pointer_cast(&dev_dynamic_array_spikemonitor_2_t[0]),
thrust::raw_pointer_cast(&dev_dynamic_array_spikemonitor_2_i[0]),
0 );
Expand All @@ -574,7 +574,7 @@ void _debugmsg_spikemonitor_2_codeobject()
// TODO: can't we acces the correct _array_eventmonitor_N[0]
// value here without any kernel call?
// Yes: use _array_spikemonitor_2_N
_debugmsg_kernel_spikemonitor_2_codeobject<<<1,1>>>(
_debugmsg_kernel_spikemonitor_2_codeobject<<<1,1,0,spikemonitor_stream2>>>(
// HOST_PARAMETERS
dev_array_spikemonitor_2_N,
dev_array_neurongroup_1_i,
Expand All @@ -589,5 +589,7 @@ void _debugmsg_spikemonitor_2_codeobject()
);

CUDA_CHECK_ERROR("_debugmsg_kernel_spikemonitor_2_codeobject");
CUDA_SAFE_CALL(cudaDeviceSynchronize());
}


Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,7 @@ void _run_spikemonitor_codeobject()
static bool first_run = true;
if (first_run)
{
_init_kernel_spikemonitor_codeobject<<<1,1>>>();
_init_kernel_spikemonitor_codeobject<<<1,1,0,spikemonitor_stream>>>();

CUDA_CHECK_ERROR("_init_kernel_spikemonitor_codeobject");
num_blocks = 1;
Expand Down Expand Up @@ -374,7 +374,7 @@ num_threads = 1;
}


_run_kernel_spikemonitor_codeobject<<<num_blocks, num_threads>>>(
_run_kernel_spikemonitor_codeobject<<<num_blocks, num_threads,0, spikemonitor_stream>>>(
_num_spikespace-1,
dev_array_spikemonitor_count,
// HOST_PARAMETERS
Expand Down Expand Up @@ -519,7 +519,7 @@ void _copyToHost_spikemonitor_codeobject()
double* const dev_array_spikemonitor_t = thrust::raw_pointer_cast(&dev_dynamic_array_spikemonitor_t[0]);
const int _numt = dev_dynamic_array_spikemonitor_t.size();

_count_kernel_spikemonitor_codeobject<<<1,1>>>(
_count_kernel_spikemonitor_codeobject<<<1,1,0,spikemonitor_stream>>>(
dev_num_events,
// HOST_PARAMETERS
dev_array_spikemonitor_N,
Expand All @@ -537,7 +537,7 @@ void _copyToHost_spikemonitor_codeobject()
CUDA_CHECK_ERROR("_count_kernel_spikemonitor_codeobject");

CUDA_SAFE_CALL(
cudaMemcpy(&host_num_events, dev_num_events, sizeof(int), cudaMemcpyDeviceToHost)
cudaMemcpyAsync(&host_num_events, dev_num_events, sizeof(int), cudaMemcpyDeviceToHost,spikemonitor_stream)
);

// resize monitor device vectors
Expand All @@ -548,7 +548,7 @@ void _copyToHost_spikemonitor_codeobject()
dev_dynamic_array_spikemonitor_i.resize(host_num_events)
);

_copy_kernel_spikemonitor_codeobject<<<1,1>>>(
_copy_kernel_spikemonitor_codeobject<<<1,1,0,spikemonitor_stream>>>(
thrust::raw_pointer_cast(&dev_dynamic_array_spikemonitor_t[0]),
thrust::raw_pointer_cast(&dev_dynamic_array_spikemonitor_i[0]),
0 );
Expand All @@ -574,7 +574,7 @@ void _debugmsg_spikemonitor_codeobject()
// TODO: can't we acces the correct _array_eventmonitor_N[0]
// value here without any kernel call?
// Yes: use _array_spikemonitor_N
_debugmsg_kernel_spikemonitor_codeobject<<<1,1>>>(
_debugmsg_kernel_spikemonitor_codeobject<<<1,1,0,spikemonitor_stream>>>(
// HOST_PARAMETERS
dev_array_spikemonitor_N,
dev_array_spikegeneratorgroup_i,
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ void _run_synapses_1_group_variable_set_conditional_codeobject()
}


_run_kernel_synapses_1_group_variable_set_conditional_codeobject<<<num_blocks, num_threads>>>(
_run_kernel_synapses_1_group_variable_set_conditional_codeobject<<<num_blocks, num_threads,0,stream1>>>(
_N,
num_threads,
///// HOST_PARAMETERS /////
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -401,7 +401,7 @@ void _run_synapses_1_group_variable_set_conditional_codeobject_1()
}


_run_kernel_synapses_1_group_variable_set_conditional_codeobject_1<<<num_blocks, num_threads>>>(
_run_kernel_synapses_1_group_variable_set_conditional_codeobject_1<<<num_blocks, num_threads,0,stream1>>>(
_N,
num_threads,
///// HOST_PARAMETERS /////
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -509,7 +509,7 @@ if (synapses_1_post_max_size > 0)
{
for(int bid_offset = 0; bid_offset < num_loops; bid_offset++)
{
_run_kernel_synapses_1_post_codeobject<<<num_blocks, num_threads>>>(
_run_kernel_synapses_1_post_codeobject<<<num_blocks, num_threads,0,stream1>>>(
_N,
bid_offset,
defaultclock.timestep[0],
Expand All @@ -534,6 +534,7 @@ if (synapses_1_post_max_size > 0)
}

CUDA_CHECK_ERROR("_run_kernel_synapses_1_post_codeobject");
CUDA_SAFE_CALL(cudaDeviceSynchronize());
}


Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -265,13 +265,13 @@ void _run_synapses_1_post_push_spikes()
// get the number of spiking neurons
int32_t num_spiking_neurons;
CUDA_SAFE_CALL(
cudaMemcpy(&num_spiking_neurons,
cudaMemcpyAsync(&num_spiking_neurons,
dev_array_neurongroup_1__spikespace[current_idx_array_neurongroup_1__spikespace] + _num_spikespace - 1,
sizeof(int32_t), cudaMemcpyDeviceToHost)
sizeof(int32_t), cudaMemcpyDeviceToHost,stream1)
);

// advance spike queues
_advance_kernel_synapses_1_post_push_spikes<<<1, num_parallel_blocks>>>();
_advance_kernel_synapses_1_post_push_spikes<<<1, num_parallel_blocks,0,stream1>>>();

CUDA_CHECK_ERROR("_advance_kernel_synapses_1_post_push_spikes");

Expand Down Expand Up @@ -359,7 +359,7 @@ void _run_synapses_1_post_push_spikes()
{
num_blocks = num_parallel_blocks * num_spiking_neurons;

_run_kernel_synapses_1_post_push_spikes<<<num_blocks, num_threads, needed_shared_memory>>>(
_run_kernel_synapses_1_post_push_spikes<<<num_blocks, num_threads, needed_shared_memory,stream1>>>(
num_parallel_blocks,
num_blocks,
num_threads,
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -534,9 +534,9 @@ if (synapses_1_pre_max_size > 0)
{
if (defaultclock.timestep[0] >= synapses_1_pre_delay)
{
cudaMemcpy(&num_spiking_neurons,
cudaMemcpyAsync(&num_spiking_neurons,
&dev_array_neurongroup__spikespace[synapses_1_pre_eventspace_idx][_num__array_neurongroup__spikespace - 1],
sizeof(int32_t), cudaMemcpyDeviceToHost);
sizeof(int32_t), cudaMemcpyDeviceToHost,stream1);
num_blocks = num_parallel_blocks * num_spiking_neurons;
//TODO collect info abt mean, std of num spiking neurons per time
//step and print INFO at end of simulation
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -265,13 +265,13 @@ void _run_synapses_1_pre_push_spikes()
// get the number of spiking neurons
int32_t num_spiking_neurons;
CUDA_SAFE_CALL(
cudaMemcpy(&num_spiking_neurons,
cudaMemcpyAsync(&num_spiking_neurons,
dev_array_neurongroup__spikespace[current_idx_array_neurongroup__spikespace] + _num_spikespace - 1,
sizeof(int32_t), cudaMemcpyDeviceToHost)
sizeof(int32_t), cudaMemcpyDeviceToHost,stream1)
);

// advance spike queues
_advance_kernel_synapses_1_pre_push_spikes<<<1, num_parallel_blocks>>>();
_advance_kernel_synapses_1_pre_push_spikes<<<1, num_parallel_blocks,0,stream1>>>();

CUDA_CHECK_ERROR("_advance_kernel_synapses_1_pre_push_spikes");

Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -480,10 +480,10 @@ size_t used_device_memory_start = used_device_memory;
dev_dynamic_array_synapses_1__synaptic_pre = _dynamic_array_synapses_1__synaptic_pre;
dev_dynamic_array_synapses_1__synaptic_post = _dynamic_array_synapses_1__synaptic_post;
CUDA_SAFE_CALL(
cudaMemcpy(dev_array_synapses_1_N,
cudaMemcpyAsync(dev_array_synapses_1_N,
_array_synapses_1_N,
sizeof(int32_t),
cudaMemcpyHostToDevice)
cudaMemcpyHostToDevice,stream1)
);


Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -443,9 +443,9 @@ if (synapses_2_pre_max_size > 0)
{
if (defaultclock.timestep[0] >= synapses_2_pre_delay)
{
cudaMemcpy(&num_spiking_neurons,
cudaMemcpyAsync(&num_spiking_neurons,
&dev_array_neurongroup_1__spikespace[synapses_2_pre_eventspace_idx][_num__array_neurongroup_1__spikespace - 1],
sizeof(int32_t), cudaMemcpyDeviceToHost);
sizeof(int32_t), cudaMemcpyDeviceToHost,stream2);
num_blocks = num_parallel_blocks * num_spiking_neurons;
//TODO collect info abt mean, std of num spiking neurons per time
//step and print INFO at end of simulation
Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -265,13 +265,13 @@ void _run_synapses_2_pre_push_spikes()
// get the number of spiking neurons
int32_t num_spiking_neurons;
CUDA_SAFE_CALL(
cudaMemcpy(&num_spiking_neurons,
cudaMemcpyAsync(&num_spiking_neurons,
dev_array_neurongroup_1__spikespace[current_idx_array_neurongroup_1__spikespace] + _num_spikespace - 1,
sizeof(int32_t), cudaMemcpyDeviceToHost)
sizeof(int32_t), cudaMemcpyDeviceToHost, stream2)
);

// advance spike queues
_advance_kernel_synapses_2_pre_push_spikes<<<1, num_parallel_blocks>>>();
_advance_kernel_synapses_2_pre_push_spikes<<<1, num_parallel_blocks,0,stream2>>>();

CUDA_CHECK_ERROR("_advance_kernel_synapses_2_pre_push_spikes");

Expand Down
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -460,10 +460,10 @@ size_t used_device_memory_start = used_device_memory;
dev_dynamic_array_synapses_2__synaptic_pre = _dynamic_array_synapses_2__synaptic_pre;
dev_dynamic_array_synapses_2__synaptic_post = _dynamic_array_synapses_2__synaptic_post;
CUDA_SAFE_CALL(
cudaMemcpy(dev_array_synapses_2_N,
cudaMemcpyAsync(dev_array_synapses_2_N,
_array_synapses_2_N,
sizeof(int32_t),
cudaMemcpyHostToDevice)
cudaMemcpyHostToDevice, stream2)
);


Expand Down
Binary file not shown.
Loading