Add cuda_standalone generated files

brian-team · SudeshnaBora · Oct 30, 2021 · Oct 30, 2021 · Nov 20, 2021 · Apr 6, 2022
commit 14164f6e04b71f7b8d93c00e3cafa259443d99e9
diff --git a/parallel_execution/parallel_execution/code/MushroomBody/cuda_standalone/.objects.cu.swp b/parallel_execution/parallel_execution/code/MushroomBody/cuda_standalone/.objects.cu.swp
diff --git a/parallel_execution/parallel_execution/code/MushroomBody/cuda_standalone/brianlib/clocks.h b/parallel_execution/parallel_execution/code/MushroomBody/cuda_standalone/brianlib/clocks.h
@@ -0,0 +1,51 @@
+#ifndef _BRIAN_CLOCKS_H
+#define _BRIAN_CLOCKS_H
+#include<stdlib.h>
+#include<iostream>
+#include<brianlib/stdint_compat.h>
+#include<math.h>
+
+namespace {
+ inline int fround(double x)
+ {
+ return (int)(x+0.5);
+ };
+};
+
+class Clock
+{
+public:
+ double epsilon;
+ double *dt;
+ int64_t *timestep;
+ double *t;
+ int64_t i_end;
+ Clock(double _epsilon=1e-14) : epsilon(_epsilon) { i_end = 0;};
+ inline void tick()
+ {
+ timestep[0] += 1;
+ t[0] = timestep[0] * dt[0];
+ }
+ inline bool running() { return timestep[0]<i_end; };
+ void set_interval(double start, double end)
+ {
+ int i_start = fround(start/dt[0]);
+ double t_start = i_start*dt[0];
+ if(t_start==start || fabs(t_start-start)<=epsilon*fabs(t_start))
+ {
+ timestep[0] = i_start;
+ } else
+ {
+ timestep[0] = (int)ceil(start/dt[0]);
+ }
+ i_end = fround(end/dt[0]);
+ double t_end = i_end*dt[0];
+ if(!(t_end==end || fabs(t_end-end)<=epsilon*fabs(t_end)))
+ {
+ i_end = (int)ceil(end/dt[0]);
+ }
+ }
+};
+
+#endif
+
diff --git a/...lel_execution/parallel_execution/code/MushroomBody/cuda_standalone/brianlib/common_math.h b/...lel_execution/parallel_execution/code/MushroomBody/cuda_standalone/brianlib/common_math.h
@@ -0,0 +1,14 @@
+#ifndef _BRIAN_COMMON_MATH_H
+#define _BRIAN_COMMON_MATH_H
+
+#include<limits>
+#include<stdlib.h>
+
+#define inf (std::numeric_limits<double>::infinity())
+#ifdef _MSC_VER
+#define INFINITY (std::numeric_limits<double>::infinity())
+#define NAN (std::numeric_limits<double>::quiet_NaN())
+#define M_PI 3.14159265358979323846
+#endif
+
+#endif
diff --git a/...llel_execution/parallel_execution/code/MushroomBody/cuda_standalone/brianlib/cudaVector.h b/...llel_execution/parallel_execution/code/MushroomBody/cuda_standalone/brianlib/cudaVector.h
@@ -0,0 +1,160 @@
+#ifndef _CUDA_VECTOR_H_
+#define _CUDA_VECTOR_H_
+
+#include <cstdio>
+#include <assert.h>
+
+/*
+ * current memory allocation strategy:
+ * only grow larger (new_size = old_size*2 + 1) ~= 2^n
+ */
+
+#define INITIAL_SIZE 1
+
+typedef int size_type;
+
+template <class scalar>
+class cudaVector
+{
+private:
+ // TODO: consider using data of type char*, since it does not have a cunstructor
+ scalar* volatile m_data; //pointer to allocated memory
+ volatile size_type m_capacity; //how much memory is allocated, should ALWAYS >= size
+ volatile size_type m_size; //how many elements are stored in this vector
+
+public:
+ __device__ cudaVector()
+ {
+ m_size = 0;
+ if(INITIAL_SIZE > 0)
+ {
+ m_data = (scalar*)malloc(sizeof(scalar) * INITIAL_SIZE);
+ if(m_data != NULL)
+ {
+ m_capacity = INITIAL_SIZE;
+ }
+ else
+ {
+ printf("ERROR while creating cudaVector with size %ld in cudaVector.h (constructor)\n", sizeof(scalar)*INITIAL_SIZE);
+ assert(m_data != NULL);
+ }
+ }
+ };
+
+ __device__ ~cudaVector()
+ {
+ free(m_data);
+ };
+
+ __device__ scalar* getDataPointer()
+ {
+ return m_data;
+ };
+
+ __device__ scalar& at(size_type index)
+ {
+ if (index < 0 || index >= m_size)
+ {
+ // TODO: check for proper exception throwing in cuda kernels
+ printf("ERROR returning a reference to index %d in cudaVector::at() (size = %u)\n", index, m_size);
+ assert(index < m_size);
+ }
+ return m_data[index];
+ };
+
+ __device__ void push(scalar elem)
+ {
+ assert(m_size <= m_capacity);
+ if(m_capacity == m_size)
+ {
+ // increase capacity
+ reserve(m_capacity*2 + 1);
+ }
+ if(m_size < m_capacity)
+ {
+ m_data[m_size] = elem;
+ m_size++;
+ }
+ };
+
+ __device__ void update(size_type pos, scalar elem)
+ {
+ if(pos <= m_size)
+ {
+ m_data[pos] = elem;
+ }
+ else
+ {
+ printf("ERROR invalid index %d, must be in range 0 - %d\n", pos, m_size);
+ assert(pos <= m_size);
+ }
+ };
+
+ __device__ void resize(size_type new_size)
+ {
+ if (new_size > m_capacity)
+ reserve(new_size * 2);
+ m_size = new_size;
+ }
+
+ __device__ size_type increaseSizeBy(size_type add_size)
+ {
+ size_type old_size = m_size;
+ size_type new_size = old_size + add_size;
+ if (new_size > m_capacity)
+ reserve(new_size * 2);
+ m_size = new_size;
+ return old_size;
+ }
+
+ __device__ void reserve(size_type new_capacity)
+ {
+ if(new_capacity > m_capacity)
+ {
+ //realloc larger memory (deviceside realloc doesn't exist, so we write our own)
+ scalar* new_data = (scalar*)malloc(sizeof(scalar) * new_capacity);
+ // TODO: use C++ version, is there a way to copy data in parallel here?
+ // since only num_unique_delays threads resize, the other threads could help copy?
+ //scalar* new_data = new scalar[new_capacity];
+ //if (new_data)
+ //{
+ // for (size_type i = 0; i < m_size; i++)
+ // new_data[i] = m_data[i];
+ //
+ // delete [] m_data;
+ // m_data = new_data;
+ // m_capacity = new_capacity;
+ //}
+ if (new_data != NULL)
+ {
+ memcpy(new_data, m_data, sizeof(scalar) * size());
+ free(m_data);
+ m_data = new_data;
+ m_capacity = new_capacity;
+ }
+ else
+ {
+ printf("ERROR while allocating %ld bytes in cudaVector.h/reserve()\n", sizeof(scalar)*new_capacity);
+ assert(new_data != NULL);
+ }
+ }
+ else
+ {
+ //kleiner reallocen?
+ m_capacity = new_capacity;
+ };
+ };
+
+ //does not overwrite old data, just resets number of elements stored to 0
+ __device__ void reset()
+ {
+ m_size = 0;
+ };
+
+ __device__ size_type size()
+ {
+ return m_size;
+ };
+};
+
+#endif