Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[DO NOT MERGE] Parallel execution #259

Open
wants to merge 10 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
Add cuda_standalone generated files
  • Loading branch information
SudeshnaBora committed Oct 30, 2021
commit 14164f6e04b71f7b8d93c00e3cafa259443d99e9
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
#ifndef _BRIAN_CLOCKS_H
#define _BRIAN_CLOCKS_H
#include<stdlib.h>
#include<iostream>
#include<brianlib/stdint_compat.h>
#include<math.h>

namespace {
inline int fround(double x)
{
return (int)(x+0.5);
};
};

class Clock
{
public:
double epsilon;
double *dt;
int64_t *timestep;
double *t;
int64_t i_end;
Clock(double _epsilon=1e-14) : epsilon(_epsilon) { i_end = 0;};
inline void tick()
{
timestep[0] += 1;
t[0] = timestep[0] * dt[0];
}
inline bool running() { return timestep[0]<i_end; };
void set_interval(double start, double end)
{
int i_start = fround(start/dt[0]);
double t_start = i_start*dt[0];
if(t_start==start || fabs(t_start-start)<=epsilon*fabs(t_start))
{
timestep[0] = i_start;
} else
{
timestep[0] = (int)ceil(start/dt[0]);
}
i_end = fround(end/dt[0]);
double t_end = i_end*dt[0];
if(!(t_end==end || fabs(t_end-end)<=epsilon*fabs(t_end)))
{
i_end = (int)ceil(end/dt[0]);
}
}
};

#endif

Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#ifndef _BRIAN_COMMON_MATH_H
#define _BRIAN_COMMON_MATH_H

#include<limits>
#include<stdlib.h>

#define inf (std::numeric_limits<double>::infinity())
#ifdef _MSC_VER
#define INFINITY (std::numeric_limits<double>::infinity())
#define NAN (std::numeric_limits<double>::quiet_NaN())
#define M_PI 3.14159265358979323846
#endif

#endif
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#ifndef _CUDA_VECTOR_H_
#define _CUDA_VECTOR_H_

#include <cstdio>
#include <assert.h>

/*
* current memory allocation strategy:
* only grow larger (new_size = old_size*2 + 1) ~= 2^n
*/

#define INITIAL_SIZE 1

typedef int size_type;

template <class scalar>
class cudaVector
{
private:
// TODO: consider using data of type char*, since it does not have a cunstructor
scalar* volatile m_data; //pointer to allocated memory
volatile size_type m_capacity; //how much memory is allocated, should ALWAYS >= size
volatile size_type m_size; //how many elements are stored in this vector

public:
__device__ cudaVector()
{
m_size = 0;
if(INITIAL_SIZE > 0)
{
m_data = (scalar*)malloc(sizeof(scalar) * INITIAL_SIZE);
if(m_data != NULL)
{
m_capacity = INITIAL_SIZE;
}
else
{
printf("ERROR while creating cudaVector with size %ld in cudaVector.h (constructor)\n", sizeof(scalar)*INITIAL_SIZE);
assert(m_data != NULL);
}
}
};

__device__ ~cudaVector()
{
free(m_data);
};

__device__ scalar* getDataPointer()
{
return m_data;
};

__device__ scalar& at(size_type index)
{
if (index < 0 || index >= m_size)
{
// TODO: check for proper exception throwing in cuda kernels
printf("ERROR returning a reference to index %d in cudaVector::at() (size = %u)\n", index, m_size);
assert(index < m_size);
}
return m_data[index];
};

__device__ void push(scalar elem)
{
assert(m_size <= m_capacity);
if(m_capacity == m_size)
{
// increase capacity
reserve(m_capacity*2 + 1);
}
if(m_size < m_capacity)
{
m_data[m_size] = elem;
m_size++;
}
};

__device__ void update(size_type pos, scalar elem)
{
if(pos <= m_size)
{
m_data[pos] = elem;
}
else
{
printf("ERROR invalid index %d, must be in range 0 - %d\n", pos, m_size);
assert(pos <= m_size);
}
};

__device__ void resize(size_type new_size)
{
if (new_size > m_capacity)
reserve(new_size * 2);
m_size = new_size;
}

__device__ size_type increaseSizeBy(size_type add_size)
{
size_type old_size = m_size;
size_type new_size = old_size + add_size;
if (new_size > m_capacity)
reserve(new_size * 2);
m_size = new_size;
return old_size;
}

__device__ void reserve(size_type new_capacity)
{
if(new_capacity > m_capacity)
{
//realloc larger memory (deviceside realloc doesn't exist, so we write our own)
scalar* new_data = (scalar*)malloc(sizeof(scalar) * new_capacity);
// TODO: use C++ version, is there a way to copy data in parallel here?
// since only num_unique_delays threads resize, the other threads could help copy?
//scalar* new_data = new scalar[new_capacity];
//if (new_data)
//{
// for (size_type i = 0; i < m_size; i++)
// new_data[i] = m_data[i];
//
// delete [] m_data;
// m_data = new_data;
// m_capacity = new_capacity;
//}
if (new_data != NULL)
{
memcpy(new_data, m_data, sizeof(scalar) * size());
free(m_data);
m_data = new_data;
m_capacity = new_capacity;
}
else
{
printf("ERROR while allocating %ld bytes in cudaVector.h/reserve()\n", sizeof(scalar)*new_capacity);
assert(new_data != NULL);
}
}
else
{
//kleiner reallocen?
m_capacity = new_capacity;
};
};

//does not overwrite old data, just resets number of elements stored to 0
__device__ void reset()
{
m_size = 0;
};

__device__ size_type size()
{
return m_size;
};
};

#endif
Loading