Skip to content

Commit

Permalink
vector: Add support for custom execution policies
Browse files Browse the repository at this point in the history
  • Loading branch information
stotko committed Jun 20, 2023
1 parent a6a6d10 commit e1b3f40
Show file tree
Hide file tree
Showing 4 changed files with 187 additions and 18 deletions.
4 changes: 2 additions & 2 deletions src/stdgpu/impl/unordered_base_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1069,7 +1069,7 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::clear()

_occupied_count.store(0);

detail::vector_clear_iota<index_t, index_allocator_type>(_excess_list_positions, bucket_count());
detail::vector_clear_iota(execution::device, _excess_list_positions, bucket_count());
}

template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
Expand Down Expand Up @@ -1119,7 +1119,7 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::createDevic
result._hash = hasher();
result._key_equal = key_equal();

detail::vector_clear_iota<index_t, index_allocator_type>(result._excess_list_positions, bucket_count);
detail::vector_clear_iota(execution::device, result._excess_list_positions, bucket_count);

STDGPU_ENSURES(result._excess_list_positions.full());

Expand Down
63 changes: 51 additions & 12 deletions src/stdgpu/impl/vector_detail.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -338,12 +338,12 @@ private:
index_t _begin;
};

template <typename T, typename Allocator>
template <typename ExecutionPolicy, typename T, typename Allocator>
void
vector_clear_iota(vector<T, Allocator>& v, const T& value)
vector_clear_iota(ExecutionPolicy&& policy, vector<T, Allocator>& v, const T& value)
{
iota(execution::device, device_begin(v.data()), device_end(v.data()), value);
v._occupied.set();
iota(std::forward<ExecutionPolicy>(policy), device_begin(v.data()), device_end(v.data()), value);
v._occupied.set(std::forward<ExecutionPolicy>(policy));
v._size.store(v.capacity());
}

Expand All @@ -353,6 +353,19 @@ template <typename T, typename Allocator>
template <typename ValueIterator, STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(detail::is_iterator_v<ValueIterator>)>
inline void
vector<T, Allocator>::insert(device_ptr<const T> position, ValueIterator begin, ValueIterator end)
{
insert(execution::device, position, begin, end);
}

template <typename T, typename Allocator>
template <typename ExecutionPolicy,
typename ValueIterator,
STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(detail::is_iterator_v<ValueIterator>)>
inline void
vector<T, Allocator>::insert(ExecutionPolicy&& policy,
device_ptr<const T> position,
ValueIterator begin,
ValueIterator end)
{
if (position != device_end())
{
Expand All @@ -372,7 +385,7 @@ vector<T, Allocator>::insert(device_ptr<const T> position, ValueIterator begin,
return;
}

for_each_index(execution::device,
for_each_index(std::forward<ExecutionPolicy>(policy),
N,
detail::vector_insert<T, Allocator, ValueIterator, true>(*this, size(), begin));

Expand All @@ -382,6 +395,14 @@ vector<T, Allocator>::insert(device_ptr<const T> position, ValueIterator begin,
template <typename T, typename Allocator>
inline void
vector<T, Allocator>::erase(device_ptr<const T> begin, device_ptr<const T> end)
{
erase(execution::device, begin, end);
}

template <typename T, typename Allocator>
template <typename ExecutionPolicy>
inline void
vector<T, Allocator>::erase(ExecutionPolicy&& policy, device_ptr<const T> begin, device_ptr<const T> end)
{
if (end != device_end())
{
Expand All @@ -399,7 +420,7 @@ vector<T, Allocator>::erase(device_ptr<const T> begin, device_ptr<const T> end)
return;
}

for_each_index(execution::device, N, detail::vector_erase<T, Allocator, true>(*this, new_size));
for_each_index(std::forward<ExecutionPolicy>(policy), N, detail::vector_erase<T, Allocator, true>(*this, new_size));

_size.store(new_size);
}
Expand Down Expand Up @@ -485,6 +506,14 @@ vector<T, Allocator>::data() noexcept
template <typename T, typename Allocator>
inline void
vector<T, Allocator>::clear()
{
clear(execution::device);
}

template <typename T, typename Allocator>
template <typename ExecutionPolicy>
inline void
vector<T, Allocator>::clear(ExecutionPolicy&& policy)
{
if (empty())
{
Expand All @@ -495,30 +524,39 @@ vector<T, Allocator>::clear()
{
const index_t current_size = size();

detail::unoptimized_destroy(execution::device,
detail::unoptimized_destroy(std::forward<ExecutionPolicy>(policy),
stdgpu::device_begin(_data),
stdgpu::device_begin(_data) + current_size);
}

_occupied.reset();
_occupied.reset(std::forward<ExecutionPolicy>(policy));

_size.store(0);

STDGPU_ENSURES(empty());
STDGPU_ENSURES(valid());
STDGPU_ENSURES(valid(std::forward<ExecutionPolicy>(policy)));
}

template <typename T, typename Allocator>
inline bool
vector<T, Allocator>::valid() const
{
return valid(execution::device);
}

template <typename T, typename Allocator>
template <typename ExecutionPolicy>
inline bool
vector<T, Allocator>::valid(ExecutionPolicy&& policy) const
{
// Special case : Zero capacity is valid
if (capacity() == 0)
{
return true;
}

return (size_valid() && occupied_count_valid() && _locks.valid());
return (size_valid() && occupied_count_valid(std::forward<ExecutionPolicy>(policy)) &&
_locks.valid(std::forward<ExecutionPolicy>(policy)));
}

template <typename T, typename Allocator>
Expand Down Expand Up @@ -588,11 +626,12 @@ vector<T, Allocator>::occupied(const index_t n) const
}

template <typename T, typename Allocator>
template <typename ExecutionPolicy>
bool
vector<T, Allocator>::occupied_count_valid() const
vector<T, Allocator>::occupied_count_valid(ExecutionPolicy&& policy) const
{
index_t size_count = size();
index_t size_sum = _occupied.count();
index_t size_sum = _occupied.count(std::forward<ExecutionPolicy>(policy));

return (size_count == size_sum);
}
Expand Down
56 changes: 52 additions & 4 deletions src/stdgpu/vector.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,9 @@ class vector_insert;
template <typename T, typename Allocator, bool>
class vector_erase;

template <typename T, typename Allocator>
template <typename ExecutionPolicy, typename T, typename Allocator>
void
vector_clear_iota(vector<T, Allocator>& v, const T& value);
vector_clear_iota(ExecutionPolicy&& policy, vector<T, Allocator>& v, const T& value);

} // namespace detail

Expand Down Expand Up @@ -242,6 +242,21 @@ public:
void
insert(device_ptr<const T> position, ValueIterator begin, ValueIterator end);

/**
* \brief Inserts the given range of elements into the container
* \tparam ExecutionPolicy The type of the execution policy
* \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
* \param[in] position The position after which to insert the range
* \param[in] begin The begin of the range
* \param[in] end The end of the range
* \note position must be equal to device_end()
*/
template <typename ExecutionPolicy,
typename ValueIterator,
STDGPU_DETAIL_OVERLOAD_IF(detail::is_iterator_v<ValueIterator>)>
void
insert(ExecutionPolicy&& policy, device_ptr<const T> position, ValueIterator begin, ValueIterator end);

/**
* \brief Deletes the given range from the container
* \param[in] begin The begin of the range
Expand All @@ -251,6 +266,18 @@ public:
void
erase(device_ptr<const T> begin, device_ptr<const T> end);

/**
* \brief Deletes the given range from the container
* \tparam ExecutionPolicy The type of the execution policy
* \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
* \param[in] begin The begin of the range
* \param[in] end The end of the range
* \note end must be equal to device_end()
*/
template <typename ExecutionPolicy>
void
erase(ExecutionPolicy&& policy, device_ptr<const T> begin, device_ptr<const T> end);

/**
* \brief Checks if the object is empty
* \return True if the object is empty, false otherwise
Expand Down Expand Up @@ -313,13 +340,32 @@ public:
void
clear();

/**
* \brief Clears the complete object
* \tparam ExecutionPolicy The type of the execution policy
* \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
*/
template <typename ExecutionPolicy>
void
clear(ExecutionPolicy&& policy);

/**
* \brief Checks if the object is in a valid state
* \return True if the state is valid, false otherwise
*/
bool
valid() const;

/**
* \brief Checks if the object is in a valid state
* \tparam ExecutionPolicy The type of the execution policy
* \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
* \return True if the state is valid, false otherwise
*/
template <typename ExecutionPolicy>
bool
valid(ExecutionPolicy&& policy) const;

/**
* \brief Creates a pointer to the begin of the device container
* \return A pointer to the begin of the object
Expand Down Expand Up @@ -383,14 +429,16 @@ private:
template <typename T2, typename Allocator2, bool>
friend class detail::vector_erase;

template <typename ExecutionPolicy, typename T2, typename Allocator2>
friend void
detail::vector_clear_iota<T, Allocator>(vector<T, Allocator>& v, const T& value);
detail::vector_clear_iota(ExecutionPolicy&& policy, vector<T2, Allocator2>& v, const T2& value);

STDGPU_DEVICE_ONLY bool
occupied(const index_t n) const;

template <typename ExecutionPolicy>
bool
occupied_count_valid() const;
occupied_count_valid(ExecutionPolicy&& policy) const;

bool
size_valid() const;
Expand Down
82 changes: 82 additions & 0 deletions test/stdgpu/vector.inc
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,39 @@ TEST_F(stdgpu_vector, insert)
destroyDeviceArray<int>(values);
}

TEST_F(stdgpu_vector, insert_custom_execution_policy)
{
stdgpu::execution::device_policy policy;

const stdgpu::index_t N = 10000;
const stdgpu::index_t N_init = N / 2;
const stdgpu::index_t N_insert = N / 4;

stdgpu::vector<int> pool = stdgpu::vector<int>::createDeviceObject(N);

fill_vector(pool, N_init);

int* values = createDeviceArray<int>(N_insert);
stdgpu::iota(policy, stdgpu::device_begin(values), stdgpu::device_end(values), N_init + 1);

pool.insert(policy, pool.device_end(), stdgpu::device_begin(values), stdgpu::device_end(values));

ASSERT_EQ(pool.size(), N_init + N_insert);
ASSERT_FALSE(pool.empty());
ASSERT_FALSE(pool.full());
ASSERT_TRUE(pool.valid());

int* host_numbers = copyCreateDevice2HostArray(pool.data(), pool.size());
for (stdgpu::index_t i = 0; i < pool.size(); ++i)
{
EXPECT_EQ(host_numbers[i], i + 1);
}

stdgpu::vector<int>::destroyDeviceObject(pool);
destroyHostArray<int>(host_numbers);
destroyDeviceArray<int>(values);
}

TEST_F(stdgpu_vector, insert_non_end)
{
const stdgpu::index_t N = 10000;
Expand Down Expand Up @@ -805,6 +838,35 @@ TEST_F(stdgpu_vector, erase)
destroyHostArray<int>(host_numbers);
}

TEST_F(stdgpu_vector, erase_custom_execution_policy)
{
stdgpu::execution::device_policy policy;

const stdgpu::index_t N = 10000;
const stdgpu::index_t N_init = N / 2;
const stdgpu::index_t N_erase = N / 4;

stdgpu::vector<int> pool = stdgpu::vector<int>::createDeviceObject(N);

fill_vector(pool, N_init);

pool.erase(policy, pool.device_end() - N_erase, pool.device_end());

ASSERT_EQ(pool.size(), N_init - N_erase);
ASSERT_FALSE(pool.empty());
ASSERT_FALSE(pool.full());
ASSERT_TRUE(pool.valid());

int* host_numbers = copyCreateDevice2HostArray(pool.data(), pool.size());
for (stdgpu::index_t i = 0; i < pool.size(); ++i)
{
EXPECT_EQ(host_numbers[i], i + 1);
}

stdgpu::vector<int>::destroyDeviceObject(pool);
destroyHostArray<int>(host_numbers);
}

TEST_F(stdgpu_vector, erase_non_end)
{
const stdgpu::index_t N = 10000;
Expand Down Expand Up @@ -863,6 +925,26 @@ TEST_F(stdgpu_vector, clear)
stdgpu::vector<int>::destroyDeviceObject(pool);
}

TEST_F(stdgpu_vector, clear_custom_execution_policy)
{
stdgpu::execution::device_policy policy;

const stdgpu::index_t N = 10000;

stdgpu::vector<int> pool = stdgpu::vector<int>::createDeviceObject(N);

fill_vector(pool);

pool.clear(policy);

ASSERT_EQ(pool.size(), 0);
ASSERT_TRUE(pool.empty());
ASSERT_FALSE(pool.full());
ASSERT_TRUE(pool.valid());

stdgpu::vector<int>::destroyDeviceObject(pool);
}

TEST_F(stdgpu_vector, clear_nondefault_type)
{
const stdgpu::index_t N = 10000;
Expand Down

0 comments on commit e1b3f40

Please sign in to comment.