vector: Add support for custom execution policies

tanzby · Jun 20, 2023 · e1b3f40 · e1b3f40
1 parent a6a6d10
commit e1b3f40
Show file tree

Hide file tree

Showing 4 changed files with 187 additions and 18 deletions.
diff --git a/src/stdgpu/impl/unordered_base_detail.cuh b/src/stdgpu/impl/unordered_base_detail.cuh
@@ -1069,7 +1069,7 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::clear()
 
     _occupied_count.store(0);
 
-    detail::vector_clear_iota<index_t, index_allocator_type>(_excess_list_positions, bucket_count());
+    detail::vector_clear_iota(execution::device, _excess_list_positions, bucket_count());
 }
 
 template <typename Key, typename Value, typename KeyFromValue, typename Hash, typename KeyEqual, typename Allocator>
@@ -1119,7 +1119,7 @@ unordered_base<Key, Value, KeyFromValue, Hash, KeyEqual, Allocator>::createDevic
     result._hash = hasher();
     result._key_equal = key_equal();
 
-    detail::vector_clear_iota<index_t, index_allocator_type>(result._excess_list_positions, bucket_count);
+    detail::vector_clear_iota(execution::device, result._excess_list_positions, bucket_count);
 
     STDGPU_ENSURES(result._excess_list_positions.full());
 

diff --git a/src/stdgpu/impl/vector_detail.cuh b/src/stdgpu/impl/vector_detail.cuh
@@ -338,12 +338,12 @@ private:
     index_t _begin;
 };
 
-template <typename T, typename Allocator>
+template <typename ExecutionPolicy, typename T, typename Allocator>
 void
-vector_clear_iota(vector<T, Allocator>& v, const T& value)
+vector_clear_iota(ExecutionPolicy&& policy, vector<T, Allocator>& v, const T& value)
 {
-    iota(execution::device, device_begin(v.data()), device_end(v.data()), value);
-    v._occupied.set();
+    iota(std::forward<ExecutionPolicy>(policy), device_begin(v.data()), device_end(v.data()), value);
+    v._occupied.set(std::forward<ExecutionPolicy>(policy));
     v._size.store(v.capacity());
 }
 
@@ -353,6 +353,19 @@ template <typename T, typename Allocator>
 template <typename ValueIterator, STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(detail::is_iterator_v<ValueIterator>)>
 inline void
 vector<T, Allocator>::insert(device_ptr<const T> position, ValueIterator begin, ValueIterator end)
+{
+    insert(execution::device, position, begin, end);
+}
+
+template <typename T, typename Allocator>
+template <typename ExecutionPolicy,
+          typename ValueIterator,
+          STDGPU_DETAIL_OVERLOAD_DEFINITION_IF(detail::is_iterator_v<ValueIterator>)>
+inline void
+vector<T, Allocator>::insert(ExecutionPolicy&& policy,
+                             device_ptr<const T> position,
+                             ValueIterator begin,
+                             ValueIterator end)
 {
     if (position != device_end())
     {
@@ -372,7 +385,7 @@ vector<T, Allocator>::insert(device_ptr<const T> position, ValueIterator begin,
         return;
     }
 
-    for_each_index(execution::device,
+    for_each_index(std::forward<ExecutionPolicy>(policy),
                    N,
                    detail::vector_insert<T, Allocator, ValueIterator, true>(*this, size(), begin));
 
@@ -382,6 +395,14 @@ vector<T, Allocator>::insert(device_ptr<const T> position, ValueIterator begin,
 template <typename T, typename Allocator>
 inline void
 vector<T, Allocator>::erase(device_ptr<const T> begin, device_ptr<const T> end)
+{
+    erase(execution::device, begin, end);
+}
+
+template <typename T, typename Allocator>
+template <typename ExecutionPolicy>
+inline void
+vector<T, Allocator>::erase(ExecutionPolicy&& policy, device_ptr<const T> begin, device_ptr<const T> end)
 {
     if (end != device_end())
     {
@@ -399,7 +420,7 @@ vector<T, Allocator>::erase(device_ptr<const T> begin, device_ptr<const T> end)
         return;
     }
 
-    for_each_index(execution::device, N, detail::vector_erase<T, Allocator, true>(*this, new_size));
+    for_each_index(std::forward<ExecutionPolicy>(policy), N, detail::vector_erase<T, Allocator, true>(*this, new_size));
 
     _size.store(new_size);
 }
@@ -485,6 +506,14 @@ vector<T, Allocator>::data() noexcept
 template <typename T, typename Allocator>
 inline void
 vector<T, Allocator>::clear()
+{
+    clear(execution::device);
+}
+
+template <typename T, typename Allocator>
+template <typename ExecutionPolicy>
+inline void
+vector<T, Allocator>::clear(ExecutionPolicy&& policy)
 {
     if (empty())
     {
@@ -495,30 +524,39 @@ vector<T, Allocator>::clear()
     {
         const index_t current_size = size();
 
-        detail::unoptimized_destroy(execution::device,
+        detail::unoptimized_destroy(std::forward<ExecutionPolicy>(policy),
                                     stdgpu::device_begin(_data),
                                     stdgpu::device_begin(_data) + current_size);
     }
 
-    _occupied.reset();
+    _occupied.reset(std::forward<ExecutionPolicy>(policy));
 
     _size.store(0);
 
     STDGPU_ENSURES(empty());
-    STDGPU_ENSURES(valid());
+    STDGPU_ENSURES(valid(std::forward<ExecutionPolicy>(policy)));
 }
 
 template <typename T, typename Allocator>
 inline bool
 vector<T, Allocator>::valid() const
+{
+    return valid(execution::device);
+}
+
+template <typename T, typename Allocator>
+template <typename ExecutionPolicy>
+inline bool
+vector<T, Allocator>::valid(ExecutionPolicy&& policy) const
 {
     // Special case : Zero capacity is valid
     if (capacity() == 0)
     {
         return true;
     }
 
-    return (size_valid() && occupied_count_valid() && _locks.valid());
+    return (size_valid() && occupied_count_valid(std::forward<ExecutionPolicy>(policy)) &&
+            _locks.valid(std::forward<ExecutionPolicy>(policy)));
 }
 
 template <typename T, typename Allocator>
@@ -588,11 +626,12 @@ vector<T, Allocator>::occupied(const index_t n) const
 }
 
 template <typename T, typename Allocator>
+template <typename ExecutionPolicy>
 bool
-vector<T, Allocator>::occupied_count_valid() const
+vector<T, Allocator>::occupied_count_valid(ExecutionPolicy&& policy) const
 {
     index_t size_count = size();
-    index_t size_sum = _occupied.count();
+    index_t size_sum = _occupied.count(std::forward<ExecutionPolicy>(policy));
 
     return (size_count == size_sum);
 }

diff --git a/src/stdgpu/vector.cuh b/src/stdgpu/vector.cuh
@@ -53,9 +53,9 @@ class vector_insert;
 template <typename T, typename Allocator, bool>
 class vector_erase;
 
-template <typename T, typename Allocator>
+template <typename ExecutionPolicy, typename T, typename Allocator>
 void
-vector_clear_iota(vector<T, Allocator>& v, const T& value);
+vector_clear_iota(ExecutionPolicy&& policy, vector<T, Allocator>& v, const T& value);
 
 } // namespace detail
 
@@ -242,6 +242,21 @@ public:
     void
     insert(device_ptr<const T> position, ValueIterator begin, ValueIterator end);
 
+    /**
+     * \brief Inserts the given range of elements into the container
+     * \tparam ExecutionPolicy The type of the execution policy
+     * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
+     * \param[in] position The position after which to insert the range
+     * \param[in] begin The begin of the range
+     * \param[in] end The end of the range
+     * \note position must be equal to device_end()
+     */
+    template <typename ExecutionPolicy,
+              typename ValueIterator,
+              STDGPU_DETAIL_OVERLOAD_IF(detail::is_iterator_v<ValueIterator>)>
+    void
+    insert(ExecutionPolicy&& policy, device_ptr<const T> position, ValueIterator begin, ValueIterator end);
+
     /**
      * \brief Deletes the given range from the container
      * \param[in] begin The begin of the range
@@ -251,6 +266,18 @@ public:
     void
     erase(device_ptr<const T> begin, device_ptr<const T> end);
 
+    /**
+     * \brief Deletes the given range from the container
+     * \tparam ExecutionPolicy The type of the execution policy
+     * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
+     * \param[in] begin The begin of the range
+     * \param[in] end The end of the range
+     * \note end must be equal to device_end()
+     */
+    template <typename ExecutionPolicy>
+    void
+    erase(ExecutionPolicy&& policy, device_ptr<const T> begin, device_ptr<const T> end);
+
     /**
      * \brief Checks if the object is empty
      * \return True if the object is empty, false otherwise
@@ -313,13 +340,32 @@ public:
     void
     clear();
 
+    /**
+     * \brief Clears the complete object
+     * \tparam ExecutionPolicy The type of the execution policy
+     * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
+     */
+    template <typename ExecutionPolicy>
+    void
+    clear(ExecutionPolicy&& policy);
+
     /**
      * \brief Checks if the object is in a valid state
      * \return True if the state is valid, false otherwise
      */
     bool
     valid() const;
 
+    /**
+     * \brief Checks if the object is in a valid state
+     * \tparam ExecutionPolicy The type of the execution policy
+     * \param[in] policy The execution policy, e.g. host or device, corresponding to the allocator
+     * \return True if the state is valid, false otherwise
+     */
+    template <typename ExecutionPolicy>
+    bool
+    valid(ExecutionPolicy&& policy) const;
+
     /**
      * \brief Creates a pointer to the begin of the device container
      * \return A pointer to the begin of the object
@@ -383,14 +429,16 @@ private:
     template <typename T2, typename Allocator2, bool>
     friend class detail::vector_erase;
 
+    template <typename ExecutionPolicy, typename T2, typename Allocator2>
     friend void
-    detail::vector_clear_iota<T, Allocator>(vector<T, Allocator>& v, const T& value);
+    detail::vector_clear_iota(ExecutionPolicy&& policy, vector<T2, Allocator2>& v, const T2& value);
 
     STDGPU_DEVICE_ONLY bool
     occupied(const index_t n) const;
 
+    template <typename ExecutionPolicy>
     bool
-    occupied_count_valid() const;
+    occupied_count_valid(ExecutionPolicy&& policy) const;
 
     bool
     size_valid() const;

diff --git a/test/stdgpu/vector.inc b/test/stdgpu/vector.inc
@@ -730,6 +730,39 @@ TEST_F(stdgpu_vector, insert)
     destroyDeviceArray<int>(values);
 }
 
+TEST_F(stdgpu_vector, insert_custom_execution_policy)
+{
+    stdgpu::execution::device_policy policy;
+
+    const stdgpu::index_t N = 10000;
+    const stdgpu::index_t N_init = N / 2;
+    const stdgpu::index_t N_insert = N / 4;
+
+    stdgpu::vector<int> pool = stdgpu::vector<int>::createDeviceObject(N);
+
+    fill_vector(pool, N_init);
+
+    int* values = createDeviceArray<int>(N_insert);
+    stdgpu::iota(policy, stdgpu::device_begin(values), stdgpu::device_end(values), N_init + 1);
+
+    pool.insert(policy, pool.device_end(), stdgpu::device_begin(values), stdgpu::device_end(values));
+
+    ASSERT_EQ(pool.size(), N_init + N_insert);
+    ASSERT_FALSE(pool.empty());
+    ASSERT_FALSE(pool.full());
+    ASSERT_TRUE(pool.valid());
+
+    int* host_numbers = copyCreateDevice2HostArray(pool.data(), pool.size());
+    for (stdgpu::index_t i = 0; i < pool.size(); ++i)
+    {
+        EXPECT_EQ(host_numbers[i], i + 1);
+    }
+
+    stdgpu::vector<int>::destroyDeviceObject(pool);
+    destroyHostArray<int>(host_numbers);
+    destroyDeviceArray<int>(values);
+}
+
 TEST_F(stdgpu_vector, insert_non_end)
 {
     const stdgpu::index_t N = 10000;
@@ -805,6 +838,35 @@ TEST_F(stdgpu_vector, erase)
     destroyHostArray<int>(host_numbers);
 }
 
+TEST_F(stdgpu_vector, erase_custom_execution_policy)
+{
+    stdgpu::execution::device_policy policy;
+
+    const stdgpu::index_t N = 10000;
+    const stdgpu::index_t N_init = N / 2;
+    const stdgpu::index_t N_erase = N / 4;
+
+    stdgpu::vector<int> pool = stdgpu::vector<int>::createDeviceObject(N);
+
+    fill_vector(pool, N_init);
+
+    pool.erase(policy, pool.device_end() - N_erase, pool.device_end());
+
+    ASSERT_EQ(pool.size(), N_init - N_erase);
+    ASSERT_FALSE(pool.empty());
+    ASSERT_FALSE(pool.full());
+    ASSERT_TRUE(pool.valid());
+
+    int* host_numbers = copyCreateDevice2HostArray(pool.data(), pool.size());
+    for (stdgpu::index_t i = 0; i < pool.size(); ++i)
+    {
+        EXPECT_EQ(host_numbers[i], i + 1);
+    }
+
+    stdgpu::vector<int>::destroyDeviceObject(pool);
+    destroyHostArray<int>(host_numbers);
+}
+
 TEST_F(stdgpu_vector, erase_non_end)
 {
     const stdgpu::index_t N = 10000;
@@ -863,6 +925,26 @@ TEST_F(stdgpu_vector, clear)
     stdgpu::vector<int>::destroyDeviceObject(pool);
 }
 
+TEST_F(stdgpu_vector, clear_custom_execution_policy)
+{
+    stdgpu::execution::device_policy policy;
+
+    const stdgpu::index_t N = 10000;
+
+    stdgpu::vector<int> pool = stdgpu::vector<int>::createDeviceObject(N);
+
+    fill_vector(pool);
+
+    pool.clear(policy);
+
+    ASSERT_EQ(pool.size(), 0);
+    ASSERT_TRUE(pool.empty());
+    ASSERT_FALSE(pool.full());
+    ASSERT_TRUE(pool.valid());
+
+    stdgpu::vector<int>::destroyDeviceObject(pool);
+}
+
 TEST_F(stdgpu_vector, clear_nondefault_type)
 {
     const stdgpu::index_t N = 10000;