From 223e8599b0f982d9ace32e620db8457500ecd4c4 Mon Sep 17 00:00:00 2001 From: Fahri Ali Rahman Date: Thu, 26 Oct 2023 23:03:00 +0700 Subject: [PATCH] Support outer simd on ndarray (#253) * fix outer simd padding evaluation * update outer simd indexing to support ndarray * testing logging using newline after label instead of tab * add outer simd ndarray tests for sse and avx * add outer simd ndarray tests for simde avx512 * add outer simd ndarray tests for gcc vector extensions --- .../array/eval/simd/evaluator/ufunc.hpp | 3 +- .../nmtools/array/eval/simd/index/ufunc.hpp | 119 ++- include/nmtools/testing/testing.hpp | 4 +- tests/simd/CMakeLists.txt | 4 + tests/simd/index/simd_outer.cpp | 336 +++++++- tests/simd/x86/outer_2d_avx.cpp | 806 ++++++++++++++++++ tests/simd/x86/outer_2d_sse.cpp | 806 ++++++++++++++++++ tests/simd/x86/outer_nd_avx.cpp | 235 +++++ tests/simd/x86/outer_nd_sse.cpp | 235 +++++ tests/simde/CMakeLists.txt | 3 + tests/simde/avx512/outer_2d.cpp | 806 ++++++++++++++++++ tests/simde/avx512/outer_nd.cpp | 235 +++++ tests/vector/CMakeLists.txt | 6 + tests/vector/outer_2d_vector128.cpp | 806 ++++++++++++++++++ tests/vector/outer_2d_vector256.cpp | 806 ++++++++++++++++++ tests/vector/outer_2d_vector512.cpp | 806 ++++++++++++++++++ tests/vector/outer_nd_vector128.cpp | 235 +++++ tests/vector/outer_nd_vector256.cpp | 235 +++++ tests/vector/outer_nd_vector512.cpp | 235 +++++ 19 files changed, 6698 insertions(+), 23 deletions(-) create mode 100644 tests/simd/x86/outer_2d_avx.cpp create mode 100644 tests/simd/x86/outer_2d_sse.cpp create mode 100644 tests/simd/x86/outer_nd_avx.cpp create mode 100644 tests/simd/x86/outer_nd_sse.cpp create mode 100644 tests/simde/avx512/outer_2d.cpp create mode 100644 tests/simde/avx512/outer_nd.cpp create mode 100644 tests/vector/outer_2d_vector128.cpp create mode 100644 tests/vector/outer_2d_vector256.cpp create mode 100644 tests/vector/outer_2d_vector512.cpp create mode 100644 tests/vector/outer_nd_vector128.cpp create mode 100644 tests/vector/outer_nd_vector256.cpp create mode 100644 tests/vector/outer_nd_vector512.cpp diff --git a/include/nmtools/array/eval/simd/evaluator/ufunc.hpp b/include/nmtools/array/eval/simd/evaluator/ufunc.hpp index 648d7c324..66da3686c 100644 --- a/include/nmtools/array/eval/simd/evaluator/ufunc.hpp +++ b/include/nmtools/array/eval/simd/evaluator/ufunc.hpp @@ -156,7 +156,8 @@ namespace nmtools::array if (static_cast(out_tag) == n_pad) { auto n_pad = static_cast(out_tag); for (size_t i=0; i<(n_simd_pack - n_pad); i++) { - auto lhs = lhs_data_ptr[lhs_offset+i]; + // lhs is always broadcasted + auto lhs = lhs_data_ptr[lhs_offset]; auto rhs = rhs_data_ptr[rhs_offset+i]; out_data_ptr[out_offset+i] = view.op(lhs,rhs); } diff --git a/include/nmtools/array/eval/simd/index/ufunc.hpp b/include/nmtools/array/eval/simd/index/ufunc.hpp index 063c82c48..4761a2cdf 100644 --- a/include/nmtools/array/eval/simd/index/ufunc.hpp +++ b/include/nmtools/array/eval/simd/index/ufunc.hpp @@ -5,6 +5,9 @@ #include "nmtools/array/index/product.hpp" #include "nmtools/array/shape.hpp" #include "nmtools/array/eval/simd/index/common.hpp" +#include "nmtools/array/index/compute_offset.hpp" +#include "nmtools/array/index/compute_strides.hpp" +#include "nmtools/array/index/compute_indices.hpp" namespace nmtools::index { @@ -266,8 +269,10 @@ namespace nmtools::index } template - constexpr auto outer_simd_shape(meta::as_type, const out_shape_t& out_shape, const lhs_shape_t& lhs_shape, const rhs_shape_t&) + constexpr auto outer_simd_shape(meta::as_type, const out_shape_t& out_shape, const lhs_shape_t& lhs_shape, const rhs_shape_t& rhs_shape) { + // TODO: compile-time inference + // same dim as out using result_t = out_shape_t; auto result = result_t {}; @@ -279,33 +284,89 @@ namespace nmtools::index const auto n_ops = at(out_shape,meta::ct_v<-1>); const auto n_packed_ops = n_ops / N_ELEM_PACK; - // assume lhs is 1D, rhs is 1D, and out is 2D - at(result,meta::ct_v<0>) = at(lhs_shape,meta::ct_v<0>); - at(result,meta::ct_v<1>) = n_packed_ops + (n_ops % N_ELEM_PACK ? 1 : 0); + auto lhs_dim = len(lhs_shape); + for (size_t i=0; i) = n_packed_ops + (n_ops % N_ELEM_PACK ? 1 : 0); return result; } // outer_simd_shape - template - constexpr auto outer_simd(meta::as_type, const simd_index_t& simd_index, const simd_shape_t&, const out_shape_t& out_shape, const lhs_shape_t&, const rhs_shape_t&) + template + constexpr auto outer_simd(meta::as_type, const simd_index_t& simd_index, const simd_shape_t&, const out_strides_t& out_strides, const out_shape_t& out_shape, const lhs_shape_t& lhs_shape, const rhs_shape_t& rhs_shape, const lhs_strides_t& lhs_strides, const rhs_strides_t& rhs_strides) { using tagged_index_t = nmtools_tuple; using result_t = nmtools_array; + const auto compute_outer_simd_offset = [](const auto& indices, const auto& strides){ + index_t offset = 0; + auto m = len(indices)-1; + for (index_t i=0; i(at(strides,i)) * static_cast(at(indices,i)); + } + return offset; + }; + const auto n_ops = at(out_shape,meta::ct_v<-1>); const auto n_packed_ops = n_ops / N_ELEM_PACK; // assume simd_index is 2D - const auto s_i = at(simd_index,meta::ct_v<0>); - const auto s_j = at(simd_index,meta::ct_v<1>); + const auto s_j = at(simd_index,meta::ct_v<-1>); const auto out_tag = s_j * N_ELEM_PACK + N_ELEM_PACK > n_ops ? static_cast(N_ELEM_PACK - (n_ops - (n_packed_ops * N_ELEM_PACK))) : SIMD::PACKED; const auto lhs_tag = SIMD::BROADCAST; const auto rhs_tag = out_tag; - const auto out_offset = (out_tag == SIMD::PACKED ? (s_i * n_ops) + (s_j * N_ELEM_PACK) : (s_i * n_ops) + (s_j) * N_ELEM_PACK); - const auto lhs_offset = s_i; - const auto rhs_offset = (rhs_tag == SIMD::PACKED ? (s_j * N_ELEM_PACK) : (s_j) * N_ELEM_PACK); + const auto outer_offset = compute_outer_simd_offset(simd_index,out_strides); + + const auto compute_offset = [](const auto& indices, const auto& strides, index_t start_dim, index_t N){ + index_t offset = 0; + for (index_t i=0; iindex_t{ + switch (lhs_dim) { + case 1: { + return at(simd_index,meta::ct_v<0>); + } break; + case 2: { + // only works for 2-dim + return at(simd_index,meta::ct_v<0>) * at(out_shape,meta::ct_v<1>) + at(simd_index,meta::ct_v<1>); + } break; + default: { + return compute_offset(simd_index,lhs_strides,0,lhs_dim); + } + } + }(); + const auto rhs_offset = [&]()->index_t{ + switch (rhs_dim) { + case 1: { + return s_j * N_ELEM_PACK; + } break; + case 2: { + // only works for 2-dim + return at(simd_index,meta::ct_v<-2>) * at(out_shape,meta::ct_v<-1>) + (s_j * N_ELEM_PACK); + } break; + default: { + auto rhs_offset = compute_offset(simd_index,rhs_strides,lhs_dim,rhs_dim-1); + return rhs_offset + (s_j * N_ELEM_PACK); + } + } + // (rhs_tag == SIMD::PACKED ? (s_j * N_ELEM_PACK) : (s_j) * N_ELEM_PACK); + }(); auto result = result_t {}; at(result,0) = tagged_index_t{out_tag,out_offset}; @@ -315,6 +376,15 @@ namespace nmtools::index return result; } // outer_simd + template + constexpr auto outer_simd(meta::as_type n_elem_pack, const simd_index_t& simd_index, const simd_shape_t& simd_shape, const out_shape_t& out_shape, const lhs_shape_t& lhs_shape, const rhs_shape_t& rhs_shape) + { + const auto out_strides = compute_strides(out_shape); + const auto lhs_strides = compute_strides(lhs_shape); + const auto rhs_strides = compute_strides(rhs_shape); + return outer_simd(n_elem_pack,simd_index,simd_shape,out_strides,out_shape,lhs_shape,rhs_shape,lhs_strides,rhs_strides); + } // outer_simd + template struct outer_simd_enumerator_t { @@ -325,14 +395,22 @@ namespace nmtools::index using simd_shape_type = out_shape_t; using index_type = index_t; using size_type = index_t; - using simd_index_type = nmtools_array; + using simd_index_type = simd_shape_type; + + using out_strides_type = meta::remove_cvref_t()))>; + using lhs_strides_type = meta::remove_cvref_t()))>; + using rhs_strides_type = meta::remove_cvref_t()))>; + using simd_strides_type = meta::remove_cvref_t()))>; meta::as_type n_elem_pack; - out_shape_type out_shape; - lhs_shape_type lhs_shape; - rhs_shape_type rhs_shape -; + out_shape_type out_shape; + lhs_shape_type lhs_shape; + rhs_shape_type rhs_shape; simd_shape_type simd_shape; + out_strides_type out_strides; + lhs_strides_type lhs_strides; + rhs_strides_type rhs_strides; + simd_strides_type simd_strides; outer_simd_enumerator_t(meta::as_type, const out_shape_t& out_shape_, const lhs_shape_t& lhs_shape_, const rhs_shape_t& rhs_shape_) : n_elem_pack{} @@ -340,6 +418,10 @@ namespace nmtools::index , lhs_shape(lhs_shape_) , rhs_shape(rhs_shape_) , simd_shape(outer_simd_shape(n_elem_pack,out_shape_,lhs_shape_,rhs_shape_)) + , out_strides(compute_strides(out_shape)) + , lhs_strides(compute_strides(lhs_shape)) + , rhs_strides(compute_strides(rhs_shape)) + , simd_strides(compute_strides(simd_shape)) {} constexpr auto size() const noexcept @@ -349,9 +431,8 @@ namespace nmtools::index constexpr auto operator[](index_type i) const { - auto index_i = i / at(simd_shape,meta::ct_v<1>); - auto index_j = i % at(simd_shape,meta::ct_v<1>); - return outer_simd(n_elem_pack,simd_index_type{index_i,index_j},simd_shape,out_shape,lhs_shape,rhs_shape); + auto index = compute_indices(i,simd_shape,simd_strides); + return outer_simd(n_elem_pack,index,simd_shape,out_strides,out_shape,lhs_shape,rhs_shape,lhs_strides,rhs_strides); } }; // outer_simd_enumerator_t diff --git a/include/nmtools/testing/testing.hpp b/include/nmtools/testing/testing.hpp index 7f8b96e5f..04c9873fc 100644 --- a/include/nmtools/testing/testing.hpp +++ b/include/nmtools/testing/testing.hpp @@ -85,8 +85,8 @@ EXPECT_TRUE(isequal(result,expect)) \ auto result_ = isclose(result,expect,NMTOOLS_TESTING_OUTPUT_PRECISION); \ std::string message {}; \ message = message + \ - + "\n\tActual : " + STRINGIFY(result) \ - + "\n\tExpected: " + STRINGIFY(expect); \ + + "\n\tActual :\n" + STRINGIFY(result) \ + + "\n\tExpected:\n" + STRINGIFY(expect); \ NMTOOLS_CHECK_MESSAGE( result_, message ); \ } diff --git a/tests/simd/CMakeLists.txt b/tests/simd/CMakeLists.txt index 48d85c35f..ab29242dd 100644 --- a/tests/simd/CMakeLists.txt +++ b/tests/simd/CMakeLists.txt @@ -38,6 +38,8 @@ if (NMTOOLS_SIMD_TEST_SSE) x86/binary_sse.cpp x86/matmul_sse.cpp x86/outer_sse.cpp + x86/outer_2d_sse.cpp + x86/outer_nd_sse.cpp ) if (NMTOOLS_SIMD_TEST_REDUCTION) set(NMTOOLS_SIMD_TEST_SOURCES ${NMTOOLS_SIMD_TEST_SOURCES} @@ -54,6 +56,8 @@ if (NMTOOLS_SIMD_TEST_AVX) x86/binary_avx_broadcast.cpp x86/matmul_avx.cpp x86/outer_avx.cpp + x86/outer_2d_avx.cpp + x86/outer_nd_avx.cpp ) if (NMTOOLS_SIMD_TEST_REDUCTION) set(NMTOOLS_SIMD_TEST_SOURCES ${NMTOOLS_SIMD_TEST_SOURCES} diff --git a/tests/simd/index/simd_outer.cpp b/tests/simd/index/simd_outer.cpp index 8d2798c89..0e1e55863 100644 --- a/tests/simd/index/simd_outer.cpp +++ b/tests/simd/index/simd_outer.cpp @@ -35,7 +35,7 @@ TEST_CASE("outer_simd_shape(case1b)" * doctest::test_suite("simd::index")) NMTOOLS_ASSERT_EQUAL( result, expect ); } -TEST_CASE("outer_simd_shape(casec)" * doctest::test_suite("simd::index")) +TEST_CASE("outer_simd_shape(case1c)" * doctest::test_suite("simd::index")) { auto lhs_shape = nmtools_array{4}; auto rhs_shape = nmtools_array{6}; @@ -74,6 +74,214 @@ TEST_CASE("outer_simd_shape(case1e)" * doctest::test_suite("simd::index")) NMTOOLS_ASSERT_EQUAL( result, expect ); } +TEST_CASE("outer_simd_shape(case2a)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{2,4}; + auto rhs_shape = nmtools_array{5}; + auto out_shape = nmtools_array{2,4,5}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{2,4,2}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("outer_simd_shape(case2b)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{2,4}; + auto rhs_shape = nmtools_array{6}; + auto out_shape = nmtools_array{2,4,6}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{2,4,2}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("outer_simd_shape(case2c)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{2,4}; + auto rhs_shape = nmtools_array{7}; + auto out_shape = nmtools_array{2,4,7}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{2,4,2}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("outer_simd_shape(case2d)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{2,4}; + auto rhs_shape = nmtools_array{8}; + auto out_shape = nmtools_array{2,4,8}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{2,4,2}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("outer_simd_shape(case2e)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{2,4}; + auto rhs_shape = nmtools_array{9}; + auto out_shape = nmtools_array{2,4,9}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{2,4,3}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("outer_simd_shape(case3a)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{2}; + auto rhs_shape = nmtools_array{4,4}; + auto out_shape = nmtools_array{2,4,4}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{2,4,1}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("outer_simd_shape(case3b)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{2}; + auto rhs_shape = nmtools_array{5,4}; + auto out_shape = nmtools_array{2,5,4}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{2,5,1}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("outer_simd_shape(case3c)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{3}; + auto rhs_shape = nmtools_array{5,6}; + auto out_shape = nmtools_array{3,5,6}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{3,5,2}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("outer_simd_shape(case4a)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{5,4}; + auto out_shape = nmtools_array{2,3,5,4}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{2,3,5,1}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("outer_simd_shape(case4b)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{5,5}; + auto out_shape = nmtools_array{2,3,5,5}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{2,3,5,2}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("outer_simd_shape(case4c)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{5,6}; + auto out_shape = nmtools_array{2,3,5,6}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{2,3,5,2}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("outer_simd_shape(case4d)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{5,7}; + auto out_shape = nmtools_array{2,3,5,7}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{2,3,5,2}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("outer_simd_shape(case5a)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{1,2,3}; + auto rhs_shape = nmtools_array{5,4}; + auto out_shape = nmtools_array{1,2,3,5,4}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{1,2,3,5,1}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("outer_simd_shape(case5b)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{1,2,3}; + auto rhs_shape = nmtools_array{5,5}; + auto out_shape = nmtools_array{1,2,3,5,5}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{1,2,3,5,2}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("outer_simd_shape(case6a)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{2,7,5,6}; + auto out_shape = nmtools_array{2,3,2,7,5,6}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{2,3,2,7,5,2}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + +TEST_CASE("outer_simd_shape(case6b)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{2,7,1,5,7}; + auto out_shape = nmtools_array{2,3,2,7,1,5,7}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto result = ix::outer_simd_shape(n_elem_pack,out_shape,lhs_shape,rhs_shape); + auto expect = nmtools_array{2,3,2,7,1,5,2}; + NMTOOLS_ASSERT_EQUAL( result, expect ); +} + /////////////////////////////////////////////////////////////////////////////////////////////// TEST_CASE("outer_simd(case1a)" * doctest::test_suite("simd::index")) @@ -419,4 +627,130 @@ TEST_CASE("outer_simd_enumerator(case2)" * doctest::test_suite("simd::index")) NMTOOLS_ASSERT_EQUAL( lhs_offset, lhs_offsets[i] ); NMTOOLS_ASSERT_EQUAL( rhs_offset, rhs_offsets[i] ); } +} + +TEST_CASE("outer_simd_enumerator(case3)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{2,4}; + auto rhs_shape = nmtools_array{5}; + auto out_shape = nmtools_array{2,4,5}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto simd_shape = nmtools_array{2,4,2}; + + auto out_tags = nmtools_array{ + SIMD::PACKED,SIMD::PAD_3, + SIMD::PACKED,SIMD::PAD_3, + SIMD::PACKED,SIMD::PAD_3, + SIMD::PACKED,SIMD::PAD_3, + SIMD::PACKED,SIMD::PAD_3, + SIMD::PACKED,SIMD::PAD_3, + SIMD::PACKED,SIMD::PAD_3, + SIMD::PACKED,SIMD::PAD_3, + }; + auto lhs_tags = nmtools_array{ + SIMD::BROADCAST,SIMD::BROADCAST, + SIMD::BROADCAST,SIMD::BROADCAST, + SIMD::BROADCAST,SIMD::BROADCAST, + SIMD::BROADCAST,SIMD::BROADCAST, + SIMD::BROADCAST,SIMD::BROADCAST, + SIMD::BROADCAST,SIMD::BROADCAST, + SIMD::BROADCAST,SIMD::BROADCAST, + SIMD::BROADCAST,SIMD::BROADCAST, + }; + auto rhs_tags = nmtools_array{ + SIMD::PACKED,SIMD::PAD_3, + SIMD::PACKED,SIMD::PAD_3, + SIMD::PACKED,SIMD::PAD_3, + SIMD::PACKED,SIMD::PAD_3, + SIMD::PACKED,SIMD::PAD_3, + SIMD::PACKED,SIMD::PAD_3, + SIMD::PACKED,SIMD::PAD_3, + SIMD::PACKED,SIMD::PAD_3, + }; + auto out_offsets = nmtools_array{0,4,5,9,10,14,15,19,20,24,25,29,30,34,35,39}; + auto lhs_offsets = nmtools_array{0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7}; + auto rhs_offsets = nmtools_array{0,4,0,4,0,4,0,4,0,4,0,4,0,4,0,4}; + + auto enumerator = ix::outer_simd_enumerator(n_elem_pack,out_shape,lhs_shape,rhs_shape); + NMTOOLS_ASSERT_EQUAL( enumerator.size(), out_tags.size() ); + for (size_t i=0; i(out_index); + const auto lhs_tag = nm::get<0>(lhs_index); + const auto rhs_tag = nm::get<0>(rhs_index); + const auto out_offset = nm::get<1>(out_index); + const auto lhs_offset = nm::get<1>(lhs_index); + const auto rhs_offset = nm::get<1>(rhs_index); + CHECK_MESSAGE( out_tag == out_tags[i], i ); + CHECK_MESSAGE( lhs_tag == lhs_tags[i], i ); + CHECK_MESSAGE( rhs_tag == rhs_tags[i], i ); + NMTOOLS_ASSERT_EQUAL( out_offset, out_offsets[i] ); + NMTOOLS_ASSERT_EQUAL( lhs_offset, lhs_offsets[i] ); + NMTOOLS_ASSERT_EQUAL( rhs_offset, rhs_offsets[i] ); + } +} + +TEST_CASE("outer_simd_enumerator(case4)" * doctest::test_suite("simd::index")) +{ + auto lhs_shape = nmtools_array{2,4}; + auto rhs_shape = nmtools_array{6}; + auto out_shape = nmtools_array{2,4,6}; + + auto n_elem_pack = meta::as_type_v<4>; + + auto simd_shape = nmtools_array{2,4,2}; + + auto out_tags = nmtools_array{ + SIMD::PACKED,SIMD::PAD_2, + SIMD::PACKED,SIMD::PAD_2, + SIMD::PACKED,SIMD::PAD_2, + SIMD::PACKED,SIMD::PAD_2, + SIMD::PACKED,SIMD::PAD_2, + SIMD::PACKED,SIMD::PAD_2, + SIMD::PACKED,SIMD::PAD_2, + SIMD::PACKED,SIMD::PAD_2, + }; + auto lhs_tags = nmtools_array{ + SIMD::BROADCAST,SIMD::BROADCAST, + SIMD::BROADCAST,SIMD::BROADCAST, + SIMD::BROADCAST,SIMD::BROADCAST, + SIMD::BROADCAST,SIMD::BROADCAST, + SIMD::BROADCAST,SIMD::BROADCAST, + SIMD::BROADCAST,SIMD::BROADCAST, + SIMD::BROADCAST,SIMD::BROADCAST, + SIMD::BROADCAST,SIMD::BROADCAST, + }; + auto rhs_tags = nmtools_array{ + SIMD::PACKED,SIMD::PAD_2, + SIMD::PACKED,SIMD::PAD_2, + SIMD::PACKED,SIMD::PAD_2, + SIMD::PACKED,SIMD::PAD_2, + SIMD::PACKED,SIMD::PAD_2, + SIMD::PACKED,SIMD::PAD_2, + SIMD::PACKED,SIMD::PAD_2, + SIMD::PACKED,SIMD::PAD_2, + }; + auto out_offsets = nmtools_array{0,4,6,10,12,16,18,22,24,28,30,34,36,40,42,46}; + auto lhs_offsets = nmtools_array{0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7}; + auto rhs_offsets = nmtools_array{0,4,0,4,0,4,0,4,0,4,0,4,0,4,0,4}; + + auto enumerator = ix::outer_simd_enumerator(n_elem_pack,out_shape,lhs_shape,rhs_shape); + NMTOOLS_ASSERT_EQUAL( enumerator.size(), out_tags.size() ); + for (size_t i=0; i(out_index); + const auto lhs_tag = nm::get<0>(lhs_index); + const auto rhs_tag = nm::get<0>(rhs_index); + const auto out_offset = nm::get<1>(out_index); + const auto lhs_offset = nm::get<1>(lhs_index); + const auto rhs_offset = nm::get<1>(rhs_index); + CHECK_MESSAGE( out_tag == out_tags[i], i ); + CHECK_MESSAGE( lhs_tag == lhs_tags[i], i ); + CHECK_MESSAGE( rhs_tag == rhs_tags[i], i ); + NMTOOLS_ASSERT_EQUAL( out_offset, out_offsets[i] ); + NMTOOLS_ASSERT_EQUAL( lhs_offset, lhs_offsets[i] ); + NMTOOLS_ASSERT_EQUAL( rhs_offset, rhs_offsets[i] ); + } } \ No newline at end of file diff --git a/tests/simd/x86/outer_2d_avx.cpp b/tests/simd/x86/outer_2d_avx.cpp new file mode 100644 index 000000000..8265dc05d --- /dev/null +++ b/tests/simd/x86/outer_2d_avx.cpp @@ -0,0 +1,806 @@ +#include "nmtools/array/eval/simd/x86_avx.hpp" +#include "nmtools/array/array/arange.hpp" +#include "nmtools/array/array/ufuncs/add.hpp" +#include "nmtools/array/array/ufuncs/multiply.hpp" +#include "nmtools/array/array/ufuncs/subtract.hpp" +#include "nmtools/array/array/ufuncs/divide.hpp" +#include "nmtools/array/array/reshape.hpp" +#include "nmtools/testing/doctest.hpp" + +namespace nm = nmtools; +namespace na = nm::array; +namespace ix = nm::index; +namespace simd = na::simd; + +#define SIMD_TEST_EQUAL(result, expect) \ +{ \ + NMTOOLS_ASSERT_EQUAL( nm::shape(result), nm::shape(expect) ); \ + NMTOOLS_ASSERT_CLOSE( result, expect ); \ +} + +#define X86_AVX_TEST(fn,...) \ +{ \ + auto expect = na::fn(__VA_ARGS__); \ + auto result = na::fn(__VA_ARGS__,simd::x86_AVX); \ + SIMD_TEST_EQUAL(result,expect); \ +} + +/*********************** add ******************************/ + +TEST_CASE("add.outer_2d(case1a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1d)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2d)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 12; + auto N = 14; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2e)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3d)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4d)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5d)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5e)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6d)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7d)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8e)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11e)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12d)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +// TODO: fix int8 add outer +#if 0 +TEST_CASE("add.outer_2d(case13a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case13b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case13c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14e)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15d)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} +#endif + +TEST_CASE("add.outer_2d(case16a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 2; + auto N = 4; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 7; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16d)" * doctest::test_suite("simd::x86_AVX")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 8; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case17a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(O*P,dtype),rhs_shape); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case17b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 7; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(O*P,dtype),rhs_shape); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} \ No newline at end of file diff --git a/tests/simd/x86/outer_2d_sse.cpp b/tests/simd/x86/outer_2d_sse.cpp new file mode 100644 index 000000000..696ed34e2 --- /dev/null +++ b/tests/simd/x86/outer_2d_sse.cpp @@ -0,0 +1,806 @@ +#include "nmtools/array/eval/simd/x86_sse.hpp" +#include "nmtools/array/array/arange.hpp" +#include "nmtools/array/array/ufuncs/add.hpp" +#include "nmtools/array/array/ufuncs/multiply.hpp" +#include "nmtools/array/array/ufuncs/subtract.hpp" +#include "nmtools/array/array/ufuncs/divide.hpp" +#include "nmtools/array/array/reshape.hpp" +#include "nmtools/testing/doctest.hpp" + +namespace nm = nmtools; +namespace na = nm::array; +namespace ix = nm::index; +namespace simd = na::simd; + +#define SIMD_TEST_EQUAL(result, expect) \ +{ \ + NMTOOLS_ASSERT_EQUAL( nm::shape(result), nm::shape(expect) ); \ + NMTOOLS_ASSERT_CLOSE( result, expect ); \ +} + +#define X86_SSE_TEST(fn,...) \ +{ \ + auto expect = na::fn(__VA_ARGS__); \ + auto result = na::fn(__VA_ARGS__,simd::x86_SSE); \ + SIMD_TEST_EQUAL(result,expect); \ +} + +/*********************** add ******************************/ + +TEST_CASE("add.outer_2d(case1a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1d)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2d)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 12; + auto N = 14; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2e)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3d)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4d)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5d)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5e)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6d)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7d)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8e)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11e)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12d)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +// TODO: fix int8 add outer +#if 0 +TEST_CASE("add.outer_2d(case13a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case13b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case13c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14e)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15d)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} +#endif + +TEST_CASE("add.outer_2d(case16a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 2; + auto N = 4; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 7; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16d)" * doctest::test_suite("simd::x86_SSE")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 8; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case17a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(O*P,dtype),rhs_shape); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case17b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 7; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(O*P,dtype),rhs_shape); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} \ No newline at end of file diff --git a/tests/simd/x86/outer_nd_avx.cpp b/tests/simd/x86/outer_nd_avx.cpp new file mode 100644 index 000000000..75a36ea0c --- /dev/null +++ b/tests/simd/x86/outer_nd_avx.cpp @@ -0,0 +1,235 @@ +#include "nmtools/array/eval/simd/x86_avx.hpp" +#include "nmtools/array/array/arange.hpp" +#include "nmtools/array/array/ufuncs/add.hpp" +#include "nmtools/array/array/ufuncs/multiply.hpp" +#include "nmtools/array/array/ufuncs/subtract.hpp" +#include "nmtools/array/array/ufuncs/divide.hpp" +#include "nmtools/array/array/reshape.hpp" +#include "nmtools/testing/doctest.hpp" + +namespace nm = nmtools; +namespace na = nm::array; +namespace ix = nm::index; +namespace simd = na::simd; + +#define SIMD_TEST_EQUAL(result, expect) \ +{ \ + NMTOOLS_ASSERT_EQUAL( nm::shape(result), nm::shape(expect) ); \ + NMTOOLS_ASSERT_CLOSE( result, expect ); \ +} + +#define X86_AVX_TEST(fn,...) \ +{ \ + auto expect = na::fn(__VA_ARGS__); \ + auto result = na::fn(__VA_ARGS__,simd::x86_AVX); \ + SIMD_TEST_EQUAL(result,expect); \ +} + +/*********************** add ******************************/ + +TEST_CASE("add.outer_nd(case1a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +#if 1 +TEST_CASE("add.outer_nd(case1b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case1c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case1d)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +#endif + +TEST_CASE("add.outer_nd(case2a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +#if 1 +TEST_CASE("add.outer_nd(case2b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,7}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case2c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,8}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case2d)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,9}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(add.outer,lhs,rhs,nm::None); +} + +#endif + +/*********************** multiply ******************************/ + +TEST_CASE("multiply.outer_nd(case1a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case1b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case1c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(multiply.outer,lhs,rhs,nm::None); +} + +// TODO: fix simd int64 multiply +#if 0 +TEST_CASE("multiply.outer_nd(case1d)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(multiply.outer,lhs,rhs,nm::None); +} +#endif + +TEST_CASE("multiply.outer_nd(case2a)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case2b)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,7}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case2c)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,8}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(multiply.outer,lhs,rhs,nm::None); +} + +// TODO: fix simd multiply +#if 0 +TEST_CASE("multiply.outer_nd(case2d)" * doctest::test_suite("simd::x86_AVX")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,9}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_AVX_TEST(multiply.outer,lhs,rhs,nm::None); +} + +#endif \ No newline at end of file diff --git a/tests/simd/x86/outer_nd_sse.cpp b/tests/simd/x86/outer_nd_sse.cpp new file mode 100644 index 000000000..f042fd6fe --- /dev/null +++ b/tests/simd/x86/outer_nd_sse.cpp @@ -0,0 +1,235 @@ +#include "nmtools/array/eval/simd/x86_sse.hpp" +#include "nmtools/array/array/arange.hpp" +#include "nmtools/array/array/ufuncs/add.hpp" +#include "nmtools/array/array/ufuncs/multiply.hpp" +#include "nmtools/array/array/ufuncs/subtract.hpp" +#include "nmtools/array/array/ufuncs/divide.hpp" +#include "nmtools/array/array/reshape.hpp" +#include "nmtools/testing/doctest.hpp" + +namespace nm = nmtools; +namespace na = nm::array; +namespace ix = nm::index; +namespace simd = na::simd; + +#define SIMD_TEST_EQUAL(result, expect) \ +{ \ + NMTOOLS_ASSERT_EQUAL( nm::shape(result), nm::shape(expect) ); \ + NMTOOLS_ASSERT_CLOSE( result, expect ); \ +} + +#define X86_SSE_TEST(fn,...) \ +{ \ + auto expect = na::fn(__VA_ARGS__); \ + auto result = na::fn(__VA_ARGS__,simd::x86_SSE); \ + SIMD_TEST_EQUAL(result,expect); \ +} + +/*********************** add ******************************/ + +TEST_CASE("add.outer_nd(case1a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +#if 1 +TEST_CASE("add.outer_nd(case1b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case1c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case1d)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +#endif + +TEST_CASE("add.outer_nd(case2a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +#if 1 +TEST_CASE("add.outer_nd(case2b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,7}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case2c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,8}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case2d)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,9}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(add.outer,lhs,rhs,nm::None); +} + +#endif + +/*********************** multiply ******************************/ + +TEST_CASE("multiply.outer_nd(case1a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case1b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case1c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(multiply.outer,lhs,rhs,nm::None); +} + +// TODO: fix simd int64 multiply +#if 0 +TEST_CASE("multiply.outer_nd(case1d)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(multiply.outer,lhs,rhs,nm::None); +} +#endif + +TEST_CASE("multiply.outer_nd(case2a)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case2b)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,7}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case2c)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,8}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(multiply.outer,lhs,rhs,nm::None); +} + +// TODO: fix simd multiply +#if 0 +TEST_CASE("multiply.outer_nd(case2d)" * doctest::test_suite("simd::x86_SSE")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,9}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + X86_SSE_TEST(multiply.outer,lhs,rhs,nm::None); +} + +#endif \ No newline at end of file diff --git a/tests/simde/CMakeLists.txt b/tests/simde/CMakeLists.txt index 006ece2c1..0d0ef2844 100644 --- a/tests/simde/CMakeLists.txt +++ b/tests/simde/CMakeLists.txt @@ -36,6 +36,9 @@ if (NMTOOLS_SIMDE_TEST_AVX512) avx512/unary_ufuncs.cpp avx512/binary_ufuncs.cpp avx512/binary_broadcast.cpp + avx512/outer_2d.cpp + avx512/outer_nd.cpp + avx512/outer_ufuncs.cpp ) if (NMTOOLS_SIMDE_TEST_REDUCTION) set(NMTOOLS_SIMDE_TEST_SOURCES ${NMTOOLS_SIMDE_TEST_SOURCES} diff --git a/tests/simde/avx512/outer_2d.cpp b/tests/simde/avx512/outer_2d.cpp new file mode 100644 index 000000000..2de8857b7 --- /dev/null +++ b/tests/simde/avx512/outer_2d.cpp @@ -0,0 +1,806 @@ +#include "nmtools/array/eval/simd/simde_avx512.hpp" +#include "nmtools/array/array/arange.hpp" +#include "nmtools/array/array/ufuncs/add.hpp" +#include "nmtools/array/array/ufuncs/multiply.hpp" +#include "nmtools/array/array/ufuncs/subtract.hpp" +#include "nmtools/array/array/ufuncs/divide.hpp" +#include "nmtools/array/array/reshape.hpp" +#include "nmtools/testing/doctest.hpp" + +namespace nm = nmtools; +namespace na = nm::array; +namespace ix = nm::index; +namespace simd = na::simd; + +#define SIMD_TEST_EQUAL(result, expect) \ +{ \ + NMTOOLS_ASSERT_EQUAL( nm::shape(result), nm::shape(expect) ); \ + NMTOOLS_ASSERT_CLOSE( result, expect ); \ +} + +#define SIMDE_AVX512_TEST(fn,...) \ +{ \ + auto expect = na::fn(__VA_ARGS__); \ + auto result = na::fn(__VA_ARGS__,simd::simde_AVX512); \ + SIMD_TEST_EQUAL(result,expect); \ +} + +/*********************** add ******************************/ + +TEST_CASE("add.outer_2d(case1a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1d)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2d)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 12; + auto N = 14; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2e)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3d)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4d)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5d)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5e)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6d)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7d)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8e)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11e)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12d)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +// TODO: fix int8 add outer +#if 0 +TEST_CASE("add.outer_2d(case13a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case13b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case13c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14e)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15d)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} +#endif + +TEST_CASE("add.outer_2d(case16a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 2; + auto N = 4; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 7; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16d)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 8; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case17a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(O*P,dtype),rhs_shape); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case17b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 7; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(O*P,dtype),rhs_shape); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} \ No newline at end of file diff --git a/tests/simde/avx512/outer_nd.cpp b/tests/simde/avx512/outer_nd.cpp new file mode 100644 index 000000000..c009828f7 --- /dev/null +++ b/tests/simde/avx512/outer_nd.cpp @@ -0,0 +1,235 @@ +#include "nmtools/array/eval/simd/simde_avx512.hpp" +#include "nmtools/array/array/arange.hpp" +#include "nmtools/array/array/ufuncs/add.hpp" +#include "nmtools/array/array/ufuncs/multiply.hpp" +#include "nmtools/array/array/ufuncs/subtract.hpp" +#include "nmtools/array/array/ufuncs/divide.hpp" +#include "nmtools/array/array/reshape.hpp" +#include "nmtools/testing/doctest.hpp" + +namespace nm = nmtools; +namespace na = nm::array; +namespace ix = nm::index; +namespace simd = na::simd; + +#define SIMD_TEST_EQUAL(result, expect) \ +{ \ + NMTOOLS_ASSERT_EQUAL( nm::shape(result), nm::shape(expect) ); \ + NMTOOLS_ASSERT_CLOSE( result, expect ); \ +} + +#define SIMDE_AVX512_TEST(fn,...) \ +{ \ + auto expect = na::fn(__VA_ARGS__); \ + auto result = na::fn(__VA_ARGS__,simd::simde_AVX512); \ + SIMD_TEST_EQUAL(result,expect); \ +} + +/*********************** add ******************************/ + +TEST_CASE("add.outer_nd(case1a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +#if 1 +TEST_CASE("add.outer_nd(case1b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case1c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case1d)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +#endif + +TEST_CASE("add.outer_nd(case2a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +#if 1 +TEST_CASE("add.outer_nd(case2b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,7}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case2c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,8}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case2d)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,9}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(add.outer,lhs,rhs,nm::None); +} + +#endif + +/*********************** multiply ******************************/ + +TEST_CASE("multiply.outer_nd(case1a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case1b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case1c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(multiply.outer,lhs,rhs,nm::None); +} + +// TODO: fix simd int64 multiply +#if 0 +TEST_CASE("multiply.outer_nd(case1d)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(multiply.outer,lhs,rhs,nm::None); +} +#endif + +TEST_CASE("multiply.outer_nd(case2a)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case2b)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,7}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case2c)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,8}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(multiply.outer,lhs,rhs,nm::None); +} + +// TODO: fix simd multiply +#if 0 +TEST_CASE("multiply.outer_nd(case2d)" * doctest::test_suite("simd::simde_AVX512")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,9}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + SIMDE_AVX512_TEST(multiply.outer,lhs,rhs,nm::None); +} + +#endif \ No newline at end of file diff --git a/tests/vector/CMakeLists.txt b/tests/vector/CMakeLists.txt index e3d150bb8..4c51c9a04 100644 --- a/tests/vector/CMakeLists.txt +++ b/tests/vector/CMakeLists.txt @@ -39,6 +39,8 @@ if (NMTOOLS_VECTOR_TEST_128) reduction_vector128.cpp reduction_2d_vector128.cpp outer_vector128.cpp + outer_2d_vector128.cpp + outer_nd_vector128.cpp ) endif (NMTOOLS_VECTOR_TEST_128) @@ -51,6 +53,8 @@ if (NMTOOLS_VECTOR_TEST_256) reduction_vector256.cpp reduction_2d_vector256.cpp outer_vector256.cpp + outer_2d_vector256.cpp + outer_nd_vector256.cpp ) endif (NMTOOLS_VECTOR_TEST_256) @@ -63,6 +67,8 @@ if (NMTOOLS_VECTOR_TEST_512) reduction_2d_vector512.cpp unary_vector512.cpp outer_vector512.cpp + outer_2d_vector512.cpp + outer_nd_vector512.cpp ) endif (NMTOOLS_VECTOR_TEST_512) diff --git a/tests/vector/outer_2d_vector128.cpp b/tests/vector/outer_2d_vector128.cpp new file mode 100644 index 000000000..26506ef64 --- /dev/null +++ b/tests/vector/outer_2d_vector128.cpp @@ -0,0 +1,806 @@ +#include "nmtools/array/eval/simd/vector_128.hpp" +#include "nmtools/array/array/arange.hpp" +#include "nmtools/array/array/ufuncs/add.hpp" +#include "nmtools/array/array/ufuncs/multiply.hpp" +#include "nmtools/array/array/ufuncs/subtract.hpp" +#include "nmtools/array/array/ufuncs/divide.hpp" +#include "nmtools/array/array/reshape.hpp" +#include "nmtools/testing/doctest.hpp" + +namespace nm = nmtools; +namespace na = nm::array; +namespace ix = nm::index; +namespace simd = na::simd; + +#define SIMD_TEST_EQUAL(result, expect) \ +{ \ + NMTOOLS_ASSERT_EQUAL( nm::shape(result), nm::shape(expect) ); \ + NMTOOLS_ASSERT_CLOSE( result, expect ); \ +} + +#define VECTOR_128_TEST(fn,...) \ +{ \ + auto expect = na::fn(__VA_ARGS__); \ + auto result = na::fn(__VA_ARGS__,simd::vector_128); \ + SIMD_TEST_EQUAL(result,expect); \ +} + +/*********************** add ******************************/ + +TEST_CASE("add.outer_2d(case1a)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1b)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1c)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1d)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2a)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2b)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2c)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2d)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 12; + auto N = 14; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2e)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3a)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3b)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3c)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3d)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4d)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5d)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5e)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6d)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7d)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8e)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11e)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12d)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +// TODO: fix int8 add outer +#if 0 +TEST_CASE("add.outer_2d(case13a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case13b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case13c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14e)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15d)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} +#endif + +TEST_CASE("add.outer_2d(case16a)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 2; + auto N = 4; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16b)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16c)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 7; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16d)" * doctest::test_suite("simd::vector_128")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 8; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case17a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(O*P,dtype),rhs_shape); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case17b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 7; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(O*P,dtype),rhs_shape); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} \ No newline at end of file diff --git a/tests/vector/outer_2d_vector256.cpp b/tests/vector/outer_2d_vector256.cpp new file mode 100644 index 000000000..393b6e35c --- /dev/null +++ b/tests/vector/outer_2d_vector256.cpp @@ -0,0 +1,806 @@ +#include "nmtools/array/eval/simd/vector_256.hpp" +#include "nmtools/array/array/arange.hpp" +#include "nmtools/array/array/ufuncs/add.hpp" +#include "nmtools/array/array/ufuncs/multiply.hpp" +#include "nmtools/array/array/ufuncs/subtract.hpp" +#include "nmtools/array/array/ufuncs/divide.hpp" +#include "nmtools/array/array/reshape.hpp" +#include "nmtools/testing/doctest.hpp" + +namespace nm = nmtools; +namespace na = nm::array; +namespace ix = nm::index; +namespace simd = na::simd; + +#define SIMD_TEST_EQUAL(result, expect) \ +{ \ + NMTOOLS_ASSERT_EQUAL( nm::shape(result), nm::shape(expect) ); \ + NMTOOLS_ASSERT_CLOSE( result, expect ); \ +} + +#define VECTOR_256_TEST(fn,...) \ +{ \ + auto expect = na::fn(__VA_ARGS__); \ + auto result = na::fn(__VA_ARGS__,simd::vector_256); \ + SIMD_TEST_EQUAL(result,expect); \ +} + +/*********************** add ******************************/ + +TEST_CASE("add.outer_2d(case1a)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1b)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1c)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1d)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2a)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2b)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2c)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2d)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 12; + auto N = 14; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2e)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3a)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3b)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3c)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3d)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4d)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5d)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5e)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6d)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7d)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8e)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11e)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12d)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +// TODO: fix int8 add outer +#if 0 +TEST_CASE("add.outer_2d(case13a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case13b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case13c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14e)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15d)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} +#endif + +TEST_CASE("add.outer_2d(case16a)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 2; + auto N = 4; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16b)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16c)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 7; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16d)" * doctest::test_suite("simd::vector_256")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 8; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case17a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(O*P,dtype),rhs_shape); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case17b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 7; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(O*P,dtype),rhs_shape); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} \ No newline at end of file diff --git a/tests/vector/outer_2d_vector512.cpp b/tests/vector/outer_2d_vector512.cpp new file mode 100644 index 000000000..6766a2f83 --- /dev/null +++ b/tests/vector/outer_2d_vector512.cpp @@ -0,0 +1,806 @@ +#include "nmtools/array/eval/simd/vector_512.hpp" +#include "nmtools/array/array/arange.hpp" +#include "nmtools/array/array/ufuncs/add.hpp" +#include "nmtools/array/array/ufuncs/multiply.hpp" +#include "nmtools/array/array/ufuncs/subtract.hpp" +#include "nmtools/array/array/ufuncs/divide.hpp" +#include "nmtools/array/array/reshape.hpp" +#include "nmtools/testing/doctest.hpp" + +namespace nm = nmtools; +namespace na = nm::array; +namespace ix = nm::index; +namespace simd = na::simd; + +#define SIMD_TEST_EQUAL(result, expect) \ +{ \ + NMTOOLS_ASSERT_EQUAL( nm::shape(result), nm::shape(expect) ); \ + NMTOOLS_ASSERT_CLOSE( result, expect ); \ +} + +#define VECTOR_512_TEST(fn,...) \ +{ \ + auto expect = na::fn(__VA_ARGS__); \ + auto result = na::fn(__VA_ARGS__,simd::vector_512); \ + SIMD_TEST_EQUAL(result,expect); \ +} + +/*********************** add ******************************/ + +TEST_CASE("add.outer_2d(case1a)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1b)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1c)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case1d)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2a)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2b)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2c)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2d)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 12; + auto N = 14; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case2e)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3a)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3b)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3c)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case3d)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N),new_shape); + auto rhs = na::arange(K); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case4d)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5d)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case5e)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case6d)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case7d)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 13; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case8e)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case9c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case10c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case11e)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int64; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case12d)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int64; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +// TODO: fix int8 add outer +#if 0 +TEST_CASE("add.outer_2d(case13a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case13b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 5; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case13c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 9; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 6; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 10; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case14e)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 18; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 7; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int8; + auto M = 12; + auto N = 14; + auto K = 11; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case15d)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int8; + auto M = 2; + auto N = 4; + auto K = 14; + auto new_shape = nmtools_array{M,N}; + auto lhs = na::reshape(na::arange(M*N,dtype),new_shape); + auto rhs = na::arange(K,dtype); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} +#endif + +TEST_CASE("add.outer_2d(case16a)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 2; + auto N = 4; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16b)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16c)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 7; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case16d)" * doctest::test_suite("simd::vector_512")) +{ + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 8; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N),lhs_shape); + auto rhs = na::reshape(na::arange(O*P),rhs_shape); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case17a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto M = 2; + auto N = 4; + auto O = 5; + auto P = 6; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(O*P,dtype),rhs_shape); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_2d(case17b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto M = 12; + auto N = 14; + auto O = 5; + auto P = 7; + auto lhs_shape = nmtools_array{M,N}; + auto rhs_shape = nmtools_array{O,P}; + auto lhs = na::reshape(na::arange(M*N,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(O*P,dtype),rhs_shape); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} \ No newline at end of file diff --git a/tests/vector/outer_nd_vector128.cpp b/tests/vector/outer_nd_vector128.cpp new file mode 100644 index 000000000..4ba8bd51e --- /dev/null +++ b/tests/vector/outer_nd_vector128.cpp @@ -0,0 +1,235 @@ +#include "nmtools/array/eval/simd/vector_128.hpp" +#include "nmtools/array/array/arange.hpp" +#include "nmtools/array/array/ufuncs/add.hpp" +#include "nmtools/array/array/ufuncs/multiply.hpp" +#include "nmtools/array/array/ufuncs/subtract.hpp" +#include "nmtools/array/array/ufuncs/divide.hpp" +#include "nmtools/array/array/reshape.hpp" +#include "nmtools/testing/doctest.hpp" + +namespace nm = nmtools; +namespace na = nm::array; +namespace ix = nm::index; +namespace simd = na::simd; + +#define SIMD_TEST_EQUAL(result, expect) \ +{ \ + NMTOOLS_ASSERT_EQUAL( nm::shape(result), nm::shape(expect) ); \ + NMTOOLS_ASSERT_CLOSE( result, expect ); \ +} + +#define VECTOR_128_TEST(fn,...) \ +{ \ + auto expect = na::fn(__VA_ARGS__); \ + auto result = na::fn(__VA_ARGS__,simd::vector_128); \ + SIMD_TEST_EQUAL(result,expect); \ +} + +/*********************** add ******************************/ + +TEST_CASE("add.outer_nd(case1a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +#if 1 +TEST_CASE("add.outer_nd(case1b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case1c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case1d)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +#endif + +TEST_CASE("add.outer_nd(case2a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +#if 1 +TEST_CASE("add.outer_nd(case2b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,7}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case2c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,8}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case2d)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,9}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(add.outer,lhs,rhs,nm::None); +} + +#endif + +/*********************** multiply ******************************/ + +TEST_CASE("multiply.outer_nd(case1a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case1b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case1c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(multiply.outer,lhs,rhs,nm::None); +} + +// TODO: fix simd int64 multiply +#if 0 +TEST_CASE("multiply.outer_nd(case1d)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(multiply.outer,lhs,rhs,nm::None); +} +#endif + +TEST_CASE("multiply.outer_nd(case2a)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case2b)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,7}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case2c)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,8}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(multiply.outer,lhs,rhs,nm::None); +} + +// TODO: fix simd multiply +#if 0 +TEST_CASE("multiply.outer_nd(case2d)" * doctest::test_suite("simd::vector_128")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,9}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_128_TEST(multiply.outer,lhs,rhs,nm::None); +} + +#endif \ No newline at end of file diff --git a/tests/vector/outer_nd_vector256.cpp b/tests/vector/outer_nd_vector256.cpp new file mode 100644 index 000000000..d4666d984 --- /dev/null +++ b/tests/vector/outer_nd_vector256.cpp @@ -0,0 +1,235 @@ +#include "nmtools/array/eval/simd/vector_256.hpp" +#include "nmtools/array/array/arange.hpp" +#include "nmtools/array/array/ufuncs/add.hpp" +#include "nmtools/array/array/ufuncs/multiply.hpp" +#include "nmtools/array/array/ufuncs/subtract.hpp" +#include "nmtools/array/array/ufuncs/divide.hpp" +#include "nmtools/array/array/reshape.hpp" +#include "nmtools/testing/doctest.hpp" + +namespace nm = nmtools; +namespace na = nm::array; +namespace ix = nm::index; +namespace simd = na::simd; + +#define SIMD_TEST_EQUAL(result, expect) \ +{ \ + NMTOOLS_ASSERT_EQUAL( nm::shape(result), nm::shape(expect) ); \ + NMTOOLS_ASSERT_CLOSE( result, expect ); \ +} + +#define VECTOR_256_TEST(fn,...) \ +{ \ + auto expect = na::fn(__VA_ARGS__); \ + auto result = na::fn(__VA_ARGS__,simd::vector_256); \ + SIMD_TEST_EQUAL(result,expect); \ +} + +/*********************** add ******************************/ + +TEST_CASE("add.outer_nd(case1a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +#if 1 +TEST_CASE("add.outer_nd(case1b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case1c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case1d)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +#endif + +TEST_CASE("add.outer_nd(case2a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +#if 1 +TEST_CASE("add.outer_nd(case2b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,7}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case2c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,8}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case2d)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,9}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(add.outer,lhs,rhs,nm::None); +} + +#endif + +/*********************** multiply ******************************/ + +TEST_CASE("multiply.outer_nd(case1a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case1b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case1c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(multiply.outer,lhs,rhs,nm::None); +} + +// TODO: fix simd int64 multiply +#if 0 +TEST_CASE("multiply.outer_nd(case1d)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(multiply.outer,lhs,rhs,nm::None); +} +#endif + +TEST_CASE("multiply.outer_nd(case2a)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case2b)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,7}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case2c)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,8}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(multiply.outer,lhs,rhs,nm::None); +} + +// TODO: fix simd multiply +#if 0 +TEST_CASE("multiply.outer_nd(case2d)" * doctest::test_suite("simd::vector_256")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,9}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_256_TEST(multiply.outer,lhs,rhs,nm::None); +} + +#endif \ No newline at end of file diff --git a/tests/vector/outer_nd_vector512.cpp b/tests/vector/outer_nd_vector512.cpp new file mode 100644 index 000000000..dc30180ad --- /dev/null +++ b/tests/vector/outer_nd_vector512.cpp @@ -0,0 +1,235 @@ +#include "nmtools/array/eval/simd/vector_512.hpp" +#include "nmtools/array/array/arange.hpp" +#include "nmtools/array/array/ufuncs/add.hpp" +#include "nmtools/array/array/ufuncs/multiply.hpp" +#include "nmtools/array/array/ufuncs/subtract.hpp" +#include "nmtools/array/array/ufuncs/divide.hpp" +#include "nmtools/array/array/reshape.hpp" +#include "nmtools/testing/doctest.hpp" + +namespace nm = nmtools; +namespace na = nm::array; +namespace ix = nm::index; +namespace simd = na::simd; + +#define SIMD_TEST_EQUAL(result, expect) \ +{ \ + NMTOOLS_ASSERT_EQUAL( nm::shape(result), nm::shape(expect) ); \ + NMTOOLS_ASSERT_CLOSE( result, expect ); \ +} + +#define VECTOR_512_TEST(fn,...) \ +{ \ + auto expect = na::fn(__VA_ARGS__); \ + auto result = na::fn(__VA_ARGS__,simd::vector_512); \ + SIMD_TEST_EQUAL(result,expect); \ +} + +/*********************** add ******************************/ + +TEST_CASE("add.outer_nd(case1a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +#if 1 +TEST_CASE("add.outer_nd(case1b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case1c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case1d)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +#endif + +TEST_CASE("add.outer_nd(case2a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +#if 1 +TEST_CASE("add.outer_nd(case2b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,7}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case2c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,8}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +TEST_CASE("add.outer_nd(case2d)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,9}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(add.outer,lhs,rhs,nm::None); +} + +#endif + +/*********************** multiply ******************************/ + +TEST_CASE("multiply.outer_nd(case1a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case1b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case1c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(multiply.outer,lhs,rhs,nm::None); +} + +// TODO: fix simd int64 multiply +#if 0 +TEST_CASE("multiply.outer_nd(case1d)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{2,3}; + auto rhs_shape = nmtools_array{4,5,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(multiply.outer,lhs,rhs,nm::None); +} +#endif + +TEST_CASE("multiply.outer_nd(case2a)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,6}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case2b)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int32; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,7}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(multiply.outer,lhs,rhs,nm::None); +} + +TEST_CASE("multiply.outer_nd(case2c)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::float64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,8}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(multiply.outer,lhs,rhs,nm::None); +} + +// TODO: fix simd multiply +#if 0 +TEST_CASE("multiply.outer_nd(case2d)" * doctest::test_suite("simd::vector_512")) +{ + auto dtype = nm::int64; + auto lhs_shape = nmtools_array{1,4,2,3}; + auto rhs_shape = nmtools_array{4,5,1,9}; + auto lhs_size = ix::product(lhs_shape); + auto rhs_size = ix::product(rhs_shape); + auto lhs = na::reshape(na::arange(lhs_size,dtype),lhs_shape); + auto rhs = na::reshape(na::arange(rhs_size,dtype),rhs_shape); + VECTOR_512_TEST(multiply.outer,lhs,rhs,nm::None); +} + +#endif \ No newline at end of file