Skip to content

Commit

Permalink
add reduce min/max along with tests. Also optimize i16 abs for sse2 (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
mcroomp committed Sep 26, 2023
1 parent 8a915b0 commit 6270f8d
Show file tree
Hide file tree
Showing 8 changed files with 178 additions and 8 deletions.
23 changes: 21 additions & 2 deletions src/i16x16_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -390,8 +390,27 @@ impl i16x16 {
#[inline]
#[must_use]
pub fn reduce_add(self) -> i16 {
let arr: [i16; 16] = cast(self);
arr.iter().sum()
let arr: [i16x8; 2] = cast(self);

(arr[0] + arr[1]).reduce_add()
}

/// horizontal min of all the elements of the vector
#[inline]
#[must_use]
pub fn reduce_min(self) -> i16 {
let arr: [i16x8; 2] = cast(self);

arr[0].min(arr[1]).reduce_min()
}

/// horizontal max of all the elements of the vector
#[inline]
#[must_use]
pub fn reduce_max(self) -> i16 {
let arr: [i16x8; 2] = cast(self);

arr[0].max(arr[1]).reduce_max()
}

#[inline]
Expand Down
34 changes: 31 additions & 3 deletions src/i16x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -655,14 +655,41 @@ impl i16x8 {
#[must_use]
pub fn reduce_add(self) -> i16 {
let arr: [i16; 8] = cast(self);
arr.iter().sum()

(arr[0].wrapping_add(arr[1]).wrapping_add(arr[2].wrapping_add(arr[3])))
.wrapping_add(
arr[4].wrapping_add(arr[5]).wrapping_add(arr[6].wrapping_add(arr[7])),
)
}

/// horizontal min of all the elements of the vector
#[inline]
#[must_use]
pub fn reduce_min(self) -> i16 {
let arr: [i16; 8] = cast(self);

(arr[0].min(arr[1]).min(arr[2].min(arr[3])))
.min(arr[4].min(arr[5]).min(arr[6].min(arr[7])))
}

/// horizontal max of all the elements of the vector
#[inline]
#[must_use]
pub fn reduce_max(self) -> i16 {
let arr: [i16; 8] = cast(self);

(arr[0].max(arr[1]).max(arr[2].max(arr[3])))
.max(arr[4].max(arr[5]).max(arr[6].max(arr[7])))
}

#[inline]
#[must_use]
pub fn abs(self) -> Self {
pick! {
if #[cfg(target_feature="ssse3")] {
if #[cfg(target_feature="sse2")] {
let mask = shr_imm_i16_m128i::<15>(self.sse);
Self { sse: bitxor_m128i(add_i16_m128i(self.sse, mask), mask) }
} else if #[cfg(target_feature="ssse3")] {
Self { sse: abs_i16_m128i(self.sse) }
} else if #[cfg(target_feature="simd128")] {
Self { simd: i16x8_abs(self.simd) }
Expand Down Expand Up @@ -692,7 +719,7 @@ impl i16x8 {
#[must_use]
pub fn min(self, rhs: Self) -> Self {
pick! {
if #[cfg(target_feature="sse4.1")] {
if #[cfg(target_feature="sse2")] {
Self { sse: min_i16_m128i(self.sse, rhs.sse) }
} else if #[cfg(target_feature="simd128")] {
Self { simd: i16x8_min(self.simd, rhs.simd) }
Expand All @@ -703,6 +730,7 @@ impl i16x8 {
}
}
}

#[inline]
#[must_use]
pub fn saturating_add(self, rhs: Self) -> Self {
Expand Down
29 changes: 28 additions & 1 deletion src/i32x4_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,8 +443,35 @@ impl i32x4 {
#[inline]
#[must_use]
pub fn reduce_add(self) -> i32 {
pick! {
if #[cfg(target_feature="sse2")] {
let hi64 = unpack_high_i64_m128i(self.sse, self.sse);
let sum64 = add_i32_m128i(hi64, self.sse);
let hi32 = shuffle_ai_f32_all_m128i::<0b10_11_00_01>(sum64); // Swap the low two elements
let sum32 = add_i32_m128i(sum64, hi32);
get_i32_from_m128i_s(sum32)
} else {
let arr: [i32; 4] = cast(self);
arr[0].wrapping_add(arr[1]).wrapping_add(
arr[2].wrapping_add(arr[3]))
}
}
}

/// horizontal max of all the elements of the vector
#[inline]
#[must_use]
pub fn reduce_max(self) -> i32 {
let arr: [i32; 4] = cast(self);
arr[0].max(arr[1]).max(arr[2].max(arr[3]))
}

/// horizontal min of all the elements of the vector
#[inline]
#[must_use]
pub fn reduce_min(self) -> i32 {
let arr: [i32; 4] = cast(self);
arr.iter().sum()
arr[0].min(arr[1]).min(arr[2].min(arr[3]))
}

#[inline]
Expand Down
20 changes: 18 additions & 2 deletions src/i32x8_.rs
Original file line number Diff line number Diff line change
Expand Up @@ -335,8 +335,24 @@ impl i32x8 {
#[inline]
#[must_use]
pub fn reduce_add(self) -> i32 {
let arr: [i32; 8] = cast(self);
arr.iter().sum()
let arr: [i32x4; 2] = cast(self);
(arr[0] + arr[1]).reduce_add()
}

/// horizontal max of all the elements of the vector
#[inline]
#[must_use]
pub fn reduce_max(self) -> i32 {
let arr: [i32x4; 2] = cast(self);
arr[0].max(arr[1]).reduce_max()
}

/// horizontal min of all the elements of the vector
#[inline]
#[must_use]
pub fn reduce_min(self) -> i32 {
let arr: [i32x4; 2] = cast(self);
arr[0].min(arr[1]).reduce_min()
}

#[inline]
Expand Down
20 changes: 20 additions & 0 deletions tests/all_tests/t_i16x16.rs
Original file line number Diff line number Diff line change
Expand Up @@ -647,3 +647,23 @@ fn impl_i16x16_reduce_add() {
i16x16::from([1, 2, 3, 4, 5, 6, 7, 9, 10, 20, 30, 40, 50, 60, 70, 90]);
assert_eq!(p.reduce_add(), 407);
}

#[test]
fn impl_i16x16_reduce_min() {
for i in 0..8 {
let mut v = [i16::MAX; 16];
v[i] = i16::MIN;
let p = i16x16::from(v);
assert_eq!(p.reduce_min(), i16::MIN);
}
}

#[test]
fn impl_i16x16_reduce_max() {
for i in 0..8 {
let mut v = [i16::MIN; 16];
v[i] = i16::MAX;
let p = i16x16::from(v);
assert_eq!(p.reduce_min(), i16::MIN);
}
}
20 changes: 20 additions & 0 deletions tests/all_tests/t_i16x8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -316,3 +316,23 @@ fn impl_i16x8_reduce_add() {
let p = i16x8::from([1, 2, 3, 4, 5, 6, 7, 9]);
assert_eq!(p.reduce_add(), 37);
}

#[test]
fn impl_i16x8_reduce_min() {
for i in 0..8 {
let mut v = [i16::MAX; 8];
v[i] = i16::MIN;
let p = i16x8::from(v);
assert_eq!(p.reduce_min(), i16::MIN);
}
}

#[test]
fn impl_i16x8_reduce_max() {
for i in 0..8 {
let mut v = [i16::MIN; 8];
v[i] = i16::MAX;
let p = i16x8::from(v);
assert_eq!(p.reduce_min(), i16::MIN);
}
}
20 changes: 20 additions & 0 deletions tests/all_tests/t_i32x4.rs
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,23 @@ fn impl_i32x4_reduce_add() {
let p = i32x4::from([10000000, 20000000, 30000000, -40000000]);
assert_eq!(p.reduce_add(), 20000000);
}

#[test]
fn impl_i32x4_reduce_min() {
for i in 0..4 {
let mut v = [i32::MAX; 4];
v[i] = i32::MIN;
let p = i32x4::from(v);
assert_eq!(p.reduce_min(), i32::MIN);
}
}

#[test]
fn impl_i32x4_reduce_max() {
for i in 0..4 {
let mut v = [i32::MIN; 4];
v[i] = i32::MAX;
let p = i32x4::from(v);
assert_eq!(p.reduce_max(), i32::MAX);
}
}
20 changes: 20 additions & 0 deletions tests/all_tests/t_i32x8.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,3 +271,23 @@ fn impl_i32x8_reduce_add() {
]);
assert_eq!(p.reduce_add(), 370000000);
}

#[test]
fn impl_i32x8_reduce_min() {
for i in 0..8 {
let mut v = [i32::MAX; 8];
v[i] = i32::MIN;
let p = i32x8::from(v);
assert_eq!(p.reduce_min(), i32::MIN);
}
}

#[test]
fn impl_i32x8_reduce_max() {
for i in 0..8 {
let mut v = [i32::MIN; 8];
v[i] = i32::MAX;
let p = i32x8::from(v);
assert_eq!(p.reduce_max(), i32::MAX);
}
}

0 comments on commit 6270f8d

Please sign in to comment.