Skip to content

Commit

Permalink
Implement LLVM x86 bmi intrinsics
Browse files Browse the repository at this point in the history
  • Loading branch information
TDecking committed Jun 20, 2024
1 parent 54d7471 commit 459eada
Show file tree
Hide file tree
Showing 3 changed files with 330 additions and 0 deletions.
108 changes: 108 additions & 0 deletions src/shims/x86/bmi.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
use rustc_span::Symbol;
use rustc_target::spec::abi::Abi;

use crate::*;

impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
fn emulate_x86_bmi_intrinsic(
&mut self,
link_name: Symbol,
abi: Abi,
args: &[OpTy<'tcx>],
dest: &MPlaceTy<'tcx>,
) -> InterpResult<'tcx, EmulateItemResult> {
let this = self.eval_context_mut();

// Prefix should have already been checked.
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.bmi.").unwrap();

// The intrinsics are suffixed with the bit size of their operands.
let (is_64_bit, unprefixed_name) = if unprefixed_name.ends_with("64") {
(true, unprefixed_name.strip_suffix(".64").unwrap_or(""))
} else {
(false, unprefixed_name.strip_suffix(".32").unwrap_or(""))
};

// All intrinsics of the "bmi" namespace belong to the "bmi2" ISA extension.
// The exception is "bextr", which belongs to "bmi1".
let target_feature = if unprefixed_name == "bextr" { "bmi1" } else { "bmi2" };
this.expect_target_feature_for_intrinsic(link_name, target_feature)?;

if is_64_bit && this.tcx.sess.target.arch != "x86_64" {
return Ok(EmulateItemResult::NotSupported);
}

let [left, right] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
let left = this.read_scalar(left)?;
let right = this.read_scalar(right)?;

let left = if is_64_bit { left.to_u64()? } else { u64::from(left.to_u32()?) };
let right = if is_64_bit { right.to_u64()? } else { u64::from(right.to_u32()?) };

let result = match unprefixed_name {
// Extract a contigous range of bits from an unsigned integer.
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bextr_u32
"bextr" => {
let start = u32::try_from(right & 0xff).unwrap();
let len = u32::try_from((right >> 8) & 0xff).unwrap();
let shifted = left.checked_shr(start).unwrap_or(0);
// Keep the `len` lowest bits of `shifted`, or all bits if `len` is too big.
if len >= 64 { shifted } else { shifted & 1u64.wrapping_shl(len).wrapping_sub(1) }
}
// Create a copy of an unsigned integer with bits above a certain index cleared.
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bzhi_u32
"bzhi" => {
let index = u32::try_from(right & 0xff).unwrap();
// Keep the `index` lowest bits of `left`, or all bits if `index` is too big.
if index >= 64 { left } else { left & 1u64.wrapping_shl(index).wrapping_sub(1) }
}
// Extract bit values of an unsigned integer at positions marked by a mask.
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u32
"pext" => {
let mut mask = right;
let mut i = 0u32;
let mut result = 0;
// Iterate over the mask one 1-bit at a time, from
// the least significant bit to the most significant bit.
while mask != 0 {
// Extract the bit marked by the mask's least significant set bit
// and put it at position `i` of the result.
result |= u64::from(left & (1 << mask.trailing_zeros()) != 0) << i;
i = i.wrapping_add(1);
// Clear the least significant set bit.
mask &= mask.wrapping_sub(1);
}
result
}
// Deposit bit values of an unsigned integer to positions marked by a mask.
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u32
"pdep" => {
let mut mask = right;
let mut set = left;
let mut result = 0;
// Iterate over the mask one 1-bit at a time, from
// the least significant bit to the most significant bit.
while mask != 0 {
// Put rightmost bit of `set` at the position of the current `mask` bit.
result |= (set & 1) << mask.trailing_zeros();
// Go to next bit of `set`.
set >>= 1;
// Clear the least significant set bit.
mask &= mask.wrapping_sub(1);
}
result
}
_ => return Ok(EmulateItemResult::NotSupported),
};

let result = if is_64_bit {
Scalar::from_u64(result)
} else {
Scalar::from_u32(u32::try_from(result).unwrap())
};
this.write_scalar(result, dest)?;

Ok(EmulateItemResult::NeedsReturn)
}
}
6 changes: 6 additions & 0 deletions src/shims/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use helpers::bool_to_simd_element;
mod aesni;
mod avx;
mod avx2;
mod bmi;
mod sse;
mod sse2;
mod sse3;
Expand Down Expand Up @@ -113,6 +114,11 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
pclmulqdq(this, left, right, imm, dest)?;
}

name if name.starts_with("bmi.") => {
return bmi::EvalContextExt::emulate_x86_bmi_intrinsic(
this, link_name, abi, args, dest,
);
}
name if name.starts_with("sse.") => {
return sse::EvalContextExt::emulate_x86_sse_intrinsic(
this, link_name, abi, args, dest,
Expand Down
216 changes: 216 additions & 0 deletions tests/pass/shims/x86/intrinsics-x86-bmi.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
// Ignore everything except x86 and x86_64
// Any new targets that are added to CI should be ignored here.
// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
//@ignore-target-aarch64
//@ignore-target-arm
//@ignore-target-avr
//@ignore-target-s390x
//@ignore-target-thumbv7em
//@ignore-target-wasm32
//@compile-flags: -C target-feature=+bmi1,+bmi2

#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

fn main() {
// BMI1 and BMI2 are independent from each other, so both must be checked.
assert!(is_x86_feature_detected!("bmi1"));
assert!(is_x86_feature_detected!("bmi2"));

unsafe {
test_bmi_32();
test_bmi_64();
}
}

/// Test the 32-bit variants of the intrinsics.
unsafe fn test_bmi_32() {
unsafe fn test_bextr_u32() {
let r = _bextr_u32(0b0101_0000u32, 4, 4);
assert_eq!(r, 0b0000_0101u32);

for i in 0..16 {
assert_eq!(_bextr_u32(u32::MAX, i, 4), 0b1111);
assert_eq!(_bextr_u32(u32::MAX, 4, i), (1 << i) - 1);
}

// Ensure that indices larger than the bit count are covered.
// It is important to go above 32 in order to verify the bit selection
// of the instruction.

for i in 0..256 {
// If the index is out of bounds, the original input won't be changed, thus the `min(32)`.
assert_eq!(_bextr_u32(u32::MAX, 0, i).count_ones(), i.min(32));
}

for i in 0..256 {
assert_eq!(_bextr_u32(u32::MAX, i, 0), 0);
}

// Test cases with completly random values. These cases also test
// that the function works even if upper bits of the control value are set.
assert_eq!(_bextr2_u32(0x7408a392, 0x54ef705), 0x3a0451c);
assert_eq!(_bextr2_u32(0xbc5a3494, 0xdd193203), 0x178b4692);
assert_eq!(_bextr2_u32(0xc0332325, 0xf96e207), 0x1806646);
}
test_bextr_u32();

unsafe fn test_pext_u32() {
let n = 0b1011_1110_1001_0011u32;

let m0 = 0b0110_0011_1000_0101u32;
let s0 = 0b0000_0000_0011_0101u32;

let m1 = 0b1110_1011_1110_1111u32;
let s1 = 0b0001_0111_0100_0011u32;

// Testing of random values.
assert_eq!(_pext_u32(n, m0), s0);
assert_eq!(_pext_u32(n, m1), s1);
assert_eq!(_pext_u32(0x12345678, 0xff00fff0), 0x00012567);

// Testing of various identities.
assert_eq!(_pext_u32(u32::MAX, u32::MAX), u32::MAX);
assert_eq!(_pext_u32(u32::MAX, 0), 0);
assert_eq!(_pext_u32(0, u32::MAX), 0);
}
test_pext_u32();

unsafe fn test_pdep_u32() {
let n = 0b1011_1110_1001_0011u32;

let m0 = 0b0110_0011_1000_0101u32;
let s0 = 0b0000_0010_0000_0101u32;

let m1 = 0b1110_1011_1110_1111u32;
let s1 = 0b1110_1001_0010_0011u32;

// Testing of random values.
assert_eq!(_pdep_u32(n, m0), s0);
assert_eq!(_pdep_u32(n, m1), s1);
assert_eq!(_pdep_u32(0x00012567, 0xff00fff0), 0x12005670);

// Testing of various identities.
assert_eq!(_pdep_u32(u32::MAX, u32::MAX), u32::MAX);
assert_eq!(_pdep_u32(0, u32::MAX), 0);
assert_eq!(_pdep_u32(u32::MAX, 0), 0);
}
test_pdep_u32();

unsafe fn test_bzhi_u32() {
let n = 0b1111_0010u32;
let s = 0b0001_0010u32;
assert_eq!(_bzhi_u32(n, 5), s);

// Ensure that indices larger than the bit count are covered.
// It is important to go above 32 in order to verify the bit selection
// of the instruction.
for i in 0..=512 {
// The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`.
// If the index is out of bounds, the original input won't be changed, thus the `min(32)`.
let expected = 1u32.checked_shl((i & 0xff).min(32)).unwrap_or(0).wrapping_sub(1);
let actual = _bzhi_u32(u32::MAX, i);
assert_eq!(expected, actual);
}
}
test_bzhi_u32();
}

#[cfg(not(target_arch = "x86_64"))]
unsafe fn test_bmi_64() {}

/// Test the 64-bit variants of the intrinsics.
#[cfg(target_arch = "x86_64")]
unsafe fn test_bmi_64() {
unsafe fn test_bextr_u64() {
let r = _bextr_u64(0b0101_0000u64, 4, 4);
assert_eq!(r, 0b0000_0101u64);

for i in 0..16 {
assert_eq!(_bextr_u64(u64::MAX, i, 4), 0b1111);
assert_eq!(_bextr_u64(u64::MAX, 32, i), (1 << i) - 1);
}

// Ensure that indices larger than the bit count are covered.
// It is important to go above 64 in order to verify the bit selection
// of the instruction.

for i in 0..256 {
// If the index is out of bounds, the original input won't be changed, thus the `min(64)`.
assert_eq!(_bextr_u64(u64::MAX, 0, i).count_ones(), i.min(64));
}

for i in 0..256 {
assert_eq!(_bextr_u64(u64::MAX, i, 0), 0);
}

// Test cases with completly random values. These cases also test
// that the function works even if upper bits of the control value are set.
assert_eq!(_bextr2_u64(0x4ff6cfbcea75f055, 0x216642e228425719), 0x27fb67de75);
assert_eq!(_bextr2_u64(0xb05e991e6f6e1b6, 0xc76dd5d7f67dfc14), 0xb05e991e6f);
assert_eq!(_bextr2_u64(0x5a3a629e323d848f, 0x95ac507d20e7719), 0x2d1d314f19);
}
test_bextr_u64();

unsafe fn test_pext_u64() {
let n = 0b1011_1110_1001_0011u64;

let m0 = 0b0110_0011_1000_0101u64;
let s0 = 0b0000_0000_0011_0101u64;

let m1 = 0b1110_1011_1110_1111u64;
let s1 = 0b0001_0111_0100_0011u64;

// Testing of random values.
assert_eq!(_pext_u64(n, m0), s0);
assert_eq!(_pext_u64(n, m1), s1);
assert_eq!(_pext_u64(0x12345678, 0xff00fff0), 0x00012567);

// Testing of various identities.
assert_eq!(_pext_u64(u64::MAX, u64::MAX), u64::MAX);
assert_eq!(_pext_u64(u64::MAX, 0), 0);
assert_eq!(_pext_u64(0, u64::MAX), 0);
}
test_pext_u64();

unsafe fn test_pdep_u64() {
let n = 0b1011_1110_1001_0011u64;

let m0 = 0b0110_0011_1000_0101u64;
let s0 = 0b0000_0010_0000_0101u64;

let m1 = 0b1110_1011_1110_1111u64;
let s1 = 0b1110_1001_0010_0011u64;

// Testing of random values.
assert_eq!(_pdep_u64(n, m0), s0);
assert_eq!(_pdep_u64(n, m1), s1);
assert_eq!(_pdep_u64(0x00012567, 0xff00fff0), 0x12005670);

// Testing of various identities.
assert_eq!(_pdep_u64(u64::MAX, u64::MAX), u64::MAX);
assert_eq!(_pdep_u64(0, u64::MAX), 0);
assert_eq!(_pdep_u64(u64::MAX, 0), 0);
}
test_pdep_u64();

unsafe fn test_bzhi_u64() {
let n = 0b1111_0010u64;
let s = 0b0001_0010u64;
assert_eq!(_bzhi_u64(n, 5), s);

// Ensure that indices larger than the bit count are covered.
// It is important to go above 255 in order to verify the bit selection
// of the instruction.
for i in 0..=512 {
// The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`.
// If the index is out of bounds, the original input won't be changed, thus the `min(64)`.
let expected = 1u64.checked_shl((i & 0xff).min(64)).unwrap_or(0).wrapping_sub(1);
let actual = _bzhi_u64(u64::MAX, i);
assert_eq!(expected, actual);
}
}
test_bzhi_u64();
}

0 comments on commit 459eada

Please sign in to comment.