Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement LLVM x86 bmi intrinsics #3674

Merged
merged 1 commit into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 108 additions & 0 deletions src/shims/x86/bmi.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
use rustc_span::Symbol;
use rustc_target::spec::abi::Abi;

use crate::*;

impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
fn emulate_x86_bmi_intrinsic(
&mut self,
link_name: Symbol,
abi: Abi,
args: &[OpTy<'tcx>],
dest: &MPlaceTy<'tcx>,
) -> InterpResult<'tcx, EmulateItemResult> {
let this = self.eval_context_mut();

// Prefix should have already been checked.
let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.bmi.").unwrap();

// The intrinsics are suffixed with the bit size of their operands.
let (is_64_bit, unprefixed_name) = if unprefixed_name.ends_with("64") {
(true, unprefixed_name.strip_suffix(".64").unwrap_or(""))
} else {
(false, unprefixed_name.strip_suffix(".32").unwrap_or(""))
};

// All intrinsics of the "bmi" namespace belong to the "bmi2" ISA extension.
// The exception is "bextr", which belongs to "bmi1".
let target_feature = if unprefixed_name == "bextr" { "bmi1" } else { "bmi2" };
this.expect_target_feature_for_intrinsic(link_name, target_feature)?;

if is_64_bit && this.tcx.sess.target.arch != "x86_64" {
return Ok(EmulateItemResult::NotSupported);
}

let [left, right] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?;
let left = this.read_scalar(left)?;
let right = this.read_scalar(right)?;

let left = if is_64_bit { left.to_u64()? } else { u64::from(left.to_u32()?) };
let right = if is_64_bit { right.to_u64()? } else { u64::from(right.to_u32()?) };

let result = match unprefixed_name {
// Extract a contigous range of bits from an unsigned integer.
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bextr_u32
"bextr" => {
let start = u32::try_from(right & 0xff).unwrap();
let len = u32::try_from((right >> 8) & 0xff).unwrap();
RalfJung marked this conversation as resolved.
Show resolved Hide resolved
let shifted = left.checked_shr(start).unwrap_or(0);
// Keep the `len` lowest bits of `shifted`, or all bits if `len` is too big.
if len >= 64 { shifted } else { shifted & 1u64.wrapping_shl(len).wrapping_sub(1) }
TDecking marked this conversation as resolved.
Show resolved Hide resolved
}
// Create a copy of an unsigned integer with bits above a certain index cleared.
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bzhi_u32
"bzhi" => {
let index = u32::try_from(right & 0xff).unwrap();
// Keep the `index` lowest bits of `left`, or all bits if `index` is too big.
if index >= 64 { left } else { left & 1u64.wrapping_shl(index).wrapping_sub(1) }
RalfJung marked this conversation as resolved.
Show resolved Hide resolved
TDecking marked this conversation as resolved.
Show resolved Hide resolved
}
// Extract bit values of an unsigned integer at positions marked by a mask.
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u32
"pext" => {
let mut mask = right;
let mut i = 0u32;
let mut result = 0;
// Iterate over the mask one 1-bit at a time, from
// the least significant bit to the most significant bit.
while mask != 0 {
RalfJung marked this conversation as resolved.
Show resolved Hide resolved
// Extract the bit marked by the mask's least significant set bit
// and put it at position `i` of the result.
result |= u64::from(left & (1 << mask.trailing_zeros()) != 0) << i;
i = i.wrapping_add(1);
// Clear the least significant set bit.
mask &= mask.wrapping_sub(1);
RalfJung marked this conversation as resolved.
Show resolved Hide resolved
}
result
}
// Deposit bit values of an unsigned integer to positions marked by a mask.
// https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u32
"pdep" => {
let mut mask = right;
let mut set = left;
let mut result = 0;
// Iterate over the mask one 1-bit at a time, from
// the least significant bit to the most significant bit.
while mask != 0 {
RalfJung marked this conversation as resolved.
Show resolved Hide resolved
// Put rightmost bit of `set` at the position of the current `mask` bit.
result |= (set & 1) << mask.trailing_zeros();
RalfJung marked this conversation as resolved.
Show resolved Hide resolved
// Go to next bit of `set`.
set >>= 1;
TDecking marked this conversation as resolved.
Show resolved Hide resolved
// Clear the least significant set bit.
mask &= mask.wrapping_sub(1);
}
result
}
_ => return Ok(EmulateItemResult::NotSupported),
};

let result = if is_64_bit {
Scalar::from_u64(result)
} else {
Scalar::from_u32(u32::try_from(result).unwrap())
};
this.write_scalar(result, dest)?;

Ok(EmulateItemResult::NeedsReturn)
}
}
6 changes: 6 additions & 0 deletions src/shims/x86/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use helpers::bool_to_simd_element;
mod aesni;
mod avx;
mod avx2;
mod bmi;
mod sse;
mod sse2;
mod sse3;
Expand Down Expand Up @@ -113,6 +114,11 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
pclmulqdq(this, left, right, imm, dest)?;
}

name if name.starts_with("bmi.") => {
return bmi::EvalContextExt::emulate_x86_bmi_intrinsic(
this, link_name, abi, args, dest,
);
}
name if name.starts_with("sse.") => {
return sse::EvalContextExt::emulate_x86_sse_intrinsic(
this, link_name, abi, args, dest,
Expand Down
216 changes: 216 additions & 0 deletions tests/pass/shims/x86/intrinsics-x86-bmi.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
// Ignore everything except x86 and x86_64
// Any new targets that are added to CI should be ignored here.
// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.)
//@ignore-target-aarch64
//@ignore-target-arm
//@ignore-target-avr
//@ignore-target-s390x
//@ignore-target-thumbv7em
//@ignore-target-wasm32
//@compile-flags: -C target-feature=+bmi1,+bmi2

#[cfg(target_arch = "x86")]
use std::arch::x86::*;
#[cfg(target_arch = "x86_64")]
use std::arch::x86_64::*;

fn main() {
TDecking marked this conversation as resolved.
Show resolved Hide resolved
// BMI1 and BMI2 are independent from each other, so both must be checked.
assert!(is_x86_feature_detected!("bmi1"));
assert!(is_x86_feature_detected!("bmi2"));

unsafe {
test_bmi_32();
test_bmi_64();
}
}

/// Test the 32-bit variants of the intrinsics.
unsafe fn test_bmi_32() {
TDecking marked this conversation as resolved.
Show resolved Hide resolved
unsafe fn test_bextr_u32() {
let r = _bextr_u32(0b0101_0000u32, 4, 4);
assert_eq!(r, 0b0000_0101u32);

for i in 0..16 {
assert_eq!(_bextr_u32(u32::MAX, i, 4), 0b1111);
assert_eq!(_bextr_u32(u32::MAX, 4, i), (1 << i) - 1);
}

// Ensure that indices larger than the bit count are covered.
// It is important to go above 32 in order to verify the bit selection
// of the instruction.

for i in 0..256 {
// If the index is out of bounds, the original input won't be changed, thus the `min(32)`.
assert_eq!(_bextr_u32(u32::MAX, 0, i).count_ones(), i.min(32));
}

for i in 0..256 {
assert_eq!(_bextr_u32(u32::MAX, i, 0), 0);
}

// Test cases with completly random values. These cases also test
// that the function works even if upper bits of the control value are set.
assert_eq!(_bextr2_u32(0x7408a392, 0x54ef705), 0x3a0451c);
assert_eq!(_bextr2_u32(0xbc5a3494, 0xdd193203), 0x178b4692);
assert_eq!(_bextr2_u32(0xc0332325, 0xf96e207), 0x1806646);
}
test_bextr_u32();

unsafe fn test_pext_u32() {
let n = 0b1011_1110_1001_0011u32;

let m0 = 0b0110_0011_1000_0101u32;
let s0 = 0b0000_0000_0011_0101u32;

let m1 = 0b1110_1011_1110_1111u32;
let s1 = 0b0001_0111_0100_0011u32;

// Testing of random values.
assert_eq!(_pext_u32(n, m0), s0);
assert_eq!(_pext_u32(n, m1), s1);
assert_eq!(_pext_u32(0x12345678, 0xff00fff0), 0x00012567);

// Testing of various identities.
assert_eq!(_pext_u32(u32::MAX, u32::MAX), u32::MAX);
assert_eq!(_pext_u32(u32::MAX, 0), 0);
assert_eq!(_pext_u32(0, u32::MAX), 0);
}
test_pext_u32();

unsafe fn test_pdep_u32() {
let n = 0b1011_1110_1001_0011u32;

let m0 = 0b0110_0011_1000_0101u32;
let s0 = 0b0000_0010_0000_0101u32;

let m1 = 0b1110_1011_1110_1111u32;
let s1 = 0b1110_1001_0010_0011u32;

// Testing of random values.
assert_eq!(_pdep_u32(n, m0), s0);
assert_eq!(_pdep_u32(n, m1), s1);
assert_eq!(_pdep_u32(0x00012567, 0xff00fff0), 0x12005670);

// Testing of various identities.
assert_eq!(_pdep_u32(u32::MAX, u32::MAX), u32::MAX);
assert_eq!(_pdep_u32(0, u32::MAX), 0);
assert_eq!(_pdep_u32(u32::MAX, 0), 0);
}
test_pdep_u32();

unsafe fn test_bzhi_u32() {
let n = 0b1111_0010u32;
let s = 0b0001_0010u32;
assert_eq!(_bzhi_u32(n, 5), s);

// Ensure that indices larger than the bit count are covered.
// It is important to go above 32 in order to verify the bit selection
// of the instruction.
for i in 0..=512 {
// The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`.
// If the index is out of bounds, the original input won't be changed, thus the `min(32)`.
let expected = 1u32.checked_shl((i & 0xff).min(32)).unwrap_or(0).wrapping_sub(1);
let actual = _bzhi_u32(u32::MAX, i);
assert_eq!(expected, actual);
}
}
test_bzhi_u32();
}

#[cfg(not(target_arch = "x86_64"))]
unsafe fn test_bmi_64() {}

/// Test the 64-bit variants of the intrinsics.
#[cfg(target_arch = "x86_64")]
unsafe fn test_bmi_64() {
unsafe fn test_bextr_u64() {
let r = _bextr_u64(0b0101_0000u64, 4, 4);
assert_eq!(r, 0b0000_0101u64);

for i in 0..16 {
assert_eq!(_bextr_u64(u64::MAX, i, 4), 0b1111);
assert_eq!(_bextr_u64(u64::MAX, 32, i), (1 << i) - 1);
}

// Ensure that indices larger than the bit count are covered.
// It is important to go above 64 in order to verify the bit selection
// of the instruction.

for i in 0..256 {
// If the index is out of bounds, the original input won't be changed, thus the `min(64)`.
assert_eq!(_bextr_u64(u64::MAX, 0, i).count_ones(), i.min(64));
}

for i in 0..256 {
assert_eq!(_bextr_u64(u64::MAX, i, 0), 0);
}

// Test cases with completly random values. These cases also test
// that the function works even if upper bits of the control value are set.
assert_eq!(_bextr2_u64(0x4ff6cfbcea75f055, 0x216642e228425719), 0x27fb67de75);
assert_eq!(_bextr2_u64(0xb05e991e6f6e1b6, 0xc76dd5d7f67dfc14), 0xb05e991e6f);
assert_eq!(_bextr2_u64(0x5a3a629e323d848f, 0x95ac507d20e7719), 0x2d1d314f19);
}
test_bextr_u64();

unsafe fn test_pext_u64() {
let n = 0b1011_1110_1001_0011u64;

let m0 = 0b0110_0011_1000_0101u64;
let s0 = 0b0000_0000_0011_0101u64;

let m1 = 0b1110_1011_1110_1111u64;
let s1 = 0b0001_0111_0100_0011u64;

// Testing of random values.
assert_eq!(_pext_u64(n, m0), s0);
assert_eq!(_pext_u64(n, m1), s1);
assert_eq!(_pext_u64(0x12345678, 0xff00fff0), 0x00012567);

// Testing of various identities.
assert_eq!(_pext_u64(u64::MAX, u64::MAX), u64::MAX);
assert_eq!(_pext_u64(u64::MAX, 0), 0);
assert_eq!(_pext_u64(0, u64::MAX), 0);
}
test_pext_u64();

unsafe fn test_pdep_u64() {
let n = 0b1011_1110_1001_0011u64;

let m0 = 0b0110_0011_1000_0101u64;
let s0 = 0b0000_0010_0000_0101u64;

let m1 = 0b1110_1011_1110_1111u64;
let s1 = 0b1110_1001_0010_0011u64;

// Testing of random values.
assert_eq!(_pdep_u64(n, m0), s0);
assert_eq!(_pdep_u64(n, m1), s1);
assert_eq!(_pdep_u64(0x00012567, 0xff00fff0), 0x12005670);

// Testing of various identities.
assert_eq!(_pdep_u64(u64::MAX, u64::MAX), u64::MAX);
assert_eq!(_pdep_u64(0, u64::MAX), 0);
assert_eq!(_pdep_u64(u64::MAX, 0), 0);
}
test_pdep_u64();

unsafe fn test_bzhi_u64() {
let n = 0b1111_0010u64;
let s = 0b0001_0010u64;
assert_eq!(_bzhi_u64(n, 5), s);

// Ensure that indices larger than the bit count are covered.
// It is important to go above 255 in order to verify the bit selection
// of the instruction.
for i in 0..=512 {
// The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`.
// If the index is out of bounds, the original input won't be changed, thus the `min(64)`.
let expected = 1u64.checked_shl((i & 0xff).min(64)).unwrap_or(0).wrapping_sub(1);
let actual = _bzhi_u64(u64::MAX, i);
assert_eq!(expected, actual);
}
}
test_bzhi_u64();
}