From 459eadaf8a1fdb40603410f4935fd0f3e27b7404 Mon Sep 17 00:00:00 2001 From: Tobias Decking Date: Sat, 15 Jun 2024 14:43:29 +0200 Subject: [PATCH] Implement LLVM x86 bmi intrinsics --- src/shims/x86/bmi.rs | 108 +++++++++++ src/shims/x86/mod.rs | 6 + tests/pass/shims/x86/intrinsics-x86-bmi.rs | 216 +++++++++++++++++++++ 3 files changed, 330 insertions(+) create mode 100644 src/shims/x86/bmi.rs create mode 100644 tests/pass/shims/x86/intrinsics-x86-bmi.rs diff --git a/src/shims/x86/bmi.rs b/src/shims/x86/bmi.rs new file mode 100644 index 0000000000..e70757f439 --- /dev/null +++ b/src/shims/x86/bmi.rs @@ -0,0 +1,108 @@ +use rustc_span::Symbol; +use rustc_target::spec::abi::Abi; + +use crate::*; + +impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} +pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { + fn emulate_x86_bmi_intrinsic( + &mut self, + link_name: Symbol, + abi: Abi, + args: &[OpTy<'tcx>], + dest: &MPlaceTy<'tcx>, + ) -> InterpResult<'tcx, EmulateItemResult> { + let this = self.eval_context_mut(); + + // Prefix should have already been checked. + let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.bmi.").unwrap(); + + // The intrinsics are suffixed with the bit size of their operands. + let (is_64_bit, unprefixed_name) = if unprefixed_name.ends_with("64") { + (true, unprefixed_name.strip_suffix(".64").unwrap_or("")) + } else { + (false, unprefixed_name.strip_suffix(".32").unwrap_or("")) + }; + + // All intrinsics of the "bmi" namespace belong to the "bmi2" ISA extension. + // The exception is "bextr", which belongs to "bmi1". + let target_feature = if unprefixed_name == "bextr" { "bmi1" } else { "bmi2" }; + this.expect_target_feature_for_intrinsic(link_name, target_feature)?; + + if is_64_bit && this.tcx.sess.target.arch != "x86_64" { + return Ok(EmulateItemResult::NotSupported); + } + + let [left, right] = this.check_shim(abi, Abi::C { unwind: false }, link_name, args)?; + let left = this.read_scalar(left)?; + let right = this.read_scalar(right)?; + + let left = if is_64_bit { left.to_u64()? } else { u64::from(left.to_u32()?) }; + let right = if is_64_bit { right.to_u64()? } else { u64::from(right.to_u32()?) }; + + let result = match unprefixed_name { + // Extract a contigous range of bits from an unsigned integer. + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bextr_u32 + "bextr" => { + let start = u32::try_from(right & 0xff).unwrap(); + let len = u32::try_from((right >> 8) & 0xff).unwrap(); + let shifted = left.checked_shr(start).unwrap_or(0); + // Keep the `len` lowest bits of `shifted`, or all bits if `len` is too big. + if len >= 64 { shifted } else { shifted & 1u64.wrapping_shl(len).wrapping_sub(1) } + } + // Create a copy of an unsigned integer with bits above a certain index cleared. + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_bzhi_u32 + "bzhi" => { + let index = u32::try_from(right & 0xff).unwrap(); + // Keep the `index` lowest bits of `left`, or all bits if `index` is too big. + if index >= 64 { left } else { left & 1u64.wrapping_shl(index).wrapping_sub(1) } + } + // Extract bit values of an unsigned integer at positions marked by a mask. + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pext_u32 + "pext" => { + let mut mask = right; + let mut i = 0u32; + let mut result = 0; + // Iterate over the mask one 1-bit at a time, from + // the least significant bit to the most significant bit. + while mask != 0 { + // Extract the bit marked by the mask's least significant set bit + // and put it at position `i` of the result. + result |= u64::from(left & (1 << mask.trailing_zeros()) != 0) << i; + i = i.wrapping_add(1); + // Clear the least significant set bit. + mask &= mask.wrapping_sub(1); + } + result + } + // Deposit bit values of an unsigned integer to positions marked by a mask. + // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_pdep_u32 + "pdep" => { + let mut mask = right; + let mut set = left; + let mut result = 0; + // Iterate over the mask one 1-bit at a time, from + // the least significant bit to the most significant bit. + while mask != 0 { + // Put rightmost bit of `set` at the position of the current `mask` bit. + result |= (set & 1) << mask.trailing_zeros(); + // Go to next bit of `set`. + set >>= 1; + // Clear the least significant set bit. + mask &= mask.wrapping_sub(1); + } + result + } + _ => return Ok(EmulateItemResult::NotSupported), + }; + + let result = if is_64_bit { + Scalar::from_u64(result) + } else { + Scalar::from_u32(u32::try_from(result).unwrap()) + }; + this.write_scalar(result, dest)?; + + Ok(EmulateItemResult::NeedsReturn) + } +} diff --git a/src/shims/x86/mod.rs b/src/shims/x86/mod.rs index b71aec0216..704c45fdd6 100644 --- a/src/shims/x86/mod.rs +++ b/src/shims/x86/mod.rs @@ -14,6 +14,7 @@ use helpers::bool_to_simd_element; mod aesni; mod avx; mod avx2; +mod bmi; mod sse; mod sse2; mod sse3; @@ -113,6 +114,11 @@ pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { pclmulqdq(this, left, right, imm, dest)?; } + name if name.starts_with("bmi.") => { + return bmi::EvalContextExt::emulate_x86_bmi_intrinsic( + this, link_name, abi, args, dest, + ); + } name if name.starts_with("sse.") => { return sse::EvalContextExt::emulate_x86_sse_intrinsic( this, link_name, abi, args, dest, diff --git a/tests/pass/shims/x86/intrinsics-x86-bmi.rs b/tests/pass/shims/x86/intrinsics-x86-bmi.rs new file mode 100644 index 0000000000..33424117c4 --- /dev/null +++ b/tests/pass/shims/x86/intrinsics-x86-bmi.rs @@ -0,0 +1,216 @@ +// Ignore everything except x86 and x86_64 +// Any new targets that are added to CI should be ignored here. +// (We cannot use `cfg`-based tricks here since the `target-feature` flags below only work on x86.) +//@ignore-target-aarch64 +//@ignore-target-arm +//@ignore-target-avr +//@ignore-target-s390x +//@ignore-target-thumbv7em +//@ignore-target-wasm32 +//@compile-flags: -C target-feature=+bmi1,+bmi2 + +#[cfg(target_arch = "x86")] +use std::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use std::arch::x86_64::*; + +fn main() { + // BMI1 and BMI2 are independent from each other, so both must be checked. + assert!(is_x86_feature_detected!("bmi1")); + assert!(is_x86_feature_detected!("bmi2")); + + unsafe { + test_bmi_32(); + test_bmi_64(); + } +} + +/// Test the 32-bit variants of the intrinsics. +unsafe fn test_bmi_32() { + unsafe fn test_bextr_u32() { + let r = _bextr_u32(0b0101_0000u32, 4, 4); + assert_eq!(r, 0b0000_0101u32); + + for i in 0..16 { + assert_eq!(_bextr_u32(u32::MAX, i, 4), 0b1111); + assert_eq!(_bextr_u32(u32::MAX, 4, i), (1 << i) - 1); + } + + // Ensure that indices larger than the bit count are covered. + // It is important to go above 32 in order to verify the bit selection + // of the instruction. + + for i in 0..256 { + // If the index is out of bounds, the original input won't be changed, thus the `min(32)`. + assert_eq!(_bextr_u32(u32::MAX, 0, i).count_ones(), i.min(32)); + } + + for i in 0..256 { + assert_eq!(_bextr_u32(u32::MAX, i, 0), 0); + } + + // Test cases with completly random values. These cases also test + // that the function works even if upper bits of the control value are set. + assert_eq!(_bextr2_u32(0x7408a392, 0x54ef705), 0x3a0451c); + assert_eq!(_bextr2_u32(0xbc5a3494, 0xdd193203), 0x178b4692); + assert_eq!(_bextr2_u32(0xc0332325, 0xf96e207), 0x1806646); + } + test_bextr_u32(); + + unsafe fn test_pext_u32() { + let n = 0b1011_1110_1001_0011u32; + + let m0 = 0b0110_0011_1000_0101u32; + let s0 = 0b0000_0000_0011_0101u32; + + let m1 = 0b1110_1011_1110_1111u32; + let s1 = 0b0001_0111_0100_0011u32; + + // Testing of random values. + assert_eq!(_pext_u32(n, m0), s0); + assert_eq!(_pext_u32(n, m1), s1); + assert_eq!(_pext_u32(0x12345678, 0xff00fff0), 0x00012567); + + // Testing of various identities. + assert_eq!(_pext_u32(u32::MAX, u32::MAX), u32::MAX); + assert_eq!(_pext_u32(u32::MAX, 0), 0); + assert_eq!(_pext_u32(0, u32::MAX), 0); + } + test_pext_u32(); + + unsafe fn test_pdep_u32() { + let n = 0b1011_1110_1001_0011u32; + + let m0 = 0b0110_0011_1000_0101u32; + let s0 = 0b0000_0010_0000_0101u32; + + let m1 = 0b1110_1011_1110_1111u32; + let s1 = 0b1110_1001_0010_0011u32; + + // Testing of random values. + assert_eq!(_pdep_u32(n, m0), s0); + assert_eq!(_pdep_u32(n, m1), s1); + assert_eq!(_pdep_u32(0x00012567, 0xff00fff0), 0x12005670); + + // Testing of various identities. + assert_eq!(_pdep_u32(u32::MAX, u32::MAX), u32::MAX); + assert_eq!(_pdep_u32(0, u32::MAX), 0); + assert_eq!(_pdep_u32(u32::MAX, 0), 0); + } + test_pdep_u32(); + + unsafe fn test_bzhi_u32() { + let n = 0b1111_0010u32; + let s = 0b0001_0010u32; + assert_eq!(_bzhi_u32(n, 5), s); + + // Ensure that indices larger than the bit count are covered. + // It is important to go above 32 in order to verify the bit selection + // of the instruction. + for i in 0..=512 { + // The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`. + // If the index is out of bounds, the original input won't be changed, thus the `min(32)`. + let expected = 1u32.checked_shl((i & 0xff).min(32)).unwrap_or(0).wrapping_sub(1); + let actual = _bzhi_u32(u32::MAX, i); + assert_eq!(expected, actual); + } + } + test_bzhi_u32(); +} + +#[cfg(not(target_arch = "x86_64"))] +unsafe fn test_bmi_64() {} + +/// Test the 64-bit variants of the intrinsics. +#[cfg(target_arch = "x86_64")] +unsafe fn test_bmi_64() { + unsafe fn test_bextr_u64() { + let r = _bextr_u64(0b0101_0000u64, 4, 4); + assert_eq!(r, 0b0000_0101u64); + + for i in 0..16 { + assert_eq!(_bextr_u64(u64::MAX, i, 4), 0b1111); + assert_eq!(_bextr_u64(u64::MAX, 32, i), (1 << i) - 1); + } + + // Ensure that indices larger than the bit count are covered. + // It is important to go above 64 in order to verify the bit selection + // of the instruction. + + for i in 0..256 { + // If the index is out of bounds, the original input won't be changed, thus the `min(64)`. + assert_eq!(_bextr_u64(u64::MAX, 0, i).count_ones(), i.min(64)); + } + + for i in 0..256 { + assert_eq!(_bextr_u64(u64::MAX, i, 0), 0); + } + + // Test cases with completly random values. These cases also test + // that the function works even if upper bits of the control value are set. + assert_eq!(_bextr2_u64(0x4ff6cfbcea75f055, 0x216642e228425719), 0x27fb67de75); + assert_eq!(_bextr2_u64(0xb05e991e6f6e1b6, 0xc76dd5d7f67dfc14), 0xb05e991e6f); + assert_eq!(_bextr2_u64(0x5a3a629e323d848f, 0x95ac507d20e7719), 0x2d1d314f19); + } + test_bextr_u64(); + + unsafe fn test_pext_u64() { + let n = 0b1011_1110_1001_0011u64; + + let m0 = 0b0110_0011_1000_0101u64; + let s0 = 0b0000_0000_0011_0101u64; + + let m1 = 0b1110_1011_1110_1111u64; + let s1 = 0b0001_0111_0100_0011u64; + + // Testing of random values. + assert_eq!(_pext_u64(n, m0), s0); + assert_eq!(_pext_u64(n, m1), s1); + assert_eq!(_pext_u64(0x12345678, 0xff00fff0), 0x00012567); + + // Testing of various identities. + assert_eq!(_pext_u64(u64::MAX, u64::MAX), u64::MAX); + assert_eq!(_pext_u64(u64::MAX, 0), 0); + assert_eq!(_pext_u64(0, u64::MAX), 0); + } + test_pext_u64(); + + unsafe fn test_pdep_u64() { + let n = 0b1011_1110_1001_0011u64; + + let m0 = 0b0110_0011_1000_0101u64; + let s0 = 0b0000_0010_0000_0101u64; + + let m1 = 0b1110_1011_1110_1111u64; + let s1 = 0b1110_1001_0010_0011u64; + + // Testing of random values. + assert_eq!(_pdep_u64(n, m0), s0); + assert_eq!(_pdep_u64(n, m1), s1); + assert_eq!(_pdep_u64(0x00012567, 0xff00fff0), 0x12005670); + + // Testing of various identities. + assert_eq!(_pdep_u64(u64::MAX, u64::MAX), u64::MAX); + assert_eq!(_pdep_u64(0, u64::MAX), 0); + assert_eq!(_pdep_u64(u64::MAX, 0), 0); + } + test_pdep_u64(); + + unsafe fn test_bzhi_u64() { + let n = 0b1111_0010u64; + let s = 0b0001_0010u64; + assert_eq!(_bzhi_u64(n, 5), s); + + // Ensure that indices larger than the bit count are covered. + // It is important to go above 255 in order to verify the bit selection + // of the instruction. + for i in 0..=512 { + // The instruction only takes the lowest eight bits to generate the index, hence `i & 0xff`. + // If the index is out of bounds, the original input won't be changed, thus the `min(64)`. + let expected = 1u64.checked_shl((i & 0xff).min(64)).unwrap_or(0).wrapping_sub(1); + let actual = _bzhi_u64(u64::MAX, i); + assert_eq!(expected, actual); + } + } + test_bzhi_u64(); +}