Skip to content

Commit c1b478e

Browse files
committed
Auto merge of #121223 - RalfJung:simd-intrinsics, r=Amanieu
intrinsics::simd: add missing functions, avoid UB-triggering fast-math Turns out stdarch declares a bunch more SIMD intrinsics that are still missing from libcore. I hope I got the docs and in particular the safety requirements right for these "unordered" and "nanless" intrinsics. Many of these are unused even in stdarch, but they are implemented in the codegen backend, so we may as well list them here. r? `@Amanieu` Cc `@calebzulawski` `@workingjubilee`
2 parents c5f69bd + 07b6240 commit c1b478e

File tree

12 files changed

+98
-46
lines changed

12 files changed

+98
-46
lines changed

compiler/rustc_codegen_cranelift/src/intrinsics/simd.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -743,7 +743,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
743743
simd_reduce(fx, v, None, ret, &|fx, _ty, a, b| fx.bcx.ins().bxor(a, b));
744744
}
745745

746-
sym::simd_reduce_min | sym::simd_reduce_min_nanless => {
746+
sym::simd_reduce_min => {
747747
intrinsic_args!(fx, args => (v); intrinsic);
748748

749749
if !v.layout().ty.is_simd() {
@@ -762,7 +762,7 @@ pub(super) fn codegen_simd_intrinsic_call<'tcx>(
762762
});
763763
}
764764

765-
sym::simd_reduce_max | sym::simd_reduce_max_nanless => {
765+
sym::simd_reduce_max => {
766766
intrinsic_args!(fx, args => (v); intrinsic);
767767

768768
if !v.layout().ty.is_simd() {

compiler/rustc_codegen_gcc/src/builder.rs

+2-2
Original file line numberDiff line numberDiff line change
@@ -1752,7 +1752,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
17521752
self.vector_reduce(src, |a, b, context| context.new_binary_op(None, op, a.get_type(), a, b))
17531753
}
17541754

1755-
pub fn vector_reduce_fadd_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
1755+
pub fn vector_reduce_fadd_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
17561756
unimplemented!();
17571757
}
17581758

@@ -1772,7 +1772,7 @@ impl<'a, 'gcc, 'tcx> Builder<'a, 'gcc, 'tcx> {
17721772
unimplemented!();
17731773
}
17741774

1775-
pub fn vector_reduce_fmul_fast(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
1775+
pub fn vector_reduce_fmul_reassoc(&mut self, _acc: RValue<'gcc>, _src: RValue<'gcc>) -> RValue<'gcc> {
17761776
unimplemented!();
17771777
}
17781778

compiler/rustc_codegen_gcc/src/intrinsic/simd.rs

+2-5
Original file line numberDiff line numberDiff line change
@@ -989,14 +989,14 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
989989

990990
arith_red!(
991991
simd_reduce_add_unordered: BinaryOp::Plus,
992-
vector_reduce_fadd_fast,
992+
vector_reduce_fadd_reassoc,
993993
false,
994994
add,
995995
0.0 // TODO: Use this argument.
996996
);
997997
arith_red!(
998998
simd_reduce_mul_unordered: BinaryOp::Mult,
999-
vector_reduce_fmul_fast,
999+
vector_reduce_fmul_reassoc,
10001000
false,
10011001
mul,
10021002
1.0
@@ -1041,9 +1041,6 @@ pub fn generic_simd_intrinsic<'a, 'gcc, 'tcx>(
10411041

10421042
minmax_red!(simd_reduce_min: vector_reduce_min, vector_reduce_fmin);
10431043
minmax_red!(simd_reduce_max: vector_reduce_max, vector_reduce_fmax);
1044-
// TODO(sadlerap): revisit these intrinsics to generate more optimal reductions
1045-
minmax_red!(simd_reduce_min_nanless: vector_reduce_min, vector_reduce_fmin);
1046-
minmax_red!(simd_reduce_max_nanless: vector_reduce_max, vector_reduce_fmax);
10471044

10481045
macro_rules! bitwise_red {
10491046
($name:ident : $op:expr, $boolean:expr) => {

compiler/rustc_codegen_llvm/src/builder.rs

+4-20
Original file line numberDiff line numberDiff line change
@@ -1367,17 +1367,17 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
13671367
pub fn vector_reduce_fmul(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
13681368
unsafe { llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src) }
13691369
}
1370-
pub fn vector_reduce_fadd_algebraic(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
1370+
pub fn vector_reduce_fadd_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
13711371
unsafe {
13721372
let instr = llvm::LLVMRustBuildVectorReduceFAdd(self.llbuilder, acc, src);
1373-
llvm::LLVMRustSetAlgebraicMath(instr);
1373+
llvm::LLVMRustSetAllowReassoc(instr);
13741374
instr
13751375
}
13761376
}
1377-
pub fn vector_reduce_fmul_algebraic(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
1377+
pub fn vector_reduce_fmul_reassoc(&mut self, acc: &'ll Value, src: &'ll Value) -> &'ll Value {
13781378
unsafe {
13791379
let instr = llvm::LLVMRustBuildVectorReduceFMul(self.llbuilder, acc, src);
1380-
llvm::LLVMRustSetAlgebraicMath(instr);
1380+
llvm::LLVMRustSetAllowReassoc(instr);
13811381
instr
13821382
}
13831383
}
@@ -1406,22 +1406,6 @@ impl<'a, 'll, 'tcx> Builder<'a, 'll, 'tcx> {
14061406
llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, /*NoNaNs:*/ false)
14071407
}
14081408
}
1409-
pub fn vector_reduce_fmin_fast(&mut self, src: &'ll Value) -> &'ll Value {
1410-
unsafe {
1411-
let instr =
1412-
llvm::LLVMRustBuildVectorReduceFMin(self.llbuilder, src, /*NoNaNs:*/ true);
1413-
llvm::LLVMRustSetFastMath(instr);
1414-
instr
1415-
}
1416-
}
1417-
pub fn vector_reduce_fmax_fast(&mut self, src: &'ll Value) -> &'ll Value {
1418-
unsafe {
1419-
let instr =
1420-
llvm::LLVMRustBuildVectorReduceFMax(self.llbuilder, src, /*NoNaNs:*/ true);
1421-
llvm::LLVMRustSetFastMath(instr);
1422-
instr
1423-
}
1424-
}
14251409
pub fn vector_reduce_min(&mut self, src: &'ll Value, is_signed: bool) -> &'ll Value {
14261410
unsafe { llvm::LLVMRustBuildVectorReduceMin(self.llbuilder, src, is_signed) }
14271411
}

compiler/rustc_codegen_llvm/src/intrinsic.rs

+2-5
Original file line numberDiff line numberDiff line change
@@ -1880,14 +1880,14 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
18801880
arith_red!(simd_reduce_mul_ordered: vector_reduce_mul, vector_reduce_fmul, true, mul, 1.0);
18811881
arith_red!(
18821882
simd_reduce_add_unordered: vector_reduce_add,
1883-
vector_reduce_fadd_algebraic,
1883+
vector_reduce_fadd_reassoc,
18841884
false,
18851885
add,
18861886
0.0
18871887
);
18881888
arith_red!(
18891889
simd_reduce_mul_unordered: vector_reduce_mul,
1890-
vector_reduce_fmul_algebraic,
1890+
vector_reduce_fmul_reassoc,
18911891
false,
18921892
mul,
18931893
1.0
@@ -1920,9 +1920,6 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
19201920
minmax_red!(simd_reduce_min: vector_reduce_min, vector_reduce_fmin);
19211921
minmax_red!(simd_reduce_max: vector_reduce_max, vector_reduce_fmax);
19221922

1923-
minmax_red!(simd_reduce_min_nanless: vector_reduce_min, vector_reduce_fmin_fast);
1924-
minmax_red!(simd_reduce_max_nanless: vector_reduce_max, vector_reduce_fmax_fast);
1925-
19261923
macro_rules! bitwise_red {
19271924
($name:ident : $red:ident, $boolean:expr) => {
19281925
if name == sym::$name {

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

+1
Original file line numberDiff line numberDiff line change
@@ -1619,6 +1619,7 @@ extern "C" {
16191619

16201620
pub fn LLVMRustSetFastMath(Instr: &Value);
16211621
pub fn LLVMRustSetAlgebraicMath(Instr: &Value);
1622+
pub fn LLVMRustSetAllowReassoc(Instr: &Value);
16221623

16231624
// Miscellaneous instructions
16241625
pub fn LLVMRustGetInstrProfIncrementIntrinsic(M: &Module) -> &Value;

compiler/rustc_hir_analysis/src/check/intrinsic.rs

+1-3
Original file line numberDiff line numberDiff line change
@@ -606,9 +606,7 @@ pub fn check_platform_intrinsic_type(
606606
| sym::simd_reduce_or
607607
| sym::simd_reduce_xor
608608
| sym::simd_reduce_min
609-
| sym::simd_reduce_max
610-
| sym::simd_reduce_min_nanless
611-
| sym::simd_reduce_max_nanless => (2, 0, vec![param(0)], param(1)),
609+
| sym::simd_reduce_max => (2, 0, vec![param(0)], param(1)),
612610
sym::simd_shuffle => (3, 0, vec![param(0), param(0), param(1)], param(2)),
613611
sym::simd_shuffle_generic => (2, 1, vec![param(0), param(0)], param(1)),
614612
_ => {

compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp

+14
Original file line numberDiff line numberDiff line change
@@ -450,6 +450,20 @@ extern "C" void LLVMRustSetAlgebraicMath(LLVMValueRef V) {
450450
}
451451
}
452452

453+
// Enable the reassoc fast-math flag, allowing transformations that pretend
454+
// floating-point addition and multiplication are associative.
455+
//
456+
// Note that this does NOT enable any flags which can cause a floating-point operation on
457+
// well-defined inputs to return poison, and therefore this function can be used to build
458+
// safe Rust intrinsics (such as fadd_algebraic).
459+
//
460+
// https://llvm.org/docs/LangRef.html#fast-math-flags
461+
extern "C" void LLVMRustSetAllowReassoc(LLVMValueRef V) {
462+
if (auto I = dyn_cast<Instruction>(unwrap<Value>(V))) {
463+
I->setHasAllowReassoc(true);
464+
}
465+
}
466+
453467
extern "C" LLVMValueRef
454468
LLVMRustBuildAtomicLoad(LLVMBuilderRef B, LLVMTypeRef Ty, LLVMValueRef Source,
455469
const char *Name, LLVMAtomicOrdering Order) {

compiler/rustc_span/src/symbol.rs

-2
Original file line numberDiff line numberDiff line change
@@ -1553,9 +1553,7 @@ symbols! {
15531553
simd_reduce_and,
15541554
simd_reduce_any,
15551555
simd_reduce_max,
1556-
simd_reduce_max_nanless,
15571556
simd_reduce_min,
1558-
simd_reduce_min_nanless,
15591557
simd_reduce_mul_ordered,
15601558
simd_reduce_mul_unordered,
15611559
simd_reduce_or,

library/core/src/intrinsics/simd.rs

+69
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,24 @@
33
//! In this module, a "vector" is any `repr(simd)` type.
44
55
extern "platform-intrinsic" {
6+
/// Insert an element into a vector, returning the updated vector.
7+
///
8+
/// `T` must be a vector with element type `U`.
9+
///
10+
/// # Safety
11+
///
12+
/// `idx` must be in-bounds of the vector.
13+
pub fn simd_insert<T, U>(x: T, idx: u32, val: U) -> T;
14+
15+
/// Extract an element from a vector.
16+
///
17+
/// `T` must be a vector with element type `U`.
18+
///
19+
/// # Safety
20+
///
21+
/// `idx` must be in-bounds of the vector.
22+
pub fn simd_extract<T, U>(x: T, idx: u32) -> U;
23+
624
/// Add two simd vectors elementwise.
725
///
826
/// `T` must be a vector of integer or floating point primitive types.
@@ -317,6 +335,14 @@ extern "platform-intrinsic" {
317335
/// Starting with the value `y`, add the elements of `x` and accumulate.
318336
pub fn simd_reduce_add_ordered<T, U>(x: T, y: U) -> U;
319337

338+
/// Add elements within a vector in arbitrary order. May also be re-associated with
339+
/// unordered additions on the inputs/outputs.
340+
///
341+
/// `T` must be a vector of integer or floating-point primitive types.
342+
///
343+
/// `U` must be the element type of `T`.
344+
pub fn simd_reduce_add_unordered<T, U>(x: T) -> U;
345+
320346
/// Multiply elements within a vector from left to right.
321347
///
322348
/// `T` must be a vector of integer or floating-point primitive types.
@@ -326,6 +352,14 @@ extern "platform-intrinsic" {
326352
/// Starting with the value `y`, multiply the elements of `x` and accumulate.
327353
pub fn simd_reduce_mul_ordered<T, U>(x: T, y: U) -> U;
328354

355+
/// Add elements within a vector in arbitrary order. May also be re-associated with
356+
/// unordered additions on the inputs/outputs.
357+
///
358+
/// `T` must be a vector of integer or floating-point primitive types.
359+
///
360+
/// `U` must be the element type of `T`.
361+
pub fn simd_reduce_mul_unordered<T, U>(x: T) -> U;
362+
329363
/// Check if all mask values are true.
330364
///
331365
/// `T` must be a vector of integer primitive types.
@@ -518,4 +552,39 @@ extern "platform-intrinsic" {
518552
///
519553
/// `T` must be a vector of floats.
520554
pub fn simd_fma<T>(x: T, y: T, z: T) -> T;
555+
556+
// Computes the sine of each element.
557+
///
558+
/// `T` must be a vector of floats.
559+
pub fn simd_fsin<T>(a: T) -> T;
560+
561+
// Computes the cosine of each element.
562+
///
563+
/// `T` must be a vector of floats.
564+
pub fn simd_fcos<T>(a: T) -> T;
565+
566+
// Computes the exponential function of each element.
567+
///
568+
/// `T` must be a vector of floats.
569+
pub fn simd_fexp<T>(a: T) -> T;
570+
571+
// Computes 2 raised to the power of each element.
572+
///
573+
/// `T` must be a vector of floats.
574+
pub fn simd_fexp2<T>(a: T) -> T;
575+
576+
// Computes the base 10 logarithm of each element.
577+
///
578+
/// `T` must be a vector of floats.
579+
pub fn simd_flog10<T>(a: T) -> T;
580+
581+
// Computes the base 2 logarithm of each element.
582+
///
583+
/// `T` must be a vector of floats.
584+
pub fn simd_flog2<T>(a: T) -> T;
585+
586+
// Computes the natural logarithm of each element.
587+
///
588+
/// `T` must be a vector of floats.
589+
pub fn simd_flog<T>(a: T) -> T;
521590
}

tests/codegen/simd/issue-120720-reduce-nan.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ use std::arch::x86_64::*;
1212
#[no_mangle]
1313
#[target_feature(enable = "avx512f")] // Function-level target feature mismatches inhibit inlining
1414
pub unsafe fn demo() -> bool {
15-
// CHECK: %0 = tail call reassoc nsz arcp contract double @llvm.vector.reduce.fadd.v8f64(
15+
// CHECK: %0 = tail call reassoc double @llvm.vector.reduce.fadd.v8f64(
1616
// CHECK: %_0.i = fcmp uno double %0, 0.000000e+00
1717
// CHECK: ret i1 %_0.i
1818
let res = unsafe {

tests/ui/simd/intrinsic/generic-reduction-pass.rs

-6
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@ extern "platform-intrinsic" {
3131
fn simd_reduce_mul_ordered<T, U>(x: T, acc: U) -> U;
3232
fn simd_reduce_min<T, U>(x: T) -> U;
3333
fn simd_reduce_max<T, U>(x: T) -> U;
34-
fn simd_reduce_min_nanless<T, U>(x: T) -> U;
35-
fn simd_reduce_max_nanless<T, U>(x: T) -> U;
3634
fn simd_reduce_and<T, U>(x: T) -> U;
3735
fn simd_reduce_or<T, U>(x: T) -> U;
3836
fn simd_reduce_xor<T, U>(x: T) -> U;
@@ -127,10 +125,6 @@ fn main() {
127125
assert_eq!(r, -2_f32);
128126
let r: f32 = simd_reduce_max(x);
129127
assert_eq!(r, 4_f32);
130-
let r: f32 = simd_reduce_min_nanless(x);
131-
assert_eq!(r, -2_f32);
132-
let r: f32 = simd_reduce_max_nanless(x);
133-
assert_eq!(r, 4_f32);
134128
}
135129

136130
unsafe {

0 commit comments

Comments
 (0)