Skip to content

Commit 0c1451c

Browse files
committed
Revert "Revert "Use nbdd0121 suggestion for reducing the perf impact""
This reverts commit e136c3a9348200c261b9b3c1c50a2f6f6a68b4bd.
1 parent a84f4c9 commit 0c1451c

File tree

3 files changed

+36
-35
lines changed

3 files changed

+36
-35
lines changed

compiler/rustc_middle/src/ty/layout.rs

+28-30
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use rustc_session::{config::OptLevel, DataTypeKind, FieldInfo, SizeKind, Variant
1414
use rustc_span::symbol::Symbol;
1515
use rustc_span::{Span, DUMMY_SP};
1616
use rustc_target::abi::call::{
17-
ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, Conv, FnAbi, PassMode, /* Reg, RegKind, */
17+
ArgAbi, ArgAttribute, ArgAttributes, ArgExtension, Conv, FnAbi, PassMode, Reg, RegKind,
1818
};
1919
use rustc_target::abi::*;
2020
use rustc_target::spec::{abi::Abi as SpecAbi, HasTargetSpec, PanicStrategy, Target};
@@ -3340,16 +3340,16 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> {
33403340
Ok(self.tcx.arena.alloc(fn_abi))
33413341
}
33423342

3343-
// /// Small heuristic for determining if layout has any float primitive
3344-
// fn has_all_float(&self, layout: &'_ TyAndLayout<'tcx>) -> bool {
3345-
// match layout.abi {
3346-
// Abi::Uninhabited | Abi::Vector { .. } => false,
3347-
// Abi::Scalar(scalar) => matches!(scalar.primitive(), Primitive::F32 | Primitive::F64),
3348-
// Abi::ScalarPair(..) | Abi::Aggregate { .. } => {
3349-
// (0..layout.fields.count()).all(|i| self.has_all_float(&layout.field(self, i)))
3350-
// }
3351-
// }
3352-
// }
3343+
/// Small heuristic for determining if layout has any float primitive
3344+
fn has_all_float(&self, layout: &'_ TyAndLayout<'tcx>) -> bool {
3345+
match layout.abi {
3346+
Abi::Uninhabited | Abi::Vector { .. } => false,
3347+
Abi::Scalar(scalar) => matches!(scalar.primitive(), Primitive::F32 | Primitive::F64),
3348+
Abi::ScalarPair(..) | Abi::Aggregate { .. } => {
3349+
(0..layout.fields.count()).all(|i| self.has_all_float(&layout.field(self, i)))
3350+
}
3351+
}
3352+
}
33533353

33543354
fn fn_abi_adjust_for_abi(
33553355
&self,
@@ -3375,29 +3375,27 @@ impl<'tcx> LayoutCx<'tcx, TyCtxt<'tcx>> {
33753375
// Pass and return structures up to 2 pointers in size by value,
33763376
// matching `ScalarPair`. LLVM will usually pass these in 2 registers
33773377
// which is more efficient than by-ref.
3378-
let max_by_val_size = Pointer.size(self) * 2;
3378+
let ptr_size = Pointer.size(self);
3379+
let max_by_val_size = ptr_size * 2;
33793380
let size = arg.layout.size;
33803381

33813382
if arg.layout.is_unsized() || size > max_by_val_size {
33823383
arg.make_indirect();
3383-
// } else if self.has_all_float(&arg.layout) {
3384-
// // We don't want to aggregate floats as an aggregates of Integer
3385-
// // because this will hurt the generated assembly (#93490)
3386-
// //
3387-
// // As an optimization we want to pass homogeneous aggregate of floats
3388-
// // greater than pointer size as indirect
3389-
// if size > Pointer.size(self) {
3390-
// arg.make_indirect();
3391-
// }
3392-
// } else {
3393-
// // We want to pass small aggregates as immediates, but using
3394-
// // a LLVM aggregate type for this leads to bad optimizations,
3395-
// // so we pick an appropriately sized integer type instead.
3396-
// //
3397-
// // NOTE: This is sub-optimal because in the case of (f32, f32, u32, u32)
3398-
// // we could do ([f32; 2], u64) which is better but this is the best we
3399-
// // can do right now.
3400-
// arg.cast_to(Reg { kind: RegKind::Integer, size });
3384+
} else if size > ptr_size && self.has_all_float(&arg.layout) {
3385+
// We don't want to aggregate floats as an aggregates of Integer
3386+
// because this will hurt the generated assembly (#93490) but as an
3387+
// optimization we want to pass homogeneous aggregate of floats
3388+
// greater than pointer size as indirect.
3389+
arg.make_indirect();
3390+
} else {
3391+
// We want to pass small aggregates as immediates, but using
3392+
// a LLVM aggregate type for this leads to bad optimizations,
3393+
// so we pick an appropriately sized integer type instead.
3394+
//
3395+
// NOTE: This is sub-optimal because in the case of (f32, f32, u32, u32)
3396+
// we could do ([f32; 2], u64) which is better but this is the best we
3397+
// can do right now.
3398+
arg.cast_to(Reg { kind: RegKind::Integer, size });
34013399
}
34023400
}
34033401

src/test/assembly/x86-64-homogenous-floats.rs

+7-4
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,15 @@ pub fn sum_f32(a: f32, b: f32) -> f32 {
1515
a + b
1616
}
1717

18-
// CHECK-LABEL: sum_f32x2:
19-
// CHECK: addss xmm{{[0-9]}}, xmm{{[0-9]}}
20-
// CHECK-NEXT: addss xmm{{[0-9]}}, xmm{{[0-9]}}
18+
// CHECK-LABEL: sum_f64x2:
19+
// CHECK: mov rax, [[PTR_IN:.*]]
20+
// CHECK-NEXT: movupd [[XMMA:xmm[0-9]]], xmmword ptr [rsi]
21+
// CHECK-NEXT: movupd [[XMMB:xmm[0-9]]], xmmword ptr [rdx]
22+
// CHECK-NEXT: addpd [[XMMB]], [[XMMA]]
23+
// CHECK-NEXT: movupd xmmword ptr {{\[}}[[PTR_IN]]{{\]}}, [[XMMB]]
2124
// CHECK-NEXT: ret
2225
#[no_mangle]
23-
pub fn sum_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] {
26+
pub fn sum_f64x2(a: [f64; 2], b: [f64; 2]) -> [f64; 2] {
2427
[
2528
a[0] + b[0],
2629
a[1] + b[1],

src/test/codegen/homogeneous-floats.rs

+1-1
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ pub struct Foo {
1313
bar4: f32,
1414
}
1515

16-
// CHECK: define [2 x float] @array_f32x2([2 x float] %0, [2 x float] %1)
16+
// CHECK: define i64 @array_f32x2(i64 %0, i64 %1)
1717
#[no_mangle]
1818
pub fn array_f32x2(a: [f32; 2], b: [f32; 2]) -> [f32; 2] {
1919
todo!()

0 commit comments

Comments
 (0)