diff --git a/compiler/rustc_codegen_llvm/src/back/lto.rs b/compiler/rustc_codegen_llvm/src/back/lto.rs index 655e1c9537376..84302009da999 100644 --- a/compiler/rustc_codegen_llvm/src/back/lto.rs +++ b/compiler/rustc_codegen_llvm/src/back/lto.rs @@ -654,6 +654,7 @@ pub(crate) fn run_pass_manager( // We then run the llvm_optimize function a second time, to optimize the code which we generated // in the enzyme differentiation pass. let enable_ad = config.autodiff.contains(&config::AutoDiff::Enable); + let enable_gpu = config.offload.contains(&config::Offload::Enable); let stage = if thin { write::AutodiffStage::PreAD } else { @@ -668,6 +669,12 @@ pub(crate) fn run_pass_manager( write::llvm_optimize(cgcx, dcx, module, None, config, opt_level, opt_stage, stage)?; } + if enable_gpu && !thin { + let cx = + SimpleCx::new(module.module_llvm.llmod(), &module.module_llvm.llcx, cgcx.pointer_size); + crate::builder::gpu_offload::handle_gpu_code(cgcx, &cx); + } + if cfg!(llvm_enzyme) && enable_ad && !thin { let cx = SimpleCx::new(module.module_llvm.llmod(), &module.module_llvm.llcx, cgcx.pointer_size); diff --git a/compiler/rustc_codegen_llvm/src/builder.rs b/compiler/rustc_codegen_llvm/src/builder.rs index 514923ad6f37f..0ade9edb0d2ea 100644 --- a/compiler/rustc_codegen_llvm/src/builder.rs +++ b/compiler/rustc_codegen_llvm/src/builder.rs @@ -3,6 +3,7 @@ use std::ops::Deref; use std::{iter, ptr}; pub(crate) mod autodiff; +pub(crate) mod gpu_offload; use libc::{c_char, c_uint, size_t}; use rustc_abi as abi; @@ -117,6 +118,74 @@ impl<'a, 'll, CX: Borrow>> GenericBuilder<'a, 'll, CX> { } bx } + + // The generic builder has less functionality and thus (unlike the other alloca) we can not + // easily jump to the beginning of the function to place our allocas there. We trust the user + // to manually do that. FIXME(offload): improve the genericCx and add more llvm wrappers to + // handle this. + pub(crate) fn direct_alloca(&mut self, ty: &'ll Type, align: Align, name: &str) -> &'ll Value { + let val = unsafe { + let alloca = llvm::LLVMBuildAlloca(self.llbuilder, ty, UNNAMED); + llvm::LLVMSetAlignment(alloca, align.bytes() as c_uint); + // Cast to default addrspace if necessary + llvm::LLVMBuildPointerCast(self.llbuilder, alloca, self.cx.type_ptr(), UNNAMED) + }; + if name != "" { + let name = std::ffi::CString::new(name).unwrap(); + llvm::set_value_name(val, &name.as_bytes()); + } + val + } + + pub(crate) fn inbounds_gep( + &mut self, + ty: &'ll Type, + ptr: &'ll Value, + indices: &[&'ll Value], + ) -> &'ll Value { + unsafe { + llvm::LLVMBuildGEPWithNoWrapFlags( + self.llbuilder, + ty, + ptr, + indices.as_ptr(), + indices.len() as c_uint, + UNNAMED, + GEPNoWrapFlags::InBounds, + ) + } + } + + pub(crate) fn store(&mut self, val: &'ll Value, ptr: &'ll Value, align: Align) -> &'ll Value { + debug!("Store {:?} -> {:?}", val, ptr); + assert_eq!(self.cx.type_kind(self.cx.val_ty(ptr)), TypeKind::Pointer); + unsafe { + let store = llvm::LLVMBuildStore(self.llbuilder, val, ptr); + llvm::LLVMSetAlignment(store, align.bytes() as c_uint); + store + } + } + + pub(crate) fn load(&mut self, ty: &'ll Type, ptr: &'ll Value, align: Align) -> &'ll Value { + unsafe { + let load = llvm::LLVMBuildLoad2(self.llbuilder, ty, ptr, UNNAMED); + llvm::LLVMSetAlignment(load, align.bytes() as c_uint); + load + } + } + + fn memset(&mut self, ptr: &'ll Value, fill_byte: &'ll Value, size: &'ll Value, align: Align) { + unsafe { + llvm::LLVMRustBuildMemSet( + self.llbuilder, + ptr, + align.bytes() as c_uint, + fill_byte, + size, + false, + ); + } + } } /// Empty string, to be used where LLVM expects an instruction name, indicating diff --git a/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs new file mode 100644 index 0000000000000..1280ab1442a09 --- /dev/null +++ b/compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs @@ -0,0 +1,439 @@ +use std::ffi::CString; + +use llvm::Linkage::*; +use rustc_abi::Align; +use rustc_codegen_ssa::back::write::CodegenContext; +use rustc_codegen_ssa::traits::BaseTypeCodegenMethods; + +use crate::builder::SBuilder; +use crate::common::AsCCharPtr; +use crate::llvm::AttributePlace::Function; +use crate::llvm::{self, Linkage, Type, Value}; +use crate::{LlvmCodegenBackend, SimpleCx, attributes}; + +pub(crate) fn handle_gpu_code<'ll>( + _cgcx: &CodegenContext, + cx: &'ll SimpleCx<'_>, +) { + // The offload memory transfer type for each kernel + let mut o_types = vec![]; + let mut kernels = vec![]; + let offload_entry_ty = add_tgt_offload_entry(&cx); + for num in 0..9 { + let kernel = cx.get_function(&format!("kernel_{num}")); + if let Some(kernel) = kernel { + o_types.push(gen_define_handling(&cx, kernel, offload_entry_ty, num)); + kernels.push(kernel); + } + } + + gen_call_handling(&cx, &kernels, &o_types); +} + +// What is our @1 here? A magic global, used in our data_{begin/update/end}_mapper: +// @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +// @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 +fn generate_at_one<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Value { + // @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 + let unknown_txt = ";unknown;unknown;0;0;;"; + let c_entry_name = CString::new(unknown_txt).unwrap(); + let c_val = c_entry_name.as_bytes_with_nul(); + let initializer = crate::common::bytes_in_context(cx.llcx, c_val); + let at_zero = add_unnamed_global(&cx, &"", initializer, PrivateLinkage); + llvm::set_alignment(at_zero, Align::ONE); + + // @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 + let struct_ident_ty = cx.type_named_struct("struct.ident_t"); + let struct_elems = vec![ + cx.get_const_i32(0), + cx.get_const_i32(2), + cx.get_const_i32(0), + cx.get_const_i32(22), + at_zero, + ]; + let struct_elems_ty: Vec<_> = struct_elems.iter().map(|&x| cx.val_ty(x)).collect(); + let initializer = crate::common::named_struct(struct_ident_ty, &struct_elems); + cx.set_struct_body(struct_ident_ty, &struct_elems_ty, false); + let at_one = add_unnamed_global(&cx, &"", initializer, PrivateLinkage); + llvm::set_alignment(at_one, Align::EIGHT); + at_one +} + +pub(crate) fn add_tgt_offload_entry<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Type { + let offload_entry_ty = cx.type_named_struct("struct.__tgt_offload_entry"); + let tptr = cx.type_ptr(); + let ti64 = cx.type_i64(); + let ti32 = cx.type_i32(); + let ti16 = cx.type_i16(); + // For each kernel to run on the gpu, we will later generate one entry of this type. + // copied from LLVM + // typedef struct { + // uint64_t Reserved; + // uint16_t Version; + // uint16_t Kind; + // uint32_t Flags; Flags associated with the entry (see Target Region Entry Flags) + // void *Address; Address of global symbol within device image (function or global) + // char *SymbolName; + // uint64_t Size; Size of the entry info (0 if it is a function) + // uint64_t Data; + // void *AuxAddr; + // } __tgt_offload_entry; + let entry_elements = vec![ti64, ti16, ti16, ti32, tptr, tptr, ti64, ti64, tptr]; + cx.set_struct_body(offload_entry_ty, &entry_elements, false); + offload_entry_ty +} + +fn gen_tgt_kernel_global<'ll>(cx: &'ll SimpleCx<'_>) { + let kernel_arguments_ty = cx.type_named_struct("struct.__tgt_kernel_arguments"); + let tptr = cx.type_ptr(); + let ti64 = cx.type_i64(); + let ti32 = cx.type_i32(); + let tarr = cx.type_array(ti32, 3); + + // Taken from the LLVM APITypes.h declaration: + //struct KernelArgsTy { + // uint32_t Version = 0; // Version of this struct for ABI compatibility. + // uint32_t NumArgs = 0; // Number of arguments in each input pointer. + // void **ArgBasePtrs = + // nullptr; // Base pointer of each argument (e.g. a struct). + // void **ArgPtrs = nullptr; // Pointer to the argument data. + // int64_t *ArgSizes = nullptr; // Size of the argument data in bytes. + // int64_t *ArgTypes = nullptr; // Type of the data (e.g. to / from). + // void **ArgNames = nullptr; // Name of the data for debugging, possibly null. + // void **ArgMappers = nullptr; // User-defined mappers, possibly null. + // uint64_t Tripcount = + // 0; // Tripcount for the teams / distribute loop, 0 otherwise. + // struct { + // uint64_t NoWait : 1; // Was this kernel spawned with a `nowait` clause. + // uint64_t IsCUDA : 1; // Was this kernel spawned via CUDA. + // uint64_t Unused : 62; + // } Flags = {0, 0, 0}; + // // The number of teams (for x,y,z dimension). + // uint32_t NumTeams[3] = {0, 0, 0}; + // // The number of threads (for x,y,z dimension). + // uint32_t ThreadLimit[3] = {0, 0, 0}; + // uint32_t DynCGroupMem = 0; // Amount of dynamic cgroup memory requested. + //}; + let kernel_elements = + vec![ti32, ti32, tptr, tptr, tptr, tptr, tptr, tptr, ti64, ti64, tarr, tarr, ti32]; + + cx.set_struct_body(kernel_arguments_ty, &kernel_elements, false); + // For now we don't handle kernels, so for now we just add a global dummy + // to make sure that the __tgt_offload_entry is defined and handled correctly. + cx.declare_global("my_struct_global2", kernel_arguments_ty); +} + +fn gen_tgt_data_mappers<'ll>( + cx: &'ll SimpleCx<'_>, +) -> (&'ll llvm::Value, &'ll llvm::Value, &'ll llvm::Value, &'ll llvm::Type) { + let tptr = cx.type_ptr(); + let ti64 = cx.type_i64(); + let ti32 = cx.type_i32(); + + let args = vec![tptr, ti64, ti32, tptr, tptr, tptr, tptr, tptr, tptr]; + let mapper_fn_ty = cx.type_func(&args, cx.type_void()); + let mapper_begin = "__tgt_target_data_begin_mapper"; + let mapper_update = "__tgt_target_data_update_mapper"; + let mapper_end = "__tgt_target_data_end_mapper"; + let begin_mapper_decl = declare_offload_fn(&cx, mapper_begin, mapper_fn_ty); + let update_mapper_decl = declare_offload_fn(&cx, mapper_update, mapper_fn_ty); + let end_mapper_decl = declare_offload_fn(&cx, mapper_end, mapper_fn_ty); + + let nounwind = llvm::AttributeKind::NoUnwind.create_attr(cx.llcx); + attributes::apply_to_llfn(begin_mapper_decl, Function, &[nounwind]); + attributes::apply_to_llfn(update_mapper_decl, Function, &[nounwind]); + attributes::apply_to_llfn(end_mapper_decl, Function, &[nounwind]); + + (begin_mapper_decl, update_mapper_decl, end_mapper_decl, mapper_fn_ty) +} + +fn add_priv_unnamed_arr<'ll>(cx: &SimpleCx<'ll>, name: &str, vals: &[u64]) -> &'ll llvm::Value { + let ti64 = cx.type_i64(); + let mut size_val = Vec::with_capacity(vals.len()); + for &val in vals { + size_val.push(cx.get_const_i64(val)); + } + let initializer = cx.const_array(ti64, &size_val); + add_unnamed_global(cx, name, initializer, PrivateLinkage) +} + +pub(crate) fn add_unnamed_global<'ll>( + cx: &SimpleCx<'ll>, + name: &str, + initializer: &'ll llvm::Value, + l: Linkage, +) -> &'ll llvm::Value { + let llglobal = add_global(cx, name, initializer, l); + llvm::LLVMSetUnnamedAddress(llglobal, llvm::UnnamedAddr::Global); + llglobal +} + +pub(crate) fn add_global<'ll>( + cx: &SimpleCx<'ll>, + name: &str, + initializer: &'ll llvm::Value, + l: Linkage, +) -> &'ll llvm::Value { + let c_name = CString::new(name).unwrap(); + let llglobal: &'ll llvm::Value = llvm::add_global(cx.llmod, cx.val_ty(initializer), &c_name); + llvm::set_global_constant(llglobal, true); + llvm::set_linkage(llglobal, l); + llvm::set_initializer(llglobal, initializer); + llglobal +} + +fn gen_define_handling<'ll>( + cx: &'ll SimpleCx<'_>, + kernel: &'ll llvm::Value, + offload_entry_ty: &'ll llvm::Type, + num: i64, +) -> &'ll llvm::Value { + let types = cx.func_params_types(cx.get_type_of_global(kernel)); + // It seems like non-pointer values are automatically mapped. So here, we focus on pointer (or + // reference) types. + let num_ptr_types = types + .iter() + .map(|&x| matches!(cx.type_kind(x), rustc_codegen_ssa::common::TypeKind::Pointer)) + .count(); + + // We do not know their size anymore at this level, so hardcode a placeholder. + // A follow-up pr will track these from the frontend, where we still have Rust types. + // Then, we will be able to figure out that e.g. `&[f32;256]` will result in 4*256 bytes. + // I decided that 1024 bytes is a great placeholder value for now. + add_priv_unnamed_arr(&cx, &format!(".offload_sizes.{num}"), &vec![1024; num_ptr_types]); + // Here we figure out whether something needs to be copied to the gpu (=1), from the gpu (=2), + // or both to and from the gpu (=3). Other values shouldn't affect us for now. + // A non-mutable reference or pointer will be 1, an array that's not read, but fully overwritten + // will be 2. For now, everything is 3, until we have our frontend set up. + let o_types = + add_priv_unnamed_arr(&cx, &format!(".offload_maptypes.{num}"), &vec![3; num_ptr_types]); + // Next: For each function, generate these three entries. A weak constant, + // the llvm.rodata entry name, and the omp_offloading_entries value + + let name = format!(".kernel_{num}.region_id"); + let initializer = cx.get_const_i8(0); + let region_id = add_unnamed_global(&cx, &name, initializer, WeakAnyLinkage); + + let c_entry_name = CString::new(format!("kernel_{num}")).unwrap(); + let c_val = c_entry_name.as_bytes_with_nul(); + let offload_entry_name = format!(".offloading.entry_name.{num}"); + + let initializer = crate::common::bytes_in_context(cx.llcx, c_val); + let llglobal = add_unnamed_global(&cx, &offload_entry_name, initializer, InternalLinkage); + llvm::set_alignment(llglobal, Align::ONE); + llvm::set_section(llglobal, c".llvm.rodata.offloading"); + + // Not actively used yet, for calling real kernels + let name = format!(".offloading.entry.kernel_{num}"); + + // See the __tgt_offload_entry documentation above. + let reserved = cx.get_const_i64(0); + let version = cx.get_const_i16(1); + let kind = cx.get_const_i16(1); + let flags = cx.get_const_i32(0); + let size = cx.get_const_i64(0); + let data = cx.get_const_i64(0); + let aux_addr = cx.const_null(cx.type_ptr()); + let elems = vec![reserved, version, kind, flags, region_id, llglobal, size, data, aux_addr]; + + let initializer = crate::common::named_struct(offload_entry_ty, &elems); + let c_name = CString::new(name).unwrap(); + let llglobal = llvm::add_global(cx.llmod, offload_entry_ty, &c_name); + llvm::set_global_constant(llglobal, true); + llvm::set_linkage(llglobal, WeakAnyLinkage); + llvm::set_initializer(llglobal, initializer); + llvm::set_alignment(llglobal, Align::ONE); + let c_section_name = CString::new(".omp_offloading_entries").unwrap(); + llvm::set_section(llglobal, &c_section_name); + o_types +} + +fn declare_offload_fn<'ll>( + cx: &'ll SimpleCx<'_>, + name: &str, + ty: &'ll llvm::Type, +) -> &'ll llvm::Value { + crate::declare::declare_simple_fn( + cx, + name, + llvm::CallConv::CCallConv, + llvm::UnnamedAddr::No, + llvm::Visibility::Default, + ty, + ) +} + +// For each kernel *call*, we now use some of our previous declared globals to move data to and from +// the gpu. We don't have a proper frontend yet, so we assume that every call to a kernel function +// from main is intended to run on the GPU. For now, we only handle the data transfer part of it. +// If two consecutive kernels use the same memory, we still move it to the host and back to the gpu. +// Since in our frontend users (by default) don't have to specify data transfer, this is something +// we should optimize in the future! We also assume that everything should be copied back and forth, +// but sometimes we can directly zero-allocate on the device and only move back, or if something is +// immutable, we might only copy it to the device, but not back. +// +// Current steps: +// 0. Alloca some variables for the following steps +// 1. set insert point before kernel call. +// 2. generate all the GEPS and stores, to be used in 3) +// 3. generate __tgt_target_data_begin calls to move data to the GPU +// +// unchanged: keep kernel call. Later move the kernel to the GPU +// +// 4. set insert point after kernel call. +// 5. generate all the GEPS and stores, to be used in 6) +// 6. generate __tgt_target_data_end calls to move data from the GPU +fn gen_call_handling<'ll>( + cx: &'ll SimpleCx<'_>, + _kernels: &[&'ll llvm::Value], + o_types: &[&'ll llvm::Value], +) { + // %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr } + let tptr = cx.type_ptr(); + let ti32 = cx.type_i32(); + let tgt_bin_desc_ty = vec![ti32, tptr, tptr, tptr]; + let tgt_bin_desc = cx.type_named_struct("struct.__tgt_bin_desc"); + cx.set_struct_body(tgt_bin_desc, &tgt_bin_desc_ty, false); + + gen_tgt_kernel_global(&cx); + let (begin_mapper_decl, _, end_mapper_decl, fn_ty) = gen_tgt_data_mappers(&cx); + + let main_fn = cx.get_function("main"); + let Some(main_fn) = main_fn else { return }; + let kernel_name = "kernel_1"; + let call = unsafe { + llvm::LLVMRustGetFunctionCall(main_fn, kernel_name.as_c_char_ptr(), kernel_name.len()) + }; + let Some(kernel_call) = call else { + return; + }; + let kernel_call_bb = unsafe { llvm::LLVMGetInstructionParent(kernel_call) }; + let called = unsafe { llvm::LLVMGetCalledValue(kernel_call).unwrap() }; + let mut builder = SBuilder::build(cx, kernel_call_bb); + + let types = cx.func_params_types(cx.get_type_of_global(called)); + let num_args = types.len() as u64; + + // Step 0) + // %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr } + // %6 = alloca %struct.__tgt_bin_desc, align 8 + unsafe { llvm::LLVMRustPositionBuilderPastAllocas(builder.llbuilder, main_fn) }; + + let tgt_bin_desc_alloca = builder.direct_alloca(tgt_bin_desc, Align::EIGHT, "EmptyDesc"); + + let ty = cx.type_array(cx.type_ptr(), num_args); + // Baseptr are just the input pointer to the kernel, stored in a local alloca + let a1 = builder.direct_alloca(ty, Align::EIGHT, ".offload_baseptrs"); + // Ptrs are the result of a gep into the baseptr, at least for our trivial types. + let a2 = builder.direct_alloca(ty, Align::EIGHT, ".offload_ptrs"); + // These represent the sizes in bytes, e.g. the entry for `&[f64; 16]` will be 8*16. + let ty2 = cx.type_array(cx.type_i64(), num_args); + let a4 = builder.direct_alloca(ty2, Align::EIGHT, ".offload_sizes"); + // Now we allocate once per function param, a copy to be passed to one of our maps. + let mut vals = vec![]; + let mut geps = vec![]; + let i32_0 = cx.get_const_i32(0); + for (index, in_ty) in types.iter().enumerate() { + // get function arg, store it into the alloca, and read it. + let p = llvm::get_param(called, index as u32); + let name = llvm::get_value_name(p); + let name = str::from_utf8(&name).unwrap(); + let arg_name = format!("{name}.addr"); + let alloca = builder.direct_alloca(in_ty, Align::EIGHT, &arg_name); + + builder.store(p, alloca, Align::EIGHT); + let val = builder.load(in_ty, alloca, Align::EIGHT); + let gep = builder.inbounds_gep(cx.type_f32(), val, &[i32_0]); + vals.push(val); + geps.push(gep); + } + + // Step 1) + unsafe { llvm::LLVMRustPositionBefore(builder.llbuilder, kernel_call) }; + builder.memset(tgt_bin_desc_alloca, cx.get_const_i8(0), cx.get_const_i64(32), Align::EIGHT); + + let mapper_fn_ty = cx.type_func(&[cx.type_ptr()], cx.type_void()); + let register_lib_decl = declare_offload_fn(&cx, "__tgt_register_lib", mapper_fn_ty); + let unregister_lib_decl = declare_offload_fn(&cx, "__tgt_unregister_lib", mapper_fn_ty); + let init_ty = cx.type_func(&[], cx.type_void()); + let init_rtls_decl = declare_offload_fn(cx, "__tgt_init_all_rtls", init_ty); + + // call void @__tgt_register_lib(ptr noundef %6) + builder.call(mapper_fn_ty, register_lib_decl, &[tgt_bin_desc_alloca], None); + // call void @__tgt_init_all_rtls() + builder.call(init_ty, init_rtls_decl, &[], None); + + for i in 0..num_args { + let idx = cx.get_const_i32(i); + let gep1 = builder.inbounds_gep(ty, a1, &[i32_0, idx]); + builder.store(vals[i as usize], gep1, Align::EIGHT); + let gep2 = builder.inbounds_gep(ty, a2, &[i32_0, idx]); + builder.store(geps[i as usize], gep2, Align::EIGHT); + let gep3 = builder.inbounds_gep(ty2, a4, &[i32_0, idx]); + // As mentioned above, we don't use Rust type information yet. So for now we will just + // assume that we have 1024 bytes, 256 f32 values. + // FIXME(offload): write an offload frontend and handle arbitrary types. + builder.store(cx.get_const_i64(1024), gep3, Align::EIGHT); + } + + // For now we have a very simplistic indexing scheme into our + // offload_{baseptrs,ptrs,sizes}. We will probably improve this along with our gpu frontend pr. + fn get_geps<'a, 'll>( + builder: &mut SBuilder<'a, 'll>, + cx: &'ll SimpleCx<'ll>, + ty: &'ll Type, + ty2: &'ll Type, + a1: &'ll Value, + a2: &'ll Value, + a4: &'ll Value, + ) -> (&'ll Value, &'ll Value, &'ll Value) { + let i32_0 = cx.get_const_i32(0); + + let gep1 = builder.inbounds_gep(ty, a1, &[i32_0, i32_0]); + let gep2 = builder.inbounds_gep(ty, a2, &[i32_0, i32_0]); + let gep3 = builder.inbounds_gep(ty2, a4, &[i32_0, i32_0]); + (gep1, gep2, gep3) + } + + fn generate_mapper_call<'a, 'll>( + builder: &mut SBuilder<'a, 'll>, + cx: &'ll SimpleCx<'ll>, + geps: (&'ll Value, &'ll Value, &'ll Value), + o_type: &'ll Value, + fn_to_call: &'ll Value, + fn_ty: &'ll Type, + num_args: u64, + s_ident_t: &'ll Value, + ) { + let nullptr = cx.const_null(cx.type_ptr()); + let i64_max = cx.get_const_i64(u64::MAX); + let num_args = cx.get_const_i32(num_args); + let args = + vec![s_ident_t, i64_max, num_args, geps.0, geps.1, geps.2, o_type, nullptr, nullptr]; + builder.call(fn_ty, fn_to_call, &args, None); + } + + // Step 2) + let s_ident_t = generate_at_one(&cx); + let o = o_types[0]; + let geps = get_geps(&mut builder, &cx, ty, ty2, a1, a2, a4); + generate_mapper_call(&mut builder, &cx, geps, o, begin_mapper_decl, fn_ty, num_args, s_ident_t); + + // Step 3) + // Here we will add code for the actual kernel launches in a follow-up PR. + // FIXME(offload): launch kernels + + // Step 4) + unsafe { llvm::LLVMRustPositionAfter(builder.llbuilder, kernel_call) }; + + let geps = get_geps(&mut builder, &cx, ty, ty2, a1, a2, a4); + generate_mapper_call(&mut builder, &cx, geps, o, end_mapper_decl, fn_ty, num_args, s_ident_t); + + builder.call(mapper_fn_ty, unregister_lib_decl, &[tgt_bin_desc_alloca], None); + + // With this we generated the following begin and end mappers. We could easily generate the + // update mapper in an update. + // call void @__tgt_target_data_begin_mapper(ptr @1, i64 -1, i32 3, ptr %27, ptr %28, ptr %29, ptr @.offload_maptypes, ptr null, ptr null) + // call void @__tgt_target_data_update_mapper(ptr @1, i64 -1, i32 2, ptr %46, ptr %47, ptr %48, ptr @.offload_maptypes.1, ptr null, ptr null) + // call void @__tgt_target_data_end_mapper(ptr @1, i64 -1, i32 3, ptr %49, ptr %50, ptr %51, ptr @.offload_maptypes, ptr null, ptr null) +} diff --git a/compiler/rustc_codegen_llvm/src/common.rs b/compiler/rustc_codegen_llvm/src/common.rs index f9ab96b578951..f29fefb66f0fe 100644 --- a/compiler/rustc_codegen_llvm/src/common.rs +++ b/compiler/rustc_codegen_llvm/src/common.rs @@ -118,6 +118,10 @@ impl<'ll, CX: Borrow>> GenericCx<'ll, CX> { r } } + + pub(crate) fn const_null(&self, t: &'ll Type) -> &'ll Value { + unsafe { llvm::LLVMConstNull(t) } + } } impl<'ll, 'tcx> ConstCodegenMethods for CodegenCx<'ll, 'tcx> { @@ -377,6 +381,11 @@ pub(crate) fn bytes_in_context<'ll>(llcx: &'ll llvm::Context, bytes: &[u8]) -> & } } +pub(crate) fn named_struct<'ll>(ty: &'ll Type, elts: &[&'ll Value]) -> &'ll Value { + let len = c_uint::try_from(elts.len()).expect("LLVMConstStructInContext elements len overflow"); + unsafe { llvm::LLVMConstNamedStruct(ty, elts.as_ptr(), len) } +} + fn struct_in_context<'ll>( llcx: &'ll llvm::Context, elts: &[&'ll Value], diff --git a/compiler/rustc_codegen_llvm/src/context.rs b/compiler/rustc_codegen_llvm/src/context.rs index 34bed2a1d2a3d..ee77774c68832 100644 --- a/compiler/rustc_codegen_llvm/src/context.rs +++ b/compiler/rustc_codegen_llvm/src/context.rs @@ -216,7 +216,7 @@ pub(crate) unsafe fn create_module<'ll>( // Ensure the data-layout values hardcoded remain the defaults. { - let tm = crate::back::write::create_informational_target_machine(tcx.sess, false); + let tm = crate::back::write::create_informational_target_machine(sess, false); unsafe { llvm::LLVMRustSetDataLayoutFromTargetMachine(llmod, tm.raw()); } @@ -685,6 +685,22 @@ impl<'ll, CX: Borrow>> GenericCx<'ll, CX> { unsafe { llvm::LLVMConstInt(ty, val, llvm::False) } } + pub(crate) fn get_const_i64(&self, n: u64) -> &'ll Value { + self.get_const_int(self.type_i64(), n) + } + + pub(crate) fn get_const_i32(&self, n: u64) -> &'ll Value { + self.get_const_int(self.type_i32(), n) + } + + pub(crate) fn get_const_i16(&self, n: u64) -> &'ll Value { + self.get_const_int(self.type_i16(), n) + } + + pub(crate) fn get_const_i8(&self, n: u64) -> &'ll Value { + self.get_const_int(self.type_i8(), n) + } + pub(crate) fn get_function(&self, name: &str) -> Option<&'ll Value> { let name = SmallCStr::new(name); unsafe { llvm::LLVMGetNamedFunction((**self).borrow().llmod, name.as_ptr()) } diff --git a/compiler/rustc_codegen_llvm/src/declare.rs b/compiler/rustc_codegen_llvm/src/declare.rs index eb75716d768bb..960a895a2031c 100644 --- a/compiler/rustc_codegen_llvm/src/declare.rs +++ b/compiler/rustc_codegen_llvm/src/declare.rs @@ -215,7 +215,9 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { llfn } +} +impl<'ll, CX: Borrow>> GenericCx<'ll, CX> { /// Declare a global with an intention to define it. /// /// Use this function when you intend to define a global. This function will @@ -234,13 +236,13 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { /// /// Use this function when you intend to define a global without a name. pub(crate) fn define_private_global(&self, ty: &'ll Type) -> &'ll Value { - unsafe { llvm::LLVMRustInsertPrivateGlobal(self.llmod, ty) } + unsafe { llvm::LLVMRustInsertPrivateGlobal(self.llmod(), ty) } } /// Gets declared value by name. pub(crate) fn get_declared_value(&self, name: &str) -> Option<&'ll Value> { debug!("get_declared_value(name={:?})", name); - unsafe { llvm::LLVMRustGetNamedValue(self.llmod, name.as_c_char_ptr(), name.len()) } + unsafe { llvm::LLVMRustGetNamedValue(self.llmod(), name.as_c_char_ptr(), name.len()) } } /// Gets defined or externally defined (AvailableExternally linkage) value by diff --git a/compiler/rustc_codegen_llvm/src/lib.rs b/compiler/rustc_codegen_llvm/src/lib.rs index 6db4e122ad6e8..aaf21f9ada9a5 100644 --- a/compiler/rustc_codegen_llvm/src/lib.rs +++ b/compiler/rustc_codegen_llvm/src/lib.rs @@ -412,6 +412,20 @@ impl ModuleLlvm { } } + fn tm_from_cgcx( + cgcx: &CodegenContext, + name: &str, + dcx: DiagCtxtHandle<'_>, + ) -> Result { + let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, name); + match (cgcx.tm_factory)(tm_factory_config) { + Ok(m) => Ok(m), + Err(e) => { + return Err(dcx.emit_almost_fatal(ParseTargetMachineConfig(e))); + } + } + } + fn parse( cgcx: &CodegenContext, name: &CStr, @@ -421,13 +435,7 @@ impl ModuleLlvm { unsafe { let llcx = llvm::LLVMRustContextCreate(cgcx.fewer_names); let llmod_raw = back::lto::parse_module(llcx, name, buffer, dcx)?; - let tm_factory_config = TargetMachineFactoryConfig::new(cgcx, name.to_str().unwrap()); - let tm = match (cgcx.tm_factory)(tm_factory_config) { - Ok(m) => m, - Err(e) => { - return Err(dcx.emit_almost_fatal(ParseTargetMachineConfig(e))); - } - }; + let tm = ModuleLlvm::tm_from_cgcx(cgcx, name.to_str().unwrap(), dcx)?; Ok(ModuleLlvm { llmod_raw, llcx, tm: ManuallyDrop::new(tm) }) } diff --git a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs index c696b8d8ff25f..56d756e52cce1 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/enzyme_ffi.rs @@ -4,7 +4,7 @@ use libc::{c_char, c_uint}; use super::MetadataKindId; use super::ffi::{AttributeKind, BasicBlock, Metadata, Module, Type, Value}; -use crate::llvm::Bool; +use crate::llvm::{Bool, Builder}; #[link(name = "llvm-wrapper", kind = "static")] unsafe extern "C" { @@ -31,6 +31,14 @@ unsafe extern "C" { index: c_uint, kind: AttributeKind, ); + pub(crate) fn LLVMRustPositionBefore<'a>(B: &'a Builder<'_>, I: &'a Value); + pub(crate) fn LLVMRustPositionAfter<'a>(B: &'a Builder<'_>, I: &'a Value); + pub(crate) fn LLVMRustGetFunctionCall( + F: &Value, + name: *const c_char, + NameLen: libc::size_t, + ) -> Option<&Value>; + } unsafe extern "C" { diff --git a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs index 80a0e5c5accc2..edfb29dd1be72 100644 --- a/compiler/rustc_codegen_llvm/src/llvm/ffi.rs +++ b/compiler/rustc_codegen_llvm/src/llvm/ffi.rs @@ -1138,6 +1138,11 @@ unsafe extern "C" { Count: c_uint, Packed: Bool, ) -> &'a Value; + pub(crate) fn LLVMConstNamedStruct<'a>( + StructTy: &'a Type, + ConstantVals: *const &'a Value, + Count: c_uint, + ) -> &'a Value; pub(crate) fn LLVMConstVector(ScalarConstantVals: *const &Value, Size: c_uint) -> &Value; // Constant expressions @@ -1217,6 +1222,8 @@ unsafe extern "C" { ) -> &'a BasicBlock; // Operations on instructions + pub(crate) fn LLVMGetInstructionParent(Inst: &Value) -> &BasicBlock; + pub(crate) fn LLVMGetCalledValue(CallInst: &Value) -> Option<&Value>; pub(crate) fn LLVMIsAInstruction(Val: &Value) -> Option<&Value>; pub(crate) fn LLVMGetFirstBasicBlock(Fn: &Value) -> &BasicBlock; pub(crate) fn LLVMGetOperand(Val: &Value, Index: c_uint) -> Option<&Value>; @@ -2557,6 +2564,7 @@ unsafe extern "C" { pub(crate) fn LLVMRustSetDataLayoutFromTargetMachine<'a>(M: &'a Module, TM: &'a TargetMachine); + pub(crate) fn LLVMRustPositionBuilderPastAllocas<'a>(B: &Builder<'a>, Fn: &'a Value); pub(crate) fn LLVMRustPositionBuilderAtStart<'a>(B: &Builder<'a>, BB: &'a BasicBlock); pub(crate) fn LLVMRustSetModulePICLevel(M: &Module); diff --git a/compiler/rustc_codegen_ssa/src/back/write.rs b/compiler/rustc_codegen_ssa/src/back/write.rs index 50a7cba300b4b..24e0a4eb53331 100644 --- a/compiler/rustc_codegen_ssa/src/back/write.rs +++ b/compiler/rustc_codegen_ssa/src/back/write.rs @@ -120,6 +120,7 @@ pub struct ModuleConfig { pub emit_lifetime_markers: bool, pub llvm_plugins: Vec, pub autodiff: Vec, + pub offload: Vec, } impl ModuleConfig { @@ -268,6 +269,7 @@ impl ModuleConfig { emit_lifetime_markers: sess.emit_lifetime_markers(), llvm_plugins: if_regular!(sess.opts.unstable_opts.llvm_plugins.clone(), vec![]), autodiff: if_regular!(sess.opts.unstable_opts.autodiff.clone(), vec![]), + offload: if_regular!(sess.opts.unstable_opts.offload.clone(), vec![]), } } diff --git a/compiler/rustc_interface/src/tests.rs b/compiler/rustc_interface/src/tests.rs index 360b5629e9d6e..8771bb4405049 100644 --- a/compiler/rustc_interface/src/tests.rs +++ b/compiler/rustc_interface/src/tests.rs @@ -13,10 +13,10 @@ use rustc_session::config::{ CoverageOptions, DebugInfo, DumpMonoStatsFormat, ErrorOutputType, ExternEntry, ExternLocation, Externs, FmtDebug, FunctionReturn, InliningThreshold, Input, InstrumentCoverage, InstrumentXRay, LinkSelfContained, LinkerPluginLto, LocationDetail, LtoCli, MirIncludeSpans, - NextSolverConfig, OomStrategy, Options, OutFileName, OutputType, OutputTypes, PAuthKey, PacRet, - Passes, PatchableFunctionEntry, Polonius, ProcMacroExecutionStrategy, Strip, SwitchWithOptPath, - SymbolManglingVersion, WasiExecModel, build_configuration, build_session_options, - rustc_optgroups, + NextSolverConfig, Offload, OomStrategy, Options, OutFileName, OutputType, OutputTypes, + PAuthKey, PacRet, Passes, PatchableFunctionEntry, Polonius, ProcMacroExecutionStrategy, Strip, + SwitchWithOptPath, SymbolManglingVersion, WasiExecModel, build_configuration, + build_session_options, rustc_optgroups, }; use rustc_session::lint::Level; use rustc_session::search_paths::SearchPath; @@ -833,6 +833,7 @@ fn test_unstable_options_tracking_hash() { tracked!(no_profiler_runtime, true); tracked!(no_trait_vptr, true); tracked!(no_unique_section_names, true); + tracked!(offload, vec![Offload::Enable]); tracked!(on_broken_pipe, OnBrokenPipe::Kill); tracked!(oom, OomStrategy::Panic); tracked!(osx_rpath_install_name, true); diff --git a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp index 90aa9188c8300..82568ed4ae177 100644 --- a/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp +++ b/compiler/rustc_llvm/llvm-wrapper/RustWrapper.cpp @@ -1591,12 +1591,49 @@ extern "C" LLVMValueRef LLVMRustBuildMemSet(LLVMBuilderRef B, LLVMValueRef Dst, MaybeAlign(DstAlign), IsVolatile)); } +extern "C" void LLVMRustPositionBuilderPastAllocas(LLVMBuilderRef B, + LLVMValueRef Fn) { + Function *F = unwrap(Fn); + unwrap(B)->SetInsertPointPastAllocas(F); +} extern "C" void LLVMRustPositionBuilderAtStart(LLVMBuilderRef B, LLVMBasicBlockRef BB) { auto Point = unwrap(BB)->getFirstInsertionPt(); unwrap(B)->SetInsertPoint(unwrap(BB), Point); } +extern "C" void LLVMRustPositionBefore(LLVMBuilderRef B, LLVMValueRef Instr) { + if (auto I = dyn_cast(unwrap(Instr))) { + unwrap(B)->SetInsertPoint(I); + } +} + +extern "C" void LLVMRustPositionAfter(LLVMBuilderRef B, LLVMValueRef Instr) { + if (auto I = dyn_cast(unwrap(Instr))) { + auto J = I->getNextNonDebugInstruction(); + unwrap(B)->SetInsertPoint(J); + } +} + +extern "C" LLVMValueRef +LLVMRustGetFunctionCall(LLVMValueRef Fn, const char *Name, size_t NameLen) { + auto targetName = StringRef(Name, NameLen); + Function *F = unwrap(Fn); + for (auto &BB : *F) { + for (auto &I : BB) { + if (auto *callInst = llvm::dyn_cast(&I)) { + const llvm::Function *calledFunc = callInst->getCalledFunction(); + if (calledFunc && calledFunc->getName() == targetName) { + // Found a call to the target function + return wrap(callInst); + } + } + } + } + + return nullptr; +} + extern "C" bool LLVMRustConstIntGetZExtValue(LLVMValueRef CV, uint64_t *value) { auto C = unwrap(CV); if (C->getBitWidth() > 64) diff --git a/compiler/rustc_middle/src/ty/inhabitedness/mod.rs b/compiler/rustc_middle/src/ty/inhabitedness/mod.rs index 2a336cc21f492..7eb74b52b44c6 100644 --- a/compiler/rustc_middle/src/ty/inhabitedness/mod.rs +++ b/compiler/rustc_middle/src/ty/inhabitedness/mod.rs @@ -43,7 +43,6 @@ //! This code should only compile in modules where the uninhabitedness of `Foo` //! is visible. -use rustc_span::sym; use rustc_type_ir::TyKind::*; use tracing::instrument; @@ -85,21 +84,6 @@ impl<'tcx> VariantDef { InhabitedPredicate::all( tcx, self.fields.iter().map(|field| { - // Unstable fields are always considered to be inhabited. In the future, - // this could be extended to be conditional on the field being unstable - // only within the module that's querying the inhabitedness, like: - // `let pred = pred.or(InhabitedPredicate::IsUnstable(field.did));` - // but this is unnecessary for now, since it would only affect nightly-only - // code or code within the standard library itself. - // HACK: We filter out `rustc_private` fields since with the flag - // `-Zforce-unstable-if-unmarked` we consider all unmarked fields to be - // unstable when building the compiler. - if tcx - .lookup_stability(field.did) - .is_some_and(|stab| stab.is_unstable() && stab.feature != sym::rustc_private) - { - return InhabitedPredicate::True; - } let pred = tcx.type_of(field.did).instantiate_identity().inhabited_predicate(tcx); if adt.is_enum() { return pred; diff --git a/compiler/rustc_mir_transform/src/elaborate_drops.rs b/compiler/rustc_mir_transform/src/elaborate_drops.rs index b4fa2be1d00e7..58dff4514a04d 100644 --- a/compiler/rustc_mir_transform/src/elaborate_drops.rs +++ b/compiler/rustc_mir_transform/src/elaborate_drops.rs @@ -253,8 +253,8 @@ struct ElaborateDropsCtxt<'a, 'tcx> { } impl fmt::Debug for ElaborateDropsCtxt<'_, '_> { - fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> fmt::Result { - Ok(()) + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("ElaborateDropsCtxt").finish_non_exhaustive() } } diff --git a/compiler/rustc_mir_transform/src/shim.rs b/compiler/rustc_mir_transform/src/shim.rs index cdbc74cdfa841..6c65b072bec08 100644 --- a/compiler/rustc_mir_transform/src/shim.rs +++ b/compiler/rustc_mir_transform/src/shim.rs @@ -434,8 +434,8 @@ pub(super) struct DropShimElaborator<'a, 'tcx> { } impl fmt::Debug for DropShimElaborator<'_, '_> { - fn fmt(&self, _f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { - Ok(()) + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> { + f.debug_struct("DropShimElaborator").finish_non_exhaustive() } } diff --git a/compiler/rustc_pattern_analysis/src/rustc.rs b/compiler/rustc_pattern_analysis/src/rustc.rs index ee72b676b38e1..0c1b0d622f229 100644 --- a/compiler/rustc_pattern_analysis/src/rustc.rs +++ b/compiler/rustc_pattern_analysis/src/rustc.rs @@ -16,7 +16,7 @@ use rustc_middle::ty::{ }; use rustc_middle::{bug, span_bug}; use rustc_session::lint; -use rustc_span::{DUMMY_SP, ErrorGuaranteed, Span, sym}; +use rustc_span::{DUMMY_SP, ErrorGuaranteed, Span}; use crate::constructor::Constructor::*; use crate::constructor::{ @@ -238,10 +238,7 @@ impl<'p, 'tcx: 'p> RustcPatCtxt<'p, 'tcx> { let is_visible = adt.is_enum() || field.vis.is_accessible_from(cx.module, cx.tcx); let is_uninhabited = cx.is_uninhabited(*ty); - let is_unstable = cx.tcx.lookup_stability(field.did).is_some_and(|stab| { - stab.is_unstable() && stab.feature != sym::rustc_private - }); - let skip = is_uninhabited && (!is_visible || is_unstable); + let skip = is_uninhabited && !is_visible; (ty, PrivateUninhabitedField(skip)) }); cx.dropless_arena.alloc_from_iter(tys) diff --git a/compiler/rustc_session/src/config.rs b/compiler/rustc_session/src/config.rs index d6215e1de043a..7bea8685724ad 100644 --- a/compiler/rustc_session/src/config.rs +++ b/compiler/rustc_session/src/config.rs @@ -226,6 +226,13 @@ pub enum CoverageLevel { Mcdc, } +// The different settings that the `-Z offload` flag can have. +#[derive(Clone, Copy, PartialEq, Hash, Debug)] +pub enum Offload { + /// Enable the llvm offload pipeline + Enable, +} + /// The different settings that the `-Z autodiff` flag can have. #[derive(Clone, PartialEq, Hash, Debug)] pub enum AutoDiff { @@ -2706,6 +2713,15 @@ pub fn build_session_options(early_dcx: &mut EarlyDiagCtxt, matches: &getopts::M ) } + if !nightly_options::is_unstable_enabled(matches) + && unstable_opts.offload.contains(&Offload::Enable) + { + early_dcx.early_fatal( + "`-Zoffload=Enable` also requires `-Zunstable-options` \ + and a nightly compiler", + ) + } + let target_triple = parse_target_triple(early_dcx, matches); // Ensure `-Z unstable-options` is required when using the unstable `-C link-self-contained` and @@ -3178,7 +3194,7 @@ pub(crate) mod dep_tracking { AutoDiff, BranchProtection, CFGuard, CFProtection, CollapseMacroDebuginfo, CoverageOptions, CrateType, DebugInfo, DebugInfoCompression, ErrorOutputType, FmtDebug, FunctionReturn, InliningThreshold, InstrumentCoverage, InstrumentXRay, LinkerPluginLto, LocationDetail, - LtoCli, MirStripDebugInfo, NextSolverConfig, OomStrategy, OptLevel, OutFileName, + LtoCli, MirStripDebugInfo, NextSolverConfig, Offload, OomStrategy, OptLevel, OutFileName, OutputType, OutputTypes, PatchableFunctionEntry, Polonius, RemapPathScopeComponents, ResolveDocLinks, SourceFileHashAlgorithm, SplitDwarfKind, SwitchWithOptPath, SymbolManglingVersion, WasiExecModel, @@ -3225,6 +3241,7 @@ pub(crate) mod dep_tracking { impl_dep_tracking_hash_via_hash!( (), AutoDiff, + Offload, bool, usize, NonZero, diff --git a/compiler/rustc_session/src/options.rs b/compiler/rustc_session/src/options.rs index 2bdde2f887a30..b33e3815ea449 100644 --- a/compiler/rustc_session/src/options.rs +++ b/compiler/rustc_session/src/options.rs @@ -726,6 +726,7 @@ mod desc { pub(crate) const parse_list_with_polarity: &str = "a comma-separated list of strings, with elements beginning with + or -"; pub(crate) const parse_autodiff: &str = "a comma separated list of settings: `Enable`, `PrintSteps`, `PrintTA`, `PrintTAFn`, `PrintAA`, `PrintPerf`, `PrintModBefore`, `PrintModAfter`, `PrintModFinal`, `PrintPasses`, `NoPostopt`, `LooseTypes`, `Inline`"; + pub(crate) const parse_offload: &str = "a comma separated list of settings: `Enable`"; pub(crate) const parse_comma_list: &str = "a comma-separated list of strings"; pub(crate) const parse_opt_comma_list: &str = parse_comma_list; pub(crate) const parse_number: &str = "a number"; @@ -1357,6 +1358,27 @@ pub mod parse { } } + pub(crate) fn parse_offload(slot: &mut Vec, v: Option<&str>) -> bool { + let Some(v) = v else { + *slot = vec![]; + return true; + }; + let mut v: Vec<&str> = v.split(",").collect(); + v.sort_unstable(); + for &val in v.iter() { + let variant = match val { + "Enable" => Offload::Enable, + _ => { + // FIXME(ZuseZ4): print an error saying which value is not recognized + return false; + } + }; + slot.push(variant); + } + + true + } + pub(crate) fn parse_autodiff(slot: &mut Vec, v: Option<&str>) -> bool { let Some(v) = v else { *slot = vec![]; @@ -2401,6 +2423,11 @@ options! { "do not use unique names for text and data sections when -Z function-sections is used"), normalize_docs: bool = (false, parse_bool, [TRACKED], "normalize associated items in rustdoc when generating documentation"), + offload: Vec = (Vec::new(), parse_offload, [TRACKED], + "a list of offload flags to enable + Mandatory setting: + `=Enable` + Currently the only option available"), on_broken_pipe: OnBrokenPipe = (OnBrokenPipe::Default, parse_on_broken_pipe, [TRACKED], "behavior of std::io::ErrorKind::BrokenPipe (SIGPIPE)"), oom: OomStrategy = (OomStrategy::Abort, parse_oom_strategy, [TRACKED], diff --git a/library/core/src/num/f32.rs b/library/core/src/num/f32.rs index f8344da79ad40..6c7ba491971c5 100644 --- a/library/core/src/num/f32.rs +++ b/library/core/src/num/f32.rs @@ -1936,8 +1936,8 @@ pub mod math { /// let abs_difference_x = (f32::math::abs_sub(x, 1.0) - 2.0).abs(); /// let abs_difference_y = (f32::math::abs_sub(y, 1.0) - 0.0).abs(); /// - /// assert!(abs_difference_x <= f32::EPSILON); - /// assert!(abs_difference_y <= f32::EPSILON); + /// assert!(abs_difference_x <= 1e-6); + /// assert!(abs_difference_y <= 1e-6); /// ``` /// /// _This standalone function is for testing only. @@ -1982,7 +1982,7 @@ pub mod math { /// // x^(1/3) - 2 == 0 /// let abs_difference = (f32::math::cbrt(x) - 2.0).abs(); /// - /// assert!(abs_difference <= f32::EPSILON); + /// assert!(abs_difference <= 1e-6); /// ``` /// /// _This standalone function is for testing only. diff --git a/library/std/src/num/f32.rs b/library/std/src/num/f32.rs index 2bff73add33d6..2c7752cc137f3 100644 --- a/library/std/src/num/f32.rs +++ b/library/std/src/num/f32.rs @@ -582,8 +582,8 @@ impl f32 { /// let abs_difference_x = (x.abs_sub(1.0) - 2.0).abs(); /// let abs_difference_y = (y.abs_sub(1.0) - 0.0).abs(); /// - /// assert!(abs_difference_x <= f32::EPSILON); - /// assert!(abs_difference_y <= f32::EPSILON); + /// assert!(abs_difference_x <= 1e-6); + /// assert!(abs_difference_y <= 1e-6); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] @@ -621,7 +621,7 @@ impl f32 { /// // x^(1/3) - 2 == 0 /// let abs_difference = (x.cbrt() - 2.0).abs(); /// - /// assert!(abs_difference <= f32::EPSILON); + /// assert!(abs_difference <= 1e-6); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] @@ -652,7 +652,7 @@ impl f32 { /// // sqrt(x^2 + y^2) /// let abs_difference = (x.hypot(y) - (x.powi(2) + y.powi(2)).sqrt()).abs(); /// - /// assert!(abs_difference <= 1e-6); + /// assert!(abs_difference <= 1e-5); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] @@ -725,7 +725,7 @@ impl f32 { /// let x = std::f32::consts::FRAC_PI_4; /// let abs_difference = (x.tan() - 1.0).abs(); /// - /// assert!(abs_difference <= f32::EPSILON); + /// assert!(abs_difference <= 1e-6); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] @@ -813,7 +813,7 @@ impl f32 { /// // atan(tan(1)) /// let abs_difference = (f.tan().atan() - 1.0).abs(); /// - /// assert!(abs_difference <= f32::EPSILON); + /// assert!(abs_difference <= 1e-6); /// ``` #[doc(alias = "arctan")] #[rustc_allow_incoherent_impl] @@ -854,8 +854,8 @@ impl f32 { /// let abs_difference_1 = (y1.atan2(x1) - (-std::f32::consts::FRAC_PI_4)).abs(); /// let abs_difference_2 = (y2.atan2(x2) - (3.0 * std::f32::consts::FRAC_PI_4)).abs(); /// - /// assert!(abs_difference_1 <= f32::EPSILON); - /// assert!(abs_difference_2 <= f32::EPSILON); + /// assert!(abs_difference_1 <= 1e-5); + /// assert!(abs_difference_2 <= 1e-5); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] @@ -982,7 +982,7 @@ impl f32 { /// let g = ((e * e) - 1.0) / (2.0 * e); /// let abs_difference = (f - g).abs(); /// - /// assert!(abs_difference <= f32::EPSILON); + /// assert!(abs_difference <= 1e-6); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] @@ -1012,7 +1012,7 @@ impl f32 { /// let abs_difference = (f - g).abs(); /// /// // Same result - /// assert!(abs_difference <= f32::EPSILON); + /// assert!(abs_difference <= 1e-6); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] @@ -1042,7 +1042,7 @@ impl f32 { /// let g = (1.0 - e.powi(-2)) / (1.0 + e.powi(-2)); /// let abs_difference = (f - g).abs(); /// - /// assert!(abs_difference <= f32::EPSILON); + /// assert!(abs_difference <= 1e-6); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] @@ -1095,7 +1095,7 @@ impl f32 { /// /// let abs_difference = (f - x).abs(); /// - /// assert!(abs_difference <= 1e-6); + /// assert!(abs_difference <= 1e-5); /// ``` #[doc(alias = "arccosh")] #[rustc_allow_incoherent_impl] @@ -1125,7 +1125,7 @@ impl f32 { /// /// let abs_difference = (f - e).abs(); /// - /// assert!(abs_difference <= 1e-5); + /// assert!(abs_difference <= 1e-4); /// ``` #[doc(alias = "arctanh")] #[rustc_allow_incoherent_impl] @@ -1153,7 +1153,7 @@ impl f32 { /// /// let abs_difference = (x.gamma() - 24.0).abs(); /// - /// assert!(abs_difference <= f32::EPSILON); + /// assert!(abs_difference <= 1e-4); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] @@ -1248,7 +1248,7 @@ impl f32 { /// let one = x.erf() + x.erfc(); /// let abs_difference = (one - 1.0).abs(); /// - /// assert!(abs_difference <= f32::EPSILON); + /// assert!(abs_difference <= 1e-6); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] diff --git a/library/std/src/num/f64.rs b/library/std/src/num/f64.rs index b71e319f4074f..76c45c5c2e806 100644 --- a/library/std/src/num/f64.rs +++ b/library/std/src/num/f64.rs @@ -1153,7 +1153,7 @@ impl f64 { /// /// let abs_difference = (x.gamma() - 24.0).abs(); /// - /// assert!(abs_difference <= f64::EPSILON); + /// assert!(abs_difference <= 1e-13); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] @@ -1248,7 +1248,7 @@ impl f64 { /// let one = x.erf() + x.erfc(); /// let abs_difference = (one - 1.0).abs(); /// - /// assert!(abs_difference <= f64::EPSILON); + /// assert!(abs_difference <= 1e-13); /// ``` #[rustc_allow_incoherent_impl] #[must_use = "method returns a new number and does not mutate the original value"] diff --git a/library/std/tests/floats/f32.rs b/library/std/tests/floats/f32.rs index 38c906c1d8771..bea9e8282a690 100644 --- a/library/std/tests/floats/f32.rs +++ b/library/std/tests/floats/f32.rs @@ -79,7 +79,7 @@ fn test_log() { let nan: f32 = f32::NAN; let inf: f32 = f32::INFINITY; let neg_inf: f32 = f32::NEG_INFINITY; - assert_approx_eq!(10.0f32.log(10.0), 1.0); + assert_approx_eq!(10.0f32.log(10.0), 1.0, APPROX_DELTA); assert_approx_eq!(2.3f32.log(3.5), 0.664858); assert_approx_eq!(1.0f32.exp().log(1.0f32.exp()), 1.0, APPROX_DELTA); assert!(1.0f32.log(1.0).is_nan()); @@ -140,10 +140,10 @@ fn test_asinh() { assert_approx_eq!(2.0f32.asinh(), 1.443635475178810342493276740273105f32); assert_approx_eq!((-2.0f32).asinh(), -1.443635475178810342493276740273105f32); // regression test for the catastrophic cancellation fixed in 72486 - assert_approx_eq!((-3000.0f32).asinh(), -8.699514775987968673236893537700647f32); + assert_approx_eq!((-3000.0f32).asinh(), -8.699514775987968673236893537700647f32, APPROX_DELTA); // test for low accuracy from issue 104548 - assert_approx_eq!(60.0f32, 60.0f32.sinh().asinh()); + assert_approx_eq!(60.0f32, 60.0f32.sinh().asinh(), APPROX_DELTA); // mul needed for approximate comparison to be meaningful assert_approx_eq!(1.0f32, 1e-15f32.sinh().asinh() * 1e15f32); } @@ -196,10 +196,10 @@ fn test_gamma() { assert_approx_eq!(1.0f32.gamma(), 1.0f32); assert_approx_eq!(2.0f32.gamma(), 1.0f32); assert_approx_eq!(3.0f32.gamma(), 2.0f32); - assert_approx_eq!(4.0f32.gamma(), 6.0f32); - assert_approx_eq!(5.0f32.gamma(), 24.0f32); + assert_approx_eq!(4.0f32.gamma(), 6.0f32, APPROX_DELTA); + assert_approx_eq!(5.0f32.gamma(), 24.0f32, APPROX_DELTA); assert_approx_eq!(0.5f32.gamma(), consts::PI.sqrt()); - assert_approx_eq!((-0.5f32).gamma(), -2.0 * consts::PI.sqrt()); + assert_approx_eq!((-0.5f32).gamma(), -2.0 * consts::PI.sqrt(), APPROX_DELTA); assert_eq!(0.0f32.gamma(), f32::INFINITY); assert_eq!((-0.0f32).gamma(), f32::NEG_INFINITY); assert!((-1.0f32).gamma().is_nan()); @@ -218,7 +218,7 @@ fn test_ln_gamma() { assert_eq!(2.0f32.ln_gamma().1, 1); assert_approx_eq!(3.0f32.ln_gamma().0, 2.0f32.ln()); assert_eq!(3.0f32.ln_gamma().1, 1); - assert_approx_eq!((-0.5f32).ln_gamma().0, (2.0 * consts::PI.sqrt()).ln()); + assert_approx_eq!((-0.5f32).ln_gamma().0, (2.0 * consts::PI.sqrt()).ln(), APPROX_DELTA); assert_eq!((-0.5f32).ln_gamma().1, -1); } diff --git a/src/doc/unstable-book/src/compiler-flags/offload.md b/src/doc/unstable-book/src/compiler-flags/offload.md new file mode 100644 index 0000000000000..4266e8c11a285 --- /dev/null +++ b/src/doc/unstable-book/src/compiler-flags/offload.md @@ -0,0 +1,8 @@ +# `offload` + +The tracking issue for this feature is: [#131513](https://github.com/rust-lang/rust/issues/131513). + +------------------------ + +This feature will later allow you to run functions on GPUs. It is work in progress. +Set the `-Zoffload=Enable` compiler flag to experiment with it. diff --git a/src/tools/enzyme b/src/tools/enzyme index b5098d515d5e1..2cccfba93c165 160000 --- a/src/tools/enzyme +++ b/src/tools/enzyme @@ -1 +1 @@ -Subproject commit b5098d515d5e1bd0f5470553bc0d18da9794ca8b +Subproject commit 2cccfba93c1650f26f1cf8be8aa875a7c1d23fb3 diff --git a/src/tools/miri/src/intrinsics/mod.rs b/src/tools/miri/src/intrinsics/mod.rs index 4efa7dd4dcf81..1ffba2726a2a9 100644 --- a/src/tools/miri/src/intrinsics/mod.rs +++ b/src/tools/miri/src/intrinsics/mod.rs @@ -3,20 +3,16 @@ mod atomic; mod simd; -use std::ops::Neg; - use rand::Rng; use rustc_abi::Size; -use rustc_apfloat::ieee::{IeeeFloat, Semantics}; use rustc_apfloat::{self, Float, Round}; use rustc_middle::mir; -use rustc_middle::ty::{self, FloatTy, ScalarInt}; +use rustc_middle::ty::{self, FloatTy}; use rustc_span::{Symbol, sym}; use self::atomic::EvalContextExt as _; use self::helpers::{ToHost, ToSoft, check_intrinsic_arg_count}; use self::simd::EvalContextExt as _; -use crate::math::{IeeeExt, apply_random_float_error_ulp}; use crate::*; impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {} @@ -191,7 +187,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let [f] = check_intrinsic_arg_count(args)?; let f = this.read_scalar(f)?.to_f32()?; - let res = fixed_float_value(this, intrinsic_name, &[f]).unwrap_or_else(|| { + let res = math::fixed_float_value(this, intrinsic_name, &[f]).unwrap_or_else(|| { // Using host floats (but it's fine, these operations do not have // guaranteed precision). let host = f.to_host(); @@ -209,7 +205,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // Apply a relative error of 4ULP to introduce some non-determinism // simulating imprecise implementations and optimizations. - let res = apply_random_float_error_ulp( + let res = math::apply_random_float_error_ulp( this, res, 2, // log2(4) @@ -217,7 +213,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // Clamp the result to the guaranteed range of this function according to the C standard, // if any. - clamp_float_value(intrinsic_name, res) + math::clamp_float_value(intrinsic_name, res) }); let res = this.adjust_nan(res, &[f]); this.write_scalar(res, dest)?; @@ -235,7 +231,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let [f] = check_intrinsic_arg_count(args)?; let f = this.read_scalar(f)?.to_f64()?; - let res = fixed_float_value(this, intrinsic_name, &[f]).unwrap_or_else(|| { + let res = math::fixed_float_value(this, intrinsic_name, &[f]).unwrap_or_else(|| { // Using host floats (but it's fine, these operations do not have // guaranteed precision). let host = f.to_host(); @@ -253,7 +249,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // Apply a relative error of 4ULP to introduce some non-determinism // simulating imprecise implementations and optimizations. - let res = apply_random_float_error_ulp( + let res = math::apply_random_float_error_ulp( this, res, 2, // log2(4) @@ -261,7 +257,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { // Clamp the result to the guaranteed range of this function according to the C standard, // if any. - clamp_float_value(intrinsic_name, res) + math::clamp_float_value(intrinsic_name, res) }); let res = this.adjust_nan(res, &[f]); this.write_scalar(res, dest)?; @@ -312,16 +308,17 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let f1 = this.read_scalar(f1)?.to_f32()?; let f2 = this.read_scalar(f2)?.to_f32()?; - let res = fixed_float_value(this, intrinsic_name, &[f1, f2]).unwrap_or_else(|| { - // Using host floats (but it's fine, this operation does not have guaranteed precision). - let res = f1.to_host().powf(f2.to_host()).to_soft(); + let res = + math::fixed_float_value(this, intrinsic_name, &[f1, f2]).unwrap_or_else(|| { + // Using host floats (but it's fine, this operation does not have guaranteed precision). + let res = f1.to_host().powf(f2.to_host()).to_soft(); - // Apply a relative error of 4ULP to introduce some non-determinism - // simulating imprecise implementations and optimizations. - apply_random_float_error_ulp( - this, res, 2, // log2(4) - ) - }); + // Apply a relative error of 4ULP to introduce some non-determinism + // simulating imprecise implementations and optimizations. + math::apply_random_float_error_ulp( + this, res, 2, // log2(4) + ) + }); let res = this.adjust_nan(res, &[f1, f2]); this.write_scalar(res, dest)?; } @@ -330,16 +327,17 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let f1 = this.read_scalar(f1)?.to_f64()?; let f2 = this.read_scalar(f2)?.to_f64()?; - let res = fixed_float_value(this, intrinsic_name, &[f1, f2]).unwrap_or_else(|| { - // Using host floats (but it's fine, this operation does not have guaranteed precision). - let res = f1.to_host().powf(f2.to_host()).to_soft(); + let res = + math::fixed_float_value(this, intrinsic_name, &[f1, f2]).unwrap_or_else(|| { + // Using host floats (but it's fine, this operation does not have guaranteed precision). + let res = f1.to_host().powf(f2.to_host()).to_soft(); - // Apply a relative error of 4ULP to introduce some non-determinism - // simulating imprecise implementations and optimizations. - apply_random_float_error_ulp( - this, res, 2, // log2(4) - ) - }); + // Apply a relative error of 4ULP to introduce some non-determinism + // simulating imprecise implementations and optimizations. + math::apply_random_float_error_ulp( + this, res, 2, // log2(4) + ) + }); let res = this.adjust_nan(res, &[f1, f2]); this.write_scalar(res, dest)?; } @@ -349,13 +347,13 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let f = this.read_scalar(f)?.to_f32()?; let i = this.read_scalar(i)?.to_i32()?; - let res = fixed_powi_float_value(this, f, i).unwrap_or_else(|| { + let res = math::fixed_powi_value(this, f, i).unwrap_or_else(|| { // Using host floats (but it's fine, this operation does not have guaranteed precision). let res = f.to_host().powi(i).to_soft(); // Apply a relative error of 4ULP to introduce some non-determinism // simulating imprecise implementations and optimizations. - apply_random_float_error_ulp( + math::apply_random_float_error_ulp( this, res, 2, // log2(4) ) }); @@ -367,13 +365,13 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { let f = this.read_scalar(f)?.to_f64()?; let i = this.read_scalar(i)?.to_i32()?; - let res = fixed_powi_float_value(this, f, i).unwrap_or_else(|| { + let res = math::fixed_powi_value(this, f, i).unwrap_or_else(|| { // Using host floats (but it's fine, this operation does not have guaranteed precision). let res = f.to_host().powi(i).to_soft(); // Apply a relative error of 4ULP to introduce some non-determinism // simulating imprecise implementations and optimizations. - apply_random_float_error_ulp( + math::apply_random_float_error_ulp( this, res, 2, // log2(4) ) }); @@ -430,7 +428,7 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { } // Apply a relative error of 4ULP to simulate non-deterministic precision loss // due to optimizations. - let res = apply_random_float_error_to_imm(this, res, 2 /* log2(4) */)?; + let res = math::apply_random_float_error_to_imm(this, res, 2 /* log2(4) */)?; this.write_immediate(*res, dest)?; } @@ -467,133 +465,3 @@ pub trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> { interp_ok(EmulateItemResult::NeedsReturn) } } - -/// Applies a random ULP floating point error to `val` and returns the new value. -/// So if you want an X ULP error, `ulp_exponent` should be log2(X). -/// -/// Will fail if `val` is not a floating point number. -fn apply_random_float_error_to_imm<'tcx>( - ecx: &mut MiriInterpCx<'tcx>, - val: ImmTy<'tcx>, - ulp_exponent: u32, -) -> InterpResult<'tcx, ImmTy<'tcx>> { - let scalar = val.to_scalar_int()?; - let res: ScalarInt = match val.layout.ty.kind() { - ty::Float(FloatTy::F16) => - apply_random_float_error_ulp(ecx, scalar.to_f16(), ulp_exponent).into(), - ty::Float(FloatTy::F32) => - apply_random_float_error_ulp(ecx, scalar.to_f32(), ulp_exponent).into(), - ty::Float(FloatTy::F64) => - apply_random_float_error_ulp(ecx, scalar.to_f64(), ulp_exponent).into(), - ty::Float(FloatTy::F128) => - apply_random_float_error_ulp(ecx, scalar.to_f128(), ulp_exponent).into(), - _ => bug!("intrinsic called with non-float input type"), - }; - - interp_ok(ImmTy::from_scalar_int(res, val.layout)) -} - -/// For the intrinsics: -/// - sinf32, sinf64 -/// - cosf32, cosf64 -/// - expf32, expf64, exp2f32, exp2f64 -/// - logf32, logf64, log2f32, log2f64, log10f32, log10f64 -/// - powf32, powf64 -/// -/// # Return -/// -/// Returns `Some(output)` if the `intrinsic` results in a defined fixed `output` specified in the C standard -/// (specifically, C23 annex F.10) when given `args` as arguments. Outputs that are unaffected by a relative error -/// (such as INF and zero) are not handled here, they are assumed to be handled by the underlying -/// implementation. Returns `None` if no specific value is guaranteed. -/// -/// # Note -/// -/// For `powf*` operations of the form: -/// -/// - `(SNaN)^(±0)` -/// - `1^(SNaN)` -/// -/// The result is implementation-defined: -/// - musl returns for both `1.0` -/// - glibc returns for both `NaN` -/// -/// This discrepancy exists because SNaN handling is not consistently defined across platforms, -/// and the C standard leaves behavior for SNaNs unspecified. -/// -/// Miri chooses to adhere to both implementations and returns either one of them non-deterministically. -fn fixed_float_value( - ecx: &mut MiriInterpCx<'_>, - intrinsic_name: &str, - args: &[IeeeFloat], -) -> Option> { - let one = IeeeFloat::::one(); - Some(match (intrinsic_name, args) { - // cos(+- 0) = 1 - ("cosf32" | "cosf64", [input]) if input.is_zero() => one, - - // e^0 = 1 - ("expf32" | "expf64" | "exp2f32" | "exp2f64", [input]) if input.is_zero() => one, - - // (-1)^(±INF) = 1 - ("powf32" | "powf64", [base, exp]) if *base == -one && exp.is_infinite() => one, - - // 1^y = 1 for any y, even a NaN - ("powf32" | "powf64", [base, exp]) if *base == one => { - let rng = ecx.machine.rng.get_mut(); - // SNaN exponents get special treatment: they might return 1, or a NaN. - let return_nan = exp.is_signaling() && ecx.machine.float_nondet && rng.random(); - // Handle both the musl and glibc cases non-deterministically. - if return_nan { ecx.generate_nan(args) } else { one } - } - - // x^(±0) = 1 for any x, even a NaN - ("powf32" | "powf64", [base, exp]) if exp.is_zero() => { - let rng = ecx.machine.rng.get_mut(); - // SNaN bases get special treatment: they might return 1, or a NaN. - let return_nan = base.is_signaling() && ecx.machine.float_nondet && rng.random(); - // Handle both the musl and glibc cases non-deterministically. - if return_nan { ecx.generate_nan(args) } else { one } - } - - // There are a lot of cases for fixed outputs according to the C Standard, but these are - // mainly INF or zero which are not affected by the applied error. - _ => return None, - }) -} - -/// Returns `Some(output)` if `powi` (called `pown` in C) results in a fixed value specified in the -/// C standard (specifically, C23 annex F.10.4.6) when doing `base^exp`. Otherwise, returns `None`. -fn fixed_powi_float_value( - ecx: &mut MiriInterpCx<'_>, - base: IeeeFloat, - exp: i32, -) -> Option> { - Some(match exp { - 0 => { - let one = IeeeFloat::::one(); - let rng = ecx.machine.rng.get_mut(); - let return_nan = ecx.machine.float_nondet && rng.random() && base.is_signaling(); - // For SNaN treatment, we are consistent with `powf`above. - // (We wouldn't have two, unlike powf all implementations seem to agree for powi, - // but for now we are maximally conservative.) - if return_nan { ecx.generate_nan(&[base]) } else { one } - } - - _ => return None, - }) -} - -/// Given an floating-point operation and a floating-point value, clamps the result to the output -/// range of the given operation. -fn clamp_float_value(intrinsic_name: &str, val: IeeeFloat) -> IeeeFloat { - match intrinsic_name { - // sin and cos: [-1, 1] - "sinf32" | "cosf32" | "sinf64" | "cosf64" => - val.clamp(IeeeFloat::::one().neg(), IeeeFloat::::one()), - // exp: [0, +INF] - "expf32" | "exp2f32" | "expf64" | "exp2f64" => - IeeeFloat::::maximum(val, IeeeFloat::::ZERO), - _ => val, - } -} diff --git a/src/tools/miri/src/lib.rs b/src/tools/miri/src/lib.rs index a591d21071db6..c9086922a51fe 100644 --- a/src/tools/miri/src/lib.rs +++ b/src/tools/miri/src/lib.rs @@ -16,6 +16,8 @@ #![feature(derive_coerce_pointee)] #![feature(arbitrary_self_types)] #![feature(iter_advance_by)] +#![feature(f16)] +#![feature(f128)] // Configure clippy and other lints #![allow( clippy::collapsible_else_if, diff --git a/src/tools/miri/src/math.rs b/src/tools/miri/src/math.rs index cf16a5676d680..6a86e4276f253 100644 --- a/src/tools/miri/src/math.rs +++ b/src/tools/miri/src/math.rs @@ -1,6 +1,9 @@ +use std::ops::Neg; +use std::{f16, f32, f64, f128}; + use rand::Rng as _; use rustc_apfloat::Float as _; -use rustc_apfloat::ieee::IeeeFloat; +use rustc_apfloat::ieee::{DoubleS, HalfS, IeeeFloat, QuadS, Semantics, SingleS}; use rustc_middle::ty::{self, FloatTy, ScalarInt}; use crate::*; @@ -50,29 +53,249 @@ pub(crate) fn apply_random_float_error_ulp( apply_random_float_error(ecx, val, err_scale) } -/// Applies a random 16ULP floating point error to `val` and returns the new value. +/// Applies a random ULP floating point error to `val` and returns the new value. +/// So if you want an X ULP error, `ulp_exponent` should be log2(X). +/// /// Will fail if `val` is not a floating point number. pub(crate) fn apply_random_float_error_to_imm<'tcx>( ecx: &mut MiriInterpCx<'tcx>, val: ImmTy<'tcx>, ulp_exponent: u32, ) -> InterpResult<'tcx, ImmTy<'tcx>> { + let this = ecx.eval_context_mut(); let scalar = val.to_scalar_int()?; let res: ScalarInt = match val.layout.ty.kind() { ty::Float(FloatTy::F16) => - apply_random_float_error_ulp(ecx, scalar.to_f16(), ulp_exponent).into(), + apply_random_float_error_ulp(this, scalar.to_f16(), ulp_exponent).into(), ty::Float(FloatTy::F32) => - apply_random_float_error_ulp(ecx, scalar.to_f32(), ulp_exponent).into(), + apply_random_float_error_ulp(this, scalar.to_f32(), ulp_exponent).into(), ty::Float(FloatTy::F64) => - apply_random_float_error_ulp(ecx, scalar.to_f64(), ulp_exponent).into(), + apply_random_float_error_ulp(this, scalar.to_f64(), ulp_exponent).into(), ty::Float(FloatTy::F128) => - apply_random_float_error_ulp(ecx, scalar.to_f128(), ulp_exponent).into(), + apply_random_float_error_ulp(this, scalar.to_f128(), ulp_exponent).into(), _ => bug!("intrinsic called with non-float input type"), }; interp_ok(ImmTy::from_scalar_int(res, val.layout)) } +/// Given a floating-point operation and a floating-point value, clamps the result to the output +/// range of the given operation according to the C standard, if any. +pub(crate) fn clamp_float_value( + intrinsic_name: &str, + val: IeeeFloat, +) -> IeeeFloat +where + IeeeFloat: IeeeExt, +{ + let zero = IeeeFloat::::ZERO; + let one = IeeeFloat::::one(); + let two = IeeeFloat::::two(); + let pi = IeeeFloat::::pi(); + let pi_over_2 = (pi / two).value; + + match intrinsic_name { + // sin, cos, tanh: [-1, 1] + #[rustfmt::skip] + | "sinf32" + | "sinf64" + | "cosf32" + | "cosf64" + | "tanhf" + | "tanh" + => val.clamp(one.neg(), one), + + // exp: [0, +INF) + "expf32" | "exp2f32" | "expf64" | "exp2f64" => val.maximum(zero), + + // cosh: [1, +INF) + "coshf" | "cosh" => val.maximum(one), + + // acos: [0, π] + "acosf" | "acos" => val.clamp(zero, pi), + + // asin: [-π, +π] + "asinf" | "asin" => val.clamp(pi.neg(), pi), + + // atan: (-π/2, +π/2) + "atanf" | "atan" => val.clamp(pi_over_2.neg(), pi_over_2), + + // erfc: (-1, 1) + "erff" | "erf" => val.clamp(one.neg(), one), + + // erfc: (0, 2) + "erfcf" | "erfc" => val.clamp(zero, two), + + // atan2(y, x): arctan(y/x) in [−π, +π] + "atan2f" | "atan2" => val.clamp(pi.neg(), pi), + + _ => val, + } +} + +/// For the intrinsics: +/// - sinf32, sinf64, sinhf, sinh +/// - cosf32, cosf64, coshf, cosh +/// - tanhf, tanh, atanf, atan, atan2f, atan2 +/// - expf32, expf64, exp2f32, exp2f64 +/// - logf32, logf64, log2f32, log2f64, log10f32, log10f64 +/// - powf32, powf64 +/// - erff, erf, erfcf, erfc +/// - hypotf, hypot +/// +/// # Return +/// +/// Returns `Some(output)` if the `intrinsic` results in a defined fixed `output` specified in the C standard +/// (specifically, C23 annex F.10) when given `args` as arguments. Outputs that are unaffected by a relative error +/// (such as INF and zero) are not handled here, they are assumed to be handled by the underlying +/// implementation. Returns `None` if no specific value is guaranteed. +/// +/// # Note +/// +/// For `powf*` operations of the form: +/// +/// - `(SNaN)^(±0)` +/// - `1^(SNaN)` +/// +/// The result is implementation-defined: +/// - musl returns for both `1.0` +/// - glibc returns for both `NaN` +/// +/// This discrepancy exists because SNaN handling is not consistently defined across platforms, +/// and the C standard leaves behavior for SNaNs unspecified. +/// +/// Miri chooses to adhere to both implementations and returns either one of them non-deterministically. +pub(crate) fn fixed_float_value( + ecx: &mut MiriInterpCx<'_>, + intrinsic_name: &str, + args: &[IeeeFloat], +) -> Option> +where + IeeeFloat: IeeeExt, +{ + let this = ecx.eval_context_mut(); + let one = IeeeFloat::::one(); + let two = IeeeFloat::::two(); + let three = IeeeFloat::::three(); + let pi = IeeeFloat::::pi(); + let pi_over_2 = (pi / two).value; + let pi_over_4 = (pi_over_2 / two).value; + + Some(match (intrinsic_name, args) { + // cos(±0) and cosh(±0)= 1 + ("cosf32" | "cosf64" | "coshf" | "cosh", [input]) if input.is_zero() => one, + + // e^0 = 1 + ("expf32" | "expf64" | "exp2f32" | "exp2f64", [input]) if input.is_zero() => one, + + // tanh(±INF) = ±1 + ("tanhf" | "tanh", [input]) if input.is_infinite() => one.copy_sign(*input), + + // atan(±INF) = ±π/2 + ("atanf" | "atan", [input]) if input.is_infinite() => pi_over_2.copy_sign(*input), + + // erf(±INF) = ±1 + ("erff" | "erf", [input]) if input.is_infinite() => one.copy_sign(*input), + + // erfc(-INF) = 2 + ("erfcf" | "erfc", [input]) if input.is_neg_infinity() => (one + one).value, + + // hypot(x, ±0) = abs(x), if x is not a NaN. + ("_hypotf" | "hypotf" | "_hypot" | "hypot", [x, y]) if !x.is_nan() && y.is_zero() => + x.abs(), + + // atan2(±0,−0) = ±π. + // atan2(±0, y) = ±π for y < 0. + // Must check for non NaN because `y.is_negative()` also applies to NaN. + ("atan2f" | "atan2", [x, y]) if (x.is_zero() && (y.is_negative() && !y.is_nan())) => + pi.copy_sign(*x), + + // atan2(±x,−∞) = ±π for finite x > 0. + ("atan2f" | "atan2", [x, y]) + if (!x.is_zero() && !x.is_infinite()) && y.is_neg_infinity() => + pi.copy_sign(*x), + + // atan2(x, ±0) = −π/2 for x < 0. + // atan2(x, ±0) = π/2 for x > 0. + ("atan2f" | "atan2", [x, y]) if !x.is_zero() && y.is_zero() => pi_over_2.copy_sign(*x), + + //atan2(±∞, −∞) = ±3π/4 + ("atan2f" | "atan2", [x, y]) if x.is_infinite() && y.is_neg_infinity() => + (pi_over_4 * three).value.copy_sign(*x), + + //atan2(±∞, +∞) = ±π/4 + ("atan2f" | "atan2", [x, y]) if x.is_infinite() && y.is_pos_infinity() => + pi_over_4.copy_sign(*x), + + // atan2(±∞, y) returns ±π/2 for finite y. + ("atan2f" | "atan2", [x, y]) if x.is_infinite() && (!y.is_infinite() && !y.is_nan()) => + pi_over_2.copy_sign(*x), + + // (-1)^(±INF) = 1 + ("powf32" | "powf64", [base, exp]) if *base == -one && exp.is_infinite() => one, + + // 1^y = 1 for any y, even a NaN + ("powf32" | "powf64", [base, exp]) if *base == one => { + let rng = this.machine.rng.get_mut(); + // SNaN exponents get special treatment: they might return 1, or a NaN. + let return_nan = exp.is_signaling() && this.machine.float_nondet && rng.random(); + // Handle both the musl and glibc cases non-deterministically. + if return_nan { this.generate_nan(args) } else { one } + } + + // x^(±0) = 1 for any x, even a NaN + ("powf32" | "powf64", [base, exp]) if exp.is_zero() => { + let rng = this.machine.rng.get_mut(); + // SNaN bases get special treatment: they might return 1, or a NaN. + let return_nan = base.is_signaling() && this.machine.float_nondet && rng.random(); + // Handle both the musl and glibc cases non-deterministically. + if return_nan { this.generate_nan(args) } else { one } + } + + // There are a lot of cases for fixed outputs according to the C Standard, but these are + // mainly INF or zero which are not affected by the applied error. + _ => return None, + }) +} + +/// Returns `Some(output)` if `powi` (called `pown` in C) results in a fixed value specified in the +/// C standard (specifically, C23 annex F.10.4.6) when doing `base^exp`. Otherwise, returns `None`. +pub(crate) fn fixed_powi_value( + ecx: &mut MiriInterpCx<'_>, + base: IeeeFloat, + exp: i32, +) -> Option> +where + IeeeFloat: IeeeExt, +{ + match exp { + 0 => { + let one = IeeeFloat::::one(); + let rng = ecx.machine.rng.get_mut(); + let return_nan = ecx.machine.float_nondet && rng.random() && base.is_signaling(); + // For SNaN treatment, we are consistent with `powf`above. + // (We wouldn't have two, unlike powf all implementations seem to agree for powi, + // but for now we are maximally conservative.) + Some(if return_nan { ecx.generate_nan(&[base]) } else { one }) + } + + _ => return None, + } +} + +/// Returns `Some(output)` if `scalbn` (called `ldexp` in C) results in a fixed value specified in the +/// C standard (specifically, C23 annex F.10.3.19) when doing `arg*(2^exp)`. Otherwise, returns `None`. +pub(crate) fn fixed_scalbn_value( + arg: IeeeFloat, + exp: i32, +) -> Option> { + match exp { + // ldexp(x, 0) = x. + 0 => Some(arg), + _ => return None, + } +} + pub(crate) fn sqrt(x: IeeeFloat) -> IeeeFloat { match x.category() { // preserve zero sign @@ -155,19 +378,49 @@ pub(crate) fn sqrt(x: IeeeFloat) -> IeeeFl } } -/// Extend functionality of rustc_apfloat softfloats +/// Extend functionality of `rustc_apfloat` softfloats for IEEE float types. pub trait IeeeExt: rustc_apfloat::Float { + // Some values we use: + #[inline] fn one() -> Self { Self::from_u128(1).value } + #[inline] + fn two() -> Self { + Self::from_u128(2).value + } + + #[inline] + fn three() -> Self { + Self::from_u128(3).value + } + + fn pi() -> Self; + #[inline] fn clamp(self, min: Self, max: Self) -> Self { self.maximum(min).minimum(max) } } -impl IeeeExt for IeeeFloat {} + +macro_rules! impl_ieee_pi { + ($float_ty:ident, $semantic:ty) => { + impl IeeeExt for IeeeFloat<$semantic> { + #[inline] + fn pi() -> Self { + // We take the value from the standard library as the most reasonable source for an exact π here. + Self::from_bits($float_ty::consts::PI.to_bits() as _) + } + } + }; +} + +impl_ieee_pi!(f16, HalfS); +impl_ieee_pi!(f32, SingleS); +impl_ieee_pi!(f64, DoubleS); +impl_ieee_pi!(f128, QuadS); #[cfg(test)] mod tests { diff --git a/src/tools/miri/src/shims/foreign_items.rs b/src/tools/miri/src/shims/foreign_items.rs index 9ddba8c2b48d8..907a858a15f6c 100644 --- a/src/tools/miri/src/shims/foreign_items.rs +++ b/src/tools/miri/src/shims/foreign_items.rs @@ -17,6 +17,7 @@ use rustc_target::callconv::FnAbi; use self::helpers::{ToHost, ToSoft}; use super::alloc::EvalContextExt as _; use super::backtrace::EvalContextExt as _; +use crate::helpers::EvalContextExt as _; use crate::*; /// Type of dynamic symbols (for `dlsym` et al) @@ -766,33 +767,38 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { => { let [f] = this.check_shim(abi, CanonAbi::C , link_name, args)?; let f = this.read_scalar(f)?.to_f32()?; - // Using host floats (but it's fine, these operations do not have guaranteed precision). - let f_host = f.to_host(); - let res = match link_name.as_str() { - "cbrtf" => f_host.cbrt(), - "coshf" => f_host.cosh(), - "sinhf" => f_host.sinh(), - "tanf" => f_host.tan(), - "tanhf" => f_host.tanh(), - "acosf" => f_host.acos(), - "asinf" => f_host.asin(), - "atanf" => f_host.atan(), - "log1pf" => f_host.ln_1p(), - "expm1f" => f_host.exp_m1(), - "tgammaf" => f_host.gamma(), - "erff" => f_host.erf(), - "erfcf" => f_host.erfc(), - _ => bug!(), - }; - let res = res.to_soft(); - // Apply a relative error of 16ULP to introduce some non-determinism - // simulating imprecise implementations and optimizations. - // FIXME: temporarily disabled as it breaks std tests. - // let res = math::apply_random_float_error_ulp( - // this, - // res, - // 4, // log2(16) - // ); + + let res = math::fixed_float_value(this, link_name.as_str(), &[f]).unwrap_or_else(|| { + // Using host floats (but it's fine, these operations do not have + // guaranteed precision). + let f_host = f.to_host(); + let res = match link_name.as_str() { + "cbrtf" => f_host.cbrt(), + "coshf" => f_host.cosh(), + "sinhf" => f_host.sinh(), + "tanf" => f_host.tan(), + "tanhf" => f_host.tanh(), + "acosf" => f_host.acos(), + "asinf" => f_host.asin(), + "atanf" => f_host.atan(), + "log1pf" => f_host.ln_1p(), + "expm1f" => f_host.exp_m1(), + "tgammaf" => f_host.gamma(), + "erff" => f_host.erf(), + "erfcf" => f_host.erfc(), + _ => bug!(), + }; + let res = res.to_soft(); + // Apply a relative error of 4ULP to introduce some non-determinism + // simulating imprecise implementations and optimizations. + let res = math::apply_random_float_error_ulp( + this, res, 2, // log2(4) + ); + + // Clamp the result to the guaranteed range of this function according to the C standard, + // if any. + math::clamp_float_value(link_name.as_str(), res) + }); let res = this.adjust_nan(res, &[f]); this.write_scalar(res, dest)?; } @@ -805,24 +811,28 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { let [f1, f2] = this.check_shim(abi, CanonAbi::C , link_name, args)?; let f1 = this.read_scalar(f1)?.to_f32()?; let f2 = this.read_scalar(f2)?.to_f32()?; - // underscore case for windows, here and below - // (see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/floating-point-primitives?view=vs-2019) - // Using host floats (but it's fine, these operations do not have guaranteed precision). - let res = match link_name.as_str() { - "_hypotf" | "hypotf" => f1.to_host().hypot(f2.to_host()).to_soft(), - "atan2f" => f1.to_host().atan2(f2.to_host()).to_soft(), - #[allow(deprecated)] - "fdimf" => f1.to_host().abs_sub(f2.to_host()).to_soft(), - _ => bug!(), - }; - // Apply a relative error of 16ULP to introduce some non-determinism - // simulating imprecise implementations and optimizations. - // FIXME: temporarily disabled as it breaks std tests. - // let res = math::apply_random_float_error_ulp( - // this, - // res, - // 4, // log2(16) - // ); + + let res = math::fixed_float_value(this, link_name.as_str(), &[f1, f2]).unwrap_or_else(|| { + let res = match link_name.as_str() { + // underscore case for windows, here and below + // (see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/floating-point-primitives?view=vs-2019) + // Using host floats (but it's fine, these operations do not have guaranteed precision). + "_hypotf" | "hypotf" => f1.to_host().hypot(f2.to_host()).to_soft(), + "atan2f" => f1.to_host().atan2(f2.to_host()).to_soft(), + #[allow(deprecated)] + "fdimf" => f1.to_host().abs_sub(f2.to_host()).to_soft(), + _ => bug!(), + }; + // Apply a relative error of 4ULP to introduce some non-determinism + // simulating imprecise implementations and optimizations. + let res = math::apply_random_float_error_ulp( + this, res, 2, // log2(4) + ); + + // Clamp the result to the guaranteed range of this function according to the C standard, + // if any. + math::clamp_float_value(link_name.as_str(), res) + }); let res = this.adjust_nan(res, &[f1, f2]); this.write_scalar(res, dest)?; } @@ -843,33 +853,38 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { => { let [f] = this.check_shim(abi, CanonAbi::C , link_name, args)?; let f = this.read_scalar(f)?.to_f64()?; - // Using host floats (but it's fine, these operations do not have guaranteed precision). - let f_host = f.to_host(); - let res = match link_name.as_str() { - "cbrt" => f_host.cbrt(), - "cosh" => f_host.cosh(), - "sinh" => f_host.sinh(), - "tan" => f_host.tan(), - "tanh" => f_host.tanh(), - "acos" => f_host.acos(), - "asin" => f_host.asin(), - "atan" => f_host.atan(), - "log1p" => f_host.ln_1p(), - "expm1" => f_host.exp_m1(), - "tgamma" => f_host.gamma(), - "erf" => f_host.erf(), - "erfc" => f_host.erfc(), - _ => bug!(), - }; - let res = res.to_soft(); - // Apply a relative error of 16ULP to introduce some non-determinism - // simulating imprecise implementations and optimizations. - // FIXME: temporarily disabled as it breaks std tests. - // let res = math::apply_random_float_error_ulp( - // this, - // res.to_soft(), - // 4, // log2(16) - // ); + + let res = math::fixed_float_value(this, link_name.as_str(), &[f]).unwrap_or_else(|| { + // Using host floats (but it's fine, these operations do not have + // guaranteed precision). + let f_host = f.to_host(); + let res = match link_name.as_str() { + "cbrt" => f_host.cbrt(), + "cosh" => f_host.cosh(), + "sinh" => f_host.sinh(), + "tan" => f_host.tan(), + "tanh" => f_host.tanh(), + "acos" => f_host.acos(), + "asin" => f_host.asin(), + "atan" => f_host.atan(), + "log1p" => f_host.ln_1p(), + "expm1" => f_host.exp_m1(), + "tgamma" => f_host.gamma(), + "erf" => f_host.erf(), + "erfc" => f_host.erfc(), + _ => bug!(), + }; + let res = res.to_soft(); + // Apply a relative error of 4ULP to introduce some non-determinism + // simulating imprecise implementations and optimizations. + let res = math::apply_random_float_error_ulp( + this, res, 2, // log2(4) + ); + + // Clamp the result to the guaranteed range of this function according to the C standard, + // if any. + math::clamp_float_value(link_name.as_str(), res) + }); let res = this.adjust_nan(res, &[f]); this.write_scalar(res, dest)?; } @@ -882,24 +897,28 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { let [f1, f2] = this.check_shim(abi, CanonAbi::C , link_name, args)?; let f1 = this.read_scalar(f1)?.to_f64()?; let f2 = this.read_scalar(f2)?.to_f64()?; - // underscore case for windows, here and below - // (see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/floating-point-primitives?view=vs-2019) - // Using host floats (but it's fine, these operations do not have guaranteed precision). - let res = match link_name.as_str() { - "_hypot" | "hypot" => f1.to_host().hypot(f2.to_host()).to_soft(), - "atan2" => f1.to_host().atan2(f2.to_host()).to_soft(), - #[allow(deprecated)] - "fdim" => f1.to_host().abs_sub(f2.to_host()).to_soft(), - _ => bug!(), - }; - // Apply a relative error of 16ULP to introduce some non-determinism - // simulating imprecise implementations and optimizations. - // FIXME: temporarily disabled as it breaks std tests. - // let res = math::apply_random_float_error_ulp( - // this, - // res, - // 4, // log2(16) - // ); + + let res = math::fixed_float_value(this, link_name.as_str(), &[f1, f2]).unwrap_or_else(|| { + let res = match link_name.as_str() { + // underscore case for windows, here and below + // (see https://docs.microsoft.com/en-us/cpp/c-runtime-library/reference/floating-point-primitives?view=vs-2019) + // Using host floats (but it's fine, these operations do not have guaranteed precision). + "_hypot" | "hypot" => f1.to_host().hypot(f2.to_host()).to_soft(), + "atan2" => f1.to_host().atan2(f2.to_host()).to_soft(), + #[allow(deprecated)] + "fdim" => f1.to_host().abs_sub(f2.to_host()).to_soft(), + _ => bug!(), + }; + // Apply a relative error of 4ULP to introduce some non-determinism + // simulating imprecise implementations and optimizations. + let res = math::apply_random_float_error_ulp( + this, res, 2, // log2(4) + ); + + // Clamp the result to the guaranteed range of this function according to the C standard, + // if any. + math::clamp_float_value(link_name.as_str(), res) + }); let res = this.adjust_nan(res, &[f1, f2]); this.write_scalar(res, dest)?; } @@ -913,7 +932,13 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { let x = this.read_scalar(x)?.to_f64()?; let exp = this.read_scalar(exp)?.to_i32()?; - let res = x.scalbn(exp); + let res = + math::fixed_scalbn_value(x, exp).unwrap_or_else(|| { + let res = x.scalbn(exp); + // Apply a relative error of 4ULP to introduce some non-determinism + // simulating imprecise implementations and optimizations. + math::apply_random_float_error_ulp(this, res, 2 /* log2(4) */) + }); let res = this.adjust_nan(res, &[x]); this.write_scalar(res, dest)?; } @@ -925,11 +950,14 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { // Using host floats (but it's fine, these operations do not have guaranteed precision). let (res, sign) = x.to_host().ln_gamma(); this.write_int(sign, &signp)?; + let res = res.to_soft(); - // Apply a relative error of 16ULP to introduce some non-determinism + // Apply a relative error of 4ULP to introduce some non-determinism // simulating imprecise implementations and optimizations. - // FIXME: temporarily disabled as it breaks std tests. - // let res = math::apply_random_float_error_ulp(this, res, 4 /* log2(16) */); + let res = math::apply_random_float_error_ulp(this, res, 2 /* log2(4) */); + // Clamp the result to the guaranteed range of this function according to the C standard, + // if any. + let res = math::clamp_float_value(link_name.as_str(), res); let res = this.adjust_nan(res, &[x]); this.write_scalar(res, dest)?; } @@ -941,11 +969,14 @@ trait EvalContextExtPriv<'tcx>: crate::MiriInterpCxExt<'tcx> { // Using host floats (but it's fine, these operations do not have guaranteed precision). let (res, sign) = x.to_host().ln_gamma(); this.write_int(sign, &signp)?; + let res = res.to_soft(); - // Apply a relative error of 16ULP to introduce some non-determinism + // Apply a relative error of 4ULP to introduce some non-determinism // simulating imprecise implementations and optimizations. - // FIXME: temporarily disabled as it breaks std tests. - // let res = math::apply_random_float_error_ulp(this, res, 4 /* log2(16) */); + let res = math::apply_random_float_error_ulp(this, res, 2 /* log2(4) */); + // Clamp the result to the guaranteed range of this function according to the C standard, + // if any. + let res = math::clamp_float_value(link_name.as_str(), res); let res = this.adjust_nan(res, &[x]); this.write_scalar(res, dest)?; } diff --git a/src/tools/miri/tests/pass/float.rs b/src/tools/miri/tests/pass/float.rs index fe7316c6680df..5c9e11dcce0ff 100644 --- a/src/tools/miri/tests/pass/float.rs +++ b/src/tools/miri/tests/pass/float.rs @@ -1088,6 +1088,8 @@ pub fn libm() { assert_approx_eq!(1f32.exp_m1(), f32::consts::E - 1.0); assert_approx_eq!(1f64.exp_m1(), f64::consts::E - 1.0); + assert_approx_eq!(f32::NEG_INFINITY.exp_m1(), -1.0); + assert_approx_eq!(f64::NEG_INFINITY.exp_m1(), -1.0); assert_approx_eq!(10f32.exp2(), 1024f32); assert_approx_eq!(50f64.exp2(), 1125899906842624f64); @@ -1120,9 +1122,10 @@ pub fn libm() { assert_approx_eq!(3.0f32.hypot(4.0f32), 5.0f32); assert_approx_eq!(3.0f64.hypot(4.0f64), 5.0f64); - assert_eq!(ldexp(0.65f64, 3i32), 5.2f64); + assert_approx_eq!(ldexp(0.65f64, 3i32), 5.2f64); assert_eq!(ldexp(1.42, 0xFFFF), f64::INFINITY); assert_eq!(ldexp(1.42, -0xFFFF), 0f64); + assert_eq!(ldexp(42.0, 0), 42.0); // Trigonometric functions. @@ -1131,8 +1134,14 @@ pub fn libm() { assert_approx_eq!((f64::consts::PI / 2f64).sin(), 1f64); assert_approx_eq!(f32::consts::FRAC_PI_6.sin(), 0.5); assert_approx_eq!(f64::consts::FRAC_PI_6.sin(), 0.5); - assert_approx_eq!(f32::consts::FRAC_PI_4.sin().asin(), f32::consts::FRAC_PI_4); - assert_approx_eq!(f64::consts::FRAC_PI_4.sin().asin(), f64::consts::FRAC_PI_4); + // Increase error tolerance from 12ULP to 16ULP because of the extra operation. + assert_approx_eq!(f32::consts::FRAC_PI_4.sin().asin(), f32::consts::FRAC_PI_4, 16); + assert_approx_eq!(f64::consts::FRAC_PI_4.sin().asin(), f64::consts::FRAC_PI_4, 16); + assert_biteq(0.0f32.asin(), 0.0f32, "asin(+0) = +0"); + assert_biteq((-0.0f32).asin(), -0.0, "asin(-0) = -0"); + assert_biteq(0.0f64.asin(), 0.0, "asin(+0) = +0"); + assert_biteq((-0.0f64).asin(), -0.0, "asin(-0) = -0"); + assert_approx_eq!(1.0f32.sinh(), 1.1752012f32); assert_approx_eq!(1.0f64.sinh(), 1.1752011936438014f64); @@ -1159,11 +1168,18 @@ pub fn libm() { assert_approx_eq!((f64::consts::PI * 2f64).cos(), 1f64); assert_approx_eq!(f32::consts::FRAC_PI_3.cos(), 0.5); assert_approx_eq!(f64::consts::FRAC_PI_3.cos(), 0.5); - assert_approx_eq!(f32::consts::FRAC_PI_4.cos().acos(), f32::consts::FRAC_PI_4); - assert_approx_eq!(f64::consts::FRAC_PI_4.cos().acos(), f64::consts::FRAC_PI_4); + // Increase error tolerance from 12ULP to 16ULP because of the extra operation. + assert_approx_eq!(f32::consts::FRAC_PI_4.cos().acos(), f32::consts::FRAC_PI_4, 16); + assert_approx_eq!(f64::consts::FRAC_PI_4.cos().acos(), f64::consts::FRAC_PI_4, 16); + assert_biteq(1.0f32.acos(), 0.0, "acos(1) = 0"); + assert_biteq(1.0f64.acos(), 0.0, "acos(1) = 0"); assert_approx_eq!(1.0f32.cosh(), 1.54308f32); assert_approx_eq!(1.0f64.cosh(), 1.5430806348152437f64); + assert_eq!(0.0f32.cosh(), 1.0); + assert_eq!(0.0f64.cosh(), 1.0); + assert_eq!((-0.0f32).cosh(), 1.0); + assert_eq!((-0.0f64).cosh(), 1.0); assert_approx_eq!(2.0f32.acosh(), 1.31695789692481670862504634730796844f32); assert_approx_eq!(3.0f64.acosh(), 1.76274717403908605046521864995958461f64); @@ -1173,6 +1189,47 @@ pub fn libm() { assert_approx_eq!(1.0_f64, 1.0_f64.tan().atan()); assert_approx_eq!(1.0f32.atan2(2.0f32), 0.46364761f32); assert_approx_eq!(1.0f32.atan2(2.0f32), 0.46364761f32); + // C standard defines a bunch of fixed outputs for atan2 + macro_rules! fixed_atan2_cases{ + ($float_type:ident) => {{ + use std::$float_type::consts::{PI, FRAC_PI_2, FRAC_PI_4}; + use $float_type::{INFINITY, NEG_INFINITY}; + + // atan2(±0,−0) = ±π. + assert_eq!($float_type::atan2(0.0, -0.0), PI, "atan2(0,−0) = π"); + assert_eq!($float_type::atan2(-0.0, -0.0), -PI, "atan2(-0,−0) = -π"); + + // atan2(±0, y) = ±π for y < 0. + assert_eq!($float_type::atan2(0.0, -1.0), PI, "atan2(0, y) = π for y < 0."); + assert_eq!($float_type::atan2(-0.0, -1.0), -PI, "atan2(-0, y) = -π for y < 0."); + + // atan2(x, ±0) = −π/2 for x < 0. + assert_eq!($float_type::atan2(-1.0, 0.0), -FRAC_PI_2, "atan2(x, 0) = −π/2 for x < 0"); + assert_eq!($float_type::atan2(-1.0, -0.0), -FRAC_PI_2, "atan2(x, -0) = −π/2 for x < 0"); + + // atan2(x, ±0) = π/2 for x > 0. + assert_eq!($float_type::atan2(1.0, 0.0), FRAC_PI_2, "atan2(x, 0) = π/2 for x > 0."); + assert_eq!($float_type::atan2(1.0, -0.0), FRAC_PI_2, "atan2(x, -0) = π/2 for x > 0."); + + // atan2(±x,−∞) = ±π for finite x > 0. + assert_eq!($float_type::atan2(1.0, NEG_INFINITY), PI, "atan2(x, −∞) = π for finite x > 0"); + assert_eq!($float_type::atan2(-1.0, NEG_INFINITY), -PI, "atan2(-x, −∞) = -π for finite x > 0"); + + // atan2(±∞, y) returns ±π/2 for finite y. + assert_eq!($float_type::atan2(INFINITY, 1.0), FRAC_PI_2, "atan2(+∞, y) returns π/2 for finite y"); + assert_eq!($float_type::atan2(NEG_INFINITY, 1.0), -FRAC_PI_2, "atan2(-∞, y) returns -π/2 for finite y"); + + // atan2(±∞, −∞) = ±3π/4 + assert_eq!($float_type::atan2(INFINITY, NEG_INFINITY), 3.0 * FRAC_PI_4, "atan2(+∞, −∞) = 3π/4"); + assert_eq!($float_type::atan2(NEG_INFINITY, NEG_INFINITY), -3.0 * FRAC_PI_4, "atan2(-∞, −∞) = -3π/4"); + + // atan2(±∞, +∞) = ±π/4 + assert_eq!($float_type::atan2(INFINITY, INFINITY), FRAC_PI_4, "atan2(+∞, +∞) = π/4"); + assert_eq!($float_type::atan2(NEG_INFINITY, INFINITY), -FRAC_PI_4, "atan2(-∞, +∞) = -π/4"); + }} + } + fixed_atan2_cases!(f32); + fixed_atan2_cases!(f64); assert_approx_eq!( 1.0f32.tanh(), @@ -1182,6 +1239,11 @@ pub fn libm() { 1.0f64.tanh(), (1.0 - f64::consts::E.powi(-2)) / (1.0 + f64::consts::E.powi(-2)) ); + assert_eq!(f32::INFINITY.tanh(), 1.0); + assert_eq!(f32::NEG_INFINITY.tanh(), -1.0); + assert_eq!(f64::INFINITY.tanh(), 1.0); + assert_eq!(f64::NEG_INFINITY.tanh(), -1.0); + assert_approx_eq!(0.5f32.atanh(), 0.54930614433405484569762261846126285f32); assert_approx_eq!(0.5f64.atanh(), 0.54930614433405484569762261846126285f64); @@ -1202,8 +1264,14 @@ pub fn libm() { assert_approx_eq!(1.0f32.erf(), 0.84270079294971486934122063508260926f32); assert_approx_eq!(1.0f64.erf(), 0.84270079294971486934122063508260926f64); + assert_eq!(f32::INFINITY.erf(), 1.0); + assert_eq!(f64::INFINITY.erf(), 1.0); assert_approx_eq!(1.0f32.erfc(), 0.15729920705028513065877936491739074f32); assert_approx_eq!(1.0f64.erfc(), 0.15729920705028513065877936491739074f64); + assert_eq!(f32::NEG_INFINITY.erfc(), 2.0); + assert_eq!(f64::NEG_INFINITY.erfc(), 2.0); + assert_eq!(f32::INFINITY.erfc(), 0.0); + assert_eq!(f64::INFINITY.erfc(), 0.0); } fn test_fast() { @@ -1413,7 +1481,6 @@ fn test_non_determinism() { } pub fn test_operations_f32(a: f32, b: f32) { test_operations_f!(a, b); - // FIXME: some are temporarily disabled as it breaks std tests. ensure_nondet(|| a.powf(b)); ensure_nondet(|| a.powi(2)); ensure_nondet(|| a.log(b)); @@ -1422,35 +1489,34 @@ fn test_non_determinism() { ensure_nondet(|| f32::consts::E.ln()); ensure_nondet(|| 10f32.log10()); ensure_nondet(|| 8f32.log2()); - // ensure_nondet(|| 1f32.ln_1p()); - // ensure_nondet(|| 27.0f32.cbrt()); - // ensure_nondet(|| 3.0f32.hypot(4.0f32)); + ensure_nondet(|| 1f32.ln_1p()); + ensure_nondet(|| 27.0f32.cbrt()); + ensure_nondet(|| 3.0f32.hypot(4.0f32)); ensure_nondet(|| 1f32.sin()); ensure_nondet(|| 1f32.cos()); // On i686-pc-windows-msvc , these functions are implemented by calling the `f64` version, // which means the little rounding errors Miri introduces are discarded by the cast down to // `f32`. Just skip the test for them. - // if !cfg!(all(target_os = "windows", target_env = "msvc", target_arch = "x86")) { - // ensure_nondet(|| 1.0f32.tan()); - // ensure_nondet(|| 1.0f32.asin()); - // ensure_nondet(|| 5.0f32.acos()); - // ensure_nondet(|| 1.0f32.atan()); - // ensure_nondet(|| 1.0f32.atan2(2.0f32)); - // ensure_nondet(|| 1.0f32.sinh()); - // ensure_nondet(|| 1.0f32.cosh()); - // ensure_nondet(|| 1.0f32.tanh()); - // } - // ensure_nondet(|| 1.0f32.asinh()); - // ensure_nondet(|| 2.0f32.acosh()); - // ensure_nondet(|| 0.5f32.atanh()); - // ensure_nondet(|| 5.0f32.gamma()); - // ensure_nondet(|| 5.0f32.ln_gamma()); - // ensure_nondet(|| 5.0f32.erf()); - // ensure_nondet(|| 5.0f32.erfc()); + if !cfg!(all(target_os = "windows", target_env = "msvc", target_arch = "x86")) { + ensure_nondet(|| 1.0f32.tan()); + ensure_nondet(|| 1.0f32.asin()); + ensure_nondet(|| 5.0f32.acos()); + ensure_nondet(|| 1.0f32.atan()); + ensure_nondet(|| 1.0f32.atan2(2.0f32)); + ensure_nondet(|| 1.0f32.sinh()); + ensure_nondet(|| 1.0f32.cosh()); + ensure_nondet(|| 1.0f32.tanh()); + } + ensure_nondet(|| 1.0f32.asinh()); + ensure_nondet(|| 2.0f32.acosh()); + ensure_nondet(|| 0.5f32.atanh()); + ensure_nondet(|| 5.0f32.gamma()); + ensure_nondet(|| 5.0f32.ln_gamma()); + ensure_nondet(|| 5.0f32.erf()); + ensure_nondet(|| 5.0f32.erfc()); } pub fn test_operations_f64(a: f64, b: f64) { test_operations_f!(a, b); - // FIXME: some are temporarily disabled as it breaks std tests. ensure_nondet(|| a.powf(b)); ensure_nondet(|| a.powi(2)); ensure_nondet(|| a.log(b)); @@ -1459,26 +1525,26 @@ fn test_non_determinism() { ensure_nondet(|| 3f64.ln()); ensure_nondet(|| f64::consts::E.log10()); ensure_nondet(|| f64::consts::E.log2()); - // ensure_nondet(|| 1f64.ln_1p()); - // ensure_nondet(|| 27.0f64.cbrt()); - // ensure_nondet(|| 3.0f64.hypot(4.0f64)); + ensure_nondet(|| 1f64.ln_1p()); + ensure_nondet(|| 27.0f64.cbrt()); + ensure_nondet(|| 3.0f64.hypot(4.0f64)); ensure_nondet(|| 1f64.sin()); ensure_nondet(|| 1f64.cos()); - // ensure_nondet(|| 1.0f64.tan()); - // ensure_nondet(|| 1.0f64.asin()); - // ensure_nondet(|| 5.0f64.acos()); - // ensure_nondet(|| 1.0f64.atan()); - // ensure_nondet(|| 1.0f64.atan2(2.0f64)); - // ensure_nondet(|| 1.0f64.sinh()); - // ensure_nondet(|| 1.0f64.cosh()); - // ensure_nondet(|| 1.0f64.tanh()); - // ensure_nondet(|| 1.0f64.asinh()); - // ensure_nondet(|| 3.0f64.acosh()); - // ensure_nondet(|| 0.5f64.atanh()); - // ensure_nondet(|| 5.0f64.gamma()); - // ensure_nondet(|| 5.0f64.ln_gamma()); - // ensure_nondet(|| 5.0f64.erf()); - // ensure_nondet(|| 5.0f64.erfc()); + ensure_nondet(|| 1.0f64.tan()); + ensure_nondet(|| 1.0f64.asin()); + ensure_nondet(|| 5.0f64.acos()); + ensure_nondet(|| 1.0f64.atan()); + ensure_nondet(|| 1.0f64.atan2(2.0f64)); + ensure_nondet(|| 1.0f64.sinh()); + ensure_nondet(|| 1.0f64.cosh()); + ensure_nondet(|| 1.0f64.tanh()); + ensure_nondet(|| 1.0f64.asinh()); + ensure_nondet(|| 3.0f64.acosh()); + ensure_nondet(|| 0.5f64.atanh()); + ensure_nondet(|| 5.0f64.gamma()); + ensure_nondet(|| 5.0f64.ln_gamma()); + ensure_nondet(|| 5.0f64.erf()); + ensure_nondet(|| 5.0f64.erfc()); } pub fn test_operations_f128(a: f128, b: f128) { test_operations_f!(a, b); diff --git a/tests/codegen/gpu_offload/gpu_host.rs b/tests/codegen/gpu_offload/gpu_host.rs new file mode 100644 index 0000000000000..513e27426bc0e --- /dev/null +++ b/tests/codegen/gpu_offload/gpu_host.rs @@ -0,0 +1,80 @@ +//@ compile-flags: -Zoffload=Enable -Zunstable-options -C opt-level=3 -Clto=fat +//@ no-prefer-dynamic +//@ needs-enzyme + +// This test is verifying that we generate __tgt_target_data_*_mapper before and after a call to the +// kernel_1. Better documentation to what each global or variable means is available in the gpu +// offlaod code, or the LLVM offload documentation. This code does not launch any GPU kernels yet, +// and will be rewritten once a proper offload frontend has landed. +// +// We currently only handle memory transfer for specific calls to functions named `kernel_{num}`, +// when inside of a function called main. This, too, is a temporary workaround for not having a +// frontend. + +#![no_main] + +#[unsafe(no_mangle)] +fn main() { + let mut x = [3.0; 256]; + kernel_1(&mut x); + core::hint::black_box(&x); +} + +// CHECK: %struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr } +// CHECK: %struct.__tgt_kernel_arguments = type { i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, i64, i64, [3 x i32], [3 x i32], i32 } +// CHECK: %struct.ident_t = type { i32, i32, i32, i32, ptr } +// CHECK: %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr } + +// CHECK: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 1024] +// CHECK: @.offload_maptypes.1 = private unnamed_addr constant [1 x i64] [i64 3] +// CHECK: @.kernel_1.region_id = weak unnamed_addr constant i8 0 +// CHECK: @.offloading.entry_name.1 = internal unnamed_addr constant [9 x i8] c"kernel_1\00", section ".llvm.rodata.offloading", align 1 +// CHECK: @.offloading.entry.kernel_1 = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @.kernel_1.region_id, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section ".omp_offloading_entries", align 1 +// CHECK: @my_struct_global2 = external global %struct.__tgt_kernel_arguments +// CHECK: @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1 +// CHECK: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8 + +// CHECK: Function Attrs: +// CHECK-NEXT: define{{( dso_local)?}} void @main() +// CHECK-NEXT: start: +// CHECK-NEXT: %0 = alloca [8 x i8], align 8 +// CHECK-NEXT: %x = alloca [1024 x i8], align 16 +// CHECK-NEXT: %EmptyDesc = alloca %struct.__tgt_bin_desc, align 8 +// CHECK-NEXT: %.offload_baseptrs = alloca [1 x ptr], align 8 +// CHECK-NEXT: %.offload_ptrs = alloca [1 x ptr], align 8 +// CHECK-NEXT: %.offload_sizes = alloca [1 x i64], align 8 +// CHECK-NEXT: %x.addr = alloca ptr, align 8 +// CHECK-NEXT: store ptr %x, ptr %x.addr, align 8 +// CHECK-NEXT: %1 = load ptr, ptr %x.addr, align 8 +// CHECK-NEXT: %2 = getelementptr inbounds float, ptr %1, i32 0 +// CHECK: call void @llvm.memset.p0.i64(ptr align 8 %EmptyDesc, i8 0, i64 32, i1 false) +// CHECK-NEXT: call void @__tgt_register_lib(ptr %EmptyDesc) +// CHECK-NEXT: call void @__tgt_init_all_rtls() +// CHECK-NEXT: %3 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 +// CHECK-NEXT: store ptr %1, ptr %3, align 8 +// CHECK-NEXT: %4 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0 +// CHECK-NEXT: store ptr %2, ptr %4, align 8 +// CHECK-NEXT: %5 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0 +// CHECK-NEXT: store i64 1024, ptr %5, align 8 +// CHECK-NEXT: %6 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 +// CHECK-NEXT: %7 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0 +// CHECK-NEXT: %8 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0 +// CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr @1, i64 -1, i32 1, ptr %6, ptr %7, ptr %8, ptr @.offload_maptypes.1, ptr null, ptr null) +// CHECK-NEXT: call void @kernel_1(ptr noalias noundef nonnull align 4 dereferenceable(1024) %x) +// CHECK-NEXT: %9 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0 +// CHECK-NEXT: %10 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0 +// CHECK-NEXT: %11 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0 +// CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr @1, i64 -1, i32 1, ptr %9, ptr %10, ptr %11, ptr @.offload_maptypes.1, ptr null, ptr null) +// CHECK-NEXT: call void @__tgt_unregister_lib(ptr %EmptyDesc) +// CHECK: store ptr %x, ptr %0, align 8 +// CHECK-NEXT: call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0) +// CHECK: ret void +// CHECK-NEXT: } + +#[unsafe(no_mangle)] +#[inline(never)] +pub fn kernel_1(x: &mut [f32; 256]) { + for i in 0..256 { + x[i] = 21.0; + } +} diff --git a/tests/ui/print-request/supported-crate-types.rs b/tests/ui/print-request/supported-crate-types.rs index c8b4c0c1a4164..50185a231eeb0 100644 --- a/tests/ui/print-request/supported-crate-types.rs +++ b/tests/ui/print-request/supported-crate-types.rs @@ -8,6 +8,11 @@ //@ check-pass +// FIXME: musl targets are currently statically linked, but running on a musl host +// requires dynamic linkage, which in turn changes the supported crate types for +// x86_64-unknown-linux-musl. +//@ ignore-musl + //@ revisions: wasm musl linux //@[wasm] compile-flags: --target=wasm32-unknown-unknown --print=supported-crate-types -Zunstable-options diff --git a/tests/ui/resolve/underscore-bindings-disambiguators.rs b/tests/ui/resolve/underscore-bindings-disambiguators.rs new file mode 100644 index 0000000000000..8c89b39f8590c --- /dev/null +++ b/tests/ui/resolve/underscore-bindings-disambiguators.rs @@ -0,0 +1,27 @@ +// Regression test for issue #144168 where some `_` bindings were incorrectly only allowed once per +// module, failing with "error[E0428]: the name `_` is defined multiple times". + +// This weird/complex setup is reduced from `zerocopy-0.8.25` where the issue was encountered. + +#![crate_type = "lib"] + +macro_rules! impl_for_transmute_from { + () => { + const _: () = {}; + }; +} + +mod impls { + use super::*; + impl_for_transmute_from!(); + impl_for_transmute_from!(); + const _: () = todo!(); //~ ERROR: evaluation panicked + const _: () = todo!(); //~ ERROR: evaluation panicked + const _: () = todo!(); //~ ERROR: evaluation panicked + const _: () = todo!(); //~ ERROR: evaluation panicked + const _: () = todo!(); //~ ERROR: evaluation panicked +} +use X as Y; //~ ERROR: unresolved import +use Z as W; //~ ERROR: unresolved import + +const _: () = todo!(); //~ ERROR: evaluation panicked diff --git a/tests/ui/resolve/underscore-bindings-disambiguators.stderr b/tests/ui/resolve/underscore-bindings-disambiguators.stderr new file mode 100644 index 0000000000000..0c8081a0c5647 --- /dev/null +++ b/tests/ui/resolve/underscore-bindings-disambiguators.stderr @@ -0,0 +1,70 @@ +error[E0432]: unresolved import `X` + --> $DIR/underscore-bindings-disambiguators.rs:24:5 + | +LL | use X as Y; + | -^^^^^ + | | + | no `X` in the root + | help: a similar name exists in the module: `_` + +error[E0432]: unresolved import `Z` + --> $DIR/underscore-bindings-disambiguators.rs:25:5 + | +LL | use Z as W; + | -^^^^^ + | | + | no `Z` in the root + | help: a similar name exists in the module: `_` + +error[E0080]: evaluation panicked: not yet implemented + --> $DIR/underscore-bindings-disambiguators.rs:18:19 + | +LL | const _: () = todo!(); + | ^^^^^^^ evaluation of `impls::_` failed here + | + = note: this error originates in the macro `todo` (in Nightly builds, run with -Z macro-backtrace for more info) + +error[E0080]: evaluation panicked: not yet implemented + --> $DIR/underscore-bindings-disambiguators.rs:19:19 + | +LL | const _: () = todo!(); + | ^^^^^^^ evaluation of `impls::_` failed here + | + = note: this error originates in the macro `todo` (in Nightly builds, run with -Z macro-backtrace for more info) + +error[E0080]: evaluation panicked: not yet implemented + --> $DIR/underscore-bindings-disambiguators.rs:20:19 + | +LL | const _: () = todo!(); + | ^^^^^^^ evaluation of `impls::_` failed here + | + = note: this error originates in the macro `todo` (in Nightly builds, run with -Z macro-backtrace for more info) + +error[E0080]: evaluation panicked: not yet implemented + --> $DIR/underscore-bindings-disambiguators.rs:21:19 + | +LL | const _: () = todo!(); + | ^^^^^^^ evaluation of `impls::_` failed here + | + = note: this error originates in the macro `todo` (in Nightly builds, run with -Z macro-backtrace for more info) + +error[E0080]: evaluation panicked: not yet implemented + --> $DIR/underscore-bindings-disambiguators.rs:22:19 + | +LL | const _: () = todo!(); + | ^^^^^^^ evaluation of `impls::_` failed here + | + = note: this error originates in the macro `todo` (in Nightly builds, run with -Z macro-backtrace for more info) + +error[E0080]: evaluation panicked: not yet implemented + --> $DIR/underscore-bindings-disambiguators.rs:27:15 + | +LL | const _: () = todo!(); + | ^^^^^^^ evaluation of `_` failed here + | + = note: this error originates in the macro `todo` (in Nightly builds, run with -Z macro-backtrace for more info) + +error: aborting due to 8 previous errors + +Some errors have detailed explanations: E0080, E0432. +For more information about an error, try `rustc --explain E0080`. diff --git a/tests/ui/uninhabited/auxiliary/staged-api.rs b/tests/ui/uninhabited/auxiliary/staged-api.rs index 342ecf020ea5e..925bb1e0c65a0 100644 --- a/tests/ui/uninhabited/auxiliary/staged-api.rs +++ b/tests/ui/uninhabited/auxiliary/staged-api.rs @@ -6,3 +6,9 @@ pub struct Foo { #[unstable(feature = "unstable", issue = "none")] pub field: T, } + +#[unstable(feature = "my_coro_state", issue = "none")] +pub enum MyCoroutineState { + Yielded(Y), + Complete(R), +} diff --git a/tests/ui/uninhabited/uninhabited-unstable-field.current.stderr b/tests/ui/uninhabited/uninhabited-unstable-field.current.stderr deleted file mode 100644 index 9e0feb4c473f8..0000000000000 --- a/tests/ui/uninhabited/uninhabited-unstable-field.current.stderr +++ /dev/null @@ -1,22 +0,0 @@ -error[E0004]: non-exhaustive patterns: type `Foo` is non-empty - --> $DIR/uninhabited-unstable-field.rs:13:11 - | -LL | match x {} - | ^ - | -note: `Foo` defined here - --> $DIR/auxiliary/staged-api.rs:5:1 - | -LL | pub struct Foo { - | ^^^^^^^^^^^^^^^^^ - = note: the matched value is of type `Foo` -help: ensure that all possible cases are being handled by adding a match arm with a wildcard pattern as shown - | -LL ~ match x { -LL + _ => todo!(), -LL ~ } - | - -error: aborting due to 1 previous error - -For more information about this error, try `rustc --explain E0004`. diff --git a/tests/ui/uninhabited/uninhabited-unstable-field.exhaustive.stderr b/tests/ui/uninhabited/uninhabited-unstable-field.exhaustive.stderr deleted file mode 100644 index 9e0feb4c473f8..0000000000000 --- a/tests/ui/uninhabited/uninhabited-unstable-field.exhaustive.stderr +++ /dev/null @@ -1,22 +0,0 @@ -error[E0004]: non-exhaustive patterns: type `Foo` is non-empty - --> $DIR/uninhabited-unstable-field.rs:13:11 - | -LL | match x {} - | ^ - | -note: `Foo` defined here - --> $DIR/auxiliary/staged-api.rs:5:1 - | -LL | pub struct Foo { - | ^^^^^^^^^^^^^^^^^ - = note: the matched value is of type `Foo` -help: ensure that all possible cases are being handled by adding a match arm with a wildcard pattern as shown - | -LL ~ match x { -LL + _ => todo!(), -LL ~ } - | - -error: aborting due to 1 previous error - -For more information about this error, try `rustc --explain E0004`. diff --git a/tests/ui/uninhabited/uninhabited-unstable-field.rs b/tests/ui/uninhabited/uninhabited-unstable-field.rs index 9b507c518ab64..321b864aa27de 100644 --- a/tests/ui/uninhabited/uninhabited-unstable-field.rs +++ b/tests/ui/uninhabited/uninhabited-unstable-field.rs @@ -1,29 +1,45 @@ //@ aux-build: staged-api.rs -//@ revisions: current exhaustive - +//! The field of `Pin` used to be public, which would cause `Pin` to be uninhabited. To remedy +//! this, we temporarily made it so unstable fields are always considered inhabited. This has now +//! been reverted, and this file ensures that we don't special-case unstable fields wrt +//! inhabitedness anymore. #![feature(exhaustive_patterns)] +#![feature(never_type)] +#![feature(my_coro_state)] // Custom feature from `staged-api.rs` +#![deny(unreachable_patterns)] extern crate staged_api; -use staged_api::Foo; +use staged_api::{Foo, MyCoroutineState}; enum Void {} fn demo(x: Foo) { match x {} - //~^ ERROR non-exhaustive patterns } -// Ensure that the pattern is not considered unreachable. +// Ensure that the pattern is considered unreachable. fn demo2(x: Foo) { match x { - Foo { .. } => {} + Foo { .. } => {} //~ ERROR unreachable } } // Same as above, but for wildcard. fn demo3(x: Foo) { - match x { _ => {} } + match x { + _ => {} //~ ERROR unreachable + } +} + +fn unstable_enum(x: MyCoroutineState) { + match x { + MyCoroutineState::Yielded(_) => {} + } + match x { + MyCoroutineState::Yielded(_) => {} + MyCoroutineState::Complete(_) => {} //~ ERROR unreachable + } } fn main() {} diff --git a/tests/ui/uninhabited/uninhabited-unstable-field.stderr b/tests/ui/uninhabited/uninhabited-unstable-field.stderr new file mode 100644 index 0000000000000..a0c9f9366a6b5 --- /dev/null +++ b/tests/ui/uninhabited/uninhabited-unstable-field.stderr @@ -0,0 +1,40 @@ +error: unreachable pattern + --> $DIR/uninhabited-unstable-field.rs:24:9 + | +LL | Foo { .. } => {} + | ^^^^^^^^^^------ + | | + | matches no values because `Foo` is uninhabited + | help: remove the match arm + | + = note: to learn more about uninhabited types, see https://doc.rust-lang.org/nomicon/exotic-sizes.html#empty-types +note: the lint level is defined here + --> $DIR/uninhabited-unstable-field.rs:9:9 + | +LL | #![deny(unreachable_patterns)] + | ^^^^^^^^^^^^^^^^^^^^ + +error: unreachable pattern + --> $DIR/uninhabited-unstable-field.rs:31:9 + | +LL | _ => {} + | ^------ + | | + | matches no values because `Foo` is uninhabited + | help: remove the match arm + | + = note: to learn more about uninhabited types, see https://doc.rust-lang.org/nomicon/exotic-sizes.html#empty-types + +error: unreachable pattern + --> $DIR/uninhabited-unstable-field.rs:41:9 + | +LL | MyCoroutineState::Complete(_) => {} + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^------ + | | + | matches no values because `!` is uninhabited + | help: remove the match arm + | + = note: to learn more about uninhabited types, see https://doc.rust-lang.org/nomicon/exotic-sizes.html#empty-types + +error: aborting due to 3 previous errors +