-
Notifications
You must be signed in to change notification settings - Fork 797
[SYCL][NVPTX] Set default fdiv and sqrt for llvm.fpbuiltin #16714
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
ca71b1f
5826911
ee64287
b16492f
01b3032
5ae0e94
6720ed0
5b6411a
32d8f6b
36f8688
cc7333f
9ea17f8
700ae48
2df0a72
e86be33
6113cc3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,6 +18,7 @@ | |
#include "llvm/IR/IRBuilder.h" | ||
#include "llvm/IR/InstIterator.h" | ||
#include "llvm/IR/IntrinsicInst.h" | ||
#include "llvm/IR/IntrinsicsNVPTX.h" | ||
#include "llvm/InitializePasses.h" | ||
#include "llvm/Support/FormatVariadic.h" | ||
|
||
|
@@ -106,6 +107,48 @@ static bool replaceWithLLVMIR(FPBuiltinIntrinsic &BuiltinCall) { | |
return true; | ||
} | ||
|
||
static bool replaceWithNVPTXCalls(FPBuiltinIntrinsic &BuiltinCall) { | ||
IRBuilder<> IRBuilder(&BuiltinCall); | ||
SmallVector<Value *> Args(BuiltinCall.args()); | ||
Value *Replacement = nullptr; | ||
// To chose between ftz and non-ftz intrinsic. | ||
FastMathFlags FMF = BuiltinCall.getFastMathFlags(); | ||
auto *Type = BuiltinCall.getType(); | ||
// For now only add lowering for fdiv and sqrt. Yet nvvm intrinsics have | ||
// approximate variants for sin, cos, exp2 and log2. | ||
// For vector fpbuiltins for NVPTX target we don't have nvvm intrinsics, use | ||
// standart for LLVM math operations. Also nvvm fdiv and sqrt intrisics | ||
MrSidims marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// support only float type. | ||
switch (BuiltinCall.getIntrinsicID()) { | ||
case Intrinsic::fpbuiltin_fdiv: | ||
if (Type->isVectorTy() || !Type->getScalarType()->isFloatTy()) | ||
jchlanda marked this conversation as resolved.
Show resolved
Hide resolved
|
||
return replaceWithLLVMIR(BuiltinCall); | ||
Replacement = | ||
IRBuilder.CreateIntrinsic(Type, | ||
FMF.isFast() | ||
MrSidims marked this conversation as resolved.
Show resolved
Hide resolved
|
||
? Intrinsic::nvvm_div_approx_ftz_f | ||
: Intrinsic::nvvm_div_approx_f, Args); | ||
break; | ||
case Intrinsic::fpbuiltin_sqrt: | ||
if (Type->isVectorTy() || !Type->getScalarType()->isFloatTy()) | ||
return replaceWithLLVMIR(BuiltinCall); | ||
Replacement = | ||
IRBuilder.CreateIntrinsic(BuiltinCall.getType(), | ||
FMF.isFast() | ||
? Intrinsic::nvvm_sqrt_approx_ftz_f | ||
: Intrinsic::nvvm_sqrt_approx_f, Args); | ||
break; | ||
default: | ||
return false; | ||
} | ||
BuiltinCall.replaceAllUsesWith(Replacement); | ||
cast<Instruction>(Replacement)->copyFastMathFlags(&BuiltinCall); | ||
LLVM_DEBUG(dbgs() << DEBUG_TYPE << ": Replaced call to `" | ||
<< BuiltinCall.getCalledFunction()->getName() | ||
<< "` with equivalent IR. \n `"); | ||
return true; | ||
} | ||
|
||
static bool selectFnForFPBuiltinCalls(const TargetLibraryInfo &TLI, | ||
const TargetTransformInfo &TTI, | ||
FPBuiltinIntrinsic &BuiltinCall) { | ||
|
@@ -154,6 +197,14 @@ static bool selectFnForFPBuiltinCalls(const TargetLibraryInfo &TLI, | |
} | ||
} | ||
|
||
// We don't have implementation for CUDA approximate precision builtins. | ||
maksimsab marked this conversation as resolved.
Show resolved
Hide resolved
|
||
// Lets map them on NVPTX intrinsics. If no appropriate intrinsics are known | ||
// - skip to replaceWithAltMathFunction. | ||
if (T.isNVPTX() && BuiltinCall.getRequiredAccuracy().value() == 3.0) { | ||
MrSidims marked this conversation as resolved.
Show resolved
Hide resolved
|
||
if (replaceWithNVPTXCalls(BuiltinCall)) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should we somehow encode "skip"/"fallback" to alt math functions part into the name? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure, from my perspective the code here is not complex and speaks for itself :) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. My point was not in the complexity, but in the expectations. Let's say you read the this specific function, i.e. you start top-down to understand high-level structure first. Just by looking at the name, you may expect that this specific step will introduce some NVPTX-specific intrinsics, but in fact, it will do some other form of lowering which could be surprising for you unless you also went through the function itself. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Sure, renamed and clarified in the comment |
||
return true; | ||
} | ||
|
||
/// Call TLI to select a function implementation to call | ||
StringRef ImplName = TLI.selectFPBuiltinImplementation(&BuiltinCall); | ||
if (ImplName.empty()) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,81 @@ | ||
; RUN: opt -fpbuiltin-fn-selection -S < %s | FileCheck %s | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I put this test in CodeGen as Andy was adding his tests for fpbuiltin there (including those, which were testing just transformations without invoking llc) There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer those tests to be in There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Totally agree, lets keep the tests in one place for now. If necessary we will move some of them to |
||
|
||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64" | ||
target triple = "nvptx64-nvidia-cuda" | ||
|
||
; CHECK-LABEL: @test_fdiv | ||
; CHECK: %{{.*}} = call float @llvm.nvvm.div.approx.f(float %{{.*}}, float %{{.*}}) | ||
; CHECK: %{{.*}} = fdiv <2 x float> %{{.*}}, %{{.*}} | ||
define void @test_fdiv(float %d1, <2 x float> %v2d1, | ||
float %d2, <2 x float> %v2d2) { | ||
entry: | ||
%t0 = call float @llvm.fpbuiltin.fdiv.f32(float %d1, float %d2) #0 | ||
%t1 = call <2 x float> @llvm.fpbuiltin.fdiv.v2f32(<2 x float> %v2d1, <2 x float> %v2d2) #0 | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: @test_fdiv_fast | ||
; CHECK: %{{.*}} = call fast float @llvm.nvvm.div.approx.ftz.f(float %{{.*}}, float %{{.*}}) | ||
; CHECK: %{{.*}} = fdiv fast <2 x float> %{{.*}}, %{{.*}} | ||
define void @test_fdiv_fast(float %d1, <2 x float> %v2d1, | ||
float %d2, <2 x float> %v2d2) { | ||
entry: | ||
%t0 = call fast float @llvm.fpbuiltin.fdiv.f32(float %d1, float %d2) #0 | ||
%t1 = call fast <2 x float> @llvm.fpbuiltin.fdiv.v2f32(<2 x float> %v2d1, <2 x float> %v2d2) #0 | ||
ret void | ||
} | ||
|
||
declare float @llvm.fpbuiltin.fdiv.f32(float, float) | ||
declare <2 x float> @llvm.fpbuiltin.fdiv.v2f32(<2 x float>, <2 x float>) | ||
|
||
; CHECK-LABEL: @test_fdiv_double | ||
; CHECK: %{{.*}} = fdiv double %{{.*}}, %{{.*}} | ||
; CHECK: %{{.*}} = fdiv <2 x double> %{{.*}}, %{{.*}} | ||
define void @test_fdiv_double(double %d1, <2 x double> %v2d1, | ||
double %d2, <2 x double> %v2d2) { | ||
entry: | ||
%t0 = call double @llvm.fpbuiltin.fdiv.f64(double %d1, double %d2) #0 | ||
%t1 = call <2 x double> @llvm.fpbuiltin.fdiv.v2f64(<2 x double> %v2d1, <2 x double> %v2d2) #0 | ||
ret void | ||
} | ||
|
||
declare double @llvm.fpbuiltin.fdiv.f64(double, double) | ||
declare <2 x double> @llvm.fpbuiltin.fdiv.v2f64(<2 x double>, <2 x double>) | ||
|
||
; CHECK-LABEL: @test_sqrt | ||
; CHECK: %{{.*}} = call float @llvm.nvvm.sqrt.approx.f(float %{{.*}}) | ||
; CHECK: %{{.*}} = call <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.*}}) | ||
define void @test_sqrt(float %d, <2 x float> %v2d, <4 x float> %v4d) { | ||
entry: | ||
%t0 = call float @llvm.fpbuiltin.sqrt.f32(float %d) #0 | ||
%t1 = call <2 x float> @llvm.fpbuiltin.sqrt.v2f32(<2 x float> %v2d) #0 | ||
ret void | ||
} | ||
|
||
; CHECK-LABEL: @test_sqrt_fast | ||
; CHECK: %{{.*}} = call fast float @llvm.nvvm.sqrt.approx.ftz.f(float %{{.*}}) | ||
; CHECK: %{{.*}} = call fast <2 x float> @llvm.sqrt.v2f32(<2 x float> %{{.*}}) | ||
define void @test_sqrt_fast(float %d, <2 x float> %v2d, <4 x float> %v4d) { | ||
entry: | ||
%t0 = call fast float @llvm.fpbuiltin.sqrt.f32(float %d) #0 | ||
%t1 = call fast <2 x float> @llvm.fpbuiltin.sqrt.v2f32(<2 x float> %v2d) #0 | ||
ret void | ||
} | ||
|
||
declare float @llvm.fpbuiltin.sqrt.f32(float) | ||
declare <2 x float> @llvm.fpbuiltin.sqrt.v2f32(<2 x float>) | ||
|
||
; CHECK-LABEL: @test_sqrt_double | ||
; CHECK: %{{.*}} = call double @llvm.sqrt.f64(double %{{.*}}) | ||
; CHECK: %{{.*}} = call <2 x double> @llvm.sqrt.v2f64(<2 x double> %{{.*}}) | ||
define void @test_sqrt_double(double %d, <2 x double> %v2d) { | ||
entry: | ||
%t0 = call double @llvm.fpbuiltin.sqrt.f64(double %d) #0 | ||
%t1 = call <2 x double> @llvm.fpbuiltin.sqrt.v2f64(<2 x double> %v2d) #0 | ||
ret void | ||
} | ||
|
||
declare double @llvm.fpbuiltin.sqrt.f64(double) | ||
declare <2 x double> @llvm.fpbuiltin.sqrt.v2f64(<2 x double>) | ||
|
||
attributes #0 = { "fpbuiltin-max-error"="3.0" } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
We only call this function if requested precision is exactly
3.0
, but the name is quite generic. I think that it worth adding a comment right before the function to better specify its intent/expected use case of this function in case it will be extended in the futureThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Renamed the function and added the comment