Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions clang/include/clang/Basic/BuiltinsNVPTX.def
Original file line number Diff line number Diff line change
Expand Up @@ -205,6 +205,8 @@ BUILTIN(__nvvm_saturate_d, "dd", "")
BUILTIN(__nvvm_ex2_approx_ftz_f, "ff", "")
BUILTIN(__nvvm_ex2_approx_f, "ff", "")
BUILTIN(__nvvm_ex2_approx_d, "dd", "")
TARGET_BUILTIN(__nvvm_ex2_approx_f16, "hh", "", AND(SM_75, PTX70))
TARGET_BUILTIN(__nvvm_ex2_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))

BUILTIN(__nvvm_lg2_approx_ftz_f, "ff", "")
BUILTIN(__nvvm_lg2_approx_f, "ff", "")
Expand All @@ -218,6 +220,12 @@ BUILTIN(__nvvm_sin_approx_f, "ff", "")
BUILTIN(__nvvm_cos_approx_ftz_f, "ff", "")
BUILTIN(__nvvm_cos_approx_f, "ff", "")

// Tanh

TARGET_BUILTIN(__nvvm_tanh_approx_f, "ff", "", AND(SM_75,PTX70))
TARGET_BUILTIN(__nvvm_tanh_approx_f16, "hh", "", AND(SM_75, PTX70))
TARGET_BUILTIN(__nvvm_tanh_approx_f16x2, "V2hV2h", "", AND(SM_75, PTX70))

// Fma

BUILTIN(__nvvm_fma_rn_ftz_f, "ffff", "")
Expand Down
44 changes: 27 additions & 17 deletions libclc/generic/include/clcmacro.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@
#ifndef __CLC_MACRO_H
#define __CLC_MACRO_H

#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
} \
\
#define _CLC_UNARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x) { \
return (RET_TYPE##3)(FUNCTION(x.x), FUNCTION(x.y), FUNCTION(x.z)); \
} \
Expand All @@ -30,12 +26,14 @@
return (RET_TYPE##16)(FUNCTION(x.lo), FUNCTION(x.hi)); \
}

#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \
return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \
#define _CLC_UNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x) { \
return (RET_TYPE##2)(FUNCTION(x.x), FUNCTION(x.y)); \
} \
\
_CLC_UNARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE)

#define _CLC_BINARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE) \
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y) { \
return (RET_TYPE##3)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y), \
FUNCTION(x.z, y.z)); \
Expand All @@ -53,6 +51,14 @@
return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
}

#define _CLC_BINARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y) { \
return (RET_TYPE##2)(FUNCTION(x.x, y.x), FUNCTION(x.y, y.y)); \
} \
_CLC_BINARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE)

#define _CLC_V_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE##2 y) { \
Expand All @@ -76,13 +82,8 @@
return (RET_TYPE##16)(FUNCTION(x, y.lo), FUNCTION(x, y.hi)); \
}

#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE, ARG3_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, \
ARG3_TYPE##2 z) { \
return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \
} \
\
#define _CLC_TERNARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE, ARG3_TYPE) \
DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, \
ARG3_TYPE##3 z) { \
return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \
Expand All @@ -107,6 +108,15 @@
FUNCTION(x.hi, y.hi, z.hi)); \
}

#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE, ARG3_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, \
ARG3_TYPE##2 z) { \
return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \
} \
_CLC_TERNARY_VECTORIZE_HAVE2(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE, ARG3_TYPE)

#define _CLC_V_S_S_V_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, \
ARG2_TYPE, ARG3_TYPE) \
DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE x, ARG2_TYPE y, ARG3_TYPE##2 z) { \
Expand Down
43 changes: 43 additions & 0 deletions libclc/generic/include/spirv/spirv_builtins.h
Original file line number Diff line number Diff line change
Expand Up @@ -15776,6 +15776,21 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp32_t
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp32_t
__spirv_ocl_native_exp2(__clc_vec16_fp32_t);

#ifdef cl_khr_fp16
_CLC_OVERLOAD
_CLC_DECL _CLC_CONSTFN __clc_fp16_t __clc_native_exp2(__clc_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp16_t
__clc_native_exp2(__clc_vec2_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp16_t
__clc_native_exp2(__clc_vec3_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp16_t
__clc_native_exp2(__clc_vec4_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp16_t
__clc_native_exp2(__clc_vec8_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t
__clc_native_exp2(__clc_vec16_fp16_t);
#endif

_CLC_OVERLOAD
_CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_native_log(__clc_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t
Expand Down Expand Up @@ -19077,6 +19092,34 @@ _CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t
__spirv_ocl_tanh(__clc_vec16_fp16_t);
#endif

_CLC_OVERLOAD
_CLC_DECL _CLC_CONSTFN __clc_fp32_t __clc_native_tanh(__clc_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t
__clc_native_tanh(__clc_vec2_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp32_t
__clc_native_tanh(__clc_vec3_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp32_t
__clc_native_tanh(__clc_vec4_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp32_t
__clc_native_tanh(__clc_vec8_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp32_t
__clc_native_tanh(__clc_vec16_fp32_t);

#ifdef cl_khr_fp16
_CLC_OVERLOAD
_CLC_DECL _CLC_CONSTFN __clc_fp16_t __clc_native_tanh(__clc_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp16_t
__clc_native_tanh(__clc_vec2_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec3_fp16_t
__clc_native_tanh(__clc_vec3_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec4_fp16_t
__clc_native_tanh(__clc_vec4_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec8_fp16_t
__clc_native_tanh(__clc_vec8_fp16_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec16_fp16_t
__clc_native_tanh(__clc_vec16_fp16_t);
#endif

_CLC_OVERLOAD
_CLC_DECL _CLC_CONSTFN __clc_fp32_t __spirv_ocl_tanpi(__clc_fp32_t);
_CLC_OVERLOAD _CLC_DECL _CLC_CONSTFN __clc_vec2_fp32_t
Expand Down
60 changes: 60 additions & 0 deletions libclc/generic/libspirv/float16.cl
Original file line number Diff line number Diff line change
Expand Up @@ -4344,6 +4344,36 @@ __spirv_ocl_exp2(__clc_vec16_float16_t args_0) {
return __spirv_ocl_exp2(as_half16(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_fp16_t
__clc_native_exp2(__clc_float16_t args_0) {
return __clc_native_exp2(as_half(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec2_fp16_t
__clc_native_exp2(__clc_vec2_float16_t args_0) {
return __clc_native_exp2(as_half2(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec3_fp16_t
__clc_native_exp2(__clc_vec3_float16_t args_0) {
return __clc_native_exp2(as_half3(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec4_fp16_t
__clc_native_exp2(__clc_vec4_float16_t args_0) {
return __clc_native_exp2(as_half4(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec8_fp16_t
__clc_native_exp2(__clc_vec8_float16_t args_0) {
return __clc_native_exp2(as_half8(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec16_fp16_t
__clc_native_exp2(__clc_vec16_float16_t args_0) {
return __clc_native_exp2(as_half16(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_fp16_t
__spirv_ocl_expm1(__clc_float16_t args_0) {
return __spirv_ocl_expm1(as_half(args_0));
Expand Down Expand Up @@ -6613,6 +6643,36 @@ __spirv_ocl_tanh(__clc_vec16_float16_t args_0) {
return __spirv_ocl_tanh(as_half16(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_fp16_t
__clc_native_tanh(__clc_float16_t args_0) {
return __clc_native_tanh(as_half(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec2_fp16_t
__clc_native_tanh(__clc_vec2_float16_t args_0) {
return __clc_native_tanh(as_half2(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec3_fp16_t
__clc_native_tanh(__clc_vec3_float16_t args_0) {
return __clc_native_tanh(as_half3(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec4_fp16_t
__clc_native_tanh(__clc_vec4_float16_t args_0) {
return __clc_native_tanh(as_half4(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec8_fp16_t
__clc_native_tanh(__clc_vec8_float16_t args_0) {
return __clc_native_tanh(as_half8(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_vec16_fp16_t
__clc_native_tanh(__clc_vec16_float16_t args_0) {
return __clc_native_tanh(as_half16(args_0));
}

_CLC_OVERLOAD _CLC_DEF _CLC_CONSTFN __clc_fp16_t
__spirv_ocl_tanpi(__clc_float16_t args_0) {
return __spirv_ocl_tanpi(as_half(args_0));
Expand Down
1 change: 1 addition & 0 deletions libclc/ptx-nvidiacl/libspirv/SOURCES
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ math/native_rsqrt.cl
math/native_sin.cl
math/native_sqrt.cl
math/native_tan.cl
math/native_tanh.cl
math/nextafter.cl
math/pow.cl
math/remainder.cl
Expand Down
37 changes: 37 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/math/native_exp2.cl
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,41 @@
#define __CLC_FUNCTION __spirv_ocl_native_exp2
#define __CLC_BUILTIN __nv_exp2
#define __CLC_BUILTIN_F __CLC_XCONCAT(__CLC_BUILTIN, f)

#ifdef cl_khr_fp16
#pragma OPENCL EXTENSION cl_khr_fp16 : enable

int __clc_nvvm_reflect_arch();

_CLC_DEF _CLC_OVERLOAD half __clc_native_exp2(half x) {
if (__clc_nvvm_reflect_arch() >= 750) {
return __nvvm_ex2_approx_f16(x);
} else {
float upcast = x;
return __spirv_ocl_native_exp2(upcast);
}
}

_CLC_DEF _CLC_OVERLOAD half2 __clc_native_exp2(half2 x) {
if (__clc_nvvm_reflect_arch() >= 750) {
return __nvvm_ex2_approx_f16x2(x);
} else {
float upcast0 = x[0];
float upcast1 = x[1];
half2 res;
res.s0 = __spirv_ocl_native_exp2(upcast0);
res.s1 = __spirv_ocl_native_exp2(upcast1);
return res;
}
}

_CLC_UNARY_VECTORIZE_HAVE2(_CLC_OVERLOAD _CLC_DEF, half,
__clc_native_exp2, half)

#endif // cl_khr_fp16

// Undef halfs before uncluding unary builtins, as they are handled above.
#ifdef cl_khr_fp16
#undef cl_khr_fp16
#endif // cl_khr_fp16
#include <math/unary_builtin.inc>
41 changes: 41 additions & 0 deletions libclc/ptx-nvidiacl/libspirv/math/native_tanh.cl
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//===----------------------------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include <spirv/spirv.h>

#include "../../include/libdevice.h"
#include <clcmacro.h>

extern int __clc_nvvm_reflect_arch();

#define __USE_TANH_APPROX (__clc_nvvm_reflect_arch() >= 750)

_CLC_DEF _CLC_OVERLOAD float __clc_native_tanh(float x) {
return (__USE_TANH_APPROX) ? __nvvm_tanh_approx_f(x) : __nv_tanhf(x);
}

_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, __clc_native_tanh, float)

#ifdef cl_khr_fp16

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

_CLC_DEF _CLC_OVERLOAD half __clc_native_tanh(half x) {
return (__USE_TANH_APPROX) ? __nvvm_tanh_approx_f16(x) : __nv_tanhf(x);
}

_CLC_DEF _CLC_OVERLOAD half2 __clc_native_tanh(half2 x) {
return (__USE_TANH_APPROX) ? __nvvm_tanh_approx_f16x2(x)
: (half2)(__nv_tanhf(x.x), __nv_tanhf(x.y));
}

_CLC_UNARY_VECTORIZE_HAVE2(_CLC_OVERLOAD _CLC_DEF, half, __clc_native_tanh, half)

#endif

#undef __USE_TANH_APPROX

11 changes: 11 additions & 0 deletions llvm/include/llvm/IR/IntrinsicsNVVM.td
Original file line number Diff line number Diff line change
Expand Up @@ -854,6 +854,17 @@ let TargetPrefix = "nvvm" in {
def int_nvvm_cos_approx_f : GCCBuiltin<"__nvvm_cos_approx_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;

//
// Tanh
//

def int_nvvm_tanh_approx_f : GCCBuiltin<"__nvvm_tanh_approx_f">,
DefaultAttrsIntrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_nvvm_tanh_approx_f16 : GCCBuiltin<"__nvvm_tanh_approx_f16">,
DefaultAttrsIntrinsic<[llvm_half_ty], [llvm_half_ty], [IntrNoMem]>;
def int_nvvm_tanh_approx_f16x2 : GCCBuiltin<"__nvvm_tanh_approx_f16x2">,
DefaultAttrsIntrinsic<[llvm_v2f16_ty], [llvm_v2f16_ty], [IntrNoMem]>;

//
// Fma
//
Expand Down
11 changes: 11 additions & 0 deletions llvm/lib/Target/NVPTX/NVPTXIntrinsics.td
Original file line number Diff line number Diff line change
Expand Up @@ -933,6 +933,17 @@ def INT_NVVM_COS_APPROX_FTZ_F : F_MATH_1<"cos.approx.ftz.f32 \t$dst, $src0;",
def INT_NVVM_COS_APPROX_F : F_MATH_1<"cos.approx.f32 \t$dst, $src0;",
Float32Regs, Float32Regs, int_nvvm_cos_approx_f>;

//
// Tanh
//

def INT_NVVM_TANH_APPROX_F : F_MATH_1<"tanh.approx.f32 \t$dst, $src0;",
Float32Regs, Float32Regs, int_nvvm_tanh_approx_f>;
def INT_NVVM_TANH_APPROX_F16 : F_MATH_1<"tanh.approx.f16 \t$dst, $src0;",
Float16Regs, Float16Regs, int_nvvm_tanh_approx_f16>;
def INT_NVVM_TANH_APPROX_F16X2 : F_MATH_1<"tanh.approx.f16x2 \t$dst, $src0;",
Float16x2Regs, Float16x2Regs, int_nvvm_tanh_approx_f16x2>;

//
// Fma
//
Expand Down
Loading