Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[NFC] clang-format nativecpu_utils.cpp. #17570

Merged
merged 3 commits into from
Mar 24, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
388 changes: 202 additions & 186 deletions libdevice/nativecpu_utils.cpp
Original file line number Diff line number Diff line change
@@ -50,25 +50,22 @@ __spirv_MemoryBarrier(int32_t Memory, int32_t Semantics) noexcept {
__mux_mem_barrier(Memory, Semantics);
}

// Turning clang format off here because it reorders macro invocations
// making the following code very difficult to read.
// clang-format off

#define DefGenericCastToPtrExplImpl(sfx, asp, cv)\
DEVICE_EXTERNAL cv asp void *\
__spirv_GenericCastToPtrExplicit_##sfx(cv void *p ,int) noexcept {\
return (cv asp void *)p;\
}
#define DefGenericCastToPtrExplImpl(sfx, asp, cv) \
DEVICE_EXTERNAL cv asp void *__spirv_GenericCastToPtrExplicit_##sfx( \
cv void *p, int) noexcept { \
return (cv asp void *)p; \
} \
static_assert(true)

#define DefGenericCastToPtrExpl(sfx, asp)\
DefGenericCastToPtrExplImpl(sfx, asp, )\
DefGenericCastToPtrExplImpl(sfx, asp, const)\
DefGenericCastToPtrExplImpl(sfx, asp, volatile)\
#define DefGenericCastToPtrExpl(sfx, asp) \
DefGenericCastToPtrExplImpl(sfx, asp, ); \
DefGenericCastToPtrExplImpl(sfx, asp, const); \
DefGenericCastToPtrExplImpl(sfx, asp, volatile); \
DefGenericCastToPtrExplImpl(sfx, asp, const volatile)

DefGenericCastToPtrExpl(ToPrivate, OCL_PRIVATE)
DefGenericCastToPtrExpl(ToLocal, OCL_LOCAL)
DefGenericCastToPtrExpl(ToGlobal, OCL_GLOBAL)
DefGenericCastToPtrExpl(ToPrivate, OCL_PRIVATE);
DefGenericCastToPtrExpl(ToLocal, OCL_LOCAL);
DefGenericCastToPtrExpl(ToGlobal, OCL_GLOBAL);

#define DefSubgroupBlockINTEL1(Type, PType) \
template <> \
@@ -80,15 +77,18 @@ DefGenericCastToPtrExpl(ToGlobal, OCL_GLOBAL)
__SYCL_CONVERGENT__ DEVICE_EXTERNAL void \
__spirv_SubgroupBlockWriteINTEL<Type>(PType OCL_GLOBAL * ptr, \
Type v) noexcept { \
((Type*)ptr)[__spirv_SubgroupLocalInvocationId()] = v; \
}
((Type *)ptr)[__spirv_SubgroupLocalInvocationId()] = v; \
} \
static_assert(true)

#define DefSubgroupBlockINTEL_vt(Type, VT_name) \
DefSubgroupBlockINTEL1(ncpu_types::vtypes<Type>::VT_name, Type)

#define DefSubgroupBlockINTEL(Type) \
DefSubgroupBlockINTEL1(Type, Type) DefSubgroupBlockINTEL_vt(Type, v2) \
DefSubgroupBlockINTEL_vt(Type, v4) DefSubgroupBlockINTEL_vt(Type, v8)
DefSubgroupBlockINTEL1(Type, Type); \
DefSubgroupBlockINTEL_vt(Type, v2); \
DefSubgroupBlockINTEL_vt(Type, v4); \
DefSubgroupBlockINTEL_vt(Type, v8)

namespace ncpu_types {
template <class T> struct vtypes {
@@ -98,33 +98,39 @@ template <class T> struct vtypes {
};
} // namespace ncpu_types

DefSubgroupBlockINTEL(uint32_t) DefSubgroupBlockINTEL(uint64_t)
DefSubgroupBlockINTEL(uint8_t) DefSubgroupBlockINTEL(uint16_t)

#define DefineGOp1(spir_sfx, name)\
DEVICE_EXTERN_C bool __mux_sub_group_##name##_i1(bool) noexcept;\
DEVICE_EXTERN_C bool __mux_work_group_##name##_i1(uint32_t id, bool val) noexcept;\
DEVICE_EXTERNAL bool __spirv_Group ## spir_sfx(int32_t g, bool val) noexcept {\
if (__spv::Scope::Flag::Subgroup == g)\
return __mux_sub_group_##name##_i1(val);\
else if (__spv::Scope::Flag::Workgroup == g)\
return __mux_work_group_##name##_i1(0, val);\
return false;\
}

DefineGOp1(Any, any)
DefineGOp1(All, all)
DefSubgroupBlockINTEL(uint32_t);
DefSubgroupBlockINTEL(uint64_t);
DefSubgroupBlockINTEL(uint8_t);
DefSubgroupBlockINTEL(uint16_t);

#define DefineGOp1(spir_sfx, name) \
DEVICE_EXTERN_C bool __mux_sub_group_##name##_i1(bool) noexcept; \
DEVICE_EXTERN_C bool __mux_work_group_##name##_i1(uint32_t id, \
bool val) noexcept; \
DEVICE_EXTERNAL bool __spirv_Group##spir_sfx(int32_t g, bool val) noexcept { \
if (__spv::Scope::Flag::Subgroup == g) \
return __mux_sub_group_##name##_i1(val); \
else if (__spv::Scope::Flag::Workgroup == g) \
return __mux_work_group_##name##_i1(0, val); \
return false; \
} \
static_assert(true)

DefineGOp1(Any, any);
DefineGOp1(All, all);

#define DefineGOp(Type, MuxType, spir_sfx, mux_sfx) \
DEVICE_EXTERN_C MuxType __mux_sub_group_scan_inclusive_##mux_sfx(MuxType) noexcept; \
DEVICE_EXTERN_C MuxType __mux_sub_group_scan_exclusive_##mux_sfx(MuxType) noexcept; \
DEVICE_EXTERN_C MuxType __mux_sub_group_scan_inclusive_##mux_sfx( \
MuxType) noexcept; \
DEVICE_EXTERN_C MuxType __mux_sub_group_scan_exclusive_##mux_sfx( \
MuxType) noexcept; \
DEVICE_EXTERN_C MuxType __mux_sub_group_reduce_##mux_sfx(MuxType) noexcept; \
DEVICE_EXTERN_C MuxType __mux_work_group_scan_exclusive_##mux_sfx(uint32_t, \
MuxType) noexcept; \
DEVICE_EXTERN_C MuxType __mux_work_group_scan_inclusive_##mux_sfx(uint32_t, \
MuxType) noexcept; \
DEVICE_EXTERN_C MuxType __mux_work_group_reduce_##mux_sfx(uint32_t, MuxType) noexcept;\
DEVICE_EXTERN_C MuxType __mux_work_group_scan_exclusive_##mux_sfx( \
uint32_t, MuxType) noexcept; \
DEVICE_EXTERN_C MuxType __mux_work_group_scan_inclusive_##mux_sfx( \
uint32_t, MuxType) noexcept; \
DEVICE_EXTERN_C MuxType __mux_work_group_reduce_##mux_sfx(uint32_t, \
MuxType) noexcept; \
DEVICE_EXTERNAL Type __spirv_Group##spir_sfx(int32_t g, int32_t id, \
Type v) noexcept { \
if (__spv::Scope::Flag::Subgroup == g) { \
@@ -144,102 +150,104 @@ DefineGOp1(All, all)
return __mux_work_group_reduce_##mux_sfx(bid, v); \
} \
return Type(); /*todo: add support for other flags as they are tested*/ \
}
} \
static_assert(true)

#define DefineSignedGOp(Name, MuxName, Bits)\
#define DefineSignedGOp(Name, MuxName, Bits) \
DefineGOp(int##Bits##_t, int##Bits##_t, Name, MuxName##Bits)

#define DefineUnsignedGOp(Name, MuxName, Bits)\
#define DefineUnsignedGOp(Name, MuxName, Bits) \
DefineGOp(uint##Bits##_t, int##Bits##_t, Name, MuxName##Bits)

#define Define_32_64(Define, Name, MuxName) \
Define(Name, MuxName, 32) \
Define(Name, MuxName, 32); \
Define(Name, MuxName, 64)

// todo: add support for other integer and float types once there are tests
#define DefineIntGOps(Name, MuxName) \
Define_32_64(DefineSignedGOp, Name, MuxName) \
Define_32_64(DefineSignedGOp, Name, MuxName); \
Define_32_64(DefineUnsignedGOp, Name, MuxName)

#define DefineFPGOps(Name, MuxName) \
DefineGOp(float, float, Name, MuxName##32) \
DefineGOp(_Float16 , _Float16 , Name, MuxName##16) \
DefineGOp(float, float, Name, MuxName##32); \
DefineGOp(_Float16, _Float16, Name, MuxName##16); \
DefineGOp(double, double, Name, MuxName##64)

DefineIntGOps(IAdd, add_i)
DefineIntGOps(IMulKHR, mul_i)
DefineIntGOps(IAdd, add_i);
DefineIntGOps(IMulKHR, mul_i);

Define_32_64(DefineUnsignedGOp, UMin, umin_i)
Define_32_64(DefineUnsignedGOp, UMax, umax_i)
Define_32_64(DefineSignedGOp, SMin, smin_i)
Define_32_64(DefineSignedGOp, SMax, smax_i)
Define_32_64(DefineUnsignedGOp, UMin, umin_i);
Define_32_64(DefineUnsignedGOp, UMax, umax_i);
Define_32_64(DefineSignedGOp, SMin, smin_i);
Define_32_64(DefineSignedGOp, SMax, smax_i);

DefineFPGOps(FMulKHR, fmul_f)
DefineFPGOps(FAdd, fadd_f)
DefineFPGOps(FMin, fmin_f)
DefineFPGOps(FMax, fmax_f)
DefineFPGOps(FMulKHR, fmul_f);
DefineFPGOps(FAdd, fadd_f);
DefineFPGOps(FMin, fmin_f);
DefineFPGOps(FMax, fmax_f);

#define DefineBitwiseGroupOp(Type, MuxType, mux_sfx) \
DefineGOp(Type, MuxType, BitwiseOrKHR, or_##mux_sfx) \
DefineGOp(Type, MuxType, BitwiseXorKHR, xor_##mux_sfx) \
#define DefineBitwiseGroupOp(Type, MuxType, mux_sfx) \
DefineGOp(Type, MuxType, BitwiseOrKHR, or_##mux_sfx); \
DefineGOp(Type, MuxType, BitwiseXorKHR, xor_##mux_sfx); \
DefineGOp(Type, MuxType, BitwiseAndKHR, and_##mux_sfx)

DefineBitwiseGroupOp(int32_t, int32_t, i32)
DefineBitwiseGroupOp(uint32_t, int32_t, i32)
DefineBitwiseGroupOp(int64_t, int64_t, i64)
DefineBitwiseGroupOp(uint64_t, int64_t, i64)
DefineBitwiseGroupOp(int32_t, int32_t, i32);
DefineBitwiseGroupOp(uint32_t, int32_t, i32);
DefineBitwiseGroupOp(int64_t, int64_t, i64);
DefineBitwiseGroupOp(uint64_t, int64_t, i64);

#define DefineLogicalGroupOp(Type, MuxType, mux_sfx) \
DefineGOp(Type, MuxType, LogicalOrKHR, logical_or_##mux_sfx) \
DefineGOp(Type, MuxType, LogicalXorKHR, logical_xor_##mux_sfx) \
#define DefineLogicalGroupOp(Type, MuxType, mux_sfx) \
DefineGOp(Type, MuxType, LogicalOrKHR, logical_or_##mux_sfx); \
DefineGOp(Type, MuxType, LogicalXorKHR, logical_xor_##mux_sfx); \
DefineGOp(Type, MuxType, LogicalAndKHR, logical_and_##mux_sfx)

DefineLogicalGroupOp(bool, bool, i1)

#define DefineBroadcastMuxType(Type, Sfx, MuxType, IDType) \
DEVICE_EXTERN_C MuxType __mux_work_group_broadcast_##Sfx( \
int32_t id, MuxType val, uint64_t lidx, uint64_t lidy, uint64_t lidz) noexcept; \
DEVICE_EXTERN_C MuxType __mux_sub_group_broadcast_##Sfx(MuxType val, \
int32_t sg_lid) noexcept;

#define DefineBroadCastImpl(Type, Sfx, MuxType, IDType) \
DEVICE_EXTERNAL Type __spirv_GroupBroadcast(int32_t g, Type v, \
IDType l) noexcept { \
if (__spv::Scope::Flag::Subgroup == g) \
return __mux_sub_group_broadcast_##Sfx(v, l); \
else \
return __mux_work_group_broadcast_##Sfx(0, v, l, 0, 0); \
} \
\
DEVICE_EXTERNAL Type __spirv_GroupBroadcast(int32_t g, Type v, \
sycl::vec<IDType, 2>::vector_t l) noexcept { \
if (__spv::Scope::Flag::Subgroup == g) \
return __mux_sub_group_broadcast_##Sfx(v, l[0]); \
else \
return __mux_work_group_broadcast_##Sfx(0, v, l[0], l[1], 0); \
} \
\
DEVICE_EXTERNAL Type __spirv_GroupBroadcast(int32_t g, Type v, \
sycl::vec<IDType, 3>::vector_t l) noexcept { \
if (__spv::Scope::Flag::Subgroup == g) \
return __mux_sub_group_broadcast_##Sfx(v, l[0]); \
else \
return __mux_work_group_broadcast_##Sfx(0, v, l[0], l[1], l[2]); \
} \

#define DefineBroadCast(Type, Sfx, MuxType) \
DefineBroadcastMuxType(Type, Sfx, MuxType, uint32_t) \
DefineBroadcastMuxType(Type, Sfx, MuxType, uint64_t) \
DefineBroadCastImpl(Type, Sfx, MuxType, uint32_t) \
DefineBroadCastImpl(Type, Sfx, MuxType, uint64_t) \

DefineBroadCast(uint32_t, i32, int32_t)
DefineBroadCast(int32_t, i32, int32_t)
DefineBroadCast(float, f32, float)
DefineBroadCast(double, f64, double)
DefineBroadCast(uint64_t, i64, int64_t)
DefineBroadCast(int64_t, i64, int64_t)
DefineLogicalGroupOp(bool, bool, i1);

#define DefineBroadcastMuxType(Type, Sfx, MuxType, IDType) \
DEVICE_EXTERN_C MuxType __mux_work_group_broadcast_##Sfx( \
int32_t id, MuxType val, uint64_t lidx, uint64_t lidy, \
uint64_t lidz) noexcept; \
DEVICE_EXTERN_C MuxType __mux_sub_group_broadcast_##Sfx( \
MuxType val, int32_t sg_lid) noexcept

#define DefineBroadCastImpl(Type, Sfx, MuxType, IDType) \
DEVICE_EXTERNAL Type __spirv_GroupBroadcast(int32_t g, Type v, \
IDType l) noexcept { \
if (__spv::Scope::Flag::Subgroup == g) \
return __mux_sub_group_broadcast_##Sfx(v, l); \
else \
return __mux_work_group_broadcast_##Sfx(0, v, l, 0, 0); \
} \
\
DEVICE_EXTERNAL Type __spirv_GroupBroadcast( \
int32_t g, Type v, sycl::vec<IDType, 2>::vector_t l) noexcept { \
if (__spv::Scope::Flag::Subgroup == g) \
return __mux_sub_group_broadcast_##Sfx(v, l[0]); \
else \
return __mux_work_group_broadcast_##Sfx(0, v, l[0], l[1], 0); \
} \
\
DEVICE_EXTERNAL Type __spirv_GroupBroadcast( \
int32_t g, Type v, sycl::vec<IDType, 3>::vector_t l) noexcept { \
if (__spv::Scope::Flag::Subgroup == g) \
return __mux_sub_group_broadcast_##Sfx(v, l[0]); \
else \
return __mux_work_group_broadcast_##Sfx(0, v, l[0], l[1], l[2]); \
} \
static_assert(true)

#define DefineBroadCast(Type, Sfx, MuxType) \
DefineBroadcastMuxType(Type, Sfx, MuxType, uint32_t); \
DefineBroadcastMuxType(Type, Sfx, MuxType, uint64_t); \
DefineBroadCastImpl(Type, Sfx, MuxType, uint32_t); \
DefineBroadCastImpl(Type, Sfx, MuxType, uint64_t)

DefineBroadCast(uint32_t, i32, int32_t);
DefineBroadCast(int32_t, i32, int32_t);
DefineBroadCast(float, f32, float);
DefineBroadCast(double, f64, double);
DefineBroadCast(uint64_t, i64, int64_t);
DefineBroadCast(int64_t, i64, int64_t);

#define DefShuffleINTEL(Type, Sfx, MuxType) \
DEVICE_EXTERN_C MuxType __mux_sub_group_shuffle_##Sfx(MuxType val, \
@@ -248,7 +256,8 @@ DefineBroadCast(int64_t, i64, int64_t)
DEVICE_EXTERNAL Type __spirv_SubgroupShuffleINTEL<Type>( \
Type val, unsigned id) noexcept { \
return (Type)__mux_sub_group_shuffle_##Sfx((MuxType)val, id); \
}
} \
static_assert(true)

#define DefShuffleUpINTEL(Type, Sfx, MuxType) \
DEVICE_EXTERN_C MuxType __mux_sub_group_shuffle_up_##Sfx( \
@@ -258,7 +267,8 @@ DefineBroadCast(int64_t, i64, int64_t)
Type prev, Type curr, unsigned delta) noexcept { \
return (Type)__mux_sub_group_shuffle_up_##Sfx((MuxType)prev, \
(MuxType)curr, delta); \
}
} \
static_assert(true)

#define DefShuffleDownINTEL(Type, Sfx, MuxType) \
DEVICE_EXTERN_C MuxType __mux_sub_group_shuffle_down_##Sfx( \
@@ -268,7 +278,8 @@ DefineBroadCast(int64_t, i64, int64_t)
Type curr, Type next, unsigned delta) noexcept { \
return (Type)__mux_sub_group_shuffle_down_##Sfx((MuxType)curr, \
(MuxType)next, delta); \
}
} \
static_assert(true)

#define DefShuffleXorINTEL(Type, Sfx, MuxType) \
DEVICE_EXTERN_C MuxType __mux_sub_group_shuffle_xor_##Sfx(MuxType val, \
@@ -277,115 +288,120 @@ DefineBroadCast(int64_t, i64, int64_t)
DEVICE_EXTERNAL Type __spirv_SubgroupShuffleXorINTEL<Type>( \
Type data, unsigned value) noexcept { \
return (Type)__mux_sub_group_shuffle_xor_##Sfx((MuxType)data, value); \
}
} \
static_assert(true)

#define DefShuffleINTEL_All(Type, Sfx, MuxType) \
DefShuffleINTEL(Type, Sfx, MuxType) \
DefShuffleUpINTEL(Type, Sfx, MuxType) \
DefShuffleDownINTEL(Type, Sfx, MuxType) \
DefShuffleINTEL(Type, Sfx, MuxType); \
DefShuffleUpINTEL(Type, Sfx, MuxType); \
DefShuffleDownINTEL(Type, Sfx, MuxType); \
DefShuffleXorINTEL(Type, Sfx, MuxType)

DefShuffleINTEL_All(uint64_t, i64, int64_t)
DefShuffleINTEL_All(int64_t, i64, int64_t)
DefShuffleINTEL_All(int32_t, i32, int32_t)
DefShuffleINTEL_All(uint32_t, i32, int32_t)
DefShuffleINTEL_All(int16_t, i16, int16_t)
DefShuffleINTEL_All(uint16_t, i16, int16_t)
DefShuffleINTEL_All(int8_t, i8, int8_t)
DefShuffleINTEL_All(uint8_t, i8, int8_t)
DefShuffleINTEL_All(double, f64, double)
DefShuffleINTEL_All(float, f32, float)
DefShuffleINTEL_All(_Float16, f16, _Float16)

// Vector versions of shuffle are generated by the FixABIBuiltinsSYCLNativeCPU pass
DefShuffleINTEL_All(uint64_t, i64, int64_t);
DefShuffleINTEL_All(int64_t, i64, int64_t);
DefShuffleINTEL_All(int32_t, i32, int32_t);
DefShuffleINTEL_All(uint32_t, i32, int32_t);
DefShuffleINTEL_All(int16_t, i16, int16_t);
DefShuffleINTEL_All(uint16_t, i16, int16_t);
DefShuffleINTEL_All(int8_t, i8, int8_t);
DefShuffleINTEL_All(uint8_t, i8, int8_t);
DefShuffleINTEL_All(double, f64, double);
DefShuffleINTEL_All(float, f32, float);
DefShuffleINTEL_All(_Float16, f16, _Float16);

#define Define2ArgForward(Type, Name, Callee)\
DEVICE_EXTERNAL Type Name(Type a, Type b) noexcept { return Callee(a,b);}
// Vector versions of shuffle are generated by the FixABIBuiltinsSYCLNativeCPU
// pass

Define2ArgForward(uint64_t, __spirv_ocl_u_min, std::min)
#define Define2ArgForward(Type, Name, Callee) \
DEVICE_EXTERNAL Type Name(Type a, Type b) noexcept { return Callee(a, b); } \
static_assert(true)

Define2ArgForward(uint64_t, __spirv_ocl_u_min, std::min);

#define GEN_u32(bname, muxname) \
DEVICE_EXTERN_C uint32_t muxname(); \
DEVICE_EXTERNAL uint32_t bname() { return muxname(); }
DEVICE_EXTERNAL uint32_t bname() { return muxname(); } \
static_assert(true)
// subgroup
GEN_u32(__spirv_SubgroupLocalInvocationId, __mux_get_sub_group_local_id)
GEN_u32(__spirv_SubgroupMaxSize, __mux_get_max_sub_group_size)
GEN_u32(__spirv_SubgroupId, __mux_get_sub_group_id)
GEN_u32(__spirv_NumSubgroups, __mux_get_num_sub_groups)
GEN_u32(__spirv_SubgroupSize, __mux_get_sub_group_size)
GEN_u32(__spirv_SubgroupLocalInvocationId, __mux_get_sub_group_local_id);
GEN_u32(__spirv_SubgroupMaxSize, __mux_get_max_sub_group_size);
GEN_u32(__spirv_SubgroupId, __mux_get_sub_group_id);
GEN_u32(__spirv_NumSubgroups, __mux_get_num_sub_groups);
GEN_u32(__spirv_SubgroupSize, __mux_get_sub_group_size);

// I64_I32
#define GEN_p(bname, muxname, arg) \
DEVICE_EXTERN_C uint64_t muxname(uint32_t); \
DEVICE_EXTERNAL uint64_t bname() { return muxname(arg); }
DEVICE_EXTERNAL uint64_t bname() { return muxname(arg); } \
static_assert(true)

#define GEN_xyz(bname, ncpu_name) \
GEN_p(bname##_x, ncpu_name, 0) \
GEN_p(bname##_y, ncpu_name, 1) \
GEN_p(bname##_x, ncpu_name, 0); \
GEN_p(bname##_y, ncpu_name, 1); \
GEN_p(bname##_z, ncpu_name, 2)

GEN_xyz(__spirv_GlobalInvocationId, __mux_get_global_id)
GEN_xyz(__spirv_GlobalSize, __mux_get_global_size)
GEN_xyz(__spirv_GlobalOffset, __mux_get_global_offset)
GEN_xyz(__spirv_LocalInvocationId, __mux_get_local_id)
GEN_xyz(__spirv_NumWorkgroups, __mux_get_num_groups)
GEN_xyz(__spirv_WorkgroupSize, __mux_get_local_size)
GEN_xyz(__spirv_WorkgroupId, __mux_get_group_id)
GEN_xyz(__spirv_GlobalInvocationId, __mux_get_global_id);
GEN_xyz(__spirv_GlobalSize, __mux_get_global_size);
GEN_xyz(__spirv_GlobalOffset, __mux_get_global_offset);
GEN_xyz(__spirv_LocalInvocationId, __mux_get_local_id);
GEN_xyz(__spirv_NumWorkgroups, __mux_get_num_groups);
GEN_xyz(__spirv_WorkgroupSize, __mux_get_local_size);
GEN_xyz(__spirv_WorkgroupId, __mux_get_group_id);

#define NCPUPREFIX(name) __dpcpp_nativecpu##name

template <class T> using MakeGlobalType =
typename sycl::detail::DecoratedType < T, sycl::access::address_space::
global_space>::type;
template <class T>
using MakeGlobalType = typename sycl::detail::DecoratedType<
T, sycl::access::address_space::global_space>::type;

#define DefStateSetWithType(name, field, type) \
DEVICE_EXTERNAL_C void NCPUPREFIX(name)( \
DEVICE_EXTERNAL_C void __dpcpp_nativecpu_##name( \
type value, MakeGlobalType<__nativecpu_state> *s) { \
s->field = value; \
}
} \
static_assert(true)

// Subgroup setters
DefStateSetWithType(_set_num_sub_groups, NumSubGroups, uint32_t)
DefStateSetWithType(_set_sub_group_id, SubGroup_id, uint32_t)
DefStateSetWithType(_set_max_sub_group_size, SubGroup_size, uint32_t)
DefStateSetWithType(set_num_sub_groups, NumSubGroups, uint32_t);
DefStateSetWithType(set_sub_group_id, SubGroup_id, uint32_t);
DefStateSetWithType(set_max_sub_group_size, SubGroup_size, uint32_t);

#define DefineStateGetWithType(name, field, type)\
DEVICE_EXTERNAL_C type NCPUPREFIX(name)( \
#define DefineStateGetWithType(name, field, type) \
DEVICE_EXTERNAL_C type __dpcpp_nativecpu_##name( \
MakeGlobalType<__nativecpu_state> *s) { \
return s->field; \
}
} \
static_assert(true)
#define DefineStateGet_U32(name, field) \
DefineStateGetWithType(name, field, uint32_t)

// Subgroup getters
DefineStateGet_U32(_get_sub_group_id, SubGroup_id)
DefineStateGet_U32(_get_sub_group_local_id, SubGroup_local_id)
DefineStateGet_U32(_get_sub_group_size, SubGroup_size)
DefineStateGet_U32(_get_max_sub_group_size, SubGroup_size)
DefineStateGet_U32(_get_num_sub_groups, NumSubGroups)
DefineStateGet_U32(get_sub_group_id, SubGroup_id);
DefineStateGet_U32(get_sub_group_local_id, SubGroup_local_id);
DefineStateGet_U32(get_sub_group_size, SubGroup_size);
DefineStateGet_U32(get_max_sub_group_size, SubGroup_size);
DefineStateGet_U32(get_num_sub_groups, NumSubGroups);

#define DefineStateGetWithType2(name, field, rtype, ptype) \
DEVICE_EXTERNAL_C rtype NCPUPREFIX(name)(ptype dim, \
MakeGlobalType<__nativecpu_state> *s) { \
DEVICE_EXTERNAL_C rtype __dpcpp_nativecpu_##name( \
ptype dim, MakeGlobalType<__nativecpu_state> *s) { \
return s->field[dim]; \
}
} \
static_assert(true)

#define DefineStateGet_U64(name, field) \
DefineStateGetWithType2(name, field, uint64_t, uint32_t)

// Workgroup getters
DefineStateGet_U64(_get_global_id, MGlobal_id)
DefineStateGet_U64(_get_global_range, MGlobal_range)
DefineStateGet_U64(_get_global_offset, MGlobalOffset)
DefineStateGet_U64(_get_local_id, MLocal_id)
DefineStateGet_U64(_get_num_groups, MNumGroups)
DefineStateGet_U64(_get_wg_size, MWorkGroup_size)
DefineStateGet_U64(_get_wg_id, MWorkGroup_id)

DEVICE_EXTERNAL_C void
__dpcpp_nativecpu_set_local_id(uint32_t dim, uint64_t value,
MakeGlobalType<__nativecpu_state> *s) {
DefineStateGet_U64(get_global_id, MGlobal_id);
DefineStateGet_U64(get_global_range, MGlobal_range);
DefineStateGet_U64(get_global_offset, MGlobalOffset);
DefineStateGet_U64(get_local_id, MLocal_id);
DefineStateGet_U64(get_num_groups, MNumGroups);
DefineStateGet_U64(get_wg_size, MWorkGroup_size);
DefineStateGet_U64(get_wg_id, MWorkGroup_id);

DEVICE_EXTERNAL_C
void __dpcpp_nativecpu_set_local_id(uint32_t dim, uint64_t value,
MakeGlobalType<__nativecpu_state> *s) {
s->MLocal_id[dim] = value;
s->MGlobal_id[dim] = s->MWorkGroup_size[dim] * s->MWorkGroup_id[dim] +
s->MLocal_id[dim] + s->MGlobalOffset[dim];