|
| 1 | +// Tests using an OpenCL-C defined kernel with multiple local accessors |
| 2 | + |
| 3 | +#include "../graph_common.hpp" |
| 4 | + |
| 5 | +using source_kb = sycl::kernel_bundle<sycl::bundle_state::ext_oneapi_source>; |
| 6 | +using exe_kb = sycl::kernel_bundle<sycl::bundle_state::executable>; |
| 7 | + |
| 8 | +auto constexpr LocalAccCLSource = R"===( |
| 9 | + kernel void test_la(__global int *out, __local int* local_ptr1, |
| 10 | + __local int2* local_ptr2, int n) { |
| 11 | + __local int4 local_data[1]; |
| 12 | +
|
| 13 | + size_t gid = get_global_id(0); |
| 14 | + size_t lid = get_local_id(0); |
| 15 | + size_t wg_size = get_num_groups(0); |
| 16 | +
|
| 17 | + local_ptr1[lid] = lid; |
| 18 | + local_ptr2[lid].x = n; |
| 19 | + local_ptr2[lid].y = wg_size; |
| 20 | +
|
| 21 | + if (lid == 0) { |
| 22 | + local_data[lid] = (int4)(0xA, 0xB, 0xC, 0xD); |
| 23 | + } |
| 24 | +
|
| 25 | + barrier(CLK_LOCAL_MEM_FENCE); |
| 26 | +
|
| 27 | + int acc = local_data[0].x + local_data[0].y + local_data[0].z + |
| 28 | + local_data[0].w; |
| 29 | + out[gid] = (local_ptr1[lid] * local_ptr2[lid].x) + |
| 30 | + (local_ptr2[lid].y * acc); |
| 31 | + } |
| 32 | +)==="; |
| 33 | + |
| 34 | +int main() { |
| 35 | + queue Queue; |
| 36 | + |
| 37 | + source_kb kbSrc = exp_ext::create_kernel_bundle_from_source( |
| 38 | + Queue.get_context(), exp_ext::source_language::opencl, LocalAccCLSource); |
| 39 | + exe_kb kbExe1 = exp_ext::build(kbSrc); |
| 40 | + sycl::kernel test_kernel = kbExe1.ext_oneapi_get_kernel("test_la"); |
| 41 | + |
| 42 | + exp_ext::command_graph Graph{Queue}; |
| 43 | + |
| 44 | + int32_t *Ptr = malloc_device<int32_t>(Size, Queue); |
| 45 | + |
| 46 | + int32_t N = 42; |
| 47 | + constexpr size_t LocalSize = 256; |
| 48 | + auto Node = add_node(Graph, Queue, [&](handler &cgh) { |
| 49 | + local_accessor<int32_t, 1> acc_local1(LocalSize, cgh); |
| 50 | + local_accessor<sycl::int2, 1> acc_local2(LocalSize, cgh); |
| 51 | + |
| 52 | + cgh.set_arg(0, Ptr); |
| 53 | + cgh.set_arg(1, acc_local1); |
| 54 | + cgh.set_arg(2, acc_local2); |
| 55 | + cgh.set_arg(3, N); |
| 56 | + |
| 57 | + cgh.parallel_for(nd_range<1>(Size, LocalSize), test_kernel); |
| 58 | + }); |
| 59 | + |
| 60 | + auto ExecGraph = Graph.finalize(); |
| 61 | + Queue.ext_oneapi_graph(ExecGraph).wait(); |
| 62 | + |
| 63 | + std::vector<int32_t> HostData(Size); |
| 64 | + Queue.copy(Ptr, HostData.data(), Size).wait(); |
| 65 | + |
| 66 | + constexpr int32_t Acc = 0xA + 0xB + 0xC + 0xD; |
| 67 | + constexpr int32_t WorkGroups = Size / LocalSize; |
| 68 | + constexpr int32_t Tmp = Acc * WorkGroups; |
| 69 | + for (size_t i = 0; i < Size; i++) { |
| 70 | + int32_t local_id = i % LocalSize; |
| 71 | + int32_t Ref = (local_id * N) + Tmp; |
| 72 | + assert(HostData[i] == Ref); |
| 73 | + } |
| 74 | + |
| 75 | + sycl::free(Ptr, Queue); |
| 76 | + |
| 77 | + return 0; |
| 78 | +} |
0 commit comments