diff --git a/.github/workflows/sycl-rel-nightly.yml b/.github/workflows/sycl-rel-nightly.yml index aa751bcf41583..42137b8de70a2 100644 --- a/.github/workflows/sycl-rel-nightly.yml +++ b/.github/workflows/sycl-rel-nightly.yml @@ -32,6 +32,11 @@ jobs: image_options: -u 1001 --device=/dev/dri --device=/dev/kfd target_devices: hip:gpu + - name: NVIDIA/CUDA + runner: '["Linux", "cuda"]' + image_options: -u 1001 --gpus all --cap-add SYS_ADMIN + target_devices: cuda:gpu + - name: Intel L0 Gen12 GPU runner: '["Linux", "gen12"]' image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN @@ -43,6 +48,11 @@ jobs: image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN target_devices: level_zero:gpu + - name: Intel L0 Arc A-Series GPU + runner: '["Linux", "arc"]' + image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN + target_devices: level_zero:gpu + - name: Intel OCL Gen12 GPU runner: '["Linux", "gen12"]' image_options: -u 1001 --device=/dev/dri -v /dev/dri/by-path:/dev/dri/by-path --privileged --cap-add SYS_ADMIN @@ -102,40 +112,6 @@ jobs: extra_lit_opts: ${{ matrix.extra_lit_opts }} repo_ref: ${{ github.sha }} - cuda-aws-start: - needs: [ubuntu2204_build] - if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }} - uses: ./.github/workflows/sycl-aws.yml - secrets: inherit - with: - mode: start - ref: ${{ github.sha }} - - cuda-run-tests: - needs: [ubuntu2204_build, cuda-aws-start] - if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }} - uses: ./.github/workflows/sycl-linux-run-tests.yml - with: - name: CUDA E2E - runner: '["aws_cuda-${{ github.run_id }}-${{ github.run_attempt }}"]' - image: ghcr.io/intel/llvm/ubuntu2204_build:latest-133fee559371ce0e6ff867e378c21cde2bdf6c90 - image_options: -u 1001 --gpus all --cap-add SYS_ADMIN --env NVIDIA_DISABLE_REQUIRE=1 - target_devices: cuda:gpu - repo_ref: ${{ github.sha }} - - sycl_toolchain_artifact: sycl_linux_default - sycl_toolchain_archive: ${{ needs.ubuntu2204_build.outputs.artifact_archive_name }} - sycl_toolchain_decompress_command: ${{ needs.ubuntu2204_build.outputs.artifact_decompress_command }} - - cuda-aws-stop: - needs: [cuda-aws-start, cuda-run-tests] - if: always() && ${{ needs.cuda-aws-start.result != 'skipped' }} - uses: ./.github/workflows/sycl-aws.yml - secrets: inherit - with: - mode: stop - ref: ${{ github.sha }} - build-sycl-cts: needs: ubuntu2204_build if: ${{ always() && !cancelled() && needs.ubuntu2204_build.outputs.build_conclusion == 'success' }} diff --git a/sycl/test-e2e/ESIMD/group_barrier.cpp b/sycl/test-e2e/ESIMD/group_barrier.cpp index 7f91644032298..6a4613ef9f533 100644 --- a/sycl/test-e2e/ESIMD/group_barrier.cpp +++ b/sycl/test-e2e/ESIMD/group_barrier.cpp @@ -14,40 +14,54 @@ #include "esimd_test_utils.hpp" #include #include +#include -static constexpr int WorkGroupSize = 16; +namespace syclex = sycl::ext::oneapi::experimental; + +static constexpr int WorkGroupSize = 32; static constexpr int VL = 16; + +template class MyKernel; + template bool test(sycl::queue &q) { bool Pass = true; - const auto MaxWGs = 8; - size_t WorkItemCount = MaxWGs * WorkGroupSize * VL; std::cout << "Test case UseThisWorkItemAPI=" << std::to_string(UseThisWorkItemAPI) << std::endl; const auto Props = sycl::ext::oneapi::experimental::properties{ sycl::ext::oneapi::experimental::use_root_sync}; - sycl::buffer DataBuf{sycl::range{WorkItemCount}}; - const auto Range = sycl::nd_range<1>{MaxWGs * WorkGroupSize, WorkGroupSize}; + auto Bundle = + sycl::get_kernel_bundle(q.get_context()); + auto Kernel = Bundle.template get_kernel>(); + sycl::range<3> LocalRange{WorkGroupSize, 1, 1}; + auto MaxWGs = Kernel.template ext_oneapi_get_info< + syclex::info::kernel_queue_specific::max_num_work_groups>(q, LocalRange, + 0); + auto GlobalRange = LocalRange; + size_t WorkItemCount = GlobalRange.size() * VL; + sycl::buffer DataBuf{WorkItemCount}; + const auto Range = sycl::nd_range<3>{GlobalRange, LocalRange}; q.submit([&](sycl::handler &h) { sycl::accessor Data{DataBuf, h}; - h.parallel_for(Range, Props, [=](sycl::nd_item<1> it) SYCL_ESIMD_KERNEL { - int ID = it.get_global_linear_id(); - __ESIMD_NS::simd V(ID, 1); - // Write data to another kernel's data to verify the barrier works. - __ESIMD_NS::block_store( - Data, (WorkItemCount * sizeof(int)) - (ID * sizeof(int) * VL), V); - if constexpr (UseThisWorkItemAPI) { - auto Root = - sycl::ext::oneapi::experimental::this_work_item::get_root_group< - 1>(); - sycl::group_barrier(Root); - } else { - auto Root = it.ext_oneapi_get_root_group(); - sycl::group_barrier(Root); - } - __ESIMD_NS::simd VOther(ID * VL, 1); - __ESIMD_NS::block_store(Data, ID * sizeof(int) * VL, VOther); - }); + h.parallel_for>( + Range, Props, [=](sycl::nd_item<3> it) SYCL_ESIMD_KERNEL { + int ID = it.get_global_linear_id(); + __ESIMD_NS::simd V(ID, 1); + // Write data to another kernel's data to verify the barrier works. + __ESIMD_NS::block_store( + Data, (WorkItemCount * sizeof(int)) - (ID * sizeof(int) * VL), + V); + if constexpr (UseThisWorkItemAPI) { + auto Root = sycl::ext::oneapi::experimental::this_work_item:: + get_root_group<1>(); + sycl::group_barrier(Root); + } else { + auto Root = it.ext_oneapi_get_root_group(); + sycl::group_barrier(Root); + } + __ESIMD_NS::simd VOther(ID * VL, 1); + __ESIMD_NS::block_store(Data, ID * sizeof(int) * VL, VOther); + }); }).wait(); sycl::host_accessor Data{DataBuf}; int ErrCnt = 0; diff --git a/sycl/test-e2e/Graph/Explicit/buffer_ordering.cpp b/sycl/test-e2e/Graph/Explicit/buffer_ordering.cpp index 6cdbb29690fd7..f0775c7d698f0 100644 --- a/sycl/test-e2e/Graph/Explicit/buffer_ordering.cpp +++ b/sycl/test-e2e/Graph/Explicit/buffer_ordering.cpp @@ -1,5 +1,3 @@ -// XFAIL: run-mode && gpu-intel-dg2 -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/18579 // RUN: %{build} -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG diff --git a/sycl/test-e2e/Graph/RecordReplay/buffer_ordering.cpp b/sycl/test-e2e/Graph/RecordReplay/buffer_ordering.cpp index 91beb23ea01be..fb6d9ea0e0e78 100644 --- a/sycl/test-e2e/Graph/RecordReplay/buffer_ordering.cpp +++ b/sycl/test-e2e/Graph/RecordReplay/buffer_ordering.cpp @@ -1,5 +1,3 @@ -// XFAIL: run-mode && gpu-intel-dg2 -// XFAIL-TRACKER: https://github.com/intel/llvm/issues/18579 // RUN: %{build} -o %t.out // RUN: %{run} %t.out // Extra run to check for leaks in Level Zero using UR_L0_LEAKS_DEBUG diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp index 996f8c98a3ac2..38ae6bf29bf1a 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_SLM.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: target-nvidia, target-amd // UNSUPPORTED-INTENDED: aspect-ext_intel_matrix isn't currently supported for // other triples -// XFAIL: run-mode && gpu-intel-dg2 +// XFAIL: run-mode && igc-dev // XFAIL-TRACKER: CMPLRLLVM-66371 // REQUIRES: aspect-ext_intel_matrix, gpu diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp index 0f3b2b93dcd91..11c00d3842ed5 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_arg_dim.cpp @@ -17,7 +17,7 @@ // Waiting for the commit in IGC to be pulled into the driver to resolve the // test. -// XFAIL: gpu-intel-dg2 && run-mode +// XFAIL: run-mode && igc-dev // XFAIL-TRACKER: GSD-10510 #include "common.hpp" diff --git a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp index a290a8cb00a6f..4fd2463cbd047 100644 --- a/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp +++ b/sycl/test-e2e/Matrix/joint_matrix_bf16_fill_k_cache_runtime_dim.cpp @@ -18,7 +18,7 @@ // Waiting for the commit in IGC to be pulled into the driver to resolve the // test. -// XFAIL: gpu-intel-dg2 && run-mode +// XFAIL: run-mode && igc-dev // XFAIL-TRACKER: GSD-10510 #include "common.hpp"