Skip to content

[SYCL] Using ballot_group with group_barrier #8783

Closed
@abagusetty

Description

@abagusetty

Describe the bug
group_ballot returned from the get_group_barrier() is having SFINAE isues when used with group_barrier() Thanks to @nbeams for reproducer.

Error:

sycl_group_ballot.cpp:48:14: error: no matching function for call to 'group_barrier'
             sycl::group_barrier(active_threads, sycl::memory_scope_sub_group);
             ^~~~~~~~~~~~~~~~~~~
/soft/testing/dpcpp/bin/../include/sycl/group_barrier.hpp:35:1: note: candidate template ignored: requirement 'is_group_v<sycl::ext::oneapi::experimental::ballot_group<sycl::ext::oneapi::sub_group>>' was not satisfied [with Group = ballot_group<sub_group>]
group_barrier(Group, memory_scope FenceScope = Group::fence_scope) {
^

To Reproduce

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>

#include <sycl/sycl.hpp>

void check_vals(double *array, int array_size, double value) {
  bool pass = true;
  for (int k = 0; k < array_size; k++) {
    if ((abs(array[k] - value) > 1.e-11)) {
      pass = false;
      printf("A[%d] = %e\n", k, array[k]);
    }
  }
  if (pass)
    printf("OK\n\n");
  else
    printf("FAIL\n\n");
}

int main() {

  std::vector<sycl::platform> platforms = sycl::platform::get_platforms();
  std::vector<sycl::device> devices = platforms[0].get_devices();
  sycl::queue queue(devices[0], {sycl::property::queue::in_order()});

  int M = 200;
  int N = 1;
  int block_size = 64;
  int array_size = 0;
  int num_blocks = 0;
  array_size = M * N;
  num_blocks = (array_size / block_size == 0) ? array_size/block_size : (array_size/block_size + 1);
  double *A = (double*)(malloc(M*N*sizeof(double)));
  double *dA = (double*)(sycl::malloc_device(M*N*sizeof(double), queue));

 queue.submit([&](sycl::handler &cgh) {
  cgh.parallel_for(
        sycl::nd_range<1>(sycl::range<1>(block_size * num_blocks), sycl::range<1>(block_size)),
        [=](sycl::nd_item<1> item_ct1) {
           const int idx = item_ct1.get_local_id(0) + block_size * item_ct1.get_group(0);
           auto sg = item_ct1.get_sub_group();
           auto active_threads = sycl::ext::oneapi::experimental::get_ballot_group(
                                  sg, idx < array_size);
           if (idx < array_size) {
             dA[idx] = 2.0;
             sycl::group_barrier(active_threads, sycl::memory_scope_sub_group);
           }
        });
    }).wait();
  // Copy from device to host
  queue.memcpy(A, dA, array_size * sizeof(double));
  queue.wait();
  check_vals(A, array_size, 2.0);

  sycl::free(dA, queue);

  return 0;
}

Metadata

Metadata

Assignees

No one assigned

    Labels

    bugSomething isn't working

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions