Description
Is there a SYCL function for cudaOccupancyMaxActiveBlocksPerMultiprocessor ? some use cases are listed below. Thanks.
AITemplate/3rdparty/cutlass/include/cutlass/gemm/device/gemm_universal_adapter.h: result = cudaOccupancyMaxActiveBlocksPerMultiprocessor(
AITemplate/3rdparty/cutlass/include/cutlass/gemm/device/gemm_universal_base.h: cudart_result = cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags(
AITemplate/3rdparty/cutlass/include/cutlass/gemm/device/gemm_universal_base.h: CUTLASS_TRACE_HOST(" cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags() returned error " << cudaGetErrorString(cudart_result));
AITemplate/3rdparty/cutlass/include/cutlass/gemm/device/base_grouped.h: result =
AITemplate/3rdparty/cub/cub/device/dispatch/dispatch_radix_sort.cuh: if (CubDebug(error = cudaOccupancyMaxActiveBlocksPerMultiprocessor(
AITemplate/3rdparty/cub/cub/util_device.cuh: return CubDebug(cudaOccupancyMaxActiveBlocksPerMultiprocessor(
AITemplate/python/aitemplate/backend/cuda/groupnorm/layer_norm.cuh: cudaError_t err = cudaOccupancyMaxActiveBlocksPerMultiprocessor(
AITemplate/python/aitemplate/backend/cuda/layernorm_sigmoid_mul/layer_norm.cuh: cudaError_t err = =
AITemplate/python/aitemplate/backend/cuda/softmax/softmax.cuh: cudaOccupancyMaxActiveBlocksPerMultiprocessor(