@@ -838,12 +838,9 @@ void runTest(int version, size_t M, size_t K, size_t N,
838
838
839
839
// Initialize Kernel and bind GPU buffers
840
840
// pre-allocate for async dispatch
841
- std::array<std::promise<void >, nIter> promises;
842
- std::array<std::future<void >, nIter> futures;
843
841
std::array<Kernel, nIter> kernels;
844
842
std::array<Tensor, nIter> outputs;
845
843
for (int i = 0 ; i < nIter; i++) {
846
- futures[i] = promises[i].get_future ();
847
844
outputs[i] = createTensor (ctx, Shape{M, N}, numtype);
848
845
kernels[i] = selectMatmul (ctx, version, {input, weights, outputs[i]}, M, K, N, numtype);
849
846
}
@@ -854,10 +851,7 @@ void runTest(int version, size_t M, size_t K, size_t N,
854
851
// Dispatch kernel nIter times
855
852
auto start = std::chrono::high_resolution_clock::now ();
856
853
for (int i = 0 ; i < nIter; i++) {
857
- dispatchKernel (ctx, kernels[i], promises[i]);
858
- }
859
- for (int i = 0 ; i < nIter; i++) {
860
- wait (ctx, futures[i]);
854
+ dispatchKernel (ctx, kernels[i]);
861
855
}
862
856
auto end = std::chrono::high_resolution_clock::now ();
863
857
0 commit comments