Skip to content

Commit 2b1767d

Browse files
Fix dispatchKernel arguments in the examples
1 parent c5f7a00 commit 2b1767d

File tree

6 files changed

+6
-33
lines changed

6 files changed

+6
-33
lines changed

examples/float16/run.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -46,12 +46,9 @@ int main(int argc, char **argv) {
4646
}
4747
Tensor input = createTensor(ctx, Shape{N}, kf16, inputArr.data());
4848
Tensor output = createTensor(ctx, Shape{N}, kf16);
49-
std::promise<void> promise;
50-
std::future<void> future = promise.get_future();
5149
Kernel op = createKernel(ctx, {kGelu, 256, kf16}, Bindings{input, output},
5250
{cdiv(N, 256), 1, 1});
53-
dispatchKernel(ctx, op, promise);
54-
wait(ctx, future);
51+
dispatchKernel(ctx, op);
5552
toCPU(ctx, output, outputArr.data(), sizeof(outputArr));
5653

5754
for (int i = 0; i < 12; ++i) {

examples/gpu_puzzles/run.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -23,11 +23,8 @@ template <size_t N> std::array<float, N> makeData() {
2323

2424
template <size_t N, size_t R = N, size_t C = 1> void showResult(Context &ctx, Kernel &op, Tensor &output) {
2525

26-
std::promise<void> promise;
27-
std::future<void> future = promise.get_future();
28-
dispatchKernel(ctx, op, promise);
26+
dispatchKernel(ctx, op);
2927
std::array<float, R * C> outputArr;
30-
wait(ctx, future);
3128
toCPU(ctx, output, outputArr.data(), sizeof(outputArr));
3229
printf("%s", show<float, R, C>(outputArr, "output").c_str());
3330
}

examples/matmul/run.cpp

Lines changed: 1 addition & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -838,12 +838,9 @@ void runTest(int version, size_t M, size_t K, size_t N,
838838

839839
// Initialize Kernel and bind GPU buffers
840840
// pre-allocate for async dispatch
841-
std::array<std::promise<void>, nIter> promises;
842-
std::array<std::future<void>, nIter> futures;
843841
std::array<Kernel, nIter> kernels;
844842
std::array<Tensor, nIter> outputs;
845843
for (int i = 0; i < nIter; i++) {
846-
futures[i] = promises[i].get_future();
847844
outputs[i] = createTensor(ctx, Shape{M, N}, numtype);
848845
kernels[i] = selectMatmul(ctx, version, {input, weights, outputs[i]}, M, K, N, numtype);
849846
}
@@ -854,10 +851,7 @@ void runTest(int version, size_t M, size_t K, size_t N,
854851
// Dispatch kernel nIter times
855852
auto start = std::chrono::high_resolution_clock::now();
856853
for (int i = 0; i < nIter; i++) {
857-
dispatchKernel(ctx, kernels[i], promises[i]);
858-
}
859-
for (int i = 0; i < nIter; i++) {
860-
wait(ctx, futures[i]);
854+
dispatchKernel(ctx, kernels[i]);
861855
}
862856
auto end = std::chrono::high_resolution_clock::now();
863857

examples/physics/run.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,7 @@ int main() {
8484
printf("\033[2J\033[H");
8585
while (true) {
8686
auto start = std::chrono::high_resolution_clock::now();
87-
std::promise<void> promise;
88-
std::future<void> future = promise.get_future();
89-
dispatchKernel(ctx, update, promise);
90-
wait(ctx, future);
87+
dispatchKernel(ctx, update);
9188
toCPU(ctx, pos, posArr.data(), sizeof(posArr));
9289
auto end = std::chrono::high_resolution_clock::now();
9390
std::chrono::duration<double> elapsed = end - start;

examples/shadertui/run.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -126,10 +126,7 @@ int main() {
126126
params.time = getCurrentTimeInMilliseconds(start);
127127
toGPU(ctx, params, renderKernel);
128128
auto frameStart = std::chrono::high_resolution_clock::now();
129-
std::promise<void> promise;
130-
std::future<void> future = promise.get_future();
131-
dispatchKernel(ctx, renderKernel, promise);
132-
wait(ctx, future);
129+
dispatchKernel(ctx, renderKernel);
133130
resetCommandBuffer(ctx.device, renderKernel);
134131
toCPU(ctx, screen, screenArr);
135132
rasterize<kRows, kCols>(screenArr, raster);

examples/transpose/run.cpp

Lines changed: 1 addition & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -162,20 +162,11 @@ void runTest(int version, size_t M, size_t N,
162162
LOG(kDefLog, kInfo, "Dispatching Kernel version %d, %d iterations ...",
163163
version, nIter);
164164

165-
// pre-allocate promises and futures for async dispatch
166-
// TODO(avh): implement a pooling mechanism for promises/futures in gpu.h
167-
std::array<std::promise<void>, nIter> promises;
168-
std::array<std::future<void>, nIter> futures;
169-
for (int i = 0; i < nIter; i++) {
170-
futures[i] = promises[i].get_future();
171-
}
172-
173165
// Dispatch kernel nIter times
174166
auto start = std::chrono::high_resolution_clock::now();
175167
for (int i = 0; i < nIter; i++) {
176168
if (!isCPU) {
177-
dispatchKernel(ctx, kernel, promises[i]);
178-
wait(ctx, futures[i]);
169+
dispatchKernel(ctx, kernel);
179170
resetCommandBuffer(ctx.device, kernel);
180171
} else {
181172
transpose(inputPtr.get(), outputPtr.get(), M, N);

0 commit comments

Comments
 (0)