Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Avoid dynamic memory allocation in kernel launch #3861

Open
wants to merge 10 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/include/migraphx/program.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -79,6 +79,8 @@ struct MIGRAPHX_EXPORT program

std::unordered_map<std::string, shape> get_parameter_shapes() const;

std::size_t total_instructions() const;

std::vector<argument> eval(parameter_map params,
execution_environment exec_env = execution_environment{}) const;

Expand Down
39 changes: 29 additions & 10 deletions src/program.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
#include <unordered_set>
#include <map>
#include <cassert>
#include <memory_resource>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
Expand Down Expand Up @@ -462,24 +463,27 @@
std::vector<argument> generic_eval(const module* mod,
std::vector<context>& ctx,
std::unordered_map<std::string, argument> params,
std::unordered_map<instruction_ref, argument> results,
std::pmr::unordered_map<instruction_ref, argument>& results,
F trace)
{
assert(mod->validate() == mod->end());
results.reserve(mod->size() * 2);
std::vector<argument> values;
values.reserve(16);
for(auto ins : iterator_for(*mod))
{
assert(results.find(ins) == results.end());
assert(mod->name() != "main" or results.find(ins) == results.end());
#ifndef NDEBUG
results.emplace(ins, argument{});
#endif
const auto& name = ins->name();
if(name == "@literal")
{
results.emplace(ins, trace(ins, [&] { return ins->get_literal().get_argument(); }));
results.insert_or_assign(ins,
trace(ins, [&] { return ins->get_literal().get_argument(); }));
}
else if(name == "@param")
{
results.emplace(
results.insert_or_assign(
ins, trace(ins, [&] {
auto param_name = any_cast<builtin::param>(ins->get_operator()).parameter;
if(not contains(params, param_name))
Expand All @@ -498,7 +502,8 @@
}
else if(name == "@outline")
{
results.emplace(ins, trace(ins, [&] { return argument{ins->get_shape(), nullptr}; }));
results.insert_or_assign(
ins, trace(ins, [&] { return argument{ins->get_shape(), nullptr}; }));
}
else if(name == "@return")
{
Expand Down Expand Up @@ -527,7 +532,7 @@
return generic_eval(smod, ctx, inputs, results, trace);
};

results.emplace(
results.insert_or_assign(
ins, trace(ins, [&] {
auto op = ins->normalized_operator();
if(op.is_context_free())
Expand All @@ -551,14 +556,28 @@
F trace)
{
const module* mm = p.get_main_module();
return generic_eval(mm, ctx, params, {}, trace);
std::size_t n = p.total_instructions();
std::vector<char> buffer(n * (sizeof(instruction_ref) + sizeof(argument)) * 4);
std::pmr::monotonic_buffer_resource bres(
buffer.data(), buffer.size(), std::pmr::null_memory_resource());
std::pmr::unordered_map<instruction_ref, argument> results(&bres);
results.reserve(n);
return generic_eval(mm, ctx, params, results, trace);
}

std::size_t program::total_instructions() const
{
return transform_accumulate(impl->modules.begin(),
impl->modules.end(),
std::size_t{0},
std::plus<>{},
[](const auto& p) { return p.second.size(); });
}

std::vector<argument> program::eval_with_context(std::vector<context>& ctx,
parameter_map params) const
{
const module* mm = this->get_main_module();
return generic_eval(mm, ctx, std::move(params), {}, [](auto&&, auto f) { return f(); });
return generic_eval(*this, ctx, std::move(params), [](auto&&, auto f) { return f(); });

Check warning on line 580 in src/program.cpp

View check run for this annotation

Codecov / codecov/patch

src/program.cpp#L580

Added line #L580 was not covered by tests
}

std::vector<argument> program::eval(parameter_map params, execution_environment exec_env) const
Expand Down
36 changes: 30 additions & 6 deletions src/targets/gpu/code_object_op.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand All @@ -24,6 +24,7 @@
#include <migraphx/gpu/code_object_op.hpp>
#include <migraphx/gpu/context.hpp>
#include <migraphx/register_op.hpp>
#include <memory_resource>

namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
Expand All @@ -45,15 +46,38 @@ shape code_object_op::compute_shape(std::vector<shape> inputs) const
to_string_range(inputs) + "]");
return output;
}

static bool needs_flatten(const std::vector<argument>& args)
{
return std::any_of(args.begin(), args.end(), [&](const argument& arg) {
return arg.get_shape().type() == shape::tuple_type;
});
}

template <class F>
static void visit_flatten_args(const std::vector<argument>& args, F f)
{
if(needs_flatten(args))
f(flatten(args));
else
f(args);
}

argument
code_object_op::compute(context& ctx, const shape&, const std::vector<argument>& args) const
{
auto fargs = flatten(args);
std::vector<void*> kargs(fargs.size());
std::transform(
fargs.begin(), fargs.end(), kargs.begin(), [](const argument& a) { return a.data(); });
std::array<char, 256> storage;
std::pmr::monotonic_buffer_resource resource{storage.data(), storage.size()};
std::pmr::vector<void*> kargs(&resource);
visit_flatten_args(args, [&](const auto& fargs) {
kargs.reserve(fargs.size());
std::transform(fargs.begin(),
fargs.end(),
std::back_inserter(kargs),
[](const argument& a) { return a.data(); });
});
auto [start, stop] = ctx.get_perf_events();
k.launch(ctx.get_stream().get(), global, local, std::move(kargs), start, stop);
k.launch(ctx.get_stream().get(), global, local, kargs, start, stop);
return args[get_output_arg(args.size())];
}
void code_object_op::finalize(context&, const shape&, const std::vector<shape>&)
Expand Down
26 changes: 23 additions & 3 deletions src/targets/gpu/include/migraphx/gpu/kernel.hpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand All @@ -28,6 +28,7 @@
#include <migraphx/gpu/pack_args.hpp>
#include <hip/hip_runtime_api.h>
#include <memory>
#include <memory_resource>
#include <string>
#include <vector>

Expand All @@ -39,6 +40,25 @@ struct kernel_impl;

struct MIGRAPHX_GPU_EXPORT kernel
{
struct pointers
{
pointers() {}

pointers(void** pp, std::size_t pn) : p(pp), n(pn) {}

pointers(std::vector<void*>& v) : p(v.data()), n(v.size()) {}
pointers(std::pmr::vector<void*>& v) : p(v.data()), n(v.size()) {}

void** data() const { return p; }

std::size_t size() const { return n; }

std::size_t bytes() const { return n * sizeof(void*); }

private:
void** p = nullptr;
std::size_t n = 0;
};
kernel() = default;
kernel(const char* image, const std::string& name);
template <class T, MIGRAPHX_REQUIRES(sizeof(T) == 1)>
Expand All @@ -57,11 +77,11 @@ struct MIGRAPHX_GPU_EXPORT kernel
void launch(hipStream_t stream,
std::size_t global,
std::size_t local,
std::vector<void*> args,
pointers args,
hipEvent_t start = nullptr,
hipEvent_t stop = nullptr) const;

template <class... Ts>
template <class... Ts, MIGRAPHX_REQUIRES(std::is_convertible<Ts, hipEvent_t>{}...)>
auto launch(hipStream_t stream, std::size_t global, std::size_t local, Ts... zs) const
{
return [=](auto&&... xs) {
Expand Down
6 changes: 3 additions & 3 deletions src/targets/gpu/kernel.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -129,13 +129,13 @@ void launch_kernel(hipFunction_t fun,
void kernel::launch(hipStream_t stream,
std::size_t global,
std::size_t local,
std::vector<void*> args,
pointers args,
hipEvent_t start,
hipEvent_t stop) const
{
assert(impl != nullptr);
void* kernargs = reinterpret_cast<void*>(args.data());
std::size_t size = args.size() * sizeof(void*);
std::size_t size = args.bytes();

launch_kernel(impl->fun, stream, global, local, kernargs, size, start, stop);
}
Expand Down
Loading