ROCm · pfultz2 · Mar 2, 2025 · Mar 3, 2025 · Mar 3, 2025 · Mar 3, 2025
@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -79,6 +79,8 @@ struct MIGRAPHX_EXPORT program
 
     std::unordered_map<std::string, shape> get_parameter_shapes() const;
 
+    std::size_t total_instructions() const;
+
     std::vector<argument> eval(parameter_map params,
                                execution_environment exec_env = execution_environment{}) const;
 

@@ -52,6 +52,7 @@
 #include <unordered_set>
 #include <map>
 #include <cassert>
+#include <memory_resource>
 
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -462,24 +463,27 @@
 std::vector<argument> generic_eval(const module* mod,
                                    std::vector<context>& ctx,
                                    std::unordered_map<std::string, argument> params,
-                                   std::unordered_map<instruction_ref, argument> results,
+                                   std::pmr::unordered_map<instruction_ref, argument>& results,
                                    F trace)
 {
     assert(mod->validate() == mod->end());
-    results.reserve(mod->size() * 2);
     std::vector<argument> values;
     values.reserve(16);
     for(auto ins : iterator_for(*mod))
     {
-        assert(results.find(ins) == results.end());
+        assert(mod->name() != "main" or results.find(ins) == results.end());
+#ifndef NDEBUG
+        results.emplace(ins, argument{});
+#endif
         const auto& name = ins->name();
         if(name == "@literal")
         {
-            results.emplace(ins, trace(ins, [&] { return ins->get_literal().get_argument(); }));
+            results.insert_or_assign(ins,
+                                     trace(ins, [&] { return ins->get_literal().get_argument(); }));
         }
         else if(name == "@param")
         {
-            results.emplace(
+            results.insert_or_assign(
                 ins, trace(ins, [&] {
                     auto param_name = any_cast<builtin::param>(ins->get_operator()).parameter;
                     if(not contains(params, param_name))
@@ -498,7 +502,8 @@
         }
         else if(name == "@outline")
         {
-            results.emplace(ins, trace(ins, [&] { return argument{ins->get_shape(), nullptr}; }));
+            results.insert_or_assign(
+                ins, trace(ins, [&] { return argument{ins->get_shape(), nullptr}; }));
         }
         else if(name == "@return")
         {
@@ -527,7 +532,7 @@
                 return generic_eval(smod, ctx, inputs, results, trace);
             };
 
-            results.emplace(
+            results.insert_or_assign(
                 ins, trace(ins, [&] {
                     auto op = ins->normalized_operator();
                     if(op.is_context_free())
@@ -551,14 +556,28 @@
                                    F trace)
 {
     const module* mm = p.get_main_module();
-    return generic_eval(mm, ctx, params, {}, trace);
+    std::size_t n    = p.total_instructions();
+    std::vector<char> buffer(n * (sizeof(instruction_ref) + sizeof(argument)) * 4);
+    std::pmr::monotonic_buffer_resource bres(
+        buffer.data(), buffer.size(), std::pmr::null_memory_resource());
+    std::pmr::unordered_map<instruction_ref, argument> results(&bres);
+    results.reserve(n);
+    return generic_eval(mm, ctx, params, results, trace);
+}
+
+std::size_t program::total_instructions() const
+{
+    return transform_accumulate(impl->modules.begin(),
+                                impl->modules.end(),
+                                std::size_t{0},
+                                std::plus<>{},
+                                [](const auto& p) { return p.second.size(); });
 }
 
 std::vector<argument> program::eval_with_context(std::vector<context>& ctx,
                                                  parameter_map params) const
 {
-    const module* mm = this->get_main_module();
-    return generic_eval(mm, ctx, std::move(params), {}, [](auto&&, auto f) { return f(); });
+    return generic_eval(*this, ctx, std::move(params), [](auto&&, auto f) { return f(); });
 }
 
 std::vector<argument> program::eval(parameter_map params, execution_environment exec_env) const

@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -24,6 +24,7 @@
 #include <migraphx/gpu/code_object_op.hpp>
 #include <migraphx/gpu/context.hpp>
 #include <migraphx/register_op.hpp>
+#include <memory_resource>
 
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -45,15 +46,38 @@ shape code_object_op::compute_shape(std::vector<shape> inputs) const
                        to_string_range(inputs) + "]");
     return output;
 }
+
+static bool needs_flatten(const std::vector<argument>& args)
+{
+    return std::any_of(args.begin(), args.end(), [&](const argument& arg) {
+        return arg.get_shape().type() == shape::tuple_type;
+    });
+}
+
+template <class F>
+static void visit_flatten_args(const std::vector<argument>& args, F f)
+{
+    if(needs_flatten(args))
+        f(flatten(args));
+    else
+        f(args);
+}
+
 argument
 code_object_op::compute(context& ctx, const shape&, const std::vector<argument>& args) const
 {
-    auto fargs = flatten(args);
-    std::vector<void*> kargs(fargs.size());
-    std::transform(
-        fargs.begin(), fargs.end(), kargs.begin(), [](const argument& a) { return a.data(); });
+    std::array<char, 256> storage;
+    std::pmr::monotonic_buffer_resource resource{storage.data(), storage.size()};
+    std::pmr::vector<void*> kargs(&resource);
+    visit_flatten_args(args, [&](const auto& fargs) {
+        kargs.reserve(fargs.size());
+        std::transform(fargs.begin(),
+                       fargs.end(),
+                       std::back_inserter(kargs),
+                       [](const argument& a) { return a.data(); });
+    });
     auto [start, stop] = ctx.get_perf_events();
-    k.launch(ctx.get_stream().get(), global, local, std::move(kargs), start, stop);
+    k.launch(ctx.get_stream().get(), global, local, kargs, start, stop);
     return args[get_output_arg(args.size())];
 }
 void code_object_op::finalize(context&, const shape&, const std::vector<shape>&)

@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -28,6 +28,7 @@
 #include <migraphx/gpu/pack_args.hpp>
 #include <hip/hip_runtime_api.h>
 #include <memory>
+#include <memory_resource>
 #include <string>
 #include <vector>
 
@@ -39,6 +40,25 @@ struct kernel_impl;
 
 struct MIGRAPHX_GPU_EXPORT kernel
 {
+    struct pointers
+    {
+        pointers() {}
+
+        pointers(void** pp, std::size_t pn) : p(pp), n(pn) {}
+
+        pointers(std::vector<void*>& v) : p(v.data()), n(v.size()) {}
+        pointers(std::pmr::vector<void*>& v) : p(v.data()), n(v.size()) {}
+
+        void** data() const { return p; }
+
+        std::size_t size() const { return n; }
+
+        std::size_t bytes() const { return n * sizeof(void*); }
+
+        private:
+        void** p      = nullptr;
+        std::size_t n = 0;
+    };
     kernel() = default;
     kernel(const char* image, const std::string& name);
     template <class T, MIGRAPHX_REQUIRES(sizeof(T) == 1)>
@@ -57,11 +77,11 @@ struct MIGRAPHX_GPU_EXPORT kernel
     void launch(hipStream_t stream,
                 std::size_t global,
                 std::size_t local,
-                std::vector<void*> args,
+                pointers args,
                 hipEvent_t start = nullptr,
                 hipEvent_t stop  = nullptr) const;
 
-    template <class... Ts>
+    template <class... Ts, MIGRAPHX_REQUIRES(std::is_convertible<Ts, hipEvent_t>{}...)>
     auto launch(hipStream_t stream, std::size_t global, std::size_t local, Ts... zs) const
     {
         return [=](auto&&... xs) {

@@ -1,7 +1,7 @@
 /*
  * The MIT License (MIT)
  *
- * Copyright (c) 2015-2024 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2025 Advanced Micro Devices, Inc. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -129,13 +129,13 @@ void launch_kernel(hipFunction_t fun,
 void kernel::launch(hipStream_t stream,
                     std::size_t global,
                     std::size_t local,
-                    std::vector<void*> args,
+                    pointers args,
                     hipEvent_t start,
                     hipEvent_t stop) const
 {
     assert(impl != nullptr);
     void* kernargs   = reinterpret_cast<void*>(args.data());
-    std::size_t size = args.size() * sizeof(void*);
+    std::size_t size = args.bytes();
 
     launch_kernel(impl->fun, stream, global, local, kernargs, size, start, stop);
 }