ROCm · ahsan-ca · Jan 31, 2025 · Feb 3, 2025 · ahsan-ca · Jan 31, 2025
@@ -551,7 +551,8 @@ struct hip_gemm_impl
     int tune(context& ctx, const std::vector<shape>& input_shapes) // const
     {
         // tuning meta parameters
-        const int hot_calls = 40;
+        const int hot_calls = 1000;
+        const int cold_calls = 1000;
 
         std::vector<argument> input_args;
         std::transform(input_shapes.begin(),
@@ -603,32 +604,41 @@ struct hip_gemm_impl
         {
             auto algo = solution.get_result(ctx, *this, 0)[0].algo;
             solution_indices.push_back(hipblaslt_ext::getIndexFromAlgo(algo));
+            best_sol = hipblaslt_ext::getIndexFromAlgo(algo);
+            first_time = 1;
+            best_time = 1;
         }
-        for(auto sol : solution_indices)
+        else
         {
-            // Warmup: the first call to an op. may not be representative since there is
-            // more time taken initializing caches, etc. so we won't time it.
-            run(ctx, input_args, sol);
-            double host_time = time<milliseconds>([&] {
-                for([[maybe_unused]] int hc : range(hot_calls))
+            for(auto sol : solution_indices)
+            {
+                // Warmup: the first call to an op. may not be representative since there is
+                // more time taken initializing caches, etc. so we won't time it.
+                for([[maybe_unused]] int cc : range(cold_calls)) {
                     run(ctx, input_args, sol);
-                ctx.finish();
-            });
+                }
+
+                double host_time = time<milliseconds>([&] {
+                    for([[maybe_unused]] int hc : range(hot_calls)) {
+                        run(ctx, input_args, sol);
+                    }
+                    ctx.finish();
+                });
 
-            host_time /= hot_calls;
+                host_time /= hot_calls;
 
-            // dev/evaluation only: track time for first solution.
-            if(first_time < 0)
-                first_time = host_time;
+                // dev/evaluation only: track time for first solution.
+                if(first_time < 0)
+                    first_time = host_time;
 
-            // track current best
-            if(host_time < best_time)
-            {
-                best_sol  = sol;
-                best_time = host_time;
+                // track current best
+                if(host_time < best_time)
+                {
+                    best_sol  = sol;
+                    best_time = host_time;
+                }
             }
         }
-
         std::cout << "Winning GEMM solution: " << best_sol << " in " << best_time << " ms, beats "
                   << first_time << "ms" << std::endl;
         return best_sol;