diff --git a/tmva/sofie/CMakeLists.txt b/tmva/sofie/CMakeLists.txt
index c807d1b7b8c27..f56d2350ecadd 100644
--- a/tmva/sofie/CMakeLists.txt
+++ b/tmva/sofie/CMakeLists.txt
@@ -22,6 +22,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
    TMVA/OperatorList.hxx
    TMVA/RModel_Base.hxx
    TMVA/RModel.hxx
+   TMVA/RModelProfiler.hxx
    TMVA/ROperator.hxx
    TMVA/ROperator_BasicUnary.hxx
    TMVA/ROperator_BasicBinary.hxx
@@ -77,6 +78,7 @@ ROOT_STANDARD_LIBRARY_PACKAGE(ROOTTMVASofie
   SOURCES
     src/RModel_Base.cxx
     src/RModel.cxx
+    src/RModelProfiler.cxx
     src/RModel_GNN.cxx
     src/RModel_GraphIndependent.cxx
     src/RFunction.cxx
diff --git a/tmva/sofie/inc/TMVA/RModel.hxx b/tmva/sofie/inc/TMVA/RModel.hxx
index 996c51020270f..afe15cba72871 100644
--- a/tmva/sofie/inc/TMVA/RModel.hxx
+++ b/tmva/sofie/inc/TMVA/RModel.hxx
@@ -11,6 +11,8 @@ namespace SOFIE {
 
 class RModel final : public RModel_Base {
 
+   friend class RModelProfiler;
+
 private:
    bool fIsInitialized = false;
    bool fIsSubGraph = false;
@@ -20,6 +22,8 @@ private:
    size_t fConstantTensorSize = 0; // size  (in Bytes) of the allocated constant tensors
    size_t fWeightsTensorSize = 0;  // size  (in Bytes) of the allocated weight tensors
    size_t fOtherTensorSize = 0;    // size  (in Bytes) of intermediate tensors which are not managed by the memory pool
+   std::string fProfilerGC = "";
+   bool fProfile = false;
 
    OptimizationLevel fOptimizationLevel = OptimizationLevel::kExtended;
 
@@ -151,8 +155,8 @@ public:
 
    void Initialize(int batchSize = -1, bool verbose = false);
    void Initialize(const std::map<std::string,size_t> & inputParams, bool verbose = false);
-
-   void Generate(std::underlying_type_t<Options> options, int batchSize = -1, long pos = 0, bool verbose = false);
+   
+    void Generate(std::underlying_type_t<Options> options, int batchSize = -1, long pos = 0, bool verbose = false);
    void Generate(Options options = Options::kDefault, int batchSize = -1, int pos = 0, bool verbose = false)
    {
       Generate(static_cast<std::underlying_type_t<Options>>(options), batchSize, pos, verbose);
diff --git a/tmva/sofie/inc/TMVA/RModelProfiler.hxx b/tmva/sofie/inc/TMVA/RModelProfiler.hxx
new file mode 100644
index 0000000000000..fd9c8c7d0267d
--- /dev/null
+++ b/tmva/sofie/inc/TMVA/RModelProfiler.hxx
@@ -0,0 +1,42 @@
+#ifndef TMVA_SOFIE_RMODELPROFILER
+#define TMVA_SOFIE_RMODELPROFILER
+
+#include "TMVA/RModel.hxx"
+
+namespace TMVA {
+namespace Experimental {
+namespace SOFIE {
+
+/// \class RModelProfiler
+/// \brief A helper class to generate profiled inference code for an RModel.
+///
+/// This class instruments the generated C++ code to measure the execution
+/// time of each operator. It is invoked when the RModel::Generate is called
+/// with the Options::kProfile flag. 
+class RModelProfiler {
+private:
+   RModel &fModel;
+   
+   void GenerateUtilityFunctions();
+
+public:
+   // The profiler must be constructed with a model to work on.
+   RModelProfiler() = delete;
+   RModelProfiler(RModel &model);
+   ~RModelProfiler() = default;
+   
+   // There is no point in copying or moving an RModelProfiler
+   RModelProfiler(const RModelProfiler &other) = delete;
+   RModelProfiler(RModelProfiler &&other) = delete;
+   RModelProfiler &operator=(const RModelProfiler &other) = delete;
+   RModelProfiler &operator=(RModelProfiler &&other) = delete;
+   
+   // Main function to generate the profiled code.
+   void Generate();
+};
+
+} // namespace SOFIE
+} // namespace Experimental
+} // namespace TMVA
+
+#endif // TMVA_SOFIE_RMODELPROFILER
diff --git a/tmva/sofie/inc/TMVA/RModel_Base.hxx b/tmva/sofie/inc/TMVA/RModel_Base.hxx
index 2cbcc6cc8ea41..2ab5dacaac57f 100644
--- a/tmva/sofie/inc/TMVA/RModel_Base.hxx
+++ b/tmva/sofie/inc/TMVA/RModel_Base.hxx
@@ -26,6 +26,7 @@ enum class Options {
    kRootBinaryWeightFile = 0x4,
    kGNN = 0x8,
    kGNNComponent = 0x10,
+   kProfile = 0x20,
 };
 
 // Optimization levels inspired by ONNXRuntime.
diff --git a/tmva/sofie/inc/TMVA/ROperator.hxx b/tmva/sofie/inc/TMVA/ROperator.hxx
index f0afd9c4374c1..200cd3f2976fe 100644
--- a/tmva/sofie/inc/TMVA/ROperator.hxx
+++ b/tmva/sofie/inc/TMVA/ROperator.hxx
@@ -37,6 +37,9 @@ public:
    //virtual void Forward_blas() = 0;
    virtual ~ROperator(){}
 
+   std::string name = "UnnamedOperator";
+   const std::string &GetOperatorName() { return name; };
+
 protected:
 
    const std::string SP = "   ";    ///< space used to correctly indent the generated C++ code
diff --git a/tmva/sofie/src/RModel.cxx b/tmva/sofie/src/RModel.cxx
index 3e2c2d6ed332f..0fc1927d413cf 100644
--- a/tmva/sofie/src/RModel.cxx
+++ b/tmva/sofie/src/RModel.cxx
@@ -9,6 +9,7 @@
 #endif
 
 #include "TMVA/RModel.hxx"
+#include "TMVA/RModelProfiler.hxx"
 #include "TMVA/SOFIE_common.hxx"
 
 namespace TMVA {
@@ -941,7 +942,7 @@ void RModel::GenerateSessionCode()
          CheckAndFlushIntermediateMemory(fOperators[op_idx]->GetOpInputTensors(), op_idx);
       }
 
-      // to check remaining unused fragments after memory allocation (lesser the better)
+  // to check remaining unused fragments after memory allocation (lesser the better)
       // for (const auto &it: fIntermediateMemoryInfo.available_stack){
       //    std::cout<<"chunk_idx: "<<it.first<<", chunk_size: "<<it.second<<"\n";
       // }
@@ -969,13 +970,13 @@ void RModel::GenerateSessionCode()
    // Generate code for Session constructor
    if (fUseSession) {
       std::string sessionName = "Session";
-      if (fIsSubGraph)
+      if (fIsSubGraph) 
          sessionName += "_" + fName;
       // add here specific operator code that needs to define session data members
       fGC += "\n";
       for (size_t id = 0; id < fOperators.size(); id++) {
          std::string opName = std::to_string(id);
-         fGC += fOperators[id]->GenerateSessionMembersCode(opName);
+         fGC += fOperators[id]->GenerateSessionMembersCode(opName);        
       }
       fGC += "\n";
       // here add initialization and reading of weight tensors
@@ -1021,23 +1022,28 @@ void RModel::GenerateSessionCode()
       fGC += "}\n\n";
    }
 
-   fGC += doInferSignature + "{\n";
-   fGC += "\n";
+   if (fProfile) {
+      RModelProfiler profiler(*this);
+      profiler.Generate();
+      fGC += fProfilerGC; 
+   } else {
+      fGC += doInferSignature + "{\n";
+      fGC += "\n";
 
-   // generate the inference code
-   if (fVerbose)
-      std::cout << "Generating main inference code for " << fName << std::endl;
+      // generate the inference code
+      if (fVerbose)
+         std::cout << "Generating main inference code for " << fName << std::endl;
 
-   if (fOutputTensorNames.size() == 0)
-      throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported");
+      if (fOutputTensorNames.size() == 0)
+         throw std::runtime_error("TMVA-SOFIE: output size=0 are not supported");
 
-   for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) {
-      if (fVerbose)
+      for (size_t op_idx = 0; op_idx < fOperators.size(); ++op_idx) {
+         if (fVerbose)
          std::cout << "Generating code for operator .... " << op_idx << std::endl;
-      fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx)));
-   }
+         fGC += (fOperators[op_idx]->Generate(std::to_string(op_idx)));
+      }
 
-   fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
+      fGC += SP + "using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
 
    for (std::string const &name : fOutputTensorNames) {
       // need to check is size is the same (don't want to return a vector with
@@ -1048,7 +1054,8 @@ void RModel::GenerateSessionCode()
       fGC += SP + "FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n";
    }
 
-   fGC += "}\n\n";
+      fGC += "}\n\n";
+   }
 
    // generate the inference overload that returns an output struct
    GenerateOutput();
@@ -1061,9 +1068,11 @@ void RModel::GenerateSessionCode()
 
 void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, long pos, bool verbose)
 {
+   bool profile = (options & static_cast<std::underlying_type_t<Options>>(Options::kProfile));
    fVerbose = verbose;
    fBatchSize = batchSize;
    fReadPos = pos;
+   fProfile = profile;
 
    // session flag is used in operator initialize
    if (static_cast<std::underlying_type_t<Options>>(Options::kNoSession) & options) {
@@ -1083,9 +1092,9 @@ void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, lo
          "TMVA-SOFIE: RModel::Generate: cannot use a separate weight file without generating a Session class");
    }
 
-   if (static_cast<std::underlying_type_t<Options>>(Options::kGNN) & options)
+   if (static_cast<std::underlying_type_t<Options>>(Options::kGNN) & options) 
       fIsGNN = true;
-   if (static_cast<std::underlying_type_t<Options>>(Options::kGNNComponent) & options)
+   if (static_cast<std::underlying_type_t<Options>>(Options::kGNNComponent) & options) 
       fIsGNNComponent = true;
 
    // initialize the model including all operators and sub-graphs
@@ -1099,13 +1108,13 @@ void RModel::Generate(std::underlying_type_t<Options> options, int batchSize, lo
 
    // generate first code for the subgraphs
    for (auto &graph : fSubGraphs) {
-      if (fVerbose)
+      if (fVerbose) 
          std::cout << "generate session code for subgraph " << graph->fName << std::endl;
       graph->GenerateSessionCode();
       fGC += graph->fGC;
    }
 
-   if (fVerbose)
+   if (fVerbose) 
       std::cout << "generate Main session code - model  " << fName << std::endl;
 
    // generate main session code
diff --git a/tmva/sofie/src/RModelProfiler.cxx b/tmva/sofie/src/RModelProfiler.cxx
new file mode 100644
index 0000000000000..203daaf1c66dc
--- /dev/null
+++ b/tmva/sofie/src/RModelProfiler.cxx
@@ -0,0 +1,193 @@
+#include "TMVA/RModelProfiler.hxx"
+#include "TMVA/SOFIE_common.hxx"
+#include <sstream>
+
+namespace TMVA {
+namespace Experimental {
+namespace SOFIE {
+
+// The constructor now just registers the necessary C++ libraries.
+RModelProfiler::RModelProfiler(RModel &model) : fModel(model)
+{
+   fModel.AddNeededStdLib("chrono");      // for timing operators
+   fModel.AddNeededStdLib("vector");      // for storing profiling results
+   fModel.AddNeededStdLib("string");      // for operator names
+   fModel.AddNeededStdLib("map");         // for the results map
+   fModel.AddNeededStdLib("iostream");    // for printing results
+   fModel.AddNeededStdLib("iomanip");     // for printing results
+   // Additions for supervisor's requests
+   fModel.AddNeededStdLib("utility");
+   fModel.AddNeededStdLib("algorithm");
+   fModel.AddNeededStdLib("cmath");
+   fModel.AddNeededStdLib("sstream");
+}
+
+// This function generates the helper functions inside the Session struct.
+void RModelProfiler::GenerateUtilityFunctions()
+{
+   auto &gc = fModel.fProfilerGC;
+
+   // Generate PrintProfilingResults function
+   gc += "   void PrintProfilingResults(bool ordered = true) const {\n";
+   gc += "      if (fProfilingResults.empty()) {\n";
+   gc += "         std::cout << \"No profiling results to display.\" << std::endl;\n";
+   gc += "         return;\n";
+   gc += "      }\n";
+   gc += "\n";
+   gc += "      // Helper struct to store full profiling info\n";
+   gc += "      struct ProfileData { std::string name; double average; double error; size_t runs; };\n";
+   gc += "      std::vector<ProfileData> results;\n\n";
+
+   gc += "      if (ordered) {\n";
+   gc += "         // For ordered view, iterate through the map (which is alphabetical)\n";
+   gc += "         for (const auto& op : fProfilingResults) {\n";
+   gc += "            double sum = 0.0, sum2 = 0.0;\n";
+   gc += "            for (double time : op.second) { sum += time; sum2 += time * time; }\n";
+   gc += "            const size_t n_runs = op.second.size();\n";
+   gc += "            const double average = (n_runs > 0) ? sum / n_runs : 0.0;\n";
+   gc += "            const double variance = (n_runs > 1) ? (sum2 / n_runs - average * average) : 0.0;\n";
+   gc += "            const double error_on_mean = (n_runs > 0) ? std::sqrt(variance / n_runs) : 0.0;\n";
+   gc += "            results.push_back({op.first, average, error_on_mean, n_runs});\n";
+   gc += "         }\n";
+   gc += "         // Then sort the results by average time\n";
+   gc += "         std::sort(results.begin(), results.end(), [](const ProfileData& a, const ProfileData& b) { return a.average > b.average; });\n";
+   gc += "      } else {\n";
+   gc += "         // For execution order view, iterate through our explicitly stored execution order vector\n";
+   gc += "         results.reserve(fExecutionOrder.size()); // Pre-allocate memory for efficiency\n";
+   gc += "         for (const auto& op_name : fExecutionOrder) {\n";
+   gc += "            const auto& op_timings = fProfilingResults.at(op_name);\n";
+   gc += "            double sum = 0.0, sum2 = 0.0;\n";
+   gc += "            for (double time : op_timings) { sum += time; sum2 += time * time; }\n";
+   gc += "            const size_t n_runs = op_timings.size();\n";
+   gc += "            const double average = (n_runs > 0) ? sum / n_runs : 0.0;\n";
+   gc += "            const double variance = (n_runs > 1) ? (sum2 / n_runs - average * average) : 0.0;\n";
+   gc += "            const double error_on_mean = (n_runs > 0) ? std::sqrt(variance / n_runs) : 0.0;\n";
+   gc += "            results.push_back({op_name, average, error_on_mean, n_runs});\n";
+   gc += "         }\n";
+   gc += "      }\n";
+   gc += "\n";
+
+   // --- PRINTING LOGIC (remains the same, but now uses the correctly ordered 'results' vector) ---\n";
+   gc += "      if (ordered) {\n";
+   gc += "         std::cout << \"\\n\" << std::string(80, '=') << std::endl;\n";
+   gc += "         std::cout << \"                  PROFILING RESULTS (ORDERED BY TIME)\" << std::endl;\n";
+   gc += "      } else {\n";
+   gc += "         std::cout << \"\\n\" << std::string(80, '=') << std::endl;\n";
+   gc += "         std::cout << \"                PROFILING RESULTS (EXECUTION ORDER)\" << std::endl;\n";
+   gc += "      }\n";
+   gc += "      std::cout << std::string(80, '=') << std::endl;\n";
+   gc += "      for (const auto & op : results) {\n";
+   gc += "         std::stringstream ss;\n";
+   gc += "         ss << std::fixed << std::setprecision(4) << op.average << \" +/- \" << op.error;\n";
+   gc += "         std::cout << \"  \" << std::left << std::setw(25) << op.name\n";
+   gc += "                   << \": \" << std::left << std::setw(25) << ss.str()\n";
+   gc += "                   << \"(over \" << op.runs << \" runs)\" << std::endl;\n";
+   gc += "      }\n";
+   gc += "      std::cout << std::string(80, '=') << \"\\n\" << std::endl;\n";
+   gc += "   }\n";
+   gc += "\n";
+
+   // Generate ResetProfilingResults function
+   gc += "   void ResetProfilingResults() {\n";
+   gc += "      fProfilingResults.clear();\n";
+   gc += "      fExecutionOrder.clear(); // Also clear the execution order vector\n";
+   gc += "   }\n";
+   gc += "\n";
+
+   // Generate GetOpAvgTime function
+   gc += "   std::map<std::string, double> GetOpAvgTime() const {\n";
+   gc += "      if (fProfilingResults.empty()) {\n";
+   gc += "         return {};\n";
+   gc += "      }\n";
+   gc += "      std::map<std::string, double> avg;\n";
+   gc += "      for (const auto& op : fProfilingResults) {\n";
+   gc += "         double mean = 0.0; for (double time : op.second) { mean += time; } mean /= op.second.size();\n";
+   gc += "         avg[op.first] = mean;\n";
+   gc += "      }\n";
+   gc += "      return avg;\n";
+   gc += "   }\n";
+   gc += "\n";
+
+   // Generate GetOpVariance function
+   gc += "   std::map<std::string, double> GetOpVariance() const {\n";
+   gc += "      if (fProfilingResults.empty()) { return {}; }\n";
+   gc += "      std::map<std::string, double> variance;\n";
+   gc += "      for (const auto& op : fProfilingResults) {\n";
+   gc += "         double mean = 0.0, mean2 = 0.0; for (double time : op.second) { mean += time; mean2 += time * time; }\n";
+   gc += "         mean /= op.second.size(); mean2 /= op.second.size();\n";
+   gc += "         variance[op.first] = mean2 - mean * mean;\n";
+   gc += "      }\n";
+   gc += "      return variance;\n";
+   gc += "   }\n";
+}
+
+// Main generation function for the profiler.
+void RModelProfiler::Generate()
+{
+   // Clear the profiler's code string to start fresh.
+   fModel.fProfilerGC.clear();
+   auto &gc = fModel.fProfilerGC;
+
+   // 1. Add the data member to the Session struct to store results.
+   gc += "public:\n";
+   gc += "   // Maps an operator name to a vector of its execution times (in microseconds).\n";
+   gc += "   std::map<std::string, std::vector<double>> fProfilingResults;\n";
+   gc += "   // Stores operator names to preserve the original execution order.\n";
+   gc += "   std::vector<std::string> fExecutionOrder;\n\n";
+
+   // 2. Generate and add the utility functions like PrintProfilingResults.
+   GenerateUtilityFunctions();
+
+   // 3. Generate the signature for the profiled doInfer method.
+   std::string doInferSignature = fModel.GenerateInferSignature();
+   if (!doInferSignature.empty()) doInferSignature += ", ";
+   for (auto const &name : fModel.GetOutputTensorNames()) {
+      doInferSignature += " std::vector<" + ConvertTypeToString(fModel.GetTensorType(name)) + "> &output_tensor_" + name + ",";
+   }
+   if (!fModel.GetOutputTensorNames().empty()) {
+      doInferSignature.back() = ' ';
+   }
+   gc += "void doInfer(" + doInferSignature + ") {\n";
+
+   // 4. Generate the body of the doInfer method with timing instrumentation.
+   gc += "   // Timer variable for profiling\n";
+   gc += "   std::chrono::steady_clock::time_point tp_start, tp_overall_start;\n\n";
+   gc += "   tp_overall_start = std::chrono::steady_clock::now();\n\n";
+
+   for (size_t op_idx = 0; op_idx < fModel.fOperators.size(); ++op_idx) {
+      const auto& op = fModel.fOperators[op_idx];
+      gc += "   // -- Profiling for operator " + op->name + " --\n";
+      gc += "   tp_start = std::chrono::steady_clock::now();\n\n";
+      gc += op->Generate(std::to_string(op_idx));
+      gc += "\n   fProfilingResults[\"" + op->name + "\"].push_back(\n";
+      gc += "      std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(\n";
+      gc += "         std::chrono::steady_clock::now() - tp_start).count());\n";
+      gc += "   if (fProfilingResults.at(\"" + op->name + "\").size() == 1) {\n";
+      gc += "      fExecutionOrder.push_back(\"" + op->name + "\");\n";
+      gc += "   }\n\n";
+   }
+
+   // 5. Generate the code to fill the output tensors.
+   gc += "   using TMVA::Experimental::SOFIE::UTILITY::FillOutput;\n\n";
+   for (std::string const &name : fModel.GetOutputTensorNames()) {
+      bool isIntermediate = fModel.fIntermediateTensorInfos.count(name) > 0;
+      std::string n = isIntermediate ? std::to_string(ConvertShapeToLength(fModel.GetTensorShape(name)))
+                                     : ConvertDynamicShapeToLength(fModel.GetDynamicTensorShape(name));
+      gc += "   FillOutput(tensor_" + name + ", output_tensor_" + name + ", " + n + ");\n";
+   }
+
+   gc += "\n   // -- Record overall inference time --\n";
+   gc += "   fProfilingResults[\"Overall_Time\"].push_back(\n";
+   gc += "      std::chrono::duration_cast<std::chrono::duration<double, std::micro>>(\n";
+   gc += "         std::chrono::steady_clock::now() - tp_overall_start).count());\n";
+   gc += "   if (fProfilingResults.at(\"Overall_Time\").size() == 1) {\n";
+   gc += "      fExecutionOrder.push_back(\"Overall_Time\");\n";
+   gc += "   }\n";
+
+
+   gc += "}\n\n"; // End of doInfer function
+}
+
+} // namespace SOFIE
+} // namespace Experimental
+} // namespace TMVA
diff --git a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx
index 7b4ade2b6bc09..4903c8d1c6511 100644
--- a/tmva/sofie_parsers/src/RModelParser_ONNX.cxx
+++ b/tmva/sofie_parsers/src/RModelParser_ONNX.cxx
@@ -731,7 +731,8 @@ void RModelParser_ONNX::ParseONNXGraph(RModel & rmodel, const onnx::GraphProto &
          std::cout << "\t" << i << "  " << nodesOrder[i] << " parsing operator " << op_type << std::endl;
       }
 
-      std::unique_ptr<ROperator> op = ParseOperator(i, graph, nodesOrder, nodesChildren[i]);
+      std::unique_ptr<ROperator> op = ParseOperator(i, graph, nodesOrder, nodesChildren[nodesOrder[i]]);
+
       if (!op) {
          if (verbose) {
             std::cout << "\t\tskipping operator since it is fused with previous one" << std::endl;
@@ -739,6 +740,12 @@ void RModelParser_ONNX::ParseONNXGraph(RModel & rmodel, const onnx::GraphProto &
          // for skipping the fused nodes like Add after MatMul
          continue;
       }
+      const auto &nodeproto = graph.node(nodesOrder[i]);
+      op->name = nodeproto.name();
+      if (op->name.empty()) {
+          op->name = op_type + "_" + std::to_string(i);
+      }
+
       rmodel.AddOperator(std::move(op), node_order_exec++);
    }
 
diff --git a/tutorials/machine_learning/TMVA_SOFIE_ONNX.C b/tutorials/machine_learning/TMVA_SOFIE_ONNX.C
index 8c192789e1210..878167db8c791 100644
--- a/tutorials/machine_learning/TMVA_SOFIE_ONNX.C
+++ b/tutorials/machine_learning/TMVA_SOFIE_ONNX.C
@@ -19,7 +19,7 @@ void TMVA_SOFIE_ONNX(std::string inputFile = ""){
     SOFIE::RModel model = parser.Parse(inputFile, true);
 
     //Generating inference code
-    model.Generate();
+    model.Generate(SOFIE::Options::kProfile);
     // write the code in a file (by default Linear_16.hxx and Linear_16.dat
     model.OutputGenerated();
 
diff --git a/tutorials/machine_learning/TMVA_SOFIE_Profiler.C b/tutorials/machine_learning/TMVA_SOFIE_Profiler.C
new file mode 100644
index 0000000000000..72c065a7aa586
--- /dev/null
+++ b/tutorials/machine_learning/TMVA_SOFIE_Profiler.C
@@ -0,0 +1,72 @@
+/// \file
+/// \ingroup tutorial_ml
+/// \notebook -nodraw
+/// This macro shows how to use the SOFIE profiler.
+/// It parses a model, generates profiled C++ code, runs inference,
+/// and prints the timing results for each operation.
+///
+/// \macro_code
+/// \macro_output
+/// \author Olha Sirikova, Lorenzo Moneta, Sanjiban Sengupta
+
+using namespace TMVA::Experimental;
+
+void TMVA_SOFIE_Profiler(const std::string& modelName = "Linear_16") {
+   // Use a standard ONNX model from the ROOT tutorials directory.
+   std::string inputFile = std::string(gROOT->GetTutorialsDir()) + "/machine_learning/" + modelName + ".onnx";
+   if (gSystem->AccessPathName(inputFile.c_str())) {
+      std::cout << "Error: Could not find input file: " << inputFile << std::endl;
+      return;
+   }
+
+   // Parse the ONNX file into a SOFIE RModel object.
+   SOFIE::RModelParser_ONNX parser;
+   SOFIE::RModel model = parser.Parse(inputFile);
+
+   // Generate inference code with profiling enabled using the kProfile option.
+   std::cout << "Generating profiled inference code..." << std::endl;
+   model.Generate(SOFIE::Options::kProfile);
+
+   // Write the generated code to .hxx and .dat files.
+   model.OutputGenerated();
+   std::cout << "Generated files: " << modelName << ".hxx and " << modelName << ".dat" << std::endl;
+
+   // Load and compile the generated model's header file.
+   std::cout << "\nCompiling the generated code..." << std::endl;
+   gROOT->ProcessLine(TString::Format(".L %s.hxx+", modelName.c_str()));
+
+   // Construct the name of the generated Session class.
+   TString sessionTypeName = TString::Format("TMVA_SOFIE_%s::Session", modelName.c_str());
+
+   // Create a new Session object via the interpreter and get its address.
+   Long_t sessionAddr = gROOT->ProcessLine(TString::Format("new %s();", sessionTypeName.Data()));
+
+   // Prepare input and output data vectors for the model.
+   std::vector<float> input_tensor(1, 1.0f);
+   std::vector<float> output_tensor(16, 0.0f);
+
+   // Run inference many times to collect timing statistics.
+   int n_inferences = 1000;
+   std::cout << "\nRunning inference " << n_inferences << " times..." << std::endl;
+   for (int i = 0; i < n_inferences; ++i) {
+      // Call the doInfer method via the interpreter, passing pointers to the data.
+      gROOT->ProcessLine(TString::Format("((%s*)%ld)->doInfer((float*)%p, *(std::vector<float>*)%p);",
+                                         sessionTypeName.Data(), sessionAddr, input_tensor.data(), &output_tensor));
+   }
+   std::cout << "Inference complete." << std::endl;
+
+   // Display the profiling results.
+   // Print results ordered by time (slowest first).
+   gROOT->ProcessLine(TString::Format("((%s*)%ld)->PrintProfilingResults(true);", sessionTypeName.Data(), sessionAddr));
+
+   // Print results in their original execution order.
+   gROOT->ProcessLine(TString::Format("((%s*)%ld)->PrintProfilingResults(false);", sessionTypeName.Data(), sessionAddr));
+
+   // Reset the profiler data.
+   std::cout << "Resetting profiling data..." << std::endl;
+   gROOT->ProcessLine(TString::Format("((%s*)%ld)->ResetProfilingResults();", sessionTypeName.Data(), sessionAddr));
+   gROOT->ProcessLine(TString::Format("((%s*)%ld)->PrintProfilingResults(true);", sessionTypeName.Data(), sessionAddr));
+
+   // Clean up the Session object to avoid memory leaks.
+   gROOT->ProcessLine(TString::Format("delete ((%s*)%ld);", sessionTypeName.Data(), sessionAddr));
+}