From d8e902ee5dbfd6a6888de7b48b1792882e178a60 Mon Sep 17 00:00:00 2001
From: Ed Addario <eaddario@hotmail.com>
Date: Tue, 1 Apr 2025 21:54:52 +0100
Subject: [PATCH 1/9] Add --show-statistics option

---
 common/arg.cpp  | 7 +++++++
 common/common.h | 5 +++--
 2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/common/arg.cpp b/common/arg.cpp
index 8292adaac655d..851130762c5c0 100644
--- a/common/arg.cpp
+++ b/common/arg.cpp
@@ -1925,6 +1925,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
             params.i_chunk = value;
         }
     ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
+    add_opt(common_arg(
+        {"--show-statistics"},
+        string_format("show imatrix statistics and then exit (default: %s)", params.show_statistics ? "true" : "false"),
+        [](common_params & params) {
+            params.show_statistics = true;
+        }
+    ).set_examples({LLAMA_EXAMPLE_IMATRIX}));
     add_opt(common_arg(
         {"-pps"},
         string_format("is the prompt shared across parallel sequences (default: %s)", params.is_pp_shared ? "true" : "false"),
diff --git a/common/common.h b/common/common.h
index 1c0f199774976..7db669c88b924 100644
--- a/common/common.h
+++ b/common/common.h
@@ -411,8 +411,9 @@ struct common_params {
     int32_t n_save_freq =  0; // save the imatrix every n_save_freq iterations
     int32_t i_chunk     =  0; // start processing from this chunk
 
-    bool process_output = false; // collect data for the output tensor
-    bool compute_ppl    = true;  // whether to compute perplexity
+    bool process_output  = false; // collect data for the output tensor
+    bool compute_ppl     = true;  // whether to compute perplexity
+    bool show_statistics = false; // show imatrix statistics per tensor
 
     // cvector-generator params
     int n_pca_batch = 100;

From f46693bc69a851b9693a723a2cf2c96f7ab9304f Mon Sep 17 00:00:00 2001
From: Ed Addario <eaddario@hotmail.com>
Date: Tue, 1 Apr 2025 21:55:41 +0100
Subject: [PATCH 2/9] Add --show-statistics logic

---
 examples/imatrix/imatrix.cpp | 65 +++++++++++++++++++++++++++++++-----
 1 file changed, 57 insertions(+), 8 deletions(-)

diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
index 31b675e8f90b9..49d1d395039d5 100644
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -21,10 +21,9 @@
 
 static void print_usage(int, char ** argv) {
     LOG("\nexample usage:\n");
-    LOG("\n    %s \\\n"
-            "       -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n"
-            "       [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n"
-            "       [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" , argv[0]);
+    LOG("\n    %s -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output]\n"
+            "       [--chunk 123] [--output-frequency 10] [--save-frequency 0] [--show-statistics]\n"
+            "       [--no-ppl] [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" , argv[0]);
     LOG("\n");
 }
 
@@ -34,13 +33,19 @@ struct Stats {
     int ncall = 0;
 };
 
+struct Tally {
+    std::string tensor;
+    float value = 0;
+    int count = 0;
+};
+
 class IMatrixCollector {
 public:
     IMatrixCollector() = default;
     void set_params(common_params params) { m_params = std::move(params); }
     bool collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data);
     void save_imatrix(int ncall = -1) const;
-    bool load_imatrix(const char * fname);
+    bool load_imatrix(const char * fname, std::vector<Tally> * tally = nullptr);
 private:
     std::unordered_map<std::string, Stats> m_stats;
     common_params                          m_params;
@@ -289,7 +294,7 @@ void IMatrixCollector::save_imatrix(int ncall) const {
     LOG_DBGV(1, "%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname.c_str());
 }
 
-bool IMatrixCollector::load_imatrix(const char * fname) {
+bool IMatrixCollector::load_imatrix(const char * fname, std::vector<Tally> * tally) {
     std::ifstream in(fname, std::ios::binary);
     if (!in) {
         LOG_ERR("%s: failed to open %s\n",__func__, fname);
@@ -335,13 +340,22 @@ bool IMatrixCollector::load_imatrix(const char * fname) {
             return false;
         }
 
-        // Recreate the state as expected by save_imatrix(), and corerct for weighted sum.
+        // Recreate the state as expected by save_imatrix(), and correct for weighted sum.
+        float total = 0;
         for (int i = 0; i < nval; i++) {
             e.values[i] += tmp[i];
+            total += tmp[i];
             e.counts[i] += ncall;
         }
         e.ncall += ncall;
 
+        if (tally) {
+            tally->emplace_back();
+            auto & [tensor, value, count] = (*tally)[i];
+            tensor = name_as_vec.data();
+            value = total;
+            count = nval;
+        }
     }
     return true;
 }
@@ -352,7 +366,6 @@ static bool ik_collect_imatrix(struct ggml_tensor * t, bool ask, void * user_dat
     return g_collector.collect_imatrix(t, ask, user_data);
 }
 
-
 struct results_log_softmax {
     double log_softmax;
     float  logit;
@@ -590,6 +603,42 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
+    std::vector<Tally> tallies;
+
+    if (params.show_statistics) {
+        if (params.in_files.empty() || params.in_files.size() > 1) {
+            LOG_ERR("\nError: a single imatrix file is required to compute tensor statistics\n\n");
+            return 1;
+        }
+        if (!g_collector.load_imatrix(params.in_files[0].c_str(), & tallies)) {
+            LOG_ERR("\nError: %s is not a valid imatrix file\n\n", params.in_files[0].c_str());
+            return 1;
+        }
+        if (tallies.empty()) {
+            LOG_ERR("Error: cannot compute statistics for %s\n\n", params.in_files[0].c_str());
+            return 1;
+        }
+        float total = 0;
+        for (const auto & tallie : tallies) {
+            total += tallie.value / static_cast<float>(tallie.count);
+        }
+
+        struct tally_sort {
+            bool operator()(const Tally& x, const Tally & y) const {
+                return x.value / static_cast<float>(x.count) > y.value / static_cast<float>(y.count);
+            }
+        };
+        std::sort(tallies.begin(), tallies.end(), tally_sort());
+        LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(tallies.size()));
+        LOG_INF("\n                    Tensor                       Σ(weights)   Contribution\n");
+        LOG_INF("==========================================================================\n");
+        for (const auto & [tensor, value, count] : tallies) {
+            LOG_INF("%40s\t%10.2f\t%7.4f %%\n", tensor.c_str(), value / count, 100.0f * (value / count / total));
+        }
+        LOG_INF("\n");
+        return 0;
+    }
+
     common_init();
 
     params.n_batch = std::min(params.n_batch, params.n_ctx);

From dc3373e5bb66febcc4e9d0061758c041cefee4d6 Mon Sep 17 00:00:00 2001
From: Ed Addario <eaddario@hotmail.com>
Date: Wed, 2 Apr 2025 11:43:38 +0100
Subject: [PATCH 3/9] Add tensor name parsing

---
 examples/imatrix/imatrix.cpp | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
index 49d1d395039d5..3467f8f937c2e 100644
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -74,6 +74,35 @@ static std::string filter_tensor_name(const char * name) {
     return wname;
 }
 
+static void process_tensor_name(const std::string & input, std::string & layer, std::string & tensor) {
+    std::vector<std::string> name;
+    std::istringstream stream(input);
+    std::string item;
+
+    while (std::getline(stream, item, '.')) {
+        name.push_back(item);
+    }
+    for (size_t i = 0; i < name.size(); ++i) {
+        if (name[i] == "blk" && i + 1 < name.size()) {
+            layer = name[i + 1];
+            break;
+        }
+    }
+    for (size_t i = 0; i < name.size(); ++i) {
+        if (name[i] == "weight" && i > 0) {
+            tensor = name[i - 1];
+            break;
+        }
+    }
+
+    if (tensor.empty()) {
+        tensor = input;
+    }
+    if (layer.empty()) {
+        layer = "-";
+    }
+}
+
 bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) {
     GGML_UNUSED(user_data);
 

From 0589c3ee9ffe936663d4119fd647ba9a785fda9f Mon Sep 17 00:00:00 2001
From: Ed Addario <eaddario@hotmail.com>
Date: Wed, 2 Apr 2025 11:44:03 +0100
Subject: [PATCH 4/9] Tidy output format

---
 examples/imatrix/imatrix.cpp | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
index 3467f8f937c2e..30b9a9a295da5 100644
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -658,11 +658,14 @@ int main(int argc, char ** argv) {
             }
         };
         std::sort(tallies.begin(), tallies.end(), tally_sort());
+
         LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(tallies.size()));
-        LOG_INF("\n                    Tensor                       Σ(weights)   Contribution\n");
-        LOG_INF("==========================================================================\n");
+        LOG_INF("\n Layer\t               Tensor\t          Σ(Importance Scores)\t   Contribution\n");
+        LOG_INF("================================================================================\n");
         for (const auto & [tensor, value, count] : tallies) {
-            LOG_INF("%40s\t%10.2f\t%7.4f %%\n", tensor.c_str(), value / count, 100.0f * (value / count / total));
+            std::string layer, name;
+            process_tensor_name(tensor, layer, name);
+            LOG_INF("%5s\t%30s\t%15.2f\t%20.4f %%\n", layer.c_str(), name.c_str(), value / count, 100.0f * (value / count / total));
         }
         LOG_INF("\n");
         return 0;

From e1fd1af77e9750e4cca7accc5efc20f2a16deecb Mon Sep 17 00:00:00 2001
From: Ed Addario <eaddario@hotmail.com>
Date: Wed, 2 Apr 2025 14:13:42 +0100
Subject: [PATCH 5/9] Fix typo in title

---
 examples/imatrix/imatrix.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
index 30b9a9a295da5..f1d2febfda621 100644
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -660,7 +660,7 @@ int main(int argc, char ** argv) {
         std::sort(tallies.begin(), tallies.end(), tally_sort());
 
         LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(tallies.size()));
-        LOG_INF("\n Layer\t               Tensor\t          Σ(Importance Scores)\t   Contribution\n");
+        LOG_INF("\n Layer\t               Tensor\t          μ(Importance Scores)\t   Contribution\n");
         LOG_INF("================================================================================\n");
         for (const auto & [tensor, value, count] : tallies) {
             std::string layer, name;

From 62ac26833a4b91866d9c93dabb88c625429f2580 Mon Sep 17 00:00:00 2001
From: Ed Addario <eaddario@hotmail.com>
Date: Tue, 8 Apr 2025 17:07:16 +0100
Subject: [PATCH 6/9] Improve tensor influence ranking

---
 examples/imatrix/imatrix.cpp | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
index f1d2febfda621..ab31fd7b2f2e6 100644
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -35,7 +35,7 @@ struct Stats {
 
 struct Tally {
     std::string tensor;
-    float value = 0;
+    double bias = 0;
     int count = 0;
 };
 
@@ -370,19 +370,20 @@ bool IMatrixCollector::load_imatrix(const char * fname, std::vector<Tally> * tal
         }
 
         // Recreate the state as expected by save_imatrix(), and correct for weighted sum.
-        float total = 0;
+        double total = 0;
         for (int i = 0; i < nval; i++) {
             e.values[i] += tmp[i];
-            total += tmp[i];
             e.counts[i] += ncall;
+            const double avg_sq = (1.0 * e.values[i]) / e.counts[i];
+            total += avg_sq;
         }
         e.ncall += ncall;
 
         if (tally) {
             tally->emplace_back();
-            auto & [tensor, value, count] = (*tally)[i];
+            auto & [tensor, bias, count] = (*tally)[i];
             tensor = name_as_vec.data();
-            value = total;
+            bias = total;
             count = nval;
         }
     }
@@ -647,25 +648,25 @@ int main(int argc, char ** argv) {
             LOG_ERR("Error: cannot compute statistics for %s\n\n", params.in_files[0].c_str());
             return 1;
         }
-        float total = 0;
+        double total = 0;
         for (const auto & tallie : tallies) {
-            total += tallie.value / static_cast<float>(tallie.count);
+            total += tallie.bias;
         }
 
         struct tally_sort {
             bool operator()(const Tally& x, const Tally & y) const {
-                return x.value / static_cast<float>(x.count) > y.value / static_cast<float>(y.count);
+                return x.bias > y.bias;
             }
         };
         std::sort(tallies.begin(), tallies.end(), tally_sort());
 
         LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(tallies.size()));
-        LOG_INF("\n Layer\t               Tensor\t          μ(Importance Scores)\t   Contribution\n");
-        LOG_INF("================================================================================\n");
-        for (const auto & [tensor, value, count] : tallies) {
+        LOG_INF("\n Layer\t               Tensor\t              Total Bias\tAvg Bias\t  Contribution\n");
+        LOG_INF("===============================================================================================\n");
+        for (const auto & [tensor, bias, count] : tallies) {
             std::string layer, name;
             process_tensor_name(tensor, layer, name);
-            LOG_INF("%5s\t%30s\t%15.2f\t%20.4f %%\n", layer.c_str(), name.c_str(), value / count, 100.0f * (value / count / total));
+            LOG_INF("%5s\t%30s\t%15.2f\t%15.4f\t%19.4f%%\n", layer.c_str(), name.c_str(), bias, bias / count, 100.0 * bias / total);
         }
         LOG_INF("\n");
         return 0;

From 73d8ecbc42bb0d032a9b63d959db9fff16e02243 Mon Sep 17 00:00:00 2001
From: Ed Addario <eaddario@hotmail.com>
Date: Sun, 13 Apr 2025 20:30:36 +0100
Subject: [PATCH 7/9] Add better statistics

---
 examples/imatrix/imatrix.cpp | 121 +++++++++++++++++++++++------------
 1 file changed, 80 insertions(+), 41 deletions(-)

diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
index ab31fd7b2f2e6..21980635b9e6a 100644
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -1,19 +1,20 @@
-#include "arg.h"
-#include "common.h"
-#include "log.h"
-#include "llama.h"
-
+#include <algorithm>
 #include <chrono>
 #include <cmath>
 #include <cstdio>
 #include <cstring>
 #include <ctime>
-#include <thread>
-#include <mutex>
-#include <vector>
 #include <fstream>
+#include <mutex>
+#include <numeric>
+#include <thread>
 #include <unordered_map>
-#include <algorithm>
+#include <vector>
+
+#include "arg.h"
+#include "common.h"
+#include "llama.h"
+#include "log.h"
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
@@ -33,10 +34,18 @@ struct Stats {
     int ncall = 0;
 };
 
-struct Tally {
+struct tensor_statistics {
     std::string tensor;
-    double bias = 0;
-    int count = 0;
+    float total = 0;
+    float mean = 0;
+    float max = 0;
+    float min = 0;
+    float stddev = 0;
+    float cv = 0;
+    float zd = 0;
+    float active = 0;
+    float entropy = 0;
+    int elements = 0;
 };
 
 class IMatrixCollector {
@@ -45,7 +54,7 @@ class IMatrixCollector {
     void set_params(common_params params) { m_params = std::move(params); }
     bool collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data);
     void save_imatrix(int ncall = -1) const;
-    bool load_imatrix(const char * fname, std::vector<Tally> * tally = nullptr);
+    bool load_imatrix(const char * fname, std::vector<tensor_statistics> * tstats = nullptr);
 private:
     std::unordered_map<std::string, Stats> m_stats;
     common_params                          m_params;
@@ -323,7 +332,7 @@ void IMatrixCollector::save_imatrix(int ncall) const {
     LOG_DBGV(1, "%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname.c_str());
 }
 
-bool IMatrixCollector::load_imatrix(const char * fname, std::vector<Tally> * tally) {
+bool IMatrixCollector::load_imatrix(const char * fname, std::vector<tensor_statistics> * ts) {
     std::ifstream in(fname, std::ios::binary);
     if (!in) {
         LOG_ERR("%s: failed to open %s\n",__func__, fname);
@@ -370,21 +379,58 @@ bool IMatrixCollector::load_imatrix(const char * fname, std::vector<Tally> * tal
         }
 
         // Recreate the state as expected by save_imatrix(), and correct for weighted sum.
-        double total = 0;
+        std::vector<float> activations;
+        activations.reserve(nval);
+
         for (int i = 0; i < nval; i++) {
             e.values[i] += tmp[i];
             e.counts[i] += ncall;
-            const double avg_sq = (1.0 * e.values[i]) / e.counts[i];
-            total += avg_sq;
+            activations.push_back(e.values[i] / static_cast<float>(e.counts[i]));
         }
         e.ncall += ncall;
 
-        if (tally) {
-            tally->emplace_back();
-            auto & [tensor, bias, count] = (*tally)[i];
+        if (ts) {
+            float total_bias = std::accumulate(activations.begin(), activations.end(), 0.0f);
+            float max_bias = * std::max_element(activations.begin(), activations.end());
+            float min_bias = * std::min_element(activations.begin(), activations.end());
+            float mean_bias = total_bias / activations.size();
+            float sq_total_bias = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f);
+            float dev = std::sqrt((sq_total_bias / activations.size()) - (mean_bias * mean_bias));
+            float rmsd = mean_bias > 0.0f ? dev / mean_bias : 0.0f;
+
+            float threshold = 1e-6f;
+            int inactive_count = std::count_if(activations.begin(), activations.end(), [threshold](const float v) { return fabs(v) < threshold; });
+            float active_ratio = 1 -  (static_cast<float>(inactive_count) / activations.size());
+
+            float ent = 0.0f;
+            if (total_bias > 0) {
+                for (auto act : activations) {
+                    if (float p = act / total_bias; p > 0) {
+                        ent -= p* std::log2(p);
+                    }
+                }
+            }
+
+            int z_score = 0;
+            for (auto act : activations) {
+                if (float p = (act - mean_bias) / dev; p > 1) {
+                    z_score++;
+                }
+            }
+
+            ts->emplace_back();
+            auto & [tensor, total, mean, max, min, stddev, cv, zd, active, entropy, elements] = (*ts)[i];
             tensor = name_as_vec.data();
-            bias = total;
-            count = nval;
+            total = total_bias;
+            mean = mean_bias;
+            max = max_bias;
+            min = min_bias;
+            stddev = dev;
+            cv = rmsd;
+            active = active_ratio;
+            entropy = ent;
+            elements = static_cast<int>(activations.size());
+            zd = static_cast<float>(z_score) / static_cast<float>(elements);
         }
     }
     return true;
@@ -633,42 +679,35 @@ int main(int argc, char ** argv) {
         return 1;
     }
 
-    std::vector<Tally> tallies;
+    std::vector<tensor_statistics> ts;
 
     if (params.show_statistics) {
         if (params.in_files.empty() || params.in_files.size() > 1) {
             LOG_ERR("\nError: a single imatrix file is required to compute tensor statistics\n\n");
             return 1;
         }
-        if (!g_collector.load_imatrix(params.in_files[0].c_str(), & tallies)) {
+        if (!g_collector.load_imatrix(params.in_files[0].c_str(), & ts)) {
             LOG_ERR("\nError: %s is not a valid imatrix file\n\n", params.in_files[0].c_str());
             return 1;
         }
-        if (tallies.empty()) {
+        if (ts.empty()) {
             LOG_ERR("Error: cannot compute statistics for %s\n\n", params.in_files[0].c_str());
             return 1;
         }
-        double total = 0;
-        for (const auto & tallie : tallies) {
-            total += tallie.bias;
-        }
 
-        struct tally_sort {
-            bool operator()(const Tally& x, const Tally & y) const {
-                return x.bias > y.bias;
-            }
-        };
-        std::sort(tallies.begin(), tallies.end(), tally_sort());
-
-        LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(tallies.size()));
-        LOG_INF("\n Layer\t               Tensor\t              Total Bias\tAvg Bias\t  Contribution\n");
-        LOG_INF("===============================================================================================\n");
-        for (const auto & [tensor, bias, count] : tallies) {
+        std::sort(ts.begin(), ts.end(), [](const tensor_statistics &a, const tensor_statistics &b) { return a.total > b.total; });
+        LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(ts.size()));
+        LOG_INF("\n%5s\t%-20s\t%10s\t%7s\t%12s\t%9s\t%10s\t%9s\t%6s\t%12s\t%7s\t%10s\n",
+            "Layer", "Tensor", "Σ(Bias)", "Min", "Max", "μ", "σ", "% Active", "N", "Entropy", "E (norm)", "ZD Score");
+        LOG_INF("==========================================================================================================================================================================\n");
+        for (const auto & [tensor, total, mean, max, min, stddev, cv, zd, active, entropy, elements] : ts) {
             std::string layer, name;
             process_tensor_name(tensor, layer, name);
-            LOG_INF("%5s\t%30s\t%15.2f\t%15.4f\t%19.4f%%\n", layer.c_str(), name.c_str(), bias, bias / count, 100.0 * bias / total);
+            LOG_INF("%5s\t%-20s\t%10.2f\t%7.4f\t%12.4f\t%8.4f\t%9.4f\t%8.2f%%\t%6d\t%12.4f\t%7.2f%%\t%10.4f\n",
+                layer.c_str(), name.c_str(), total, min, max, mean, stddev, active * 100.0f, elements, entropy, 100.0f * (entropy / std::log2(elements)), 1000.0f * zd);
         }
         LOG_INF("\n");
+
         return 0;
     }
 

From 0b7f9c40c8cbe1bc52c8152805baf9794b439cf5 Mon Sep 17 00:00:00 2001
From: Ed Addario <eaddario@hotmail.com>
Date: Tue, 15 Apr 2025 08:16:19 +0100
Subject: [PATCH 8/9] Change statistics' sort order

---
 examples/imatrix/imatrix.cpp | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
index 21980635b9e6a..bc9cf0108b492 100644
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -695,7 +695,16 @@ int main(int argc, char ** argv) {
             return 1;
         }
 
-        std::sort(ts.begin(), ts.end(), [](const tensor_statistics &a, const tensor_statistics &b) { return a.total > b.total; });
+        struct tensor_comparer {
+            bool operator()(const tensor_statistics & a, const tensor_statistics & b) const {
+                std::string layer, name_a, name_b;;
+                process_tensor_name(a.tensor, layer, name_a);
+                process_tensor_name(b.tensor, layer, name_b);
+                return name_a < name_b || (name_a == name_b && a.total > b.total);
+            }
+        };
+        std::sort(ts.begin(), ts.end(), tensor_comparer());
+
         LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(ts.size()));
         LOG_INF("\n%5s\t%-20s\t%10s\t%7s\t%12s\t%9s\t%10s\t%9s\t%6s\t%12s\t%7s\t%10s\n",
             "Layer", "Tensor", "Σ(Bias)", "Min", "Max", "μ", "σ", "% Active", "N", "Entropy", "E (norm)", "ZD Score");
@@ -703,8 +712,8 @@ int main(int argc, char ** argv) {
         for (const auto & [tensor, total, mean, max, min, stddev, cv, zd, active, entropy, elements] : ts) {
             std::string layer, name;
             process_tensor_name(tensor, layer, name);
-            LOG_INF("%5s\t%-20s\t%10.2f\t%7.4f\t%12.4f\t%8.4f\t%9.4f\t%8.2f%%\t%6d\t%12.4f\t%7.2f%%\t%10.4f\n",
-                layer.c_str(), name.c_str(), total, min, max, mean, stddev, active * 100.0f, elements, entropy, 100.0f * (entropy / std::log2(elements)), 1000.0f * zd);
+            LOG_INF("%5s\t%-20s\t%10.2f\t%7.4f\t%12.4f\t%8.4f\t%9.4f\t%8.2f%%\t%6d\t%12.4f\t%7.2f%%\t%9.2f%%\n",
+                layer.c_str(), name.c_str(), total, min, max, mean, stddev, active * 100.0f, elements, entropy, 100.0f * (entropy / std::log2(elements)), 100.0f * zd);
         }
         LOG_INF("\n");
 

From 755c1efbc15e94d5e6be47852c52ef329eb122b4 Mon Sep 17 00:00:00 2001
From: Ed Addario <eaddario@hotmail.com>
Date: Tue, 22 Apr 2025 18:42:31 +0100
Subject: [PATCH 9/9] Add Cosine Similarity

---
 examples/imatrix/imatrix.cpp | 131 +++++++++++++++++++++--------------
 1 file changed, 78 insertions(+), 53 deletions(-)

diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp
index bc9cf0108b492..2c6a06cdf236f 100644
--- a/examples/imatrix/imatrix.cpp
+++ b/examples/imatrix/imatrix.cpp
@@ -1,3 +1,9 @@
+#include "arg.h"
+#include "common.h"
+#include "llama-impl.h"
+#include "llama.h"
+#include "log.h"
+
 #include <algorithm>
 #include <chrono>
 #include <cmath>
@@ -10,11 +16,7 @@
 #include <thread>
 #include <unordered_map>
 #include <vector>
-
-#include "arg.h"
-#include "common.h"
-#include "llama.h"
-#include "log.h"
+#include <regex>
 
 #if defined(_MSC_VER)
 #pragma warning(disable: 4244 4267) // possible loss of data
@@ -36,16 +38,17 @@ struct Stats {
 
 struct tensor_statistics {
     std::string tensor;
-    float total = 0;
-    float mean = 0;
-    float max = 0;
-    float min = 0;
+    Stats stats;
+    float total_bias = 0;
+    float mean_bias = 0;
+    float max_bias = 0;
+    float min_bias = 0;
+    int elements = 0;
     float stddev = 0;
-    float cv = 0;
-    float zd = 0;
     float active = 0;
     float entropy = 0;
-    int elements = 0;
+    float zd = 0;
+    float cossim = 0;
 };
 
 class IMatrixCollector {
@@ -332,7 +335,7 @@ void IMatrixCollector::save_imatrix(int ncall) const {
     LOG_DBGV(1, "%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname.c_str());
 }
 
-bool IMatrixCollector::load_imatrix(const char * fname, std::vector<tensor_statistics> * ts) {
+bool IMatrixCollector::load_imatrix(const char * fname, std::vector<tensor_statistics> * tstats) {
     std::ifstream in(fname, std::ios::binary);
     if (!in) {
         LOG_ERR("%s: failed to open %s\n",__func__, fname);
@@ -381,31 +384,29 @@ bool IMatrixCollector::load_imatrix(const char * fname, std::vector<tensor_stati
         // Recreate the state as expected by save_imatrix(), and correct for weighted sum.
         std::vector<float> activations;
         activations.reserve(nval);
-
         for (int i = 0; i < nval; i++) {
             e.values[i] += tmp[i];
             e.counts[i] += ncall;
-            activations.push_back(e.values[i] / static_cast<float>(e.counts[i]));
+            activations.push_back(e.values[i] / e.counts[i]);
         }
         e.ncall += ncall;
 
-        if (ts) {
-            float total_bias = std::accumulate(activations.begin(), activations.end(), 0.0f);
-            float max_bias = * std::max_element(activations.begin(), activations.end());
-            float min_bias = * std::min_element(activations.begin(), activations.end());
-            float mean_bias = total_bias / activations.size();
-            float sq_total_bias = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f);
-            float dev = std::sqrt((sq_total_bias / activations.size()) - (mean_bias * mean_bias));
-            float rmsd = mean_bias > 0.0f ? dev / mean_bias : 0.0f;
-
-            float threshold = 1e-6f;
-            int inactive_count = std::count_if(activations.begin(), activations.end(), [threshold](const float v) { return fabs(v) < threshold; });
-            float active_ratio = 1 -  (static_cast<float>(inactive_count) / activations.size());
-
-            float ent = 0.0f;
-            if (total_bias > 0) {
+        if (tstats) {
+            float total = std::accumulate(activations.begin(), activations.end(), 0.0f);
+            float max = * std::max_element(activations.begin(), activations.end());
+            float min = * std::min_element(activations.begin(), activations.end());
+            float mean = total / activations.size();
+            float sq_total = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f);
+            float dev = std::sqrt((sq_total / activations.size()) - (mean * mean));
+
+            float threshold = min + min * 0.5f;
+            int inactive_count = std::count_if(activations.begin(), activations.end(), [threshold](const float v) { return fabs(v) <= threshold; });
+            float active_ratio = 1 -  static_cast<float>(inactive_count) / activations.size();
+
+            float ent = 0;
+            if (total > 0) {
                 for (auto act : activations) {
-                    if (float p = act / total_bias; p > 0) {
+                    if (float p = act / total; p > 0) {
                         ent -= p* std::log2(p);
                     }
                 }
@@ -413,26 +414,48 @@ bool IMatrixCollector::load_imatrix(const char * fname, std::vector<tensor_stati
 
             int z_score = 0;
             for (auto act : activations) {
-                if (float p = (act - mean_bias) / dev; p > 1) {
+                if (float p = (act - mean) / dev; p > 1) {
                     z_score++;
                 }
             }
 
-            ts->emplace_back();
-            auto & [tensor, total, mean, max, min, stddev, cv, zd, active, entropy, elements] = (*ts)[i];
-            tensor = name_as_vec.data();
-            total = total_bias;
-            mean = mean_bias;
-            max = max_bias;
-            min = min_bias;
-            stddev = dev;
-            cv = rmsd;
-            active = active_ratio;
-            entropy = ent;
-            elements = static_cast<int>(activations.size());
-            zd = static_cast<float>(z_score) / static_cast<float>(elements);
+            tstats->emplace_back();
+            auto & ts     = (*tstats)[i];
+            ts.tensor     = name_as_vec.data();
+            ts.stats      = e;
+            ts.total_bias = total;
+            ts.mean_bias  = mean;
+            ts.max_bias   = max;
+            ts.min_bias   = min;
+            ts.elements   = static_cast<int>(activations.size());
+            ts.stddev     = dev;
+            ts.active     = active_ratio;
+            ts.entropy    = ent;
+            ts.zd = static_cast<float>(z_score) / ts.elements;
+        }
+    }
+
+    if (tstats) {
+        static const std::regex pattern(R"(blk\.(\d+)\.)");
+        for (auto & ts : *tstats) {
+            if (std::smatch match; std::regex_search(ts.tensor, match, pattern)) {
+                const int blk = std::stoi(match[1]);
+                std::string tname(ts.tensor);
+                tname.replace(match.position(1), match.length(1), std::to_string(blk-1));
+                auto prev = std::find_if(tstats->begin(), tstats->end(), [tname](const tensor_statistics & t) { return t.tensor == tname; });
+                if (prev != tstats->end()) {
+                    const float dp = std::inner_product(ts.stats.values.begin(), ts.stats.values.end(), prev->stats.values.begin(), 0.0f);
+                    const float curr_mag = std::sqrt(std::inner_product(ts.stats.values.begin(), ts.stats.values.end(), ts.stats.values.begin(), 0.0f));
+                    const float prev_mag = std::sqrt(std::inner_product(prev->stats.values.begin(), prev->stats.values.end(), prev->stats.values.begin(), 0.0f));
+                    const float cs = dp / (curr_mag * prev_mag);
+                    ts.cossim = cs;
+                }
+            } else {
+                ts.cossim = 0;
+            }
         }
     }
+
     return true;
 }
 
@@ -700,20 +723,22 @@ int main(int argc, char ** argv) {
                 std::string layer, name_a, name_b;;
                 process_tensor_name(a.tensor, layer, name_a);
                 process_tensor_name(b.tensor, layer, name_b);
-                return name_a < name_b || (name_a == name_b && a.total > b.total);
+                return name_a < name_b || (name_a == name_b && a.total_bias > b.total_bias);
             }
         };
         std::sort(ts.begin(), ts.end(), tensor_comparer());
 
         LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast<int>(ts.size()));
-        LOG_INF("\n%5s\t%-20s\t%10s\t%7s\t%12s\t%9s\t%10s\t%9s\t%6s\t%12s\t%7s\t%10s\n",
-            "Layer", "Tensor", "Σ(Bias)", "Min", "Max", "μ", "σ", "% Active", "N", "Entropy", "E (norm)", "ZD Score");
-        LOG_INF("==========================================================================================================================================================================\n");
-        for (const auto & [tensor, total, mean, max, min, stddev, cv, zd, active, entropy, elements] : ts) {
+        LOG_INF("\n%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n",
+            " Layer", "       Tensor", "          Σ(Bias)", "  Min", "            Max", "           μ", "   σ", " % Active", "N", "   Entropy", "E (norm)", "ZD", "  CosSim");
+        LOG_INF("=========================================================================================================================================================================\n");
+        for (const auto & tstat : ts) {
             std::string layer, name;
-            process_tensor_name(tensor, layer, name);
-            LOG_INF("%5s\t%-20s\t%10.2f\t%7.4f\t%12.4f\t%8.4f\t%9.4f\t%8.2f%%\t%6d\t%12.4f\t%7.2f%%\t%9.2f%%\n",
-                layer.c_str(), name.c_str(), total, min, max, mean, stddev, active * 100.0f, elements, entropy, 100.0f * (entropy / std::log2(elements)), 100.0f * zd);
+            process_tensor_name(tstat.tensor, layer, name);
+            LOG_INF("%5s\t%-20s\t%10.2f\t%8.4f\t%11.4f\t%6.2f\t%6.2f\t%8.2f%%\t%6d\t%10.4f\t%6.2f%%\t%10.2f%%\t%8.4f\n",
+                layer.c_str(), name.c_str(), tstat.total_bias, tstat.min_bias, tstat.max_bias, tstat.mean_bias, tstat.stddev,
+                tstat.active * 100.0f, tstat.elements, tstat.entropy, 100.0f * (tstat.entropy / std::log2(tstat.elements)),
+                100.0f * tstat.zd, tstat.cossim);
         }
         LOG_INF("\n");