From d8e902ee5dbfd6a6888de7b48b1792882e178a60 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Tue, 1 Apr 2025 21:54:52 +0100 Subject: [PATCH 1/9] Add --show-statistics option --- common/arg.cpp | 7 +++++++ common/common.h | 5 +++-- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index 8292adaac655d..851130762c5c0 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -1925,6 +1925,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.i_chunk = value; } ).set_examples({LLAMA_EXAMPLE_IMATRIX})); + add_opt(common_arg( + {"--show-statistics"}, + string_format("show imatrix statistics and then exit (default: %s)", params.show_statistics ? "true" : "false"), + [](common_params & params) { + params.show_statistics = true; + } + ).set_examples({LLAMA_EXAMPLE_IMATRIX})); add_opt(common_arg( {"-pps"}, string_format("is the prompt shared across parallel sequences (default: %s)", params.is_pp_shared ? "true" : "false"), diff --git a/common/common.h b/common/common.h index 1c0f199774976..7db669c88b924 100644 --- a/common/common.h +++ b/common/common.h @@ -411,8 +411,9 @@ struct common_params { int32_t n_save_freq = 0; // save the imatrix every n_save_freq iterations int32_t i_chunk = 0; // start processing from this chunk - bool process_output = false; // collect data for the output tensor - bool compute_ppl = true; // whether to compute perplexity + bool process_output = false; // collect data for the output tensor + bool compute_ppl = true; // whether to compute perplexity + bool show_statistics = false; // show imatrix statistics per tensor // cvector-generator params int n_pca_batch = 100; From f46693bc69a851b9693a723a2cf2c96f7ab9304f Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Tue, 1 Apr 2025 21:55:41 +0100 Subject: [PATCH 2/9] Add --show-statistics logic --- examples/imatrix/imatrix.cpp | 65 +++++++++++++++++++++++++++++++----- 1 file changed, 57 insertions(+), 8 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 31b675e8f90b9..49d1d395039d5 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -21,10 +21,9 @@ static void print_usage(int, char ** argv) { LOG("\nexample usage:\n"); - LOG("\n %s \\\n" - " -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output] \\\n" - " [--no-ppl] [--chunk 123] [--output-frequency 10] [--save-frequency 0] \\\n" - " [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" , argv[0]); + LOG("\n %s -m model.gguf -f some-text.txt [-o imatrix.dat] [--process-output]\n" + " [--chunk 123] [--output-frequency 10] [--save-frequency 0] [--show-statistics]\n" + " [--no-ppl] [--in-file imatrix-prev-0.dat --in-file imatrix-prev-1.dat ...]\n" , argv[0]); LOG("\n"); } @@ -34,13 +33,19 @@ struct Stats { int ncall = 0; }; +struct Tally { + std::string tensor; + float value = 0; + int count = 0; +}; + class IMatrixCollector { public: IMatrixCollector() = default; void set_params(common_params params) { m_params = std::move(params); } bool collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data); void save_imatrix(int ncall = -1) const; - bool load_imatrix(const char * fname); + bool load_imatrix(const char * fname, std::vector * tally = nullptr); private: std::unordered_map m_stats; common_params m_params; @@ -289,7 +294,7 @@ void IMatrixCollector::save_imatrix(int ncall) const { LOG_DBGV(1, "%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname.c_str()); } -bool IMatrixCollector::load_imatrix(const char * fname) { +bool IMatrixCollector::load_imatrix(const char * fname, std::vector * tally) { std::ifstream in(fname, std::ios::binary); if (!in) { LOG_ERR("%s: failed to open %s\n",__func__, fname); @@ -335,13 +340,22 @@ bool IMatrixCollector::load_imatrix(const char * fname) { return false; } - // Recreate the state as expected by save_imatrix(), and corerct for weighted sum. + // Recreate the state as expected by save_imatrix(), and correct for weighted sum. + float total = 0; for (int i = 0; i < nval; i++) { e.values[i] += tmp[i]; + total += tmp[i]; e.counts[i] += ncall; } e.ncall += ncall; + if (tally) { + tally->emplace_back(); + auto & [tensor, value, count] = (*tally)[i]; + tensor = name_as_vec.data(); + value = total; + count = nval; + } } return true; } @@ -352,7 +366,6 @@ static bool ik_collect_imatrix(struct ggml_tensor * t, bool ask, void * user_dat return g_collector.collect_imatrix(t, ask, user_data); } - struct results_log_softmax { double log_softmax; float logit; @@ -590,6 +603,42 @@ int main(int argc, char ** argv) { return 1; } + std::vector tallies; + + if (params.show_statistics) { + if (params.in_files.empty() || params.in_files.size() > 1) { + LOG_ERR("\nError: a single imatrix file is required to compute tensor statistics\n\n"); + return 1; + } + if (!g_collector.load_imatrix(params.in_files[0].c_str(), & tallies)) { + LOG_ERR("\nError: %s is not a valid imatrix file\n\n", params.in_files[0].c_str()); + return 1; + } + if (tallies.empty()) { + LOG_ERR("Error: cannot compute statistics for %s\n\n", params.in_files[0].c_str()); + return 1; + } + float total = 0; + for (const auto & tallie : tallies) { + total += tallie.value / static_cast(tallie.count); + } + + struct tally_sort { + bool operator()(const Tally& x, const Tally & y) const { + return x.value / static_cast(x.count) > y.value / static_cast(y.count); + } + }; + std::sort(tallies.begin(), tallies.end(), tally_sort()); + LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast(tallies.size())); + LOG_INF("\n Tensor Σ(weights) Contribution\n"); + LOG_INF("==========================================================================\n"); + for (const auto & [tensor, value, count] : tallies) { + LOG_INF("%40s\t%10.2f\t%7.4f %%\n", tensor.c_str(), value / count, 100.0f * (value / count / total)); + } + LOG_INF("\n"); + return 0; + } + common_init(); params.n_batch = std::min(params.n_batch, params.n_ctx); From dc3373e5bb66febcc4e9d0061758c041cefee4d6 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Wed, 2 Apr 2025 11:43:38 +0100 Subject: [PATCH 3/9] Add tensor name parsing --- examples/imatrix/imatrix.cpp | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 49d1d395039d5..3467f8f937c2e 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -74,6 +74,35 @@ static std::string filter_tensor_name(const char * name) { return wname; } +static void process_tensor_name(const std::string & input, std::string & layer, std::string & tensor) { + std::vector name; + std::istringstream stream(input); + std::string item; + + while (std::getline(stream, item, '.')) { + name.push_back(item); + } + for (size_t i = 0; i < name.size(); ++i) { + if (name[i] == "blk" && i + 1 < name.size()) { + layer = name[i + 1]; + break; + } + } + for (size_t i = 0; i < name.size(); ++i) { + if (name[i] == "weight" && i > 0) { + tensor = name[i - 1]; + break; + } + } + + if (tensor.empty()) { + tensor = input; + } + if (layer.empty()) { + layer = "-"; + } +} + bool IMatrixCollector::collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data) { GGML_UNUSED(user_data); From 0589c3ee9ffe936663d4119fd647ba9a785fda9f Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Wed, 2 Apr 2025 11:44:03 +0100 Subject: [PATCH 4/9] Tidy output format --- examples/imatrix/imatrix.cpp | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 3467f8f937c2e..30b9a9a295da5 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -658,11 +658,14 @@ int main(int argc, char ** argv) { } }; std::sort(tallies.begin(), tallies.end(), tally_sort()); + LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast(tallies.size())); - LOG_INF("\n Tensor Σ(weights) Contribution\n"); - LOG_INF("==========================================================================\n"); + LOG_INF("\n Layer\t Tensor\t Σ(Importance Scores)\t Contribution\n"); + LOG_INF("================================================================================\n"); for (const auto & [tensor, value, count] : tallies) { - LOG_INF("%40s\t%10.2f\t%7.4f %%\n", tensor.c_str(), value / count, 100.0f * (value / count / total)); + std::string layer, name; + process_tensor_name(tensor, layer, name); + LOG_INF("%5s\t%30s\t%15.2f\t%20.4f %%\n", layer.c_str(), name.c_str(), value / count, 100.0f * (value / count / total)); } LOG_INF("\n"); return 0; From e1fd1af77e9750e4cca7accc5efc20f2a16deecb Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Wed, 2 Apr 2025 14:13:42 +0100 Subject: [PATCH 5/9] Fix typo in title --- examples/imatrix/imatrix.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 30b9a9a295da5..f1d2febfda621 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -660,7 +660,7 @@ int main(int argc, char ** argv) { std::sort(tallies.begin(), tallies.end(), tally_sort()); LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast(tallies.size())); - LOG_INF("\n Layer\t Tensor\t Σ(Importance Scores)\t Contribution\n"); + LOG_INF("\n Layer\t Tensor\t μ(Importance Scores)\t Contribution\n"); LOG_INF("================================================================================\n"); for (const auto & [tensor, value, count] : tallies) { std::string layer, name; From 62ac26833a4b91866d9c93dabb88c625429f2580 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Tue, 8 Apr 2025 17:07:16 +0100 Subject: [PATCH 6/9] Improve tensor influence ranking --- examples/imatrix/imatrix.cpp | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index f1d2febfda621..ab31fd7b2f2e6 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -35,7 +35,7 @@ struct Stats { struct Tally { std::string tensor; - float value = 0; + double bias = 0; int count = 0; }; @@ -370,19 +370,20 @@ bool IMatrixCollector::load_imatrix(const char * fname, std::vector * tal } // Recreate the state as expected by save_imatrix(), and correct for weighted sum. - float total = 0; + double total = 0; for (int i = 0; i < nval; i++) { e.values[i] += tmp[i]; - total += tmp[i]; e.counts[i] += ncall; + const double avg_sq = (1.0 * e.values[i]) / e.counts[i]; + total += avg_sq; } e.ncall += ncall; if (tally) { tally->emplace_back(); - auto & [tensor, value, count] = (*tally)[i]; + auto & [tensor, bias, count] = (*tally)[i]; tensor = name_as_vec.data(); - value = total; + bias = total; count = nval; } } @@ -647,25 +648,25 @@ int main(int argc, char ** argv) { LOG_ERR("Error: cannot compute statistics for %s\n\n", params.in_files[0].c_str()); return 1; } - float total = 0; + double total = 0; for (const auto & tallie : tallies) { - total += tallie.value / static_cast(tallie.count); + total += tallie.bias; } struct tally_sort { bool operator()(const Tally& x, const Tally & y) const { - return x.value / static_cast(x.count) > y.value / static_cast(y.count); + return x.bias > y.bias; } }; std::sort(tallies.begin(), tallies.end(), tally_sort()); LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast(tallies.size())); - LOG_INF("\n Layer\t Tensor\t μ(Importance Scores)\t Contribution\n"); - LOG_INF("================================================================================\n"); - for (const auto & [tensor, value, count] : tallies) { + LOG_INF("\n Layer\t Tensor\t Total Bias\tAvg Bias\t Contribution\n"); + LOG_INF("===============================================================================================\n"); + for (const auto & [tensor, bias, count] : tallies) { std::string layer, name; process_tensor_name(tensor, layer, name); - LOG_INF("%5s\t%30s\t%15.2f\t%20.4f %%\n", layer.c_str(), name.c_str(), value / count, 100.0f * (value / count / total)); + LOG_INF("%5s\t%30s\t%15.2f\t%15.4f\t%19.4f%%\n", layer.c_str(), name.c_str(), bias, bias / count, 100.0 * bias / total); } LOG_INF("\n"); return 0; From 73d8ecbc42bb0d032a9b63d959db9fff16e02243 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Sun, 13 Apr 2025 20:30:36 +0100 Subject: [PATCH 7/9] Add better statistics --- examples/imatrix/imatrix.cpp | 121 +++++++++++++++++++++++------------ 1 file changed, 80 insertions(+), 41 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index ab31fd7b2f2e6..21980635b9e6a 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -1,19 +1,20 @@ -#include "arg.h" -#include "common.h" -#include "log.h" -#include "llama.h" - +#include #include #include #include #include #include -#include -#include -#include #include +#include +#include +#include #include -#include +#include + +#include "arg.h" +#include "common.h" +#include "llama.h" +#include "log.h" #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data @@ -33,10 +34,18 @@ struct Stats { int ncall = 0; }; -struct Tally { +struct tensor_statistics { std::string tensor; - double bias = 0; - int count = 0; + float total = 0; + float mean = 0; + float max = 0; + float min = 0; + float stddev = 0; + float cv = 0; + float zd = 0; + float active = 0; + float entropy = 0; + int elements = 0; }; class IMatrixCollector { @@ -45,7 +54,7 @@ class IMatrixCollector { void set_params(common_params params) { m_params = std::move(params); } bool collect_imatrix(struct ggml_tensor * t, bool ask, void * user_data); void save_imatrix(int ncall = -1) const; - bool load_imatrix(const char * fname, std::vector * tally = nullptr); + bool load_imatrix(const char * fname, std::vector * tstats = nullptr); private: std::unordered_map m_stats; common_params m_params; @@ -323,7 +332,7 @@ void IMatrixCollector::save_imatrix(int ncall) const { LOG_DBGV(1, "%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname.c_str()); } -bool IMatrixCollector::load_imatrix(const char * fname, std::vector * tally) { +bool IMatrixCollector::load_imatrix(const char * fname, std::vector * ts) { std::ifstream in(fname, std::ios::binary); if (!in) { LOG_ERR("%s: failed to open %s\n",__func__, fname); @@ -370,21 +379,58 @@ bool IMatrixCollector::load_imatrix(const char * fname, std::vector * tal } // Recreate the state as expected by save_imatrix(), and correct for weighted sum. - double total = 0; + std::vector activations; + activations.reserve(nval); + for (int i = 0; i < nval; i++) { e.values[i] += tmp[i]; e.counts[i] += ncall; - const double avg_sq = (1.0 * e.values[i]) / e.counts[i]; - total += avg_sq; + activations.push_back(e.values[i] / static_cast(e.counts[i])); } e.ncall += ncall; - if (tally) { - tally->emplace_back(); - auto & [tensor, bias, count] = (*tally)[i]; + if (ts) { + float total_bias = std::accumulate(activations.begin(), activations.end(), 0.0f); + float max_bias = * std::max_element(activations.begin(), activations.end()); + float min_bias = * std::min_element(activations.begin(), activations.end()); + float mean_bias = total_bias / activations.size(); + float sq_total_bias = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f); + float dev = std::sqrt((sq_total_bias / activations.size()) - (mean_bias * mean_bias)); + float rmsd = mean_bias > 0.0f ? dev / mean_bias : 0.0f; + + float threshold = 1e-6f; + int inactive_count = std::count_if(activations.begin(), activations.end(), [threshold](const float v) { return fabs(v) < threshold; }); + float active_ratio = 1 - (static_cast(inactive_count) / activations.size()); + + float ent = 0.0f; + if (total_bias > 0) { + for (auto act : activations) { + if (float p = act / total_bias; p > 0) { + ent -= p* std::log2(p); + } + } + } + + int z_score = 0; + for (auto act : activations) { + if (float p = (act - mean_bias) / dev; p > 1) { + z_score++; + } + } + + ts->emplace_back(); + auto & [tensor, total, mean, max, min, stddev, cv, zd, active, entropy, elements] = (*ts)[i]; tensor = name_as_vec.data(); - bias = total; - count = nval; + total = total_bias; + mean = mean_bias; + max = max_bias; + min = min_bias; + stddev = dev; + cv = rmsd; + active = active_ratio; + entropy = ent; + elements = static_cast(activations.size()); + zd = static_cast(z_score) / static_cast(elements); } } return true; @@ -633,42 +679,35 @@ int main(int argc, char ** argv) { return 1; } - std::vector tallies; + std::vector ts; if (params.show_statistics) { if (params.in_files.empty() || params.in_files.size() > 1) { LOG_ERR("\nError: a single imatrix file is required to compute tensor statistics\n\n"); return 1; } - if (!g_collector.load_imatrix(params.in_files[0].c_str(), & tallies)) { + if (!g_collector.load_imatrix(params.in_files[0].c_str(), & ts)) { LOG_ERR("\nError: %s is not a valid imatrix file\n\n", params.in_files[0].c_str()); return 1; } - if (tallies.empty()) { + if (ts.empty()) { LOG_ERR("Error: cannot compute statistics for %s\n\n", params.in_files[0].c_str()); return 1; } - double total = 0; - for (const auto & tallie : tallies) { - total += tallie.bias; - } - struct tally_sort { - bool operator()(const Tally& x, const Tally & y) const { - return x.bias > y.bias; - } - }; - std::sort(tallies.begin(), tallies.end(), tally_sort()); - - LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast(tallies.size())); - LOG_INF("\n Layer\t Tensor\t Total Bias\tAvg Bias\t Contribution\n"); - LOG_INF("===============================================================================================\n"); - for (const auto & [tensor, bias, count] : tallies) { + std::sort(ts.begin(), ts.end(), [](const tensor_statistics &a, const tensor_statistics &b) { return a.total > b.total; }); + LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast(ts.size())); + LOG_INF("\n%5s\t%-20s\t%10s\t%7s\t%12s\t%9s\t%10s\t%9s\t%6s\t%12s\t%7s\t%10s\n", + "Layer", "Tensor", "Σ(Bias)", "Min", "Max", "μ", "σ", "% Active", "N", "Entropy", "E (norm)", "ZD Score"); + LOG_INF("==========================================================================================================================================================================\n"); + for (const auto & [tensor, total, mean, max, min, stddev, cv, zd, active, entropy, elements] : ts) { std::string layer, name; process_tensor_name(tensor, layer, name); - LOG_INF("%5s\t%30s\t%15.2f\t%15.4f\t%19.4f%%\n", layer.c_str(), name.c_str(), bias, bias / count, 100.0 * bias / total); + LOG_INF("%5s\t%-20s\t%10.2f\t%7.4f\t%12.4f\t%8.4f\t%9.4f\t%8.2f%%\t%6d\t%12.4f\t%7.2f%%\t%10.4f\n", + layer.c_str(), name.c_str(), total, min, max, mean, stddev, active * 100.0f, elements, entropy, 100.0f * (entropy / std::log2(elements)), 1000.0f * zd); } LOG_INF("\n"); + return 0; } From 0b7f9c40c8cbe1bc52c8152805baf9794b439cf5 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Tue, 15 Apr 2025 08:16:19 +0100 Subject: [PATCH 8/9] Change statistics' sort order --- examples/imatrix/imatrix.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index 21980635b9e6a..bc9cf0108b492 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -695,7 +695,16 @@ int main(int argc, char ** argv) { return 1; } - std::sort(ts.begin(), ts.end(), [](const tensor_statistics &a, const tensor_statistics &b) { return a.total > b.total; }); + struct tensor_comparer { + bool operator()(const tensor_statistics & a, const tensor_statistics & b) const { + std::string layer, name_a, name_b;; + process_tensor_name(a.tensor, layer, name_a); + process_tensor_name(b.tensor, layer, name_b); + return name_a < name_b || (name_a == name_b && a.total > b.total); + } + }; + std::sort(ts.begin(), ts.end(), tensor_comparer()); + LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast(ts.size())); LOG_INF("\n%5s\t%-20s\t%10s\t%7s\t%12s\t%9s\t%10s\t%9s\t%6s\t%12s\t%7s\t%10s\n", "Layer", "Tensor", "Σ(Bias)", "Min", "Max", "μ", "σ", "% Active", "N", "Entropy", "E (norm)", "ZD Score"); @@ -703,8 +712,8 @@ int main(int argc, char ** argv) { for (const auto & [tensor, total, mean, max, min, stddev, cv, zd, active, entropy, elements] : ts) { std::string layer, name; process_tensor_name(tensor, layer, name); - LOG_INF("%5s\t%-20s\t%10.2f\t%7.4f\t%12.4f\t%8.4f\t%9.4f\t%8.2f%%\t%6d\t%12.4f\t%7.2f%%\t%10.4f\n", - layer.c_str(), name.c_str(), total, min, max, mean, stddev, active * 100.0f, elements, entropy, 100.0f * (entropy / std::log2(elements)), 1000.0f * zd); + LOG_INF("%5s\t%-20s\t%10.2f\t%7.4f\t%12.4f\t%8.4f\t%9.4f\t%8.2f%%\t%6d\t%12.4f\t%7.2f%%\t%9.2f%%\n", + layer.c_str(), name.c_str(), total, min, max, mean, stddev, active * 100.0f, elements, entropy, 100.0f * (entropy / std::log2(elements)), 100.0f * zd); } LOG_INF("\n"); From 755c1efbc15e94d5e6be47852c52ef329eb122b4 Mon Sep 17 00:00:00 2001 From: Ed Addario Date: Tue, 22 Apr 2025 18:42:31 +0100 Subject: [PATCH 9/9] Add Cosine Similarity --- examples/imatrix/imatrix.cpp | 131 +++++++++++++++++++++-------------- 1 file changed, 78 insertions(+), 53 deletions(-) diff --git a/examples/imatrix/imatrix.cpp b/examples/imatrix/imatrix.cpp index bc9cf0108b492..2c6a06cdf236f 100644 --- a/examples/imatrix/imatrix.cpp +++ b/examples/imatrix/imatrix.cpp @@ -1,3 +1,9 @@ +#include "arg.h" +#include "common.h" +#include "llama-impl.h" +#include "llama.h" +#include "log.h" + #include #include #include @@ -10,11 +16,7 @@ #include #include #include - -#include "arg.h" -#include "common.h" -#include "llama.h" -#include "log.h" +#include #if defined(_MSC_VER) #pragma warning(disable: 4244 4267) // possible loss of data @@ -36,16 +38,17 @@ struct Stats { struct tensor_statistics { std::string tensor; - float total = 0; - float mean = 0; - float max = 0; - float min = 0; + Stats stats; + float total_bias = 0; + float mean_bias = 0; + float max_bias = 0; + float min_bias = 0; + int elements = 0; float stddev = 0; - float cv = 0; - float zd = 0; float active = 0; float entropy = 0; - int elements = 0; + float zd = 0; + float cossim = 0; }; class IMatrixCollector { @@ -332,7 +335,7 @@ void IMatrixCollector::save_imatrix(int ncall) const { LOG_DBGV(1, "%s: stored collected data after %d chunks in %s\n", __func__, m_last_call, fname.c_str()); } -bool IMatrixCollector::load_imatrix(const char * fname, std::vector * ts) { +bool IMatrixCollector::load_imatrix(const char * fname, std::vector * tstats) { std::ifstream in(fname, std::ios::binary); if (!in) { LOG_ERR("%s: failed to open %s\n",__func__, fname); @@ -381,31 +384,29 @@ bool IMatrixCollector::load_imatrix(const char * fname, std::vector activations; activations.reserve(nval); - for (int i = 0; i < nval; i++) { e.values[i] += tmp[i]; e.counts[i] += ncall; - activations.push_back(e.values[i] / static_cast(e.counts[i])); + activations.push_back(e.values[i] / e.counts[i]); } e.ncall += ncall; - if (ts) { - float total_bias = std::accumulate(activations.begin(), activations.end(), 0.0f); - float max_bias = * std::max_element(activations.begin(), activations.end()); - float min_bias = * std::min_element(activations.begin(), activations.end()); - float mean_bias = total_bias / activations.size(); - float sq_total_bias = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f); - float dev = std::sqrt((sq_total_bias / activations.size()) - (mean_bias * mean_bias)); - float rmsd = mean_bias > 0.0f ? dev / mean_bias : 0.0f; - - float threshold = 1e-6f; - int inactive_count = std::count_if(activations.begin(), activations.end(), [threshold](const float v) { return fabs(v) < threshold; }); - float active_ratio = 1 - (static_cast(inactive_count) / activations.size()); - - float ent = 0.0f; - if (total_bias > 0) { + if (tstats) { + float total = std::accumulate(activations.begin(), activations.end(), 0.0f); + float max = * std::max_element(activations.begin(), activations.end()); + float min = * std::min_element(activations.begin(), activations.end()); + float mean = total / activations.size(); + float sq_total = std::inner_product(activations.begin(), activations.end(), activations.begin(), 0.0f); + float dev = std::sqrt((sq_total / activations.size()) - (mean * mean)); + + float threshold = min + min * 0.5f; + int inactive_count = std::count_if(activations.begin(), activations.end(), [threshold](const float v) { return fabs(v) <= threshold; }); + float active_ratio = 1 - static_cast(inactive_count) / activations.size(); + + float ent = 0; + if (total > 0) { for (auto act : activations) { - if (float p = act / total_bias; p > 0) { + if (float p = act / total; p > 0) { ent -= p* std::log2(p); } } @@ -413,26 +414,48 @@ bool IMatrixCollector::load_imatrix(const char * fname, std::vector 1) { + if (float p = (act - mean) / dev; p > 1) { z_score++; } } - ts->emplace_back(); - auto & [tensor, total, mean, max, min, stddev, cv, zd, active, entropy, elements] = (*ts)[i]; - tensor = name_as_vec.data(); - total = total_bias; - mean = mean_bias; - max = max_bias; - min = min_bias; - stddev = dev; - cv = rmsd; - active = active_ratio; - entropy = ent; - elements = static_cast(activations.size()); - zd = static_cast(z_score) / static_cast(elements); + tstats->emplace_back(); + auto & ts = (*tstats)[i]; + ts.tensor = name_as_vec.data(); + ts.stats = e; + ts.total_bias = total; + ts.mean_bias = mean; + ts.max_bias = max; + ts.min_bias = min; + ts.elements = static_cast(activations.size()); + ts.stddev = dev; + ts.active = active_ratio; + ts.entropy = ent; + ts.zd = static_cast(z_score) / ts.elements; + } + } + + if (tstats) { + static const std::regex pattern(R"(blk\.(\d+)\.)"); + for (auto & ts : *tstats) { + if (std::smatch match; std::regex_search(ts.tensor, match, pattern)) { + const int blk = std::stoi(match[1]); + std::string tname(ts.tensor); + tname.replace(match.position(1), match.length(1), std::to_string(blk-1)); + auto prev = std::find_if(tstats->begin(), tstats->end(), [tname](const tensor_statistics & t) { return t.tensor == tname; }); + if (prev != tstats->end()) { + const float dp = std::inner_product(ts.stats.values.begin(), ts.stats.values.end(), prev->stats.values.begin(), 0.0f); + const float curr_mag = std::sqrt(std::inner_product(ts.stats.values.begin(), ts.stats.values.end(), ts.stats.values.begin(), 0.0f)); + const float prev_mag = std::sqrt(std::inner_product(prev->stats.values.begin(), prev->stats.values.end(), prev->stats.values.begin(), 0.0f)); + const float cs = dp / (curr_mag * prev_mag); + ts.cossim = cs; + } + } else { + ts.cossim = 0; + } } } + return true; } @@ -700,20 +723,22 @@ int main(int argc, char ** argv) { std::string layer, name_a, name_b;; process_tensor_name(a.tensor, layer, name_a); process_tensor_name(b.tensor, layer, name_b); - return name_a < name_b || (name_a == name_b && a.total > b.total); + return name_a < name_b || (name_a == name_b && a.total_bias > b.total_bias); } }; std::sort(ts.begin(), ts.end(), tensor_comparer()); LOG_INF("\nComputing statistics for %s (%d tensors)\n", params.in_files[0].c_str(), static_cast(ts.size())); - LOG_INF("\n%5s\t%-20s\t%10s\t%7s\t%12s\t%9s\t%10s\t%9s\t%6s\t%12s\t%7s\t%10s\n", - "Layer", "Tensor", "Σ(Bias)", "Min", "Max", "μ", "σ", "% Active", "N", "Entropy", "E (norm)", "ZD Score"); - LOG_INF("==========================================================================================================================================================================\n"); - for (const auto & [tensor, total, mean, max, min, stddev, cv, zd, active, entropy, elements] : ts) { + LOG_INF("\n%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n", + " Layer", " Tensor", " Σ(Bias)", " Min", " Max", " μ", " σ", " % Active", "N", " Entropy", "E (norm)", "ZD", " CosSim"); + LOG_INF("=========================================================================================================================================================================\n"); + for (const auto & tstat : ts) { std::string layer, name; - process_tensor_name(tensor, layer, name); - LOG_INF("%5s\t%-20s\t%10.2f\t%7.4f\t%12.4f\t%8.4f\t%9.4f\t%8.2f%%\t%6d\t%12.4f\t%7.2f%%\t%9.2f%%\n", - layer.c_str(), name.c_str(), total, min, max, mean, stddev, active * 100.0f, elements, entropy, 100.0f * (entropy / std::log2(elements)), 100.0f * zd); + process_tensor_name(tstat.tensor, layer, name); + LOG_INF("%5s\t%-20s\t%10.2f\t%8.4f\t%11.4f\t%6.2f\t%6.2f\t%8.2f%%\t%6d\t%10.4f\t%6.2f%%\t%10.2f%%\t%8.4f\n", + layer.c_str(), name.c_str(), tstat.total_bias, tstat.min_bias, tstat.max_bias, tstat.mean_bias, tstat.stddev, + tstat.active * 100.0f, tstat.elements, tstat.entropy, 100.0f * (tstat.entropy / std::log2(tstat.elements)), + 100.0f * tstat.zd, tstat.cossim); } LOG_INF("\n");