From 04943624a4c9e4866d16bfa5ffa0cdc16d8433c5 Mon Sep 17 00:00:00 2001 From: Bill Chen Date: Fri, 5 Jul 2024 18:14:53 +0800 Subject: [PATCH 1/4] Add collect_scope_start/end for the perf collector. Signed-off-by: Bill Chen --- collectors/perf.cpp | 82 ++++++++++++++++++++++++++++++++++++++++++--- collectors/perf.hpp | 31 ++++++++++++++++- interface.cpp | 22 ++++++++++++ interface.hpp | 10 ++++++ 4 files changed, 139 insertions(+), 6 deletions(-) diff --git a/collectors/perf.cpp b/collectors/perf.cpp index aaef865..048ab6d 100644 --- a/collectors/perf.cpp +++ b/collectors/perf.cpp @@ -84,8 +84,8 @@ PerfCollector::PerfCollector(const Json::Value& config, const std::string& name) struct event leader = {"CPUCycleCount", PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES}; mSet = mConfig.get("set", -1).asInt(); - mInherit = mConfig.get("inherit", 1).asInt(); - + mInherit = mConfig.get("inherit", 1).asInt(); + leader.inherited = mInherit; mEvents.push_back(leader); @@ -145,7 +145,7 @@ PerfCollector::PerfCollector(const Json::Value& config, const std::string& name) } } else if(e.device!="") - {//for d9000, CPU cores on different PMU + {//for d9000, CPU cores on different PMU e.config = item.get("config", 0).asUInt64(); auto type_string = e.device; @@ -405,7 +405,6 @@ bool PerfCollector::collect(int64_t now) { if (!mCollecting) return false; - struct snapshot snap; for (perf_thread& t : mReplayThreads) { @@ -444,6 +443,63 @@ bool PerfCollector::collect(int64_t now) return true; } +bool PerfCollector::collect_scope_start(int64_t now, uint16_t func_id) { + if (!mCollecting) return false; + struct snapshot snap; + for (perf_thread& t : mReplayThreads) + { + t.eventCtx.collect_scope(now, func_id, false); + } + for (perf_thread& t : mBgThreads) + { + t.eventCtx.collect_scope(now, func_id, false); + } + for (perf_thread& t : mMultiPMUThreads) + { + t.eventCtx.collect_scope(now, func_id, false); + } + for (perf_thread& t : mBookerThread) + { + t.eventCtx.collect_scope(now, func_id, false); + } + for (perf_thread& t : mCSPMUThreads) + { + t.eventCtx.collect_scope(now, func_id, false); + } + return true; +} + +bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id) { + if (!mCollecting) return false; + struct snapshot snap_start, snap_stop; + for (perf_thread &t : mReplayThreads) { + snap_start = t.eventCtx.last_snap; + snap_stop = t.eventCtx.collect_scope(now, func_id, true); + t.eventCtx.update_data_perapi(func_id, snap_start, snap_stop, t.mResultsPerThread); + } + for (perf_thread &t : mBgThreads) { + snap_start = t.eventCtx.last_snap; + snap_stop = t.eventCtx.collect_scope(now, func_id, true); + t.eventCtx.update_data_perapi(func_id, snap_start, snap_stop, t.mResultsPerThread); + } + for (perf_thread &t : mMultiPMUThreads) { + snap_start = t.eventCtx.last_snap; + snap_stop = t.eventCtx.collect_scope(now, func_id, true); + t.eventCtx.update_data_perapi(func_id, snap_start, snap_stop, t.mResultsPerThread); + } + for (perf_thread &t : mBookerThread) { + snap_start = t.eventCtx.last_snap; + snap_stop = t.eventCtx.collect_scope(now, func_id, true); + t.eventCtx.update_data_perapi(func_id, snap_start, snap_stop, t.mResultsPerThread); + } + for (perf_thread &t : mCSPMUThreads) { + snap_start = t.eventCtx.last_snap; + snap_stop = t.eventCtx.collect_scope(now, func_id, true); + t.eventCtx.update_data_perapi(func_id, snap_start, snap_stop, t.mResultsPerThread); + } + return false; +} + bool PerfCollector::postprocess(const std::vector& timing) { Json::Value v; @@ -521,7 +577,7 @@ bool PerfCollector::postprocess(const std::vector& timing) } mCustomResult["thread_data"].append(perf_threadValue); } - + mCustomResult["thread_data"].append(bgValue); mCustomResult["thread_data"].append(allValue); } @@ -617,6 +673,7 @@ bool event_context::stop() return true; } +// Collect and reset the perf counters to 0. struct snapshot event_context::collect(int64_t now) { struct snapshot snap; @@ -626,6 +683,21 @@ struct snapshot event_context::collect(int64_t now) return snap; } +struct snapshot event_context::collect_scope(int64_t now, uint16_t func_id, bool stopping) { + if (stopping && last_snap_func_id != func_id) { + DBG_LOG("Error: Could not find the corresponding collect_scope_start call for func_id %ud.\n", func_id); + } + struct snapshot snap; + if (read(group, &snap, sizeof(snap)) == -1) perror("read"); + if (stopping) { + last_snap_func_id = -1; + } else { + last_snap_func_id = func_id; + last_snap = snap; + } + return snap; +} + static std::string getThreadName(int tid) { std::stringstream comm_path; diff --git a/collectors/perf.hpp b/collectors/perf.hpp index 121548e..b6c6a49 100644 --- a/collectors/perf.hpp +++ b/collectors/perf.hpp @@ -59,6 +59,7 @@ class event_context event_context() { group = -1; + last_snap_func_id = -1; } ~event_context() {} @@ -66,6 +67,13 @@ class event_context bool init(std::vector &events, int tid, int cpu); bool start(); struct snapshot collect(int64_t now); + + struct snapshot collect_scope(int64_t now, uint16_t func_id, bool stopping); + + // If not -1, then we are in the middle of collect_scope_start/stop. + uint16_t last_snap_func_id; + struct snapshot last_snap; + bool stop(); bool deinit(); @@ -75,6 +83,14 @@ class event_context result[mCounters[i].name].push_back(snap.values[i]); } + inline void update_data_perapi(uint16_t func_id, struct snapshot &snap_start, struct snapshot &snap_end, CollectorValueResults &result) + { + for (unsigned int i = 0; i < mCounters.size(); i++) { + std::string name = mCounters[i].name + ":" + std::to_string(func_id); + result[name].push_back(snap_end.values[i] - snap_start.values[i]); + } + } + private: struct counter { @@ -84,6 +100,7 @@ class event_context int group; std::vector mCounters; + }; class PerfCollector : public Collector @@ -102,7 +119,14 @@ class PerfCollector : public Collector virtual bool postprocess(const std::vector& timing) override; virtual void summarize() override; -private: + /// @brief Collector functions for perapi perf instrumentations. + /// @param now Current time in milliseconds. + /// @param func_id The function id. + /// @return + virtual bool collect_scope_start(int64_t now, uint16_t func_id); + virtual bool collect_scope_stop(int64_t now, uint16_t func_id); + + private: void create_perf_thread(); void saveResultsFile(); @@ -125,6 +149,11 @@ class PerfCollector : public Collector eventCtx.update_data(snap, mResultsPerThread); } + void update_data_perapi(uint16_t func_id, struct snapshot& snap_start, struct snapshot& snap_end) + { + eventCtx.update_data_perapi(func_id, snap_start, snap_end, mResultsPerThread); + } + void clear() { for (auto& pair : mResultsPerThread) diff --git a/interface.cpp b/interface.cpp index 838ebf1..f18a906 100644 --- a/interface.cpp +++ b/interface.cpp @@ -450,6 +450,28 @@ void Collection::collect(std::vector custom) } } +void Collection::collect_scope_start(uint16_t func_id) { + const int64_t now = getTime(); + for (Collector* c : mRunning) + { + if (!c->isThreaded()) + { + c->collect_scope_start(now, func_id); + } + } +} + +void Collection::collect_scope_stop(uint16_t func_id) { + const int64_t now = getTime(); + for (Collector* c : mRunning) + { + if (!c->isThreaded()) + { + c->collect_scope_stop(now, func_id); + } + } +} + Json::Value Collection::results() { Json::Value results; diff --git a/interface.hpp b/interface.hpp index 0ee383e..96e3ea2 100644 --- a/interface.hpp +++ b/interface.hpp @@ -90,6 +90,8 @@ class Collector virtual bool stop() { mCollecting = false; return true; } virtual bool postprocess(const std::vector& timing); virtual bool collect( int64_t ) = 0; + virtual bool collect_scope_start( int64_t now, uint16_t func_id) {return true; }; + virtual bool collect_scope_stop( int64_t now, uint16_t func_id) { return true; }; virtual bool collecting() const { return mCollecting; } virtual const std::string& name() const { return mName; } virtual bool available() = 0; @@ -252,6 +254,14 @@ class Collection /// result value. void collect(std::vector custom = std::vector()); + /// Sample periodical data for per function instrumentation. Call start before the actual code + /// to be tested. Currently only used for perf collector. + void collect_scope_start(uint16_t func_id); + + /// Sample periodical data for per function instrumentation. Call after before the actual code + /// to be tested. Currently only used for perf collector. + void collect_scope_stop(uint16_t func_id); + /// Get the results as JSON Json::Value results(); From b32a0c10a6013a82089313e99d48376f0778b38f Mon Sep 17 00:00:00 2001 From: Bill Chen Date: Tue, 9 Jul 2024 15:52:50 +0800 Subject: [PATCH 2/4] Add tests for collect_scope. Signed-off-by: Bill Chen --- collectors/perf.cpp | 10 +++--- collectors/perf.hpp | 11 +++---- interface.cpp | 18 ++++++++--- interface.hpp | 14 ++++---- test.cpp | 79 ++++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 109 insertions(+), 23 deletions(-) diff --git a/collectors/perf.cpp b/collectors/perf.cpp index 048ab6d..a4675a6 100644 --- a/collectors/perf.cpp +++ b/collectors/perf.cpp @@ -475,27 +475,27 @@ bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id) { for (perf_thread &t : mReplayThreads) { snap_start = t.eventCtx.last_snap; snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.eventCtx.update_data_perapi(func_id, snap_start, snap_stop, t.mResultsPerThread); + t.eventCtx.update_data_scope(func_id, snap_start, snap_stop, t.mResultsPerThread); } for (perf_thread &t : mBgThreads) { snap_start = t.eventCtx.last_snap; snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.eventCtx.update_data_perapi(func_id, snap_start, snap_stop, t.mResultsPerThread); + t.eventCtx.update_data_scope(func_id, snap_start, snap_stop, t.mResultsPerThread); } for (perf_thread &t : mMultiPMUThreads) { snap_start = t.eventCtx.last_snap; snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.eventCtx.update_data_perapi(func_id, snap_start, snap_stop, t.mResultsPerThread); + t.eventCtx.update_data_scope(func_id, snap_start, snap_stop, t.mResultsPerThread); } for (perf_thread &t : mBookerThread) { snap_start = t.eventCtx.last_snap; snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.eventCtx.update_data_perapi(func_id, snap_start, snap_stop, t.mResultsPerThread); + t.eventCtx.update_data_scope(func_id, snap_start, snap_stop, t.mResultsPerThread); } for (perf_thread &t : mCSPMUThreads) { snap_start = t.eventCtx.last_snap; snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.eventCtx.update_data_perapi(func_id, snap_start, snap_stop, t.mResultsPerThread); + t.eventCtx.update_data_scope(func_id, snap_start, snap_stop, t.mResultsPerThread); } return false; } diff --git a/collectors/perf.hpp b/collectors/perf.hpp index b6c6a49..ca38c71 100644 --- a/collectors/perf.hpp +++ b/collectors/perf.hpp @@ -83,7 +83,7 @@ class event_context result[mCounters[i].name].push_back(snap.values[i]); } - inline void update_data_perapi(uint16_t func_id, struct snapshot &snap_start, struct snapshot &snap_end, CollectorValueResults &result) + inline void update_data_scope(uint16_t func_id, struct snapshot &snap_start, struct snapshot &snap_end, CollectorValueResults &result) { for (unsigned int i = 0; i < mCounters.size(); i++) { std::string name = mCounters[i].name + ":" + std::to_string(func_id); @@ -119,10 +119,7 @@ class PerfCollector : public Collector virtual bool postprocess(const std::vector& timing) override; virtual void summarize() override; - /// @brief Collector functions for perapi perf instrumentations. - /// @param now Current time in milliseconds. - /// @param func_id The function id. - /// @return + /// Collector functions for perapi perf instrumentations. virtual bool collect_scope_start(int64_t now, uint16_t func_id); virtual bool collect_scope_stop(int64_t now, uint16_t func_id); @@ -149,9 +146,9 @@ class PerfCollector : public Collector eventCtx.update_data(snap, mResultsPerThread); } - void update_data_perapi(uint16_t func_id, struct snapshot& snap_start, struct snapshot& snap_end) + void update_data_scope(uint16_t func_id, struct snapshot& snap_start, struct snapshot& snap_end) { - eventCtx.update_data_perapi(func_id, snap_start, snap_end, mResultsPerThread); + eventCtx.update_data_scope(func_id, snap_start, snap_end, mResultsPerThread); } void clear() diff --git a/interface.cpp b/interface.cpp index f18a906..898ce64 100644 --- a/interface.cpp +++ b/interface.cpp @@ -450,26 +450,36 @@ void Collection::collect(std::vector custom) } } -void Collection::collect_scope_start(uint16_t func_id) { +void Collection::collect_scope_start(uint16_t label) { const int64_t now = getTime(); + mScopeStartTime = now; for (Collector* c : mRunning) { if (!c->isThreaded()) { - c->collect_scope_start(now, func_id); + c->collect_scope_start(now, label); } } + mScopeStarted = true; } -void Collection::collect_scope_stop(uint16_t func_id) { +void Collection::collect_scope_stop(uint16_t label) { + // A collect_scope_start and collect_scope_end pair is considered as one sample. + // Timing is calculated from the start of the scope to the end of the scope. + if (!mScopeStarted) { + DBG_LOG("WARNING: collect_scope_stop called without a corresponding collect_scope_start.\n"); + return; + } const int64_t now = getTime(); + mTiming.push_back(now - mScopeStartTime); for (Collector* c : mRunning) { if (!c->isThreaded()) { - c->collect_scope_stop(now, func_id); + c->collect_scope_stop(now, label); } } + mScopeStarted = false; } Json::Value Collection::results() diff --git a/interface.hpp b/interface.hpp index 96e3ea2..573bf4d 100644 --- a/interface.hpp +++ b/interface.hpp @@ -254,13 +254,13 @@ class Collection /// result value. void collect(std::vector custom = std::vector()); - /// Sample periodical data for per function instrumentation. Call start before the actual code - /// to be tested. Currently only used for perf collector. - void collect_scope_start(uint16_t func_id); + /// Sample periodical data for per API instrumentation. Call this method before the payload + /// execution. Currently only used for perf collector. + void collect_scope_start(uint16_t label); - /// Sample periodical data for per function instrumentation. Call after before the actual code - /// to be tested. Currently only used for perf collector. - void collect_scope_stop(uint16_t func_id); + /// Sample periodical data for per API instrumentation. Call this method after the payload + /// execution. Currently only used for perf collector. + void collect_scope_stop(uint16_t label); /// Get the results as JSON Json::Value results(); @@ -282,5 +282,7 @@ class Collection std::vector mCustomHeaders; int64_t mStartTime = 0; int64_t mPreviousTime = 0; + bool mScopeStarted = false; + int64_t mScopeStartTime = 0; bool mDebug = false; }; diff --git a/test.cpp b/test.cpp index 7485023..3ce6389 100644 --- a/test.cpp +++ b/test.cpp @@ -1,8 +1,9 @@ #include "interface.hpp" -#include #include #include +#include +#include #include #include "json/writer.h" @@ -259,6 +260,81 @@ static void test7() c.writeCSV("excel.csv"); } +void test8() +{ + printf("[test 8]: Testing collect_scope for the perf collector...\n"); + + // Specification: + // https://github.com/ARM-software/patrace/blob/master/patrace/doc/manual.md#generating-cpu-load-with-perf-collector + std::string collectorConfig = R"( + { + "perf": { + "set": 4, + "event": [ + { + "name": "CPUCyclesUser", + "type": 4, + "config": 17, + "excludeKernel": true + }, + { + "name": "CPUCyclesKernel", + "type": 4, + "config": 17, + "excludeUser": true + }, + { + "name": "CPUInstructionUser", + "type": 4, + "config": 8, + "excludeKernel": true + }, + { + "name": "CPUInstructionKernel", + "type": 4, + "config": 8, + "excludeUser": true + } + ], + } + })"; + Json::Value config; + std::stringstream(collectorConfig) >> config; + Collection c(config); + auto payload = [](int ops) { + int tmp = 1; + for (int i = 0; i < ops; i++) tmp *= rand(); + }; + + + char *cur_thread_name = (char *)malloc(16); + prctl(PR_GET_NAME, (unsigned long)cur_thread_name, 0, 0, 0); + + std::string thread_name = "patrace-1"; + prctl(PR_SET_NAME, (unsigned long)thread_name.c_str(), 0, 0, 0); + c.initialize(); + + c.start(); + for (int i = 0; i < 10; i++) { + c.collect_scope_start(1); + payload(10); + c.collect_scope_stop(1); + c.collect_scope_start(2); + payload(100); + c.collect_scope_stop(2); + } + c.stop(); + + Json::Value results = c.results(); + Json::StyledWriter writer; + std::string data = writer.write(results); + printf("Results:\n%s", data.c_str()); + c.writeJSON("results_collect_scope.json"); + + // restore thread name + prctl(PR_SET_NAME, (unsigned long)cur_thread_name, 0, 0, 0); +} + int main() { srandom(time(NULL)); @@ -270,6 +346,7 @@ int main() test5(); test6(); test7(); // summarized results + test8(); // collect_scope printf("ALL DONE!\n"); return 0; } From 5acc385023332c24b30c12229f558b1ee28b6ac2 Mon Sep 17 00:00:00 2001 From: Bill Chen Date: Fri, 12 Jul 2024 17:34:11 +0800 Subject: [PATCH 3/4] Modify collect_scope output format to avoid huge json. Signed-off-by: Bill Chen --- collectors/perf.cpp | 34 +++++++++++++--- collectors/perf.hpp | 38 +++++++++++++++--- interface.cpp | 4 +- test.cpp | 97 ++++++++++++++++++++++++++++----------------- 4 files changed, 124 insertions(+), 49 deletions(-) diff --git a/collectors/perf.cpp b/collectors/perf.cpp index a4675a6..b9a4351 100644 --- a/collectors/perf.cpp +++ b/collectors/perf.cpp @@ -475,27 +475,27 @@ bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id) { for (perf_thread &t : mReplayThreads) { snap_start = t.eventCtx.last_snap; snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.eventCtx.update_data_scope(func_id, snap_start, snap_stop, t.mResultsPerThread); + t.update_data_scope(func_id, snap_start, snap_stop); } for (perf_thread &t : mBgThreads) { snap_start = t.eventCtx.last_snap; snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.eventCtx.update_data_scope(func_id, snap_start, snap_stop, t.mResultsPerThread); + t.update_data_scope(func_id, snap_start, snap_stop); } for (perf_thread &t : mMultiPMUThreads) { snap_start = t.eventCtx.last_snap; snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.eventCtx.update_data_scope(func_id, snap_start, snap_stop, t.mResultsPerThread); + t.update_data_scope(func_id, snap_start, snap_stop); } for (perf_thread &t : mBookerThread) { snap_start = t.eventCtx.last_snap; snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.eventCtx.update_data_scope(func_id, snap_start, snap_stop, t.mResultsPerThread); + t.update_data_scope(func_id, snap_start, snap_stop); } for (perf_thread &t : mCSPMUThreads) { snap_start = t.eventCtx.last_snap; snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.eventCtx.update_data_scope(func_id, snap_start, snap_stop, t.mResultsPerThread); + t.update_data_scope(func_id, snap_start, snap_stop); } return false; } @@ -670,6 +670,30 @@ bool event_context::stop() return false; } + for (struct counter& c : mCounters) + { + if (c.scope_values.size() > 0 && mValueResults != nullptr) + { + std::string name = c.name + ":ScopeSum"; + for (unsigned int i = 0; i < c.scope_values.size(); i++) + { + (*mValueResults)[name].push_back(c.scope_values[i]); + } + } + } + + std::string name_num_func_calls = "CCthread:ScopeNumCalls"; + for (unsigned int i = 0; i < scope_num_calls.size(); i++) + { + (*mValueResults)[name_num_func_calls].push_back(scope_num_calls[i]); + } + + std::string name_num_calls = "CCthread:ScopeNumWithPerf"; + for (unsigned int i = 0; i < scope_num_with_perf.size(); i++) + { + (*mValueResults)[name_num_calls].push_back(scope_num_with_perf[i]); + } + return true; } diff --git a/collectors/perf.hpp b/collectors/perf.hpp index ca38c71..dc06d0e 100644 --- a/collectors/perf.hpp +++ b/collectors/perf.hpp @@ -3,6 +3,9 @@ #include "collector_utility.hpp" #include "interface.hpp" #include +#include +#include +#include enum hw_cnt_length { @@ -83,11 +86,29 @@ class event_context result[mCounters[i].name].push_back(snap.values[i]); } - inline void update_data_scope(uint16_t func_id, struct snapshot &snap_start, struct snapshot &snap_end, CollectorValueResults &result) + inline void update_data_scope(uint16_t func_id, bool is_calling, struct snapshot &snap_start, struct snapshot &snap_end, CollectorValueResults &result) { + if (!mValueResults) mValueResults = &result; + long long diff_acc = 0; for (unsigned int i = 0; i < mCounters.size(); i++) { - std::string name = mCounters[i].name + ":" + std::to_string(func_id); - result[name].push_back(snap_end.values[i] - snap_start.values[i]); + long long diff = snap_end.values[i] - snap_start.values[i]; + if (mCounters[i].scope_values.size() <= func_id) { + mCounters[i].scope_values.resize(std::min(func_id * 2 + 1, UINT16_MAX - 1), 0); + } + mCounters[i].scope_values[func_id] += diff; + diff_acc += diff; + } + if (diff_acc > 0 && is_calling) { + if (scope_num_calls.size() <= func_id) { + scope_num_calls.resize(std::min(func_id * 2 + 1, UINT16_MAX - 1), 0); + } + scope_num_calls[func_id]++; + } + if (diff_acc > 0) { + if (scope_num_with_perf.size() <= func_id) { + scope_num_with_perf.resize(std::min(func_id * 2 + 1, UINT16_MAX - 1), 0); + } + scope_num_with_perf[func_id]++; } } @@ -96,11 +117,17 @@ class event_context { std::string name; int fd; + // Record accumulated values for update_data_scope, where the index of the vector is the uint16_t func_id. + std::vector scope_values; }; int group; std::vector mCounters; - + // Record number of scope calls with perf counter incremental greater than 0 (can happen in multiple bg threads) + std::vector scope_num_with_perf; + // Record number of scope calls that actually triggered the collect_scope (happen in 1 thread that calls the collection method) + std::vector scope_num_calls; + CollectorValueResults *mValueResults = nullptr; }; class PerfCollector : public Collector @@ -148,7 +175,8 @@ class PerfCollector : public Collector void update_data_scope(uint16_t func_id, struct snapshot& snap_start, struct snapshot& snap_end) { - eventCtx.update_data_scope(func_id, snap_start, snap_end, mResultsPerThread); + pid_t cur_tid = syscall(SYS_gettid); + eventCtx.update_data_scope(func_id, cur_tid == tid, snap_start, snap_end, mResultsPerThread); } void clear() diff --git a/interface.cpp b/interface.cpp index 898ce64..3fcbdb2 100644 --- a/interface.cpp +++ b/interface.cpp @@ -465,13 +465,13 @@ void Collection::collect_scope_start(uint16_t label) { void Collection::collect_scope_stop(uint16_t label) { // A collect_scope_start and collect_scope_end pair is considered as one sample. - // Timing is calculated from the start of the scope to the end of the scope. if (!mScopeStarted) { DBG_LOG("WARNING: collect_scope_stop called without a corresponding collect_scope_start.\n"); return; } const int64_t now = getTime(); - mTiming.push_back(now - mScopeStartTime); + // Timing is ignored to avoid extreme large json outputs. + // mTiming.push_back(now - mScopeStartTime); for (Collector* c : mRunning) { if (!c->isThreaded()) diff --git a/test.cpp b/test.cpp index 3ce6389..f4e86b2 100644 --- a/test.cpp +++ b/test.cpp @@ -5,6 +5,8 @@ #include #include #include +#include +#include #include "json/writer.h" @@ -260,13 +262,18 @@ static void test7() c.writeCSV("excel.csv"); } -void test8() -{ - printf("[test 8]: Testing collect_scope for the perf collector...\n"); +class Test8 { +public: + + Test8() : test8_ready(false) {} + + void run() { + printf("[test 8]: Testing collect_scope for the perf collector...\n"); + std::vector threads; - // Specification: - // https://github.com/ARM-software/patrace/blob/master/patrace/doc/manual.md#generating-cpu-load-with-perf-collector - std::string collectorConfig = R"( + // Specification: + // https://github.com/ARM-software/patrace/blob/master/patrace/doc/manual.md#generating-cpu-load-with-perf-collector + std::string collectorConfig = R"( { "perf": { "set": 4, @@ -298,42 +305,58 @@ void test8() ], } })"; - Json::Value config; - std::stringstream(collectorConfig) >> config; - Collection c(config); - auto payload = [](int ops) { - int tmp = 1; - for (int i = 0; i < ops; i++) tmp *= rand(); - }; + Json::Value config; + std::stringstream(collectorConfig) >> config; + threads.emplace_back(&Test8::test8_worker, this, "patrace-1", 1000, 0); + threads.emplace_back(&Test8::test8_worker, this, "patrace-2", 1000, 1); + threads.emplace_back(&Test8::test8_worker, this, "mali-1", 100, 2); + threads.emplace_back(&Test8::test8_worker, this, "mali-2", 100, 3); - char *cur_thread_name = (char *)malloc(16); - prctl(PR_GET_NAME, (unsigned long)cur_thread_name, 0, 0, 0); + c = new Collection(config); + c->initialize(); + c->start(); + test8_ready.store(true); + test8_cv.notify_all(); + for (auto &t : threads) + t.join(); + c->stop(); - std::string thread_name = "patrace-1"; - prctl(PR_SET_NAME, (unsigned long)thread_name.c_str(), 0, 0, 0); - c.initialize(); + Json::Value results = c->results(); + Json::StyledWriter writer; + std::string data = writer.write(results); + printf("Results:\n%s", data.c_str()); + c->writeJSON("results_collect_scope.json"); + } - c.start(); - for (int i = 0; i < 10; i++) { - c.collect_scope_start(1); - payload(10); - c.collect_scope_stop(1); - c.collect_scope_start(2); - payload(100); - c.collect_scope_stop(2); - } - c.stop(); +private: + void test8_worker(std::string const &thread_name, int ops, int scope_label_offset) { + prctl(PR_SET_NAME, (unsigned long)thread_name.c_str(), 0, 0, 0); + std::unique_lock lk(test8_mtx); + test8_cv.wait(lk, [this] { return test8_ready.load(); }); + printf("Thread %s started.\n", thread_name.c_str()); - Json::Value results = c.results(); - Json::StyledWriter writer; - std::string data = writer.write(results); - printf("Results:\n%s", data.c_str()); - c.writeJSON("results_collect_scope.json"); + auto payload = [](int ops) { + int tmp = 1; + for (int i = 0; i < ops; i++) + tmp *= rand(); + }; - // restore thread name - prctl(PR_SET_NAME, (unsigned long)cur_thread_name, 0, 0, 0); -} + c->collect_scope_start(0 + scope_label_offset); + payload(10); + c->collect_scope_stop(0 + scope_label_offset); + c->collect_scope_start(5 + scope_label_offset); + payload(1000); + c->collect_scope_stop(5 + scope_label_offset); + printf("Thread %s finished.\n", thread_name.c_str()); + // usleep(1e5); + } + + Collection *c; + std::atomic test8_ready; + std::condition_variable test8_cv; + std::mutex test8_mtx; +}; int main() { @@ -346,7 +369,7 @@ int main() test5(); test6(); test7(); // summarized results - test8(); // collect_scope + (new Test8())->run(); printf("ALL DONE!\n"); return 0; } From 60b3aa78fbe280b54ee1a38142c8e90c6d50bef7 Mon Sep 17 00:00:00 2001 From: Bill Chen Date: Wed, 31 Jul 2024 17:19:24 +0800 Subject: [PATCH 4/4] Use inline asm for more accurate perf collection. Added flag parameter in collect_scope_start/stop to control what threads to collect. Signed-off-by: Bill Chen --- collectors/perf.cpp | 141 ++++++++++++++++++++++++++++++++------------ collectors/perf.hpp | 16 ++++- interface.cpp | 8 +-- interface.hpp | 8 +-- test.cpp | 27 ++++++--- 5 files changed, 145 insertions(+), 55 deletions(-) diff --git a/collectors/perf.cpp b/collectors/perf.cpp index b9a4351..487bc36 100644 --- a/collectors/perf.cpp +++ b/collectors/perf.cpp @@ -443,59 +443,99 @@ bool PerfCollector::collect(int64_t now) return true; } -bool PerfCollector::collect_scope_start(int64_t now, uint16_t func_id) { +bool PerfCollector::collect_scope_start(int64_t now, uint16_t func_id, int32_t flags) { if (!mCollecting) return false; struct snapshot snap; - for (perf_thread& t : mReplayThreads) + if (flags & COLLECT_REPLAY_THREADS || flags & COLLECT_ALL_THREADS) { - t.eventCtx.collect_scope(now, func_id, false); + for (perf_thread &t : mReplayThreads) + { + t.eventCtx.collect_scope(now, func_id, false); + } } - for (perf_thread& t : mBgThreads) + if (flags & COLLECT_BG_THREADS || flags & COLLECT_ALL_THREADS) { - t.eventCtx.collect_scope(now, func_id, false); + for (perf_thread &t : mBgThreads) + { + t.eventCtx.collect_scope(now, func_id, false); + } } - for (perf_thread& t : mMultiPMUThreads) + if (flags & COLLECT_MULTI_PMU_THREADS || flags & COLLECT_ALL_THREADS) { - t.eventCtx.collect_scope(now, func_id, false); + for (perf_thread &t : mMultiPMUThreads) + { + t.eventCtx.collect_scope(now, func_id, false); + } } - for (perf_thread& t : mBookerThread) + if (flags & COLLECT_BOOKER_THREADS || flags & COLLECT_ALL_THREADS) { - t.eventCtx.collect_scope(now, func_id, false); + for (perf_thread &t : mBookerThread) + { + t.eventCtx.collect_scope(now, func_id, false); + } } - for (perf_thread& t : mCSPMUThreads) + if (flags & COLLECT_CSPMU_THREADS || flags & COLLECT_ALL_THREADS) { - t.eventCtx.collect_scope(now, func_id, false); + for (perf_thread &t : mCSPMUThreads) + { + t.eventCtx.collect_scope(now, func_id, false); + } } + last_collect_scope_flags = flags; return true; } -bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id) { +bool PerfCollector::collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags) { if (!mCollecting) return false; + if (last_collect_scope_flags != flags) { + DBG_LOG("Error: Could not find the corresponding collect_scope_start call for func_id %ud.\n", func_id); + return false; + } struct snapshot snap_start, snap_stop; - for (perf_thread &t : mReplayThreads) { - snap_start = t.eventCtx.last_snap; - snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.update_data_scope(func_id, snap_start, snap_stop); - } - for (perf_thread &t : mBgThreads) { - snap_start = t.eventCtx.last_snap; - snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.update_data_scope(func_id, snap_start, snap_stop); - } - for (perf_thread &t : mMultiPMUThreads) { - snap_start = t.eventCtx.last_snap; - snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.update_data_scope(func_id, snap_start, snap_stop); - } - for (perf_thread &t : mBookerThread) { - snap_start = t.eventCtx.last_snap; - snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.update_data_scope(func_id, snap_start, snap_stop); - } - for (perf_thread &t : mCSPMUThreads) { - snap_start = t.eventCtx.last_snap; - snap_stop = t.eventCtx.collect_scope(now, func_id, true); - t.update_data_scope(func_id, snap_start, snap_stop); + if (flags & COLLECT_REPLAY_THREADS || flags & COLLECT_ALL_THREADS) + { + for (perf_thread &t : mReplayThreads) + { + snap_start = t.eventCtx.last_snap; + snap_stop = t.eventCtx.collect_scope(now, func_id, true); + t.update_data_scope(func_id, snap_start, snap_stop); + } + } + if (flags & COLLECT_BG_THREADS || flags & COLLECT_ALL_THREADS) + { + for (perf_thread &t : mBgThreads) + { + snap_start = t.eventCtx.last_snap; + snap_stop = t.eventCtx.collect_scope(now, func_id, true); + t.update_data_scope(func_id, snap_start, snap_stop); + } + } + if (flags & COLLECT_MULTI_PMU_THREADS || flags & COLLECT_ALL_THREADS) + { + for (perf_thread &t : mMultiPMUThreads) + { + snap_start = t.eventCtx.last_snap; + snap_stop = t.eventCtx.collect_scope(now, func_id, true); + t.update_data_scope(func_id, snap_start, snap_stop); + } + } + if (flags & COLLECT_BOOKER_THREADS || flags & COLLECT_ALL_THREADS) + { + for (perf_thread &t : mBookerThread) + { + snap_start = t.eventCtx.last_snap; + snap_stop = t.eventCtx.collect_scope(now, func_id, true); + t.update_data_scope(func_id, snap_start, snap_stop); + } + } + if (flags & COLLECT_CSPMU_THREADS || flags & COLLECT_ALL_THREADS) + { + for (perf_thread &t : mCSPMUThreads) + { + snap_start = t.eventCtx.last_snap; + snap_stop = t.eventCtx.collect_scope(now, func_id, true); + t.update_data_scope(func_id, snap_start, snap_stop); + } } return false; } @@ -707,7 +747,23 @@ struct snapshot event_context::collect(int64_t now) return snap; } -struct snapshot event_context::collect_scope(int64_t now, uint16_t func_id, bool stopping) { +struct snapshot event_context::collect_scope(int64_t now, uint16_t func_id, bool stopping) +{ + +#if defined(__aarch64__) + // stop counters for arm64 + uint64_t PMCNTENSET_EL0_safe; + uint64_t PMCR_EL0_safe; + asm volatile("mrs %0, PMCR_EL0" : "=r" (PMCR_EL0_safe)); + asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe & 0xFFFFFFFFFFFFFFFE)); +#elif defined(__arm__) + // stop counters for arm32 + uint64_t PMCNTENSET_EL0_safe; + uint64_t PMCR_EL0_safe; + asm volatile("mrc p15, 0, %0, c9, c12, 0" : "=r"(PMCR_EL0_safe)); + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe & 0xFFFFFFFE)); +#endif + if (stopping && last_snap_func_id != func_id) { DBG_LOG("Error: Could not find the corresponding collect_scope_start call for func_id %ud.\n", func_id); } @@ -719,6 +775,17 @@ struct snapshot event_context::collect_scope(int64_t now, uint16_t func_id, bool last_snap_func_id = func_id; last_snap = snap; } + +#if defined(__aarch64__) + // start counters for arm64 + asm volatile("msr PMCNTENSET_EL0, %0" : : "r" (PMCNTENSET_EL0_safe)); + asm volatile("msr PMCR_EL0, %0" : : "r" (PMCR_EL0_safe)); +#elif defined(__arm__) + // start counters for arm32 + asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(PMCNTENSET_EL0_safe)); + asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(PMCR_EL0_safe)); +#endif + return snap; } diff --git a/collectors/perf.hpp b/collectors/perf.hpp index dc06d0e..df29639 100644 --- a/collectors/perf.hpp +++ b/collectors/perf.hpp @@ -36,6 +36,17 @@ enum cmn_node_type CMN_TYPE_WP = 0x7770, }; +enum collect_scope_flags: int32_t +{ + COLLECT_NOOP = 0x00, + COLLECT_ALL_THREADS = 0x01, + COLLECT_REPLAY_THREADS = 0x01 << 1, + COLLECT_BG_THREADS = 0x01 << 2, + COLLECT_MULTI_PMU_THREADS = 0x01 << 3, + COLLECT_BOOKER_THREADS = 0x01 << 4, + COLLECT_CSPMU_THREADS = 0x01 << 5, +}; + struct snapshot { snapshot() : size(0) {} @@ -147,8 +158,8 @@ class PerfCollector : public Collector virtual void summarize() override; /// Collector functions for perapi perf instrumentations. - virtual bool collect_scope_start(int64_t now, uint16_t func_id); - virtual bool collect_scope_stop(int64_t now, uint16_t func_id); + virtual bool collect_scope_start(int64_t now, uint16_t func_id, int32_t flags); + virtual bool collect_scope_stop(int64_t now, uint16_t func_id, int32_t flags); private: void create_perf_thread(); @@ -163,6 +174,7 @@ class PerfCollector : public Collector std::map> mMultiPMUEvents; std::map> mCSPMUEvents; std::map> mClocks; // device_name -> clock_vector + int last_collect_scope_flags = 0; struct perf_thread { diff --git a/interface.cpp b/interface.cpp index 3fcbdb2..6151e13 100644 --- a/interface.cpp +++ b/interface.cpp @@ -450,20 +450,20 @@ void Collection::collect(std::vector custom) } } -void Collection::collect_scope_start(uint16_t label) { +void Collection::collect_scope_start(uint16_t label, int32_t flags) { const int64_t now = getTime(); mScopeStartTime = now; for (Collector* c : mRunning) { if (!c->isThreaded()) { - c->collect_scope_start(now, label); + c->collect_scope_start(now, label, flags); } } mScopeStarted = true; } -void Collection::collect_scope_stop(uint16_t label) { +void Collection::collect_scope_stop(uint16_t label, int32_t flags) { // A collect_scope_start and collect_scope_end pair is considered as one sample. if (!mScopeStarted) { DBG_LOG("WARNING: collect_scope_stop called without a corresponding collect_scope_start.\n"); @@ -476,7 +476,7 @@ void Collection::collect_scope_stop(uint16_t label) { { if (!c->isThreaded()) { - c->collect_scope_stop(now, label); + c->collect_scope_stop(now, label, flags); } } mScopeStarted = false; diff --git a/interface.hpp b/interface.hpp index 573bf4d..dce31cb 100644 --- a/interface.hpp +++ b/interface.hpp @@ -90,8 +90,8 @@ class Collector virtual bool stop() { mCollecting = false; return true; } virtual bool postprocess(const std::vector& timing); virtual bool collect( int64_t ) = 0; - virtual bool collect_scope_start( int64_t now, uint16_t func_id) {return true; }; - virtual bool collect_scope_stop( int64_t now, uint16_t func_id) { return true; }; + virtual bool collect_scope_start( int64_t now, uint16_t func_id, int flags ) {return true; }; + virtual bool collect_scope_stop( int64_t now, uint16_t func_id, int flags ) { return true; }; virtual bool collecting() const { return mCollecting; } virtual const std::string& name() const { return mName; } virtual bool available() = 0; @@ -256,11 +256,11 @@ class Collection /// Sample periodical data for per API instrumentation. Call this method before the payload /// execution. Currently only used for perf collector. - void collect_scope_start(uint16_t label); + void collect_scope_start(uint16_t label, int32_t flags); /// Sample periodical data for per API instrumentation. Call this method after the payload /// execution. Currently only used for perf collector. - void collect_scope_stop(uint16_t label); + void collect_scope_stop(uint16_t label, int32_t flags); /// Get the results as JSON Json::Value results(); diff --git a/test.cpp b/test.cpp index f4e86b2..dc4220d 100644 --- a/test.cpp +++ b/test.cpp @@ -1,4 +1,5 @@ #include "interface.hpp" +#include "collectors/perf.hpp" #include #include @@ -6,6 +7,7 @@ #include #include #include +#include #include #include "json/writer.h" @@ -267,6 +269,10 @@ class Test8 { Test8() : test8_ready(false) {} + ~Test8() { + delete c; + } + void run() { printf("[test 8]: Testing collect_scope for the perf collector...\n"); std::vector threads; @@ -342,14 +348,18 @@ class Test8 { tmp *= rand(); }; - c->collect_scope_start(0 + scope_label_offset); - payload(10); - c->collect_scope_stop(0 + scope_label_offset); - c->collect_scope_start(5 + scope_label_offset); - payload(1000); - c->collect_scope_stop(5 + scope_label_offset); + if (strncmp(thread_name.c_str(), "patrace", 7) == 0) { + c->collect_scope_start(0 + scope_label_offset, COLLECT_REPLAY_THREADS); + payload(1000); + c->collect_scope_stop(0 + scope_label_offset, COLLECT_REPLAY_THREADS); + } + + if (strncmp(thread_name.c_str(), "mali", 4) == 0) { + c->collect_scope_start(1 + scope_label_offset, COLLECT_BG_THREADS); + payload(1000); + c->collect_scope_stop(1 + scope_label_offset, COLLECT_BG_THREADS); + } printf("Thread %s finished.\n", thread_name.c_str()); - // usleep(1e5); } Collection *c; @@ -369,7 +379,8 @@ int main() test5(); test6(); test7(); // summarized results - (new Test8())->run(); + auto test8 = std::unique_ptr(new Test8()); + test8->run(); printf("ALL DONE!\n"); return 0; }