Skip to content

Commit 838638e

Browse files
committed
fix(EventDescription): Turn EventDescription into real class
1 parent 2462b9f commit 838638e

File tree

11 files changed

+246
-279
lines changed

11 files changed

+246
-279
lines changed

include/lo2s/perf/counter/counter_collection.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,11 +39,11 @@ struct CounterCollection
3939
{
4040
if (index == 0)
4141
{
42-
return leader.scale;
42+
return leader.scale();
4343
}
4444
else
4545
{
46-
return counters[index - 1].scale;
46+
return counters[index - 1].scale();
4747
}
4848
}
4949
friend bool operator==(const CounterCollection& lhs, const CounterCollection& rhs)

include/lo2s/perf/event_description.hpp

Lines changed: 136 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,10 @@
2121

2222
#pragma once
2323

24+
#include <lo2s/config.hpp>
25+
#include <lo2s/error.hpp>
2426
#include <lo2s/execution_scope.hpp>
27+
#include <lo2s/perf/util.hpp>
2528
#include <lo2s/topology.hpp>
2629

2730
#include <set>
@@ -48,16 +51,54 @@ struct EventDescription
4851
{
4952
EventDescription(const std::string& name, perf_type_id type, std::uint64_t config,
5053
std::uint64_t config1 = 0, std::set<Cpu> cpus = std::set<Cpu>(),
51-
double scale = 1, std::string unit = "#",
52-
Availability availability = Availability::UNAVAILABLE)
53-
: name(name), type(type), config(config), config1(config1), scale(scale), unit(unit),
54-
availability(availability), cpus_(cpus)
54+
double scale = 1, std::string unit = "#")
55+
: name_(name), type_(type), config_(config), config1_(config1), scale_(scale), unit_(unit),
56+
cpus_(cpus)
5557
{
58+
struct perf_event_attr attr = perf_event_attr();
59+
60+
int proc_fd = perf_event_open(&attr, ExecutionScope(Thread(0)), -1, 0);
61+
int sys_fd = perf_event_open(&attr, ExecutionScope(*supported_cpus().begin()), -1, 0);
62+
63+
if (sys_fd == -1 && proc_fd == -1)
64+
{
65+
attr.exclude_kernel = 1;
66+
proc_fd = perf_event_open(&attr, ExecutionScope(Thread(0)), -1, 0);
67+
sys_fd = perf_event_open(&attr, ExecutionScope(*supported_cpus().begin()), -1, 0);
68+
}
69+
70+
if (sys_fd == -1 && proc_fd == -1)
71+
{
72+
switch (errno)
73+
{
74+
case ENOTSUP:
75+
Log::debug() << "perf event not supported by the running kernel: " << name_;
76+
break;
77+
default:
78+
Log::debug() << "perf event " << name_
79+
<< " not available: " << std::string(std::strerror(errno));
80+
break;
81+
}
82+
83+
availability_ = Availability::UNAVAILABLE;
84+
}
85+
else if (sys_fd == -1)
86+
{
87+
availability_ = Availability::PROCESS_MODE;
88+
}
89+
else if (proc_fd == -1)
90+
{
91+
availability_ = Availability::SYSTEM_MODE;
92+
}
93+
else
94+
{
95+
availability_ = Availability::UNIVERSAL;
96+
}
5697
}
5798

5899
EventDescription()
59-
: name(""), type(static_cast<perf_type_id>(-1)), config(0), config1(0), scale(1), unit("#"),
60-
availability(Availability::UNAVAILABLE)
100+
: name_(""), type_(static_cast<perf_type_id>(-1)), config_(0), config1_(0), scale_(1),
101+
unit_("#"), availability_(Availability::UNAVAILABLE)
61102
{
62103
}
63104

@@ -79,30 +120,106 @@ struct EventDescription
79120

80121
friend bool operator==(const EventDescription& lhs, const EventDescription& rhs)
81122
{
82-
return (lhs.type == rhs.type) && (lhs.config == rhs.config) && (lhs.config1 == rhs.config1);
123+
return (lhs.type_ == rhs.type_) && (lhs.config_ == rhs.config_) &&
124+
(lhs.config1_ == rhs.config1_);
83125
}
84126

85127
friend bool operator<(const EventDescription& lhs, const EventDescription& rhs)
86128
{
87-
if (lhs.type == rhs.type)
129+
if (lhs.type_ == rhs.type_)
88130
{
89-
if (lhs.config == rhs.config)
131+
if (lhs.config_ == rhs.config_)
90132
{
91-
return lhs.config1 < rhs.config1;
133+
return lhs.config1_ < rhs.config1_;
92134
}
93-
return lhs.config < rhs.config;
135+
return lhs.config_ < rhs.config_;
136+
}
137+
return lhs.type_ < rhs.type_;
138+
}
139+
140+
struct perf_event_attr perf_event_attr() const
141+
{
142+
struct perf_event_attr attr;
143+
memset(&attr, 0, sizeof(struct perf_event_attr));
144+
145+
attr.size = sizeof(struct perf_event_attr);
146+
147+
attr.type = type_;
148+
attr.config = config_;
149+
attr.config1 = config1_;
150+
151+
return attr;
152+
}
153+
154+
std::string name() const
155+
{
156+
return name_;
157+
}
158+
159+
std::string description() const
160+
{
161+
if (availability_ == Availability::UNIVERSAL)
162+
{
163+
return name_;
164+
}
165+
else if (availability_ == Availability::SYSTEM_MODE)
166+
{
167+
return fmt::format("{} [SYS]", name_);
168+
}
169+
else if (availability_ == Availability::PROCESS_MODE)
170+
{
171+
return fmt::format("{} [PROC]", name_);
172+
}
173+
174+
return "";
175+
}
176+
177+
bool is_valid() const
178+
{
179+
return availability_ != Availability::UNAVAILABLE;
180+
}
181+
182+
double scale() const
183+
{
184+
return scale_;
185+
}
186+
187+
std::string unit() const
188+
{
189+
return unit_;
190+
}
191+
192+
int open_counter(ExecutionScope scope, int group_fd)
193+
{
194+
struct perf_event_attr perf_attr = perf_event_attr();
195+
perf_attr.sample_period = 0;
196+
perf_attr.exclude_kernel = config().exclude_kernel;
197+
// Needed when scaling multiplexed events, and recognize activation phases
198+
perf_attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
199+
200+
#if !defined(USE_HW_BREAKPOINT_COMPAT) && defined(USE_PERF_CLOCKID)
201+
perf_attr.use_clockid = config().use_clockid;
202+
perf_attr.clockid = config().clockid;
203+
#endif
204+
205+
int fd = perf_try_event_open(&perf_attr, scope, group_fd, 0, config().cgroup_fd);
206+
if (fd < 0)
207+
{
208+
Log::error() << "perf_event_open for counter failed";
209+
throw_errno();
94210
}
95-
return lhs.type < rhs.type;
211+
return fd;
96212
}
97-
std::string name;
98-
perf_type_id type;
99-
std::uint64_t config;
100-
std::uint64_t config1;
101-
double scale;
102-
std::string unit;
103-
Availability availability;
104213

105214
private:
215+
std::string name_;
216+
perf_type_id type_;
217+
std::uint64_t config_;
218+
std::uint64_t config1_;
219+
double scale_;
220+
std::string unit_;
221+
Availability availability_;
222+
106223
std::set<Cpu> cpus_;
107224
};
108225
} // namespace perf

include/lo2s/perf/event_provider.hpp

Lines changed: 1 addition & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -36,42 +36,7 @@ namespace perf
3636
class EventProvider
3737
{
3838
public:
39-
struct DescriptionCache
40-
{
41-
private:
42-
DescriptionCache()
43-
: description(std::string(), static_cast<perf_type_id>(-1), 0, 0), valid_(false)
44-
{
45-
}
46-
47-
public:
48-
DescriptionCache(const EventDescription& description)
49-
: description(description), valid_(true)
50-
{
51-
}
52-
53-
DescriptionCache(EventDescription&& description)
54-
: description(std::move(description)), valid_(true)
55-
{
56-
}
57-
58-
static DescriptionCache make_invalid()
59-
{
60-
return DescriptionCache();
61-
}
62-
63-
bool is_valid() const
64-
{
65-
return valid_;
66-
}
67-
68-
EventDescription description;
69-
70-
private:
71-
bool valid_;
72-
};
73-
74-
using EventMap = std::unordered_map<std::string, DescriptionCache>;
39+
using EventMap = std::unordered_map<std::string, EventDescription>;
7540

7641
EventProvider();
7742
EventProvider(const EventProvider&) = delete;

include/lo2s/perf/sample/reader.hpp

Lines changed: 24 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -84,16 +84,7 @@ class Reader : public EventReader<T>
8484
Log::debug() << "initializing event_reader for:" << scope.name()
8585
<< ", enable_on_exec: " << enable_on_exec;
8686

87-
struct perf_event_attr perf_attr = common_perf_event_attrs();
88-
#ifdef USE_PERF_CLOCKID
89-
if (config().use_pebs)
90-
{
91-
perf_attr.use_clockid = 0;
92-
}
93-
#endif
94-
95-
perf_attr.exclude_kernel = config().exclude_kernel;
96-
perf_attr.sample_period = config().sampling_period;
87+
struct perf_event_attr perf_attr;
9788

9889
if (config().sampling)
9990
{
@@ -104,19 +95,38 @@ class Reader : public EventReader<T>
10495

10596
Log::debug() << "using sampling event \'" << config().sampling_event
10697
<< "\', period: " << config().sampling_period;
107-
108-
perf_attr.type = sampling_event.type;
109-
perf_attr.config = sampling_event.config;
110-
perf_attr.config1 = sampling_event.config1;
98+
perf_attr = sampling_event.perf_event_attr();
11199

112100
perf_attr.mmap = 1;
101+
perf_attr.disabled = 1;
102+
103+
#if !defined(USE_HW_BREAKPOINT_COMPAT) && defined(USE_PERF_CLOCKID)
104+
perf_attr.use_clockid = config().use_clockid;
105+
perf_attr.clockid = config().clockid;
106+
#endif
107+
// When we poll on the fd given by perf_event_open, wakeup, when our buffer is 80% full
108+
// Default behaviour is to wakeup on every event, which is horrible performance wise
109+
perf_attr.watermark = 1;
110+
perf_attr.wakeup_watermark =
111+
static_cast<uint32_t>(0.8 * config().mmap_pages * get_page_size());
113112
}
114113
else
115114
{
115+
perf_attr = common_perf_event_attrs();
116+
116117
// Set up a dummy event for recording calling context enter/leaves only
117118
perf_attr.type = PERF_TYPE_SOFTWARE;
118119
perf_attr.config = PERF_COUNT_SW_DUMMY;
119120
}
121+
#ifdef USE_PERF_CLOCKID
122+
if (config().use_pebs)
123+
{
124+
perf_attr.use_clockid = 0;
125+
}
126+
#endif
127+
128+
perf_attr.exclude_kernel = config().exclude_kernel;
129+
perf_attr.sample_period = config().sampling_period;
120130

121131
perf_attr.sample_id_all = 1;
122132
// Generate PERF_RECORD_COMM events to trace changes to the command

include/lo2s/perf/util.hpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#pragma once
22

3-
#include <lo2s/perf/event_description.hpp>
3+
#include <lo2s/execution_scope.hpp>
44

55
extern "C"
66
{
@@ -19,7 +19,6 @@ struct perf_event_attr common_perf_event_attrs();
1919
void perf_warn_paranoid();
2020
void perf_check_disabled();
2121

22-
int perf_event_description_open(ExecutionScope scope, const EventDescription& desc, int group_fd);
2322
int perf_try_event_open(struct perf_event_attr* perf_attr, ExecutionScope scope, int group_fd,
2423
unsigned long flags, int cgroup_fd = -1);
2524

include/lo2s/trace/trace.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -171,9 +171,9 @@ class Trace
171171
otf2::definition::metric_member& get_event_metric_member(perf::EventDescription event)
172172
{
173173
return registry_.emplace<otf2::definition::metric_member>(
174-
ByEventDescription(event), intern(event.name), intern(event.name),
174+
ByEventDescription(event), intern(event.name()), intern(event.name()),
175175
otf2::common::metric_type::other, otf2::common::metric_mode::accumulated_start,
176-
otf2::common::type::Double, otf2::common::base_type::decimal, 0, intern(event.unit));
176+
otf2::common::type::Double, otf2::common::base_type::decimal, 0, intern(event.unit()));
177177
}
178178
otf2::definition::metric_class& perf_metric_class(MeasurementScope scope)
179179
{

src/config.cpp

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -74,30 +74,10 @@ static inline void print_availability(std::ostream& os, const std::string& descr
7474
std::vector<std::string> event_names;
7575
for (const auto& ev : events)
7676
{
77-
if (ev.availability == perf::Availability::UNAVAILABLE)
77+
if (ev.is_valid())
7878
{
79-
continue;
79+
event_names.push_back(ev.description());
8080
}
81-
82-
std::string availability = "";
83-
std::string cpu = "";
84-
if (ev.availability == perf::Availability::PROCESS_MODE)
85-
{
86-
availability = " *";
87-
}
88-
else if (ev.availability == perf::Availability::SYSTEM_MODE)
89-
{
90-
availability = " #";
91-
}
92-
if (ev.supported_cpus() != Topology::instance().cpus())
93-
{
94-
const auto& cpus = ev.supported_cpus();
95-
cpu =
96-
fmt::format(" [ CPUs {}-{} ]", std::min_element(cpus.begin(), cpus.end())->as_int(),
97-
std::max_element(cpus.begin(), cpus.end())->as_int());
98-
}
99-
100-
event_names.push_back(ev.name + availability + cpu);
10181
}
10282
list_arguments_sorted(os, description, event_names);
10383
}
@@ -670,7 +650,7 @@ void parse_program_options(int argc, const char** argv)
670650
{
671651
for (const auto& mem_event : platform::get_mem_events())
672652
{
673-
perf_group_events.emplace_back(mem_event.name);
653+
perf_group_events.emplace_back(mem_event.name());
674654
}
675655
perf_group_events.emplace_back("instructions");
676656
perf_group_events.emplace_back("cpu-cycles");

0 commit comments

Comments
 (0)