diff --git a/CMakeLists.txt b/CMakeLists.txt index b43fecd8..2961cef8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -133,6 +133,7 @@ set(SOURCE_FILES src/monitor/scope_monitor.cpp src/monitor/threaded_monitor.cpp src/monitor/tracepoint_monitor.cpp + src/monitor/bio_monitor.cpp src/process_controller.cpp src/perf/event_provider.cpp @@ -148,6 +149,7 @@ set(SOURCE_FILES src/perf/time/converter.cpp src/perf/time/reader.cpp src/perf/tracepoint/format.cpp src/perf/tracepoint/writer.cpp + src/perf/bio/event_cacher.cpp src/time/time.cpp diff --git a/include/lo2s/config.hpp b/include/lo2s/config.hpp index cbc2d053..7ab8ff1d 100644 --- a/include/lo2s/config.hpp +++ b/include/lo2s/config.hpp @@ -90,6 +90,9 @@ struct Config clockid_t clockid; // x86_energy bool use_x86_energy; + // block I/O + bool use_block_io; + size_t block_io_cache_size; }; const Config& config(); diff --git a/include/lo2s/measurement_scope.hpp b/include/lo2s/measurement_scope.hpp index dedd757a..4add1ef6 100644 --- a/include/lo2s/measurement_scope.hpp +++ b/include/lo2s/measurement_scope.hpp @@ -30,6 +30,7 @@ enum class MeasurementScopeType GROUP_METRIC, USERSPACE_METRIC, SWITCH, + BIO, UNKNOWN }; @@ -66,6 +67,11 @@ struct MeasurementScope return { MeasurementScopeType::SWITCH, s }; } + static MeasurementScope bio(ExecutionScope s) + { + return { MeasurementScopeType::BIO, s }; + } + friend bool operator==(const MeasurementScope& lhs, const MeasurementScope& rhs) { return (lhs.scope == rhs.scope) && lhs.type == rhs.type; @@ -94,6 +100,8 @@ struct MeasurementScope return fmt::format("samples for {}", scope.name()); case MeasurementScopeType::SWITCH: return fmt::format("context switches for {}", scope.name()); + case MeasurementScopeType::BIO: + return fmt::format("block layer I/O events for {}", scope.name()); default: throw new std::runtime_error("Unknown ExecutionScopeType!"); } diff --git a/include/lo2s/monitor/bio_monitor.hpp b/include/lo2s/monitor/bio_monitor.hpp new file mode 100644 index 00000000..081f4973 --- /dev/null +++ b/include/lo2s/monitor/bio_monitor.hpp @@ -0,0 +1,61 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2017, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#pragma once + +#include +#include + +#include +#include + +#include +#include + +namespace lo2s +{ +namespace monitor +{ + +class BioMonitor : public PollMonitor +{ +public: + BioMonitor(trace::Trace& trace, Cpu cpu, + std::map>& writers_); + +private: + void monitor(int fd) override; + void initialize_thread() override; + void finalize_thread() override; + + std::string group() const override + { + return "lo2s::BioMonitor"; + } + +private: + Cpu cpu_; + perf::bio::EventCacher bio_insert_cacher_; + perf::bio::EventCacher bio_issue_cacher_; + perf::bio::EventCacher bio_complete_cacher_; +}; +} // namespace monitor +} // namespace lo2s diff --git a/include/lo2s/monitor/main_monitor.hpp b/include/lo2s/monitor/main_monitor.hpp index 0279b38c..62a90346 100644 --- a/include/lo2s/monitor/main_monitor.hpp +++ b/include/lo2s/monitor/main_monitor.hpp @@ -29,6 +29,7 @@ #include #endif #include +#include #include #include #include @@ -63,10 +64,12 @@ class MainMonitor protected: trace::Trace trace_; - std::map process_infos_; metric::plugin::Metrics metrics_; std::vector> tracepoint_monitors_; + + std::map> writers_; + std::vector> bio_monitors_; #ifdef HAVE_X86_ADAPT std::unique_ptr x86_adapt_metrics_; #endif diff --git a/include/lo2s/perf/bio/block_device.hpp b/include/lo2s/perf/bio/block_device.hpp new file mode 100644 index 00000000..c85c5ffb --- /dev/null +++ b/include/lo2s/perf/bio/block_device.hpp @@ -0,0 +1,66 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2022, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#pragma once + +#include + +extern "C" +{ +#include +} + +namespace lo2s +{ + +enum class BlockDeviceType +{ + PARTITION, + DISK +}; + +struct BlockDevice +{ + BlockDevice() : id(0), name(), type(BlockDeviceType::PARTITION), parent(0) + { + } + + BlockDevice(dev_t id, const std::string& name, BlockDeviceType type, dev_t parent) + : id(id), name(name), type(type), parent(parent) + { + } + + static BlockDevice partition(dev_t id, const std::string& name, dev_t parent) + { + return BlockDevice(id, name, BlockDeviceType::PARTITION, parent); + } + + static BlockDevice disk(dev_t id, const std::string& name) + { + return BlockDevice(id, name, BlockDeviceType::DISK, 0); + } + + dev_t id; + std::string name; + BlockDeviceType type; + dev_t parent; +}; +} // namespace lo2s diff --git a/include/lo2s/perf/bio/event_cacher.hpp b/include/lo2s/perf/bio/event_cacher.hpp new file mode 100644 index 00000000..341ba0ef --- /dev/null +++ b/include/lo2s/perf/bio/event_cacher.hpp @@ -0,0 +1,91 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2017, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#pragma once + +#include +#include +#include + +#include + +#include +#include +#include + +#include +#include + +namespace lo2s +{ +namespace perf +{ +namespace bio +{ +// Note, this cannot be protected for CRTP reasons... + +class EventCacher : public Reader +{ +public: + struct __attribute((__packed__)) RecordBlock + { + uint16_t common_type; // 2 + uint8_t common_flag; // 3 + uint8_t common_preempt_count; // 4 + int32_t common_pid; // 8 + + uint32_t dev; // 12 + char padding[4]; // 16 + uint64_t sector; // 24 + + uint32_t nr_sector; // 28 + int32_t error_or_bytes; // 32 + + char rwbs[8]; // 40 + }; + + EventCacher(Cpu cpu, std::map>& writers, BioEventType type); + + EventCacher(const EventCacher& other) = delete; + + EventCacher(EventCacher&& other) = default; + +public: + using Reader::handle; + + bool handle(const Reader::RecordSampleType* sample); + + void finalize() + { + for (auto& events : events_) + { + writers_[events.first]->submit_events(events.second); + } + } + +private: + std::map>& writers_; + std::unordered_map> events_; +}; + +} // namespace bio +} // namespace perf +} // namespace lo2s diff --git a/include/lo2s/perf/bio/reader.hpp b/include/lo2s/perf/bio/reader.hpp new file mode 100644 index 00000000..c5bcd7a6 --- /dev/null +++ b/include/lo2s/perf/bio/reader.hpp @@ -0,0 +1,157 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2017, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include + +#include + +extern "C" +{ +#include +#include +#include +#include +#include +} + +namespace lo2s +{ +namespace perf +{ +namespace bio +{ + +template +class Reader : public EventReader +{ +public: + struct RecordSampleType + { + struct perf_event_header header; + uint64_t time; + uint32_t tp_data_size; + char tp_data[1]; + }; + + Reader(Cpu cpu, BioEventType type) : type_(type), cpu_(cpu) + { + struct perf_event_attr attr = common_perf_event_attrs(); + attr.type = PERF_TYPE_TRACEPOINT; + if (type == BioEventType::INSERT) + { + attr.config = tracepoint::EventFormat("block:block_rq_insert").id(); + } + else if (type == BioEventType::ISSUE) + { + attr.config = tracepoint::EventFormat("block:block_rq_issue").id(); + } + else + { + attr.config = tracepoint::EventFormat("block:block_rq_complete").id(); + } + + attr.sample_period = 1; + attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_TIME; + + fd_ = perf_event_open(&attr, cpu_.as_scope(), -1, 0); + if (fd_ < 0) + { + Log::error() << "perf_event_open for raw tracepoint failed."; + throw_errno(); + } + + Log::debug() << "Opened block_rq_insert_tracing"; + + try + { + if (fcntl(fd_, F_SETFL, O_NONBLOCK)) + { + throw_errno(); + } + + init_mmap(fd_); + Log::debug() << "perf_tracepoint_reader mmap initialized"; + + auto ret = ioctl(fd_, PERF_EVENT_IOC_ENABLE); + Log::debug() << "perf_tracepoint_reader ioctl(fd, PERF_EVENT_IOC_ENABLE) = " << ret; + if (ret == -1) + { + throw_errno(); + } + } + catch (...) + { + Log::error() << "Couldn't initialize block:rq_insert reading"; + close(fd_); + throw; + } + } + + Reader(Reader&& other) + : EventReader(std::forward>(other)), cpu_(other.cpu_) + { + std::swap(fd_, other.fd_); + } + + ~Reader() + { + if (fd_ != -1) + { + close(fd_); + } + } + + void stop() + { + auto ret = ioctl(fd_, PERF_EVENT_IOC_DISABLE); + Log::debug() << "perf_tracepoint_reader ioctl(fd, PERF_EVENT_IOC_DISABLE) = " << ret; + if (ret == -1) + { + throw_errno(); + } + this->read(); + } + +protected: + using EventReader::init_mmap; + BioEventType type_; + +private: + Cpu cpu_; + int fd_ = -1; +}; +} // namespace bio +} // namespace perf +} // namespace lo2s diff --git a/include/lo2s/perf/bio/writer.hpp b/include/lo2s/perf/bio/writer.hpp new file mode 100644 index 00000000..41d31234 --- /dev/null +++ b/include/lo2s/perf/bio/writer.hpp @@ -0,0 +1,140 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2017, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#pragma once + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace lo2s +{ +namespace perf +{ +namespace bio +{ + +enum class BioEventType +{ + INSERT, + ISSUE, + COMPLETE +}; + +struct BioEvent +{ + BioEvent(dev_t device, uint64_t sector, uint64_t time, BioEventType type, + otf2::common::io_operation_mode_type mode, uint32_t nr_sector) + : device(device), sector(sector), time(time), type(type), mode(mode), nr_sector(nr_sector) + { + } + + dev_t device; + uint64_t sector; + uint64_t time; + BioEventType type; + otf2::common::io_operation_mode_type mode; + uint32_t nr_sector; +}; + +class BioComperator +{ +public: + bool operator()(const BioEvent& t1, const BioEvent& t2) + { + return t1.time > t2.time; + } +}; + +class Writer +{ +public: + Writer(trace::Trace& trace, BlockDevice& device) + : trace_(trace), time_converter_(time::Converter::instance()), device_(device) + { + } + + void submit_events(const std::vector& new_events) + { + std::lock_guard guard(lock_); + for (auto& event : new_events) + { + events_.emplace(event); + } + } + + void write_events() + { + if (events_.empty()) + { + // early exit, so we don't create unnecessary definitions + return; + } + + otf2::writer::local& writer = trace_.bio_writer(device_); + otf2::definition::io_handle& handle = trace_.block_io_handle(device_); + + while (!events_.empty()) + { + const BioEvent& event = events_.top(); + + if (event.type == BioEventType::INSERT) + { + writer << otf2::event::io_operation_begin( + time_converter_(event.time), handle, event.mode, + otf2::common::io_operation_flag_type::non_blocking, event.nr_sector, + event.sector); + } + else if (event.type == BioEventType::ISSUE) + { + writer << otf2::event::io_operation_issued(time_converter_(event.time), handle, + event.sector); + } + else + { + writer << otf2::event::io_operation_complete(time_converter_(event.time), handle, + event.nr_sector, event.sector); + } + + events_.pop(); + } + } + +private: + trace::Trace& trace_; + const time::Converter& time_converter_; + BlockDevice device_; + std::priority_queue, BioComperator> events_; + std::mutex lock_; +}; + +} // namespace bio +} // namespace perf +} // namespace lo2s diff --git a/include/lo2s/trace/reg_keys.hpp b/include/lo2s/trace/reg_keys.hpp index 7be3315e..9e41b412 100644 --- a/include/lo2s/trace/reg_keys.hpp +++ b/include/lo2s/trace/reg_keys.hpp @@ -82,6 +82,11 @@ struct ByProcessTag }; using ByProcess = SimpleKeyType; +struct ByDevTag +{ +}; +using ByDev = SimpleKeyType; + struct ByStringTag { }; @@ -112,12 +117,11 @@ struct Holder { using type = typename otf2::get_default_holder::type; }; - template <> struct Holder { using type = otf2::lookup_definition_holder; + ByProcess, ByDev, ByCpu, ByPackage>; }; template <> struct Holder @@ -131,6 +135,16 @@ struct Holder using type = otf2::lookup_definition_holder; }; template <> +struct Holder +{ + using type = otf2::lookup_definition_holder; +}; +template <> +struct Holder +{ + using type = otf2::lookup_definition_holder; +}; +template <> struct Holder { using type = otf2::lookup_definition_holder; @@ -138,13 +152,14 @@ struct Holder template <> struct Holder { - using type = otf2::lookup_definition_holder; + using type = + otf2::lookup_definition_holder; }; template <> struct Holder { using type = otf2::lookup_definition_holder; + ByMeasurementScope, ByDev>; }; template <> struct Holder @@ -164,7 +179,7 @@ struct Holder template <> struct Holder { - using type = otf2::lookup_definition_holder; + using type = otf2::lookup_definition_holder; }; template <> struct Holder diff --git a/include/lo2s/trace/trace.hpp b/include/lo2s/trace/trace.hpp index b6ccffb8..1b899b48 100644 --- a/include/lo2s/trace/trace.hpp +++ b/include/lo2s/trace/trace.hpp @@ -107,8 +107,11 @@ class Trace otf2::writer::local& sample_writer(const ExecutionScope& scope); otf2::writer::local& switch_writer(const ExecutionScope& scope); otf2::writer::local& metric_writer(const MeasurementScope& scope); + otf2::writer::local& bio_writer(BlockDevice& device); otf2::writer::local& create_metric_writer(const std::string& name); + otf2::definition::io_handle& block_io_handle(BlockDevice& device); + otf2::definition::metric_member metric_member(const std::string& name, const std::string& description, otf2::common::metric_mode mode, otf2::common::type value_type, @@ -256,6 +259,18 @@ class Trace } } + const otf2::definition::system_tree_node bio_parent_node(BlockDevice& device) + { + if (device.type == BlockDeviceType::PARTITION) + { + if (registry_.has(ByDev(device.parent))) + { + return registry_.get(ByDev(device.parent)); + } + } + return bio_system_tree_node_; + } + void create_userspace_metric_class() { const auto& counter_collection_ = perf::counter::requested_userspace_counters(); @@ -303,12 +318,17 @@ class Trace std::map calling_context_tree_; otf2::definition::comm_locations_group& comm_locations_group_; + otf2::definition::comm_locations_group& hardware_comm_locations_group_; otf2::definition::regions_group& lo2s_regions_group_; otf2::definition::detail::weak_ref cpuid_metric_class_; otf2::definition::detail::weak_ref perf_group_metric_class_; otf2::definition::detail::weak_ref perf_userspace_metric_class_; + otf2::definition::detail::weak_ref bio_system_tree_node_; + otf2::definition::detail::weak_ref bio_paradigm_; + otf2::definition::detail::weak_ref bio_comm_group_; + const otf2::definition::system_tree_node& system_tree_root_node_; ExecutionScopeGroup& groups_; diff --git a/include/lo2s/util.hpp b/include/lo2s/util.hpp index c000efe8..138d6a6e 100644 --- a/include/lo2s/util.hpp +++ b/include/lo2s/util.hpp @@ -22,6 +22,7 @@ #pragma once #include +#include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include #include #include @@ -59,12 +61,7 @@ class StringCache T& operator[](const std::string& name) { std::lock_guard guard(mutex_); - if (elements_.count(name) == 0) - { - elements_.emplace(std::piecewise_construct, std::forward_as_tuple(name), - std::forward_as_tuple(name)); - } - return elements_.at(name); + return elements_.try_emplace(name, name).first->second; } private: @@ -102,4 +99,6 @@ std::unordered_map get_comms_for_running_threads(); void try_pin_to_scope(ExecutionScope scope); Thread gettid(); + +std::vector get_block_devices(); } // namespace lo2s diff --git a/man/lo2s.1.pod b/man/lo2s.1.pod index b6e9159b..40598d0c 100644 --- a/man/lo2s.1.pod +++ b/man/lo2s.1.pod @@ -335,6 +335,20 @@ Record L values. =back +=head2 B options + +=over + +=item B<--block-io> + +Record block I/O events using the block:block_rq_insert tracepoint for begin events and block:block_rq_complete tracepoint for end events specifically. + +=item B<--block-io-cache-size> I + +Size of the per-CPU cache in number-of-events. A larger cache size might increase performance but comes at the cost of a higher memory footprint. + +=back + =head2 Arguments to options =over diff --git a/src/config.cpp b/src/config.cpp index 3390ad97..42ac7be3 100644 --- a/src/config.cpp +++ b/src/config.cpp @@ -120,6 +120,7 @@ void parse_program_options(int argc, const char** argv) auto& kernel_tracepoint_options = parser.group("Kernel tracepoint options"); auto& x86_adapt_options = parser.group("x86_adapt options"); auto& x86_energy_options = parser.group("x86_energy options"); + auto& io_options = parser.group("I/O recording options"); lo2s::Config config; @@ -274,6 +275,12 @@ void parse_program_options(int argc, const char** argv) x86_energy_options.toggle("x86-energy", "Add x86_energy recordings.").short_name("X"); + io_options.toggle("block-io", + "Enable recording of block I/O events (requires access to debugfs)"); + io_options.option("block-io-cache-size", "Size (in events) of the block I/O event cache") + .optional() + .metavar("NUM") + .default_value("1000"); nitro::options::arguments arguments; try { @@ -300,6 +307,8 @@ void parse_program_options(int argc, const char** argv) config.perf_userspace_events = arguments.get_all("userspace-metric-event"); config.standard_metrics = arguments.given("standard-metrics"); config.use_x86_energy = arguments.given("x86-energy"); + config.use_block_io = arguments.given("block-io"); + config.block_io_cache_size = arguments.as("block-io-cache-size"); config.command = arguments.positionals(); if (arguments.given("help")) diff --git a/src/monitor/bio_monitor.cpp b/src/monitor/bio_monitor.cpp new file mode 100644 index 00000000..5b0714de --- /dev/null +++ b/src/monitor/bio_monitor.cpp @@ -0,0 +1,83 @@ +/* + * This file is part of the lo2s software. + * Linux OTF2 sampling + * + * Copyright (c) 2017, + * Technische Universitaet Dresden, Germany + * + * lo2s is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * lo2s is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with lo2s. If not, see . + */ + +#include + +#include +#include +#include + +namespace lo2s +{ +namespace monitor +{ + +BioMonitor::BioMonitor(trace::Trace& trace, Cpu cpu, + std::map>& writers) +: monitor::PollMonitor(trace, "", config().perf_read_interval), cpu_(cpu), + bio_insert_cacher_(cpu, writers, perf::bio::BioEventType::INSERT), + bio_issue_cacher_(cpu, writers, perf::bio::BioEventType::ISSUE), + bio_complete_cacher_(cpu, writers, perf::bio::BioEventType::COMPLETE) +{ + add_fd(bio_insert_cacher_.fd()); + add_fd(bio_issue_cacher_.fd()); + add_fd(bio_complete_cacher_.fd()); +} + +void BioMonitor::initialize_thread() +{ + try_pin_to_scope(cpu_.as_scope()); +} + +void BioMonitor::finalize_thread() +{ + bio_insert_cacher_.finalize(); + bio_issue_cacher_.finalize(); + bio_complete_cacher_.finalize(); +} + +void BioMonitor::monitor(int fd) +{ + if (fd == timer_pfd().fd) + { + return; + } + else if (fd == bio_insert_cacher_.fd()) + { + bio_insert_cacher_.read(); + } + else if (fd == bio_issue_cacher_.fd()) + { + bio_issue_cacher_.read(); + } + else if (fd == bio_complete_cacher_.fd()) + { + bio_complete_cacher_.read(); + } + else + { + bio_insert_cacher_.read(); + bio_issue_cacher_.read(); + bio_complete_cacher_.read(); + } +} +} // namespace monitor +} // namespace lo2s diff --git a/src/monitor/main_monitor.cpp b/src/monitor/main_monitor.cpp index 89663fb1..07134f20 100644 --- a/src/monitor/main_monitor.cpp +++ b/src/monitor/main_monitor.cpp @@ -27,6 +27,8 @@ #include #include +#include + namespace lo2s { namespace monitor @@ -64,6 +66,21 @@ MainMonitor::MainMonitor() : trace_(), metrics_(trace_) } } + if (config().use_block_io) + { + for (auto& entry : get_block_devices()) + { + writers_.emplace( + std::piecewise_construct, std::forward_as_tuple(entry.id), + std::forward_as_tuple(std::make_unique(trace_, entry))); + } + for (const auto& cpu : Topology::instance().cpus()) + { + bio_monitors_.emplace_back(std::make_unique(trace_, Cpu(cpu.id), writers_)) + ->start(); + } + } + #ifdef HAVE_X86_ADAPT if (!config().x86_adapt_knobs.empty()) { @@ -130,6 +147,22 @@ MainMonitor::~MainMonitor() tracepoint_monitor->stop(); } } + if (config().use_block_io) + { + for (auto& bio_monitor : bio_monitors_) + { + bio_monitor->stop(); + } + std::vector bio_workers; + for (auto& writer_ : writers_) + { + bio_workers.emplace_back(&perf::bio::Writer::write_events, std::ref(*writer_.second)); + } + for (auto& worker : bio_workers) + { + worker.join(); + } + } // Notify trace, that we will end recording now. That means, get_time() of this call will be // the last possible timestamp in the trace diff --git a/src/perf/bio/event_cacher.cpp b/src/perf/bio/event_cacher.cpp new file mode 100644 index 00000000..92e5bf29 --- /dev/null +++ b/src/perf/bio/event_cacher.cpp @@ -0,0 +1,69 @@ +#include +#include +#include +#include + +#include + +#include + +extern "C" +{ +#include +#include +} + +namespace lo2s +{ +namespace perf +{ +namespace bio +{ + +EventCacher::EventCacher(Cpu cpu, std::map>& writers, + BioEventType type) +: Reader(cpu, type), writers_(writers) +{ + for (auto& entry : get_block_devices()) + { + events_.emplace(std::piecewise_construct, std::forward_as_tuple(entry.id), std::tuple()); + + events_[entry.id].reserve(config().block_io_cache_size); + } +} + +bool EventCacher::handle(const Reader::RecordSampleType* sample) +{ + struct RecordBlock* record = (struct RecordBlock*)&sample->tp_data; + otf2::common::io_operation_mode_type mode = otf2::common::io_operation_mode_type::flush; + // TODO: Handle the few io operations that arent either reads or write + if (record->rwbs[0] == 'R') + { + mode = otf2::common::io_operation_mode_type::read; + } + else if (record->rwbs[0] == 'W') + { + mode = otf2::common::io_operation_mode_type::write; + } + else + { + return false; + } + + dev_t dev = makedev(record->dev >> 20, record->dev & ((1U << 20) - 1)); + // Linux reports the size of I/O operations in number of sectors, which are always 512 byte + // lare, regardless of what the real sector size of the block device is + events_[dev].push_back( + BioEvent(dev, record->sector, sample->time, type_, mode, record->nr_sector * 512)); + + if (events_[dev].size() == config().block_io_cache_size) + { + writers_[dev]->submit_events(events_[dev]); + events_[dev].clear(); + } + return false; +} + +} // namespace bio +} // namespace perf +} // namespace lo2s diff --git a/src/trace/trace.cpp b/src/trace/trace.cpp index 49f13e2d..844f5ed5 100644 --- a/src/trace/trace.cpp +++ b/src/trace/trace.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -82,9 +83,13 @@ Trace::Trace() interrupt_generator_(registry_.create( intern("perf HW_INSTRUCTIONS"), otf2::common::interrupt_generator_mode_type::count, otf2::common::base_type::decimal, 0, config().sampling_period)), + comm_locations_group_(registry_.create( intern("All pthread locations"), otf2::common::paradigm_type::pthread, otf2::common::group_flag_type::none)), + hardware_comm_locations_group_(registry_.create( + intern("All hardware locations"), otf2::common::paradigm_type::hardware, + otf2::common::group_flag_type::none)), lo2s_regions_group_(registry_.create( intern("lo2s"), otf2::common::paradigm_type::user, otf2::common::group_flag_type::none)), system_tree_root_node_(registry_.create( @@ -154,6 +159,32 @@ Trace::Trace() } groups_.add_process(NO_PARENT_PROCESS); + + if (config().use_block_io) + { + bio_system_tree_node_ = registry_.create( + intern("block devices"), intern("hardware"), system_tree_root_node_); + + const std::vector properties; + const std::vector values; + bio_paradigm_ = registry_.create( + intern("block_io"), intern("block layer I/O"), + otf2::common::io_paradigm_class_type::parallel, otf2::common::io_paradigm_flag_type::os, + properties, values); + + bio_comm_group_ = registry_.create( + intern("block devices"), otf2::common::paradigm_type::hardware, + otf2::common::group_flag_type::none); + + for (auto& device : get_block_devices()) + { + if (device.type == BlockDeviceType::DISK) + { + block_io_handle(device); + bio_writer(device); + } + } + } } void Trace::begin_record() @@ -208,6 +239,11 @@ Trace::~Trace() } } + if (starting_time_ > stopping_time_) + { + stopping_time_ = starting_time_; + } + archive_ << otf2::definition::clock_properties(starting_time_, stopping_time_); std::filesystem::path symlink_path = nitro::env::get("LO2S_OUTPUT_LINK"); @@ -362,6 +398,26 @@ otf2::writer::local& Trace::metric_writer(const MeasurementScope& writer_scope) return archive_(intern_location); } +otf2::writer::local& Trace::bio_writer(BlockDevice& device) +{ + std::lock_guard guard(mutex_); + + const auto& name = intern(fmt::format("block I/O events for {}", device.name)); + + const auto& node = registry_.emplace( + ByDev(device.id), intern(device.name), intern("block device"), bio_system_tree_node_); + + const auto& bio_location_group = registry_.emplace( + ByDev(device.id), name, otf2::common::location_group_type::process, node); + + const auto& intern_location = registry_.emplace( + ByDev(device.id), name, bio_location_group, + otf2::definition::location::location_type::cpu_thread); + + hardware_comm_locations_group_.add_member(intern_location); + return archive_(intern_location); +} + otf2::writer::local& Trace::switch_writer(const ExecutionScope& writer_scope) { MeasurementScope scope = MeasurementScope::context_switch(writer_scope); @@ -387,6 +443,47 @@ otf2::writer::local& Trace::create_metric_writer(const std::string& name) return archive_(location); } +otf2::definition::io_handle& Trace::block_io_handle(BlockDevice& device) +{ + + std::lock_guard guard(mutex_); + + // io_pre_created_handle can not be emplaced because it has no ref. + // So we have to check if we already created everything + if (registry_.has(ByDev(device.id))) + { + return registry_.get(ByDev(device.id)); + } + + const auto& device_name = intern(device.name); + + const otf2::definition::system_tree_node& parent = bio_parent_node(device); + + std::string device_class = (device.type == BlockDeviceType::PARTITION) ? "partition" : "disk"; + + const auto& node = registry_.emplace( + ByDev(device.id), device_name, intern(device_class), parent); + + const auto& file = + registry_.emplace(ByDev(device.id), device_name, node); + + const auto& block_comm = + registry_.emplace(ByDev(device.id), device_name, bio_comm_group_, + otf2::definition::comm::comm_flag_type::none); + + // we could have io handle parents and childs here (block device being the parent (sda), + // partition being the child (sda1)) but that seems like it would be overkill. + auto& handle = registry_.emplace( + ByDev(device.id), device_name, file, bio_paradigm_, + otf2::common::io_handle_flag_type::pre_created, block_comm); + + // todo: set status flags accordingly + registry_.create( + handle, otf2::common::io_access_mode_type::read_write, + otf2::common::io_status_flag_type::none); + return handle; +} + otf2::definition::metric_member Trace::metric_member(const std::string& name, const std::string& description, otf2::common::metric_mode mode, otf2::common::type value_type, diff --git a/src/util.cpp b/src/util.cpp index 9ef636f6..1dff5878 100644 --- a/src/util.cpp +++ b/src/util.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include @@ -21,6 +22,7 @@ extern "C" #include #include #include +#include #include #include #include @@ -244,4 +246,96 @@ Thread gettid() { return Thread(syscall(SYS_gettid)); } + +std::vector get_block_devices() +{ + std::vector result; + std::filesystem::path sys_dev_path("/sys/dev/block"); + + const std::regex devname_regex("DEVNAME=(\\S+)"); + const std::regex devtype_regex("DEVTYPE=(\\S+)"); + const std::regex major_regex("MAJOR=(\\S+)"); + const std::regex minor_regex("MINOR=(\\S+)"); + + std::smatch devname_match; + std::smatch devtype_match; + std::smatch major_match; + std::smatch minor_match; + + for (const std::filesystem::directory_entry& dir_entry : + std::filesystem::directory_iterator(sys_dev_path)) + { + std::string path_str = dir_entry.path().string(); + std::string devname = "unknown device"; + uint32_t major = 0; + uint32_t minor = 0; + std::string devtype = "partition"; + + std::filesystem::path uevent_path = dir_entry.path() / "uevent"; + std::ifstream uevent_file(uevent_path); + + while (uevent_file.good()) + { + std::string line; + uevent_file >> line; + if (std::regex_match(line, devname_match, devname_regex)) + { + devname = fmt::format("/dev/{}", devname_match[1].str()); + } + else if (std::regex_match(line, devtype_match, devtype_regex)) + { + devtype = devtype_match[1].str(); + } + else if (std::regex_match(line, major_match, major_regex)) + { + major = std::stoi(major_match[1].str()); + } + else if (std::regex_match(line, minor_match, minor_regex)) + { + minor = std::stoi(minor_match[1].str()); + } + } + + uint32_t parent_major = 0; + uint32_t parent_minor = 0; + if (devtype == "partition") + { + std::filesystem::path parent_dev("/sys"); + + // Because someone at Linux has a serious glue-sniffing problem these symlinks are + // relative paths and not absolute. Solution: delete the relative part "../../" from the + // beginning and make it absolute + parent_dev = + parent_dev / + (std::filesystem::read_symlink(dir_entry.path()).parent_path().string().substr(6)); + + std::ifstream parent_uevent_file(parent_dev / "uevent"); + while (parent_uevent_file.good()) + { + std::string line; + parent_uevent_file >> line; + + if (std::regex_match(line, major_match, major_regex)) + { + parent_major = std::stoi(major_match[1].str()); + } + else if (std::regex_match(line, minor_match, minor_regex)) + { + parent_minor = std::stoi(minor_match[1].str()); + } + } + } + + if (devtype == "partition") + { + result.push_back(BlockDevice::partition(makedev(major, minor), devname, + makedev(parent_major, parent_minor))); + } + else + { + result.push_back(BlockDevice::disk(makedev(major, minor), devname)); + } + } + return result; +} } // namespace lo2s