From 051f68c2d3a069543dce4d560facff6aa22284bd Mon Sep 17 00:00:00 2001 From: Thiago Macieira Date: Mon, 9 Apr 2012 17:16:31 -0300 Subject: [PATCH] Add support for more performance counters in QBenchlib Added support for most hardware and software counters available on Linux 3.3. The cache-based counters are missing because they need special handling. Also added an option that lists available counters. Note that the list is of counters the library knows about, not the counters that the hardware can measure. Change-Id: I9f6fb09b5460bf4ac6082081611c1d6ff806a3fa Reviewed-by: Jason McDonald --- src/testlib/qbenchmarkperfevents.cpp | 206 +++++++++++++++++++++++++-- src/testlib/qbenchmarkperfevents_p.h | 3 + src/testlib/qtestcase.cpp | 12 ++ 3 files changed, 213 insertions(+), 8 deletions(-) diff --git a/src/testlib/qbenchmarkperfevents.cpp b/src/testlib/qbenchmarkperfevents.cpp index 5ed76f9aa7..85a0875a88 100644 --- a/src/testlib/qbenchmarkperfevents.cpp +++ b/src/testlib/qbenchmarkperfevents.cpp @@ -40,6 +40,7 @@ ****************************************************************************/ #include "qbenchmarkperfevents_p.h" +#include "qbenchmarkmetric.h" #include "qbenchmark_p.h" #ifdef QTESTLIB_USE_PERF_EVENTS @@ -60,6 +61,9 @@ QT_BEGIN_NAMESPACE +static quint32 event_type = PERF_TYPE_HARDWARE; +static quint64 event_id = PERF_COUNT_HW_CPU_CYCLES; + /*! \class QBenchmarkPerfEvents \brief The Linux perf events benchmark backend @@ -92,6 +96,182 @@ bool QBenchmarkPerfEventsMeasurer::isAvailable() return perf_event_open(0, 0, 0, 0, 0) == -1 && errno != ENOSYS; } +/* Event list structure + The following table provides the list of supported events + + Event type Event counter Unit Name and aliases + HARDWARE CPU_CYCLES CPUCycles cycles cpu-cycles + HARDWARE INSTRUCTIONS Instructions instructions + HARDWARE CACHE_REFERENCES CacheReferences cache-references + HARDWARE CACHE_MISSES CacheMisses cache-misses + HARDWARE BRANCH_INSTRUCTIONS BranchInstructions branch-instructions branches + HARDWARE BRANCH_MISSES BranchMisses branch-misses + HARDWARE BUS_CYCLES BusCycles bus-cycles + HARDWARE STALLED_CYCLES_FRONTEND StalledCycles stalled-cycles-frontend idle-cycles-frontend + HARDWARE STALLED_CYCLES_BACKEND StalledCycles stalled-cycles-backend idle-cycles-backend + SOFTWARE CPU_CLOCK WalltimeMilliseconds cpu-clock + SOFTWARE TASK_CLOCK WalltimeMilliseconds task-clock + SOFTWARE PAGE_FAULTS PageFaults page-faults faults + SOFTWARE PAGE_FAULTS_MAJ MajorPageFaults major-faults + SOFTWARE PAGE_FAULTS_MIN MinorPageFaults minor-faults + SOFTWARE CONTEXT_SWITCHES ContextSwitches context-switches cs + SOFTWARE CPU_MIGRATIONS CPUMigrations cpu-migrations migrations + SOFTWARE ALIGNMENT_FAULTS AlignmentFaults alignment-faults + SOFTWARE EMULATION_FAULTS EmulationFaults emulation-faults + + Use the following Perl script to re-generate the list +=== cut perl === +#!/usr/bin/env perl +# Load all entries into %map +while () { + m/^\s*(.*)\s*$/; + @_ = split /\s+/, $1; + $type = shift @_; + $id = ($type eq "HARDWARE" ? "PERF_COUNT_HW_" : + $type eq "SOFTWARE" ? "PERF_COUNT_SW_" : + $type eq "HW_CACHE" ? "CACHE_" : "") . shift @_; + $unit = shift @_; + + for $string (@_) { + die "$string was already seen!" if defined($map{$string}); + $map{$string} = [-1, $type, $id, $unit]; + push @strings, $string; + } +} + +# sort the map and print the string list +@strings = sort @strings; +print "static const char eventlist_strings[] = \n"; +$counter = 0; +for $entry (@strings) { + print " \"$entry\\0\"\n"; + $map{$entry}[0] = $counter; + $counter += 1 + length $entry; +} + +# print the table +print " \"\\0\";\n\nstatic const Events eventlist[] = {\n"; +for $entry (sort @strings) { + printf " { %3d, PERF_TYPE_%s, %s, QTest::%s },\n", + $map{$entry}[0], + $map{$entry}[1], + $map{$entry}[2], + $map{$entry}[3]; +} +print " { 0, PERF_TYPE_MAX, 0, QTest::Events }\n};\n"; +=== cut perl === +*/ + +struct Events { + unsigned offset; + quint32 type; + quint64 event_id; + QTest::QBenchmarkMetric metric; +}; + +/* -- BEGIN GENERATED CODE -- */ +static const char eventlist_strings[] = + "alignment-faults\0" + "branch-instructions\0" + "branch-misses\0" + "branches\0" + "bus-cycles\0" + "cache-misses\0" + "cache-references\0" + "context-switches\0" + "cpu-clock\0" + "cpu-cycles\0" + "cpu-migrations\0" + "cs\0" + "cycles\0" + "emulation-faults\0" + "faults\0" + "idle-cycles-backend\0" + "idle-cycles-frontend\0" + "instructions\0" + "major-faults\0" + "migrations\0" + "minor-faults\0" + "page-faults\0" + "stalled-cycles-backend\0" + "stalled-cycles-frontend\0" + "task-clock\0" + "\0"; + +static const Events eventlist[] = { + { 0, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_ALIGNMENT_FAULTS, QTest::AlignmentFaults }, + { 17, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions }, + { 37, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_MISSES, QTest::BranchMisses }, + { 51, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BRANCH_INSTRUCTIONS, QTest::BranchInstructions }, + { 60, PERF_TYPE_HARDWARE, PERF_COUNT_HW_BUS_CYCLES, QTest::BusCycles }, + { 71, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_MISSES, QTest::CacheMisses }, + { 84, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CACHE_REFERENCES, QTest::CacheReferences }, + { 101, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches }, + { 118, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_CLOCK, QTest::WalltimeMilliseconds }, + { 128, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles }, + { 139, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations }, + { 154, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CONTEXT_SWITCHES, QTest::ContextSwitches }, + { 157, PERF_TYPE_HARDWARE, PERF_COUNT_HW_CPU_CYCLES, QTest::CPUCycles }, + { 164, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_EMULATION_FAULTS, QTest::EmulationFaults }, + { 181, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults }, + { 188, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles }, + { 208, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles }, + { 229, PERF_TYPE_HARDWARE, PERF_COUNT_HW_INSTRUCTIONS, QTest::Instructions }, + { 242, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MAJ, QTest::MajorPageFaults }, + { 255, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_CPU_MIGRATIONS, QTest::CPUMigrations }, + { 266, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS_MIN, QTest::MinorPageFaults }, + { 279, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_PAGE_FAULTS, QTest::PageFaults }, + { 291, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_BACKEND, QTest::StalledCycles }, + { 314, PERF_TYPE_HARDWARE, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND, QTest::StalledCycles }, + { 338, PERF_TYPE_SOFTWARE, PERF_COUNT_SW_TASK_CLOCK, QTest::WalltimeMilliseconds }, + { 0, PERF_TYPE_MAX, 0, QTest::Events } +}; +/* -- END GENERATED CODE -- */ + +QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricForEvent(quint32 type, quint64 event_id) +{ + const Events *ptr = eventlist; + for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) { + if (ptr->type == type && ptr->event_id == event_id) + return ptr->metric; + } + return QTest::Events; +} + +void QBenchmarkPerfEventsMeasurer::setCounter(const char *name) +{ + const Events *ptr = eventlist; + for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) { + int c = strcmp(name, eventlist_strings + ptr->offset); + if (c == 0) + break; + if (c < 0) { + fprintf(stderr, "ERROR: Performance counter type '%s' is unknown\n", name); + exit(1); + } + } + + ::event_type = ptr->type; + ::event_id = ptr->event_id; +} + +void QBenchmarkPerfEventsMeasurer::listCounters() +{ + if (!isAvailable()) { + printf("Performance counters are not available on this system\n"); + return; + } + + printf("The following performance counters are available:\n"); + const Events *ptr = eventlist; + for ( ; ptr->type != PERF_TYPE_MAX; ++ptr) { + printf(" %-30s [%s]\n", eventlist_strings + ptr->offset, + ptr->type == PERF_TYPE_HARDWARE ? "hardware" : + ptr->type == PERF_TYPE_SOFTWARE ? "software" : + ptr->type == PERF_TYPE_HW_CACHE ? "cache" : "other"); + } +} + QBenchmarkPerfEventsMeasurer::QBenchmarkPerfEventsMeasurer() : fd(-1) { @@ -103,6 +283,10 @@ QBenchmarkPerfEventsMeasurer::~QBenchmarkPerfEventsMeasurer() } void QBenchmarkPerfEventsMeasurer::init() +{ +} + +void QBenchmarkPerfEventsMeasurer::start() { perf_event_attr attr; memset(&attr, 0, sizeof attr); @@ -119,9 +303,8 @@ void QBenchmarkPerfEventsMeasurer::init() attr.task = true; // trace fork and exit // our event type - // ### FIXME hardcoded for now - attr.type = PERF_TYPE_HARDWARE; - attr.config = PERF_COUNT_HW_CPU_CYCLES; + attr.type = ::event_type; + attr.config = ::event_id; // pid == 0 -> attach to the current process // cpu == -1 -> monitor on all CPUs @@ -134,10 +317,7 @@ void QBenchmarkPerfEventsMeasurer::init() } else { ::fcntl(fd, F_SETFD, FD_CLOEXEC); } -} -void QBenchmarkPerfEventsMeasurer::start() -{ // enable the counter ::ioctl(fd, PERF_EVENT_IOC_RESET); ::ioctl(fd, PERF_EVENT_IOC_ENABLE); @@ -175,10 +355,10 @@ int QBenchmarkPerfEventsMeasurer::adjustMedianCount(int) QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricType() { - return QTest::CPUCycles; + return metricForEvent(event_type, event_id); } -qint64 QBenchmarkPerfEventsMeasurer::readValue() +static quint64 rawReadValue(int fd) { /* from the kernel docs: * struct read_format { @@ -213,6 +393,16 @@ qint64 QBenchmarkPerfEventsMeasurer::readValue() return results.value * (double(results.time_running) / double(results.time_enabled)); } +qint64 QBenchmarkPerfEventsMeasurer::readValue() +{ + quint64 raw = rawReadValue(fd); + if (metricType() == QTest::WalltimeMilliseconds) { + // perf returns nanoseconds + return raw / 1000000; + } + return raw; +} + QT_END_NAMESPACE #endif diff --git a/src/testlib/qbenchmarkperfevents_p.h b/src/testlib/qbenchmarkperfevents_p.h index 74966e1699..f73d140300 100644 --- a/src/testlib/qbenchmarkperfevents_p.h +++ b/src/testlib/qbenchmarkperfevents_p.h @@ -74,6 +74,9 @@ public: virtual QTest::QBenchmarkMetric metricType(); static bool isAvailable(); + static QTest::QBenchmarkMetric metricForEvent(quint32 type, quint64 event_id); + static void setCounter(const char *name); + static void listCounters(); private: int fd; diff --git a/src/testlib/qtestcase.cpp b/src/testlib/qtestcase.cpp index 87d32da26a..c1ab574291 100644 --- a/src/testlib/qtestcase.cpp +++ b/src/testlib/qtestcase.cpp @@ -1343,6 +1343,8 @@ Q_TESTLIB_EXPORT void qtest_qParseArgs(int argc, char *argv[], bool qml) #endif #ifdef QTESTLIB_USE_PERF_EVENTS " -perf : Use Linux perf events to time benchmarks\n" + " -perfcounter name : Use the counter named 'name'\n" + " -perfcounterlist : Lists the counters available\n" #endif #ifdef HAVE_TICK_COUNTER " -tickcounter : Use CPU tick counters to time benchmarks\n" @@ -1492,6 +1494,16 @@ Q_TESTLIB_EXPORT void qtest_qParseArgs(int argc, char *argv[], bool qml) } else { fprintf(stderr, "WARNING: Linux perf events not available. Using the walltime measurer.\n"); } + } else if (strcmp(argv[i], "-perfcounter") == 0) { + if (i + 1 >= argc) { + fprintf(stderr, "-perfcounter needs an extra parameter with the name of the counter\n"); + exit(1); + } else { + QBenchmarkPerfEventsMeasurer::setCounter(argv[++i]); + } + } else if (strcmp(argv[i], "-perfcounterlist") == 0) { + QBenchmarkPerfEventsMeasurer::listCounters(); + exit(0); #endif #ifdef HAVE_TICK_COUNTER } else if (strcmp(argv[i], "-tickcounter") == 0) {