Implement the Linux Perf Counter backend for benchlib

Currently, we only support one event type: counting CPU cycles with
hardware counters. There are no fallbacks if this hardware counter is
not available, and there is currently no way to specify other
counters.

Benchlib only supports reporting one event per benchmark, even though
the event counter interface allows specifying more than one. Still,
the hardware usually has limitations on how many events it can monitor
at a time, and we'd prefer to have the counter running at 100% of the
time, so this will not change.

Change-Id: I79858a3ad1e696dc4b7b72c420e5a04b67cd55de
Reviewed-by: Jason McDonald <macadder1@gmail.com>
This commit is contained in:
Thiago Macieira 2012-04-09 14:11:47 -03:00 committed by The Qt Project
parent 9d72259f94
commit c63420a117
3 changed files with 109 additions and 2 deletions

View File

@ -44,15 +44,41 @@
#ifdef QTESTLIB_USE_PERF_EVENTS
// include the qcore_unix_p.h without core-private
// we only use inline functions anyway
#include "../corelib/kernel/qcore_unix_p.h"
#include <sys/types.h>
#include <errno.h>
#include <fcntl.h>
#include <string.h>
#include <sys/syscall.h>
#include <sys/ioctl.h>
#include "3rdparty/linux_perf_event_p.h"
QT_BEGIN_NAMESPACE
/*!
\class QBenchmarkPerfEvents
\brief The Linux perf events benchmark backend
This benchmark backend uses the Linux Performance Counters interface,
introduced with the Linux kernel v2.6.31. The interface is done by one
system call (perf_event_open) which takes an attribute structure and
returns a file descriptor.
More information:
\li design docs: tools/perf/design.txt <http://lxr.linux.no/linux/tools/perf/design.txt>
\li sample tool: tools/perf/builtin-stat.c <http://lxr.linux.no/linux/tools/perf/builtin-stat.c>
(note: as of v3.3.1, the documentation is out-of-date with the kernel
interface, so reading the source code of existing tools is necessary)
This benchlib backend monitors the current process as well as child process
launched. We do not try to benchmark in kernel or hypervisor mode, as that
usually requires elevated privileges.
*/
static int perf_event_open(perf_event_attr *attr, pid_t pid, int cpu, int group_fd, unsigned long flags)
{
@ -67,27 +93,69 @@ bool QBenchmarkPerfEventsMeasurer::isAvailable()
}
QBenchmarkPerfEventsMeasurer::QBenchmarkPerfEventsMeasurer()
: fd(-1)
{
}
QBenchmarkPerfEventsMeasurer::~QBenchmarkPerfEventsMeasurer()
{
qt_safe_close(fd);
}
void QBenchmarkPerfEventsMeasurer::init()
{
perf_event_attr attr;
memset(&attr, 0, sizeof attr);
// common init
attr.size = sizeof attr;
attr.sample_period = 0;
attr.sample_type = 0;
attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | PERF_FORMAT_TOTAL_TIME_RUNNING;
attr.disabled = true; // start disabled, we'll enable later
attr.inherit = true; // let children inherit, if the benchmark has child processes
attr.pinned = true; // keep it running on the PMU
attr.inherit_stat = true; // collapse all the info from child processes
attr.task = true; // trace fork and exit
// our event type
// ### FIXME hardcoded for now
attr.type = PERF_TYPE_HARDWARE;
attr.config = PERF_COUNT_HW_CPU_CYCLES;
// pid == 0 -> attach to the current process
// cpu == -1 -> monitor on all CPUs
// group_fd == -1 -> this is the group leader
// flags == 0 -> reserved, must be zero
fd = perf_event_open(&attr, 0, -1, -1, 0);
if (fd == -1) {
perror("QBenchmarkPerfEventsMeasurer::start: perf_event_open");
exit(1);
} else {
::fcntl(fd, F_SETFD, FD_CLOEXEC);
}
}
void QBenchmarkPerfEventsMeasurer::start()
{
// enable the counter
::ioctl(fd, PERF_EVENT_IOC_RESET);
::ioctl(fd, PERF_EVENT_IOC_ENABLE);
}
qint64 QBenchmarkPerfEventsMeasurer::checkpoint()
{
::ioctl(fd, PERF_EVENT_IOC_DISABLE);
qint64 value = readValue();
::ioctl(fd, PERF_EVENT_IOC_ENABLE);
return value;
}
qint64 QBenchmarkPerfEventsMeasurer::stop()
{
// disable the counter
::ioctl(fd, PERF_EVENT_IOC_DISABLE);
return readValue();
}
bool QBenchmarkPerfEventsMeasurer::isMeasurementAccepted(qint64)
@ -110,6 +178,41 @@ QTest::QBenchmarkMetric QBenchmarkPerfEventsMeasurer::metricType()
return QTest::Events;
}
#endif
qint64 QBenchmarkPerfEventsMeasurer::readValue()
{
/* from the kernel docs:
* struct read_format {
* { u64 value;
* { u64 time_enabled; } && PERF_FORMAT_TOTAL_TIME_ENABLED
* { u64 time_running; } && PERF_FORMAT_TOTAL_TIME_RUNNING
* { u64 id; } && PERF_FORMAT_ID
* } && !PERF_FORMAT_GROUP
*/
struct read_format {
quint64 value;
quint64 time_enabled;
quint64 time_running;
} results;
size_t nread = 0;
while (nread < sizeof results) {
char *ptr = reinterpret_cast<char *>(&results);
qint64 r = qt_safe_read(fd, ptr + nread, sizeof results - nread);
if (r == -1) {
perror("QBenchmarkPerfEventsMeasurer::readValue: reading the results");
exit(1);
}
nread += quint64(r);
}
if (results.time_running == results.time_enabled)
return results.value;
// scale the results, though this shouldn't happen!
return results.value * (double(results.time_running) / double(results.time_enabled));
}
QT_END_NAMESPACE
#endif

View File

@ -75,6 +75,9 @@ public:
static bool isAvailable();
private:
int fd;
qint64 readValue();
};
QT_END_NAMESPACE

View File

@ -1487,7 +1487,8 @@ Q_TESTLIB_EXPORT void qtest_qParseArgs(int argc, char *argv[], bool qml)
#ifdef QTESTLIB_USE_PERF_EVENTS
} else if (strcmp(argv[i], "-perf") == 0) {
if (QBenchmarkPerfEventsMeasurer::isAvailable()) {
printf("perf available\n");
// perf available
QBenchmarkGlobalData::current->setMode(QBenchmarkGlobalData::PerfCounter);
} else {
fprintf(stderr, "WARNING: Linux perf events not available. Using the walltime measurer.\n");
}