glibc/sysdeps/unix/sysv/linux/getsysstats.c
Rasmus Villemoes 0ce657c576 linux/getsysstats.c: use sysinfo() instead of parsing /proc/meminfo
Profiling git's test suite, Linus noted [1] that a disproportionately
large amount of time was spent reading /proc/meminfo. This is done by
the glibc functions get_phys_pages and get_avphys_pages, but they only
need the MemTotal and MemFree fields, respectively. That same
information can be obtained with a single syscall, sysinfo, instead of
six: open, fstat, mmap, read, close, munmap. While sysinfo also
provides more than necessary, it does a lot less work than what the
kernel needs to do to provide the entire /proc/meminfo. Both strace -T
and in-app microbenchmarks shows that the sysinfo() approach is
roughly an order of magnitude faster.

sysinfo() is much older than what glibc currently requires, so I don't
think there's any reason to keep the old parsing code. Moreover, this
makes get_[av]phys_pages work even in the absence of /proc.

Linus noted that something as simple as 'bash -c "echo"' would trigger
the reading of /proc/meminfo, but gdb says that many more applications
than just bash are affected:

Starting program: /bin/bash "-c" "echo"

Breakpoint 1, __get_phys_pages () at ../sysdeps/unix/sysv/linux/getsysstats.c:283
283     ../sysdeps/unix/sysv/linux/getsysstats.c: No such file or directory.
(gdb) bt

So it seems that any application that uses qsort on a moderately sized
array will incur this cost (once), which is obviously proportionately
more expensive for lots of short-lived processes (such as the git test
suite).

[1] http://thread.gmane.org/gmane.linux.kernel/2019285

Signed-off-by: Rasmus Villemoes <rv@rasmusvillemoes.dk>

	* sysdeps/unix/sysv/linux/getsysstats.c (__get_phys_pages):
	Use sysinfo system call instead of parsing /proc/meminfo.
	* sysdeps/unix/sysv/linux/getsysstats.c (__get_avphys_pages):
	Likewise.
2015-09-12 21:09:59 -04:00

331 lines
8.1 KiB
C

/* Determine various system internal values, Linux version.
Copyright (C) 1996-2015 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
The GNU C Library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with the GNU C Library; if not, see
<http://www.gnu.org/licenses/>. */
#include <alloca.h>
#include <assert.h>
#include <ctype.h>
#include <dirent.h>
#include <errno.h>
#include <fcntl.h>
#include <mntent.h>
#include <paths.h>
#include <stdio.h>
#include <stdio_ext.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/sysinfo.h>
#include <atomic.h>
#include <not-cancel.h>
/* How we can determine the number of available processors depends on
the configuration. There is currently (as of version 2.0.21) no
system call to determine the number. It is planned for the 2.1.x
series to add this, though.
One possibility to implement it for systems using Linux 2.0 is to
examine the pseudo file /proc/cpuinfo. Here we have one entry for
each processor.
But not all systems have support for the /proc filesystem. If it
is not available we simply return 1 since there is no way. */
/* Other architectures use different formats for /proc/cpuinfo. This
provides a hook for alternative parsers. */
#ifndef GET_NPROCS_PARSER
# define GET_NPROCS_PARSER(FD, BUFFER, CP, RE, BUFFER_END, RESULT) \
do \
{ \
(RESULT) = 0; \
/* Read all lines and count the lines starting with the string \
"processor". We don't have to fear extremely long lines since \
the kernel will not generate them. 8192 bytes are really \
enough. */ \
char *l; \
while ((l = next_line (FD, BUFFER, &CP, &RE, BUFFER_END)) != NULL) \
if (strncmp (l, "processor", 9) == 0) \
++(RESULT); \
} \
while (0)
#endif
static char *
next_line (int fd, char *const buffer, char **cp, char **re,
char *const buffer_end)
{
char *res = *cp;
char *nl = memchr (*cp, '\n', *re - *cp);
if (nl == NULL)
{
if (*cp != buffer)
{
if (*re == buffer_end)
{
memmove (buffer, *cp, *re - *cp);
*re = buffer + (*re - *cp);
*cp = buffer;
ssize_t n = read_not_cancel (fd, *re, buffer_end - *re);
if (n < 0)
return NULL;
*re += n;
nl = memchr (*cp, '\n', *re - *cp);
while (nl == NULL && *re == buffer_end)
{
/* Truncate too long lines. */
*re = buffer + 3 * (buffer_end - buffer) / 4;
n = read_not_cancel (fd, *re, buffer_end - *re);
if (n < 0)
return NULL;
nl = memchr (*re, '\n', n);
**re = '\n';
*re += n;
}
}
else
nl = memchr (*cp, '\n', *re - *cp);
res = *cp;
}
if (nl == NULL)
nl = *re - 1;
}
*cp = nl + 1;
assert (*cp <= *re);
return res == *re ? NULL : res;
}
int
__get_nprocs (void)
{
static int cached_result = -1;
static time_t timestamp;
time_t now = time (NULL);
time_t prev = timestamp;
atomic_read_barrier ();
if (now == prev && cached_result > -1)
return cached_result;
/* XXX Here will come a test for the new system call. */
const size_t buffer_size = __libc_use_alloca (8192) ? 8192 : 512;
char *buffer = alloca (buffer_size);
char *buffer_end = buffer + buffer_size;
char *cp = buffer_end;
char *re = buffer_end;
const int flags = O_RDONLY | O_CLOEXEC;
int fd = open_not_cancel_2 ("/sys/devices/system/cpu/online", flags);
char *l;
int result = 0;
if (fd != -1)
{
l = next_line (fd, buffer, &cp, &re, buffer_end);
if (l != NULL)
do
{
char *endp;
unsigned long int n = strtoul (l, &endp, 10);
if (l == endp)
{
result = 0;
break;
}
unsigned long int m = n;
if (*endp == '-')
{
l = endp + 1;
m = strtoul (l, &endp, 10);
if (l == endp)
{
result = 0;
break;
}
}
result += m - n + 1;
l = endp;
while (l < re && isspace (*l))
++l;
}
while (l < re);
close_not_cancel_no_status (fd);
if (result > 0)
goto out;
}
cp = buffer_end;
re = buffer_end;
result = 1;
/* The /proc/stat format is more uniform, use it by default. */
fd = open_not_cancel_2 ("/proc/stat", flags);
if (fd != -1)
{
result = 0;
while ((l = next_line (fd, buffer, &cp, &re, buffer_end)) != NULL)
/* The current format of /proc/stat has all the cpu* entries
at the front. We assume here that stays this way. */
if (strncmp (l, "cpu", 3) != 0)
break;
else if (isdigit (l[3]))
++result;
close_not_cancel_no_status (fd);
}
else
{
fd = open_not_cancel_2 ("/proc/cpuinfo", flags);
if (fd != -1)
{
GET_NPROCS_PARSER (fd, buffer, cp, re, buffer_end, result);
close_not_cancel_no_status (fd);
}
}
out:
cached_result = result;
atomic_write_barrier ();
timestamp = now;
return result;
}
weak_alias (__get_nprocs, get_nprocs)
/* On some architectures it is possible to distinguish between configured
and active cpus. */
int
__get_nprocs_conf (void)
{
/* XXX Here will come a test for the new system call. */
/* Try to use the sysfs filesystem. It has actual information about
online processors. */
DIR *dir = __opendir ("/sys/devices/system/cpu");
if (dir != NULL)
{
int count = 0;
struct dirent64 *d;
while ((d = __readdir64 (dir)) != NULL)
/* NB: the sysfs has d_type support. */
if (d->d_type == DT_DIR && strncmp (d->d_name, "cpu", 3) == 0)
{
char *endp;
unsigned long int nr = strtoul (d->d_name + 3, &endp, 10);
if (nr != ULONG_MAX && endp != d->d_name + 3 && *endp == '\0')
++count;
}
__closedir (dir);
return count;
}
int result = 1;
#ifdef GET_NPROCS_CONF_PARSER
/* If we haven't found an appropriate entry return 1. */
FILE *fp = fopen ("/proc/cpuinfo", "rce");
if (fp != NULL)
{
char buffer[8192];
/* No threads use this stream. */
__fsetlocking (fp, FSETLOCKING_BYCALLER);
GET_NPROCS_CONF_PARSER (fp, buffer, result);
fclose (fp);
}
#else
result = __get_nprocs ();
#endif
return result;
}
weak_alias (__get_nprocs_conf, get_nprocs_conf)
/* Compute (num*mem_unit)/pagesize, but avoid overflowing long int.
In practice, mem_unit is never bigger than the page size, so after
the first loop it is 1. [In the kernel, it is initialized to
PAGE_SIZE in mm/page_alloc.c:si_meminfo(), and then in
kernel.sys.c:do_sysinfo() it is set to 1 if unsigned long can
represent all the sizes measured in bytes]. */
static long int
sysinfo_mempages (unsigned long int num, unsigned int mem_unit)
{
unsigned long int ps = __getpagesize ();
while (mem_unit > 1 && ps > 1)
{
mem_unit >>= 1;
ps >>= 1;
}
num *= mem_unit;
while (ps > 1)
{
ps >>= 1;
num >>= 1;
}
return num;
}
/* Return the number of pages of total/available physical memory in
the system. This used to be done by parsing /proc/meminfo, but
that's unnecessarily expensive (and /proc is not always available).
The sysinfo syscall provides the same information, and has been
available at least since kernel 2.3.48. */
long int
__get_phys_pages (void)
{
struct sysinfo info;
__sysinfo (&info);
return sysinfo_mempages (info.totalram, info.mem_unit);
}
weak_alias (__get_phys_pages, get_phys_pages)
long int
__get_avphys_pages (void)
{
struct sysinfo info;
__sysinfo (&info);
return sysinfo_mempages (info.freeram, info.mem_unit);
}
weak_alias (__get_avphys_pages, get_avphys_pages)