From a95a08b4af38992cbcf3d1da97199ef19528fbde Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Thu, 26 Aug 2004 18:35:05 +0000 Subject: [PATCH] Update. 2004-08-26 Ulrich Drepper * nscd/cache.c: Major rewrite. The data is now optionally kept in a mmaped memory region which is automatically mirrored on disk. This implements persistent data storage. The Memory handled needed to be completely revamped, it now uses a garbage collection mechanism instead of malloc. * nscd/connections.c: Likewise. * nscd/nscd.c: Likewise. * nscd/nscd.h: Likewise. * nscd/nscd_conf.c: Likewise. * nscd/nscd_stat.c: Likewise. * nscd/grpcache.c: Likewise. * nscd/hstcache.c:: Likewise. * nscd/pwdcache.c:: Likewise. * nscd/Makefile: Add rules to build mem.c. * nscd/mem.c: New file. * nscd/nscd.conf: Describe new configuration options. --- ChangeLog | 19 ++ nscd/Makefile | 5 +- nscd/cache.c | 315 +++++++++++++------ nscd/connections.c | 383 +++++++++++++++++++---- nscd/grpcache.c | 441 ++++++++++++++++++-------- nscd/hstcache.c | 740 ++++++++++++++++++++++++++------------------ nscd/mem.c | 515 ++++++++++++++++++++++++++++++ nscd/nscd.c | 16 +- nscd/nscd.conf | 6 + nscd/nscd.h | 210 ++++++++++--- nscd/nscd_conf.c | 43 ++- nscd/nscd_getgr_r.c | 30 +- nscd/nscd_stat.c | 77 +++-- nscd/pwdcache.c | 446 ++++++++++++++++++-------- 14 files changed, 2409 insertions(+), 837 deletions(-) create mode 100644 nscd/mem.c diff --git a/ChangeLog b/ChangeLog index d1b41045ba..4b86a8d2e9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,22 @@ +2004-08-26 Ulrich Drepper + + * nscd/cache.c: Major rewrite. The data is now optionally kept in + a mmaped memory region which is automatically mirrored on disk. + This implements persistent data storage. The Memory handled + needed to be completely revamped, it now uses a garbage collection + mechanism instead of malloc. + * nscd/connections.c: Likewise. + * nscd/nscd.c: Likewise. + * nscd/nscd.h: Likewise. + * nscd/nscd_conf.c: Likewise. + * nscd/nscd_stat.c: Likewise. + * nscd/grpcache.c: Likewise. + * nscd/hstcache.c:: Likewise. + * nscd/pwdcache.c:: Likewise. + * nscd/Makefile: Add rules to build mem.c. + * nscd/mem.c: New file. + * nscd/nscd.conf: Describe new configuration options. + 2004-08-26 Kaz Kojima * sysdeps/unix/sysv/linux/mips/pread.c: Include sgidefs.h only if diff --git a/nscd/Makefile b/nscd/Makefile index 7392e71509..cb82a2e06f 100644 --- a/nscd/Makefile +++ b/nscd/Makefile @@ -1,4 +1,4 @@ -# Copyright (C) 1998, 2000, 2002, 2003 Free Software Foundation, Inc. +# Copyright (C) 1998, 2000, 2002, 2003, 2004 Free Software Foundation, Inc. # This file is part of the GNU C Library. # The GNU C Library is free software; you can redistribute it and/or @@ -30,7 +30,7 @@ vpath %.c ../locale/programs nscd-modules := nscd connections pwdcache getpwnam_r getpwuid_r grpcache \ getgrnam_r getgrgid_r hstcache gethstbyad_r gethstbynm2_r \ - dbg_log nscd_conf nscd_stat cache xmalloc xstrdup + dbg_log nscd_conf nscd_stat cache mem xmalloc xstrdup ifeq ($(have-thread-library),yes) @@ -78,6 +78,7 @@ CFLAGS-nscd_stat.c = -fpie CFLAGS-cache.c = -fpie CFLAGS-xmalloc.c = -fpie CFLAGS-xstrdup.c = -fpie +CFLAGS-mem.c = -fpie $(objpfx)nscd: $(addprefix $(objpfx),$(nscd-modules:=.o)) $(LINK.o) -pie -Wl,-O1 \ diff --git a/nscd/cache.c b/nscd/cache.c index 446154880a..2d50d7794c 100644 --- a/nscd/cache.c +++ b/nscd/cache.c @@ -17,6 +17,7 @@ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. */ +#include #include #include #include @@ -26,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -33,44 +35,69 @@ #include "nscd.h" #include "dbg_log.h" + +/* Number of times a value is reloaded without being used. UINT_MAX + means unlimited. */ +unsigned int reload_count = DEFAULT_RELOAD_LIMIT; + + /* Search the cache for a matching entry and return it when found. If this fails search the negative cache and return (void *) -1 if this search was successful. Otherwise return NULL. This function must be called with the read-lock held. */ -struct hashentry * -cache_search (request_type type, void *key, size_t len, struct database *table, - uid_t owner) +struct datahead * +cache_search (request_type type, void *key, size_t len, + struct database_dyn *table, uid_t owner) { - unsigned long int hash = __nis_hash (key, len) % table->module; - struct hashentry *work; + unsigned long int hash = __nis_hash (key, len) % table->head->module; + unsigned long int nsearched = 0; + struct datahead *result = NULL; - work = table->array[hash]; - - while (work != NULL) + ref_t work = table->head->array[hash]; + while (work != ENDREF) { ++nsearched; - if (type == work->type && len == work->len - && memcmp (key, work->key, len) == 0 && work->owner == owner) + struct hashentry *here = (struct hashentry *) (table->data + work); + + if (type == here->type && len == here->len + && memcmp (key, table->data + here->key, len) == 0 + && here->owner == owner) { /* We found the entry. Increment the appropriate counter. */ - if (work->data == (void *) -1) - ++table->neghit; - else - ++table->poshit; + struct datahead *dh + = (struct datahead *) (table->data + here->packet); - break; + /* See whether we must ignore the entry. */ + if (dh->usable) + { + /* We do not synchronize the memory here. The statistics + data is not crucial, we synchronize only once in a while + in the cleanup threads. */ + if (dh->notfound) + ++table->head->neghit; + else + { + ++table->head->poshit; + + if (dh->nreloads != 0) + dh->nreloads = 0; + } + + result = dh; + break; + } } - work = work->next; + work = here->next; } - if (nsearched > table->maxnsearched) - table->maxnsearched = nsearched; + if (nsearched > table->head->maxnsearched) + table->head->maxnsearched = nsearched; - return work; + return result; } /* Add a new entry to the cache. The return value is zero if the function @@ -82,45 +109,57 @@ cache_search (request_type type, void *key, size_t len, struct database *table, This is ok since we use operations which would be safe even without locking, given that the `prune_cache' function never runs. Using the readlock reduces the chance of conflicts. */ -void -cache_add (int type, void *key, size_t len, const void *packet, size_t total, - void *data, int last, time_t t, struct database *table, uid_t owner) +int +cache_add (int type, const void *key, size_t len, struct datahead *packet, + bool first, struct database_dyn *table, + uid_t owner) { - unsigned long int hash = __nis_hash (key, len) % table->module; + if (__builtin_expect (debug_level >= 2, 0)) + dbg_log (_("add new entry \"%s\" of type %s for %s to cache%s"), + (const char *) key, serv2str[type], dbnames[table - dbs], + first ? " (first)" : ""); + + unsigned long int hash = __nis_hash (key, len) % table->head->module; struct hashentry *newp; - newp = malloc (sizeof (struct hashentry)); + newp = mempool_alloc (table, sizeof (struct hashentry)); + /* If we cannot allocate memory, just do not do anything. */ if (newp == NULL) - error (EXIT_FAILURE, errno, _("while allocating hash table entry")); + return -1; newp->type = type; + newp->first = first; newp->len = len; - newp->key = key; + newp->key = (char *) key - table->data; + assert (newp->key + newp->len <= table->head->first_free); newp->owner = owner; - newp->data = data; - newp->timeout = t; - newp->packet = packet; - newp->total = total; - - newp->last = last; + newp->packet = (char *) packet - table->data; /* Put the new entry in the first position. */ do - newp->next = table->array[hash]; - while (atomic_compare_and_exchange_bool_acq (&table->array[hash], newp, - newp->next)); + newp->next = table->head->array[hash]; + while (atomic_compare_and_exchange_bool_acq (&table->head->array[hash], + (ref_t) ((char *) newp + - table->data), + (ref_t) newp->next)); /* Update the statistics. */ - if (data == (void *) -1) - ++table->negmiss; - else if (last) - ++table->posmiss; + if (packet->notfound) + ++table->head->negmiss; + else if (first) + ++table->head->posmiss; - /* Instead of slowing down the normal process for statistics - collection we accept living with some incorrect data. */ - unsigned long int nentries = ++table->nentries; - if (nentries > table->maxnentries) - table->maxnentries = nentries; + /* We depend on this value being correct and at least as high as the + real number of entries. */ + atomic_increment (&table->head->nentries); + + /* It does not matter that we are not loading the just increment + value, this is just for statistics. */ + unsigned long int nentries = table->head->nentries; + if (nentries > table->head->maxnentries) + table->head->maxnentries = nentries; + + return 0; } /* Walk through the table and remove all entries which lifetime ended. @@ -136,13 +175,9 @@ cache_add (int type, void *key, size_t len, const void *packet, size_t total, free the data structures since some hash table entries share the same data. */ void -prune_cache (struct database *table, time_t now) +prune_cache (struct database_dyn *table, time_t now) { - size_t cnt = table->module; - int mark[cnt]; - int anything = 0; - size_t first = cnt + 1; - size_t last = 0; + size_t cnt = table->head->module; /* If this table is not actually used don't do anything. */ if (cnt == 0) @@ -181,27 +216,112 @@ prune_cache (struct database *table, time_t now) we don't need to get any lock. It is at all timed assured that the linked lists are set up correctly and that no second thread prunes the cache. */ + bool mark[cnt]; + size_t first = cnt + 1; + size_t last = 0; + char *const data = table->data; + bool any = false; + do { - struct hashentry *runp = table->array[--cnt]; + ref_t run = table->head->array[--cnt]; - mark[cnt] = 0; - - while (runp != NULL) + while (run != ENDREF) { - if (runp->timeout < now) + struct hashentry *runp = (struct hashentry *) (data + run); + struct datahead *dh = (struct datahead *) (data + runp->packet); + + /* Check whether the entry timed out. */ + if (dh->timeout < now) { - ++mark[cnt]; - anything = 1; + /* This hash bucket could contain entries which need to + be looked at. */ + mark[cnt] = true; + first = MIN (first, cnt); last = MAX (last, cnt); + + /* We only have to look at the data of the first entries + since the count information is kept in the data part + which is shared. */ + if (runp->first) + { + + /* At this point there are two choices: we reload the + value or we discard it. Do not change NRELOADS if + we never not reload the record. */ + if ((reload_count != UINT_MAX + && __builtin_expect (dh->nreloads >= reload_count, 0)) + /* We always remove negative entries. */ + || dh->notfound + /* Discard everything if the user explicitly + requests it. */ + || now == LONG_MAX) + { + /* Remove the value. */ + dh->usable = false; + + /* We definitely have some garbage entries now. */ + any = true; + } + else + { + /* Reload the value. We do this only for the + initially used key, not the additionally + added derived value. */ + switch (runp->type) + { + case GETPWBYNAME: + readdpwbyname (table, runp, dh); + break; + + case GETPWBYUID: + readdpwbyuid (table, runp, dh); + break; + + case GETGRBYNAME: + readdgrbyname (table, runp, dh); + break; + + case GETGRBYGID: + readdgrbygid (table, runp, dh); + break; + + case GETHOSTBYNAME: + readdhstbyname (table, runp, dh); + break; + + case GETHOSTBYNAMEv6: + readdhstbynamev6 (table, runp, dh); + break; + + case GETHOSTBYADDR: + readdhstbyaddr (table, runp, dh); + break; + + case GETHOSTBYADDRv6: + readdhstbyaddrv6 (table, runp, dh); + break; + + default: + assert (! "should never happen"); + } + + /* If the entry has been replaced, we might need + cleanup. */ + any |= !dh->usable; + } + } } - runp = runp->next; + else + assert (dh->usable); + + run = runp->next; } } while (cnt > 0); - if (anything) + if (first <= last) { struct hashentry *head = NULL; @@ -209,47 +329,57 @@ prune_cache (struct database *table, time_t now) the table. */ if (__builtin_expect (pthread_rwlock_trywrlock (&table->lock) != 0, 0)) { - ++table->wrlockdelayed; + ++table->head->wrlockdelayed; pthread_rwlock_wrlock (&table->lock); } while (first <= last) { - if (mark[first] > 0) + if (mark[first]) { - struct hashentry *runp; + ref_t *old = &table->head->array[first]; + ref_t run = table->head->array[first]; - while (table->array[first]->timeout < now) + while (run != ENDREF) { - table->array[first]->dellist = head; - head = table->array[first]; - table->array[first] = head->next; - --table->nentries; - if (--mark[first] == 0) - break; - } + struct hashentry *runp = (struct hashentry *) (data + run); + struct datahead *dh + = (struct datahead *) (data + runp->packet); - runp = table->array[first]; - while (mark[first] > 0) - { - if (runp->next->timeout < now) + if (! dh->usable) { - runp->next->dellist = head; - head = runp->next; - runp->next = head->next; - --mark[first]; - --table->nentries; + /* We need the list only for debugging but it is + more costly to avoid creating the list than + doing it. */ + runp->dellist = head; + head = runp; + + /* No need for an atomic operation, we have the + write lock. */ + --table->head->nentries; + + run = *old = runp->next; } else - runp = runp->next; + { + old = &runp->next; + run = runp->next; + } } } + ++first; } /* It's all done. */ pthread_rwlock_unlock (&table->lock); + /* Make sure the data is saved to disk. */ + if (table->persistent) + msync (table->head, + table->data + table->head->first_free - (char *) table->head, + MS_ASYNC); + /* One extra pass if we do debugging. */ if (__builtin_expect (debug_level > 0, 0)) { @@ -263,33 +393,20 @@ prune_cache (struct database *table, time_t now) if (runp->type == GETHOSTBYADDR || runp->type == GETHOSTBYADDRv6) { inet_ntop (runp->type == GETHOSTBYADDR ? AF_INET : AF_INET6, - runp->key, buf, sizeof (buf)); + table->data + runp->key, buf, sizeof (buf)); str = buf; } else - str = runp->key; + str = table->data + runp->key; dbg_log ("remove %s entry \"%s\"", serv2str[runp->type], str); runp = runp->dellist; } } - - /* And another run to free the data. */ - do - { - struct hashentry *old = head; - - /* Free the data structures. */ - if (old->data == (void *) -1) - free (old->key); - else if (old->last) - free (old->data); - - head = head->dellist; - - free (old); - } - while (head != NULL); } + + /* Run garbage collection if any entry has been removed or replaced. */ + if (any) + gc (table); } diff --git a/nscd/connections.c b/nscd/connections.c index 313ca0dc45..c3100816df 100644 --- a/nscd/connections.c +++ b/nscd/connections.c @@ -24,14 +24,15 @@ #include #include #include +#include #include #include #include #include #include #include -#include #include +#include #include #include #include @@ -41,6 +42,11 @@ #include "nscd.h" #include "dbg_log.h" + +/* Number of bytes of data we initially reserve for each hash table bucket. */ +#define DEFAULT_DATASIZE_PER_BUCKET 1024 + + /* Wrapper functions with error checking for standard functions. */ extern void *xmalloc (size_t n); extern void *xcalloc (size_t n, size_t s); @@ -56,25 +62,11 @@ static gid_t *server_groups; #ifndef NGROUPS # define NGROUPS 32 #endif -static int server_ngroups = NGROUPS; +static int server_ngroups; static void begin_drop_privileges (void); static void finish_drop_privileges (void); - -/* Mapping of request type to database. */ -static const dbtype serv2db[LASTDBREQ + 1] = -{ - [GETPWBYNAME] = pwddb, - [GETPWBYUID] = pwddb, - [GETGRBYNAME] = grpdb, - [GETGRBYGID] = grpdb, - [GETHOSTBYNAME] = hstdb, - [GETHOSTBYNAMEv6] = hstdb, - [GETHOSTBYADDR] = hstdb, - [GETHOSTBYADDRv6] = hstdb, -}; - /* Map request type to a string. */ const char *serv2str[LASTREQ] = { @@ -92,43 +84,71 @@ const char *serv2str[LASTREQ] = }; /* The control data structures for the services. */ -struct database dbs[lastdb] = +struct database_dyn dbs[lastdb] = { [pwddb] = { .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP, .enabled = 0, .check_file = 1, + .persistent = 0, .filename = "/etc/passwd", - .module = 211, + .db_filename = _PATH_NSCD_PASSWD_DB, .disabled_iov = &pwd_iov_disabled, .postimeout = 3600, - .negtimeout = 20 + .negtimeout = 20, + .wr_fd = -1, + .ro_fd = -1, + .mmap_used = false }, [grpdb] = { .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP, .enabled = 0, .check_file = 1, + .persistent = 0, .filename = "/etc/group", - .module = 211, + .db_filename = _PATH_NSCD_GROUP_DB, .disabled_iov = &grp_iov_disabled, .postimeout = 3600, - .negtimeout = 60 + .negtimeout = 60, + .wr_fd = -1, + .ro_fd = -1, + .mmap_used = false }, [hstdb] = { .lock = PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP, .enabled = 0, .check_file = 1, + .persistent = 0, .filename = "/etc/hosts", - .module = 211, + .db_filename = _PATH_NSCD_HOSTS_DB, .disabled_iov = &hst_iov_disabled, .postimeout = 3600, - .negtimeout = 20 + .negtimeout = 20, + .wr_fd = -1, + .ro_fd = -1, + .mmap_used = false } }; + +/* Mapping of request type to database. */ +static struct database_dyn *const serv2db[LASTDBREQ + 1] = +{ + [GETPWBYNAME] = &dbs[pwddb], + [GETPWBYUID] = &dbs[pwddb], + [GETGRBYNAME] = &dbs[grpdb], + [GETGRBYGID] = &dbs[grpdb], + [GETHOSTBYNAME] = &dbs[hstdb], + [GETHOSTBYNAMEv6] = &dbs[hstdb], + [GETHOSTBYADDR] = &dbs[hstdb], + [GETHOSTBYADDRv6] = &dbs[hstdb] +}; + + /* Number of seconds between two cache pruning runs. */ #define CACHE_PRUNE_INTERVAL 15 + /* Number of threads to use. */ int nthreads = -1; @@ -138,6 +158,9 @@ static int sock; /* Number of times clients had to wait. */ unsigned long int client_queued; +/* Alignment requirement of the beginning of the data region. */ +#define ALIGN 16 + /* Initialize database information structures. */ void @@ -166,13 +189,256 @@ nscd_init (void) if (dbs[cnt].enabled) { pthread_rwlock_init (&dbs[cnt].lock, NULL); + pthread_mutex_init (&dbs[cnt].memlock, NULL); - dbs[cnt].array = (struct hashentry **) - calloc (dbs[cnt].module, sizeof (struct hashentry *)); - if (dbs[cnt].array == NULL) + if (dbs[cnt].persistent) { - dbg_log (_("while allocating cache: %s"), strerror (errno)); - exit (1); + /* Try to open the appropriate file on disk. */ + int fd = open (dbs[cnt].db_filename, O_RDWR); + if (fd != -1) + { + struct stat64 st; + void *mem; + size_t total; + struct database_pers_head head; + ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head, + sizeof (head))); + if (n != sizeof (head) || fstat64 (fd, &st) != 0) + { + fail_db: + dbg_log (_("invalid persistent database file \"%s\": %s"), + dbs[cnt].db_filename, strerror (errno)); + dbs[cnt].persistent = 0; + } + else if (head.module == 0 && head.data_size == 0) + { + /* The file has been created, but the head has not been + initialized yet. Remove the old file. */ + unlink (dbs[cnt].db_filename); + } + else if (head.header_size != (int) sizeof (head)) + { + dbg_log (_("invalid persistent database file \"%s\": %s"), + dbs[cnt].db_filename, + _("header size does not match")); + dbs[cnt].persistent = 0; + } + else if ((total = (sizeof (head) + + roundup (head.module + * sizeof (struct hashentry), + ALIGN) + + head.data_size)) + < st.st_size) + { + dbg_log (_("invalid persistent database file \"%s\": %s"), + dbs[cnt].db_filename, + _("file size does not match")); + dbs[cnt].persistent = 0; + } + else if ((mem = mmap (NULL, total, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0)) == MAP_FAILED) + goto fail_db; + else + { + /* Success. We have the database. */ + dbs[cnt].head = mem; + dbs[cnt].memsize = total; + dbs[cnt].data = (char *) + &dbs[cnt].head->array[roundup (dbs[cnt].head->module, + ALIGN / sizeof (ref_t))]; + dbs[cnt].mmap_used = true; + + if (dbs[cnt].suggested_module > head.module) + dbg_log (_("suggested size of table for database %s larger than the persistent database's table"), + dbnames[cnt]); + + dbs[cnt].wr_fd = fd; + fd = -1; + /* We also need a read-only descriptor. */ + dbs[cnt].ro_fd = open (dbs[cnt].db_filename, O_RDONLY); + if (dbs[cnt].ro_fd == -1) + dbg_log (_("\ +cannot create read-only descriptor for \"%s\"; no mmap"), + dbs[cnt].db_filename); + + // XXX Shall we test whether the descriptors actually + // XXX point to the same file? + } + + /* Close the file descriptors in case something went + wrong in which case the variable have not been + assigned -1. */ + if (fd != -1) + close (fd); + } + } + + if (dbs[cnt].head == NULL) + { + /* No database loaded. Allocate the data structure, + possibly on disk. */ + struct database_pers_head head; + size_t total = (sizeof (head) + + roundup (dbs[cnt].suggested_module + * sizeof (ref_t), ALIGN) + + (dbs[cnt].suggested_module + * DEFAULT_DATASIZE_PER_BUCKET)); + + /* Try to create the database. If we do not need a + persistent database create a temporary file. */ + int fd; + int ro_fd = -1; + if (dbs[cnt].persistent) + { + fd = open (dbs[cnt].db_filename, + O_RDWR | O_CREAT | O_EXCL | O_TRUNC, + S_IRUSR | S_IWUSR); + if (fd != -1) + ro_fd = open (dbs[cnt].db_filename, O_RDONLY); + } + else + { + size_t slen = strlen (dbs[cnt].db_filename); + char fname[slen + 8]; + strcpy (mempcpy (fname, dbs[cnt].db_filename, slen), + ".XXXXXX"); + fd = mkstemp (fname); + + /* We do not need the file name anymore after we + opened another file descriptor in read-only mode. */ + if (fd != -1) + { + ro_fd = open (fname, O_RDONLY); + + unlink (fname); + } + } + + if (fd == -1) + { + if (errno == EEXIST) + { + dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"), + dbnames[cnt], dbs[cnt].db_filename); + // XXX Correct way to terminate? + exit (1); + } + + if (dbs[cnt].persistent) + dbg_log (_("cannot create %s; no persistent database used"), + dbs[cnt].db_filename); + else + dbg_log (_("cannot create %s; no sharing possible"), + dbs[cnt].db_filename); + + dbs[cnt].persistent = 0; + // XXX remember: no mmap + } + else + { + /* Tell the user if we could not create the read-only + descriptor. */ + if (ro_fd == -1) + dbg_log (_("\ +cannot create read-only descriptor for \"%s\"; no mmap"), + dbs[cnt].db_filename); + + /* Before we create the header, initialiye the hash + table. So that if we get interrupted if writing + the header we can recognize a partially initialized + database. */ + size_t ps = sysconf (_SC_PAGESIZE); + char tmpbuf[ps]; + assert (~ENDREF == 0); + memset (tmpbuf, '\xff', ps); + + size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t); + off_t offset = sizeof (head); + + size_t towrite; + if (offset % ps != 0) + { + towrite = MIN (remaining, ps - (offset % ps)); + pwrite (fd, tmpbuf, towrite, offset); + offset += towrite; + remaining -= towrite; + } + + while (remaining > ps) + { + pwrite (fd, tmpbuf, ps, offset); + offset += ps; + remaining -= ps; + } + + if (remaining > 0) + pwrite (fd, tmpbuf, remaining, offset); + + /* Create the header of the file. */ + struct database_pers_head head = + { + .version = DB_VERSION, + .header_size = sizeof (head), + .module = dbs[cnt].suggested_module, + .data_size = (dbs[cnt].suggested_module + * DEFAULT_DATASIZE_PER_BUCKET), + .first_free = 0 + }; + void *mem; + + if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head))) + != sizeof (head)) + || ftruncate (fd, total) != 0 + || (mem = mmap (NULL, total, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0)) == MAP_FAILED) + { + unlink (dbs[cnt].db_filename); + dbg_log (_("cannot write to database file %s: %s"), + dbs[cnt].db_filename, strerror (errno)); + dbs[cnt].persistent = 0; + } + else + { + /* Success. */ + dbs[cnt].head = mem; + dbs[cnt].data = (char *) + &dbs[cnt].head->array[roundup (dbs[cnt].head->module, + ALIGN / sizeof (ref_t))]; + dbs[cnt].memsize = total; + dbs[cnt].mmap_used = true; + + /* Remember the descriptors. */ + dbs[cnt].wr_fd = fd; + dbs[cnt].ro_fd = ro_fd; + fd = -1; + ro_fd = -1; + } + + if (fd != -1) + close (fd); + if (ro_fd != -1) + close (ro_fd); + } + } + + if (dbs[cnt].head == NULL) + { + /* We do not use the persistent database. Just + create an in-memory data structure. */ + assert (! dbs[cnt].persistent); + + dbs[cnt].head = xmalloc (sizeof (struct database_pers_head) + + (dbs[cnt].suggested_module + * sizeof (ref_t))); + memset (dbs[cnt].head, '\0', sizeof (dbs[cnt].head)); + assert (~ENDREF == 0); + memset (dbs[cnt].head->array, '\xff', + dbs[cnt].suggested_module * sizeof (ref_t)); + dbs[cnt].head->module = dbs[cnt].suggested_module; + dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET + * dbs[cnt].head->module); + dbs[cnt].data = xmalloc (dbs[cnt].head->data_size); + dbs[cnt].head->first_free = 0; } if (dbs[cnt].check_file) @@ -215,7 +481,7 @@ nscd_init (void) fcntl (sock, F_SETFL, fl | O_NONBLOCK); /* Set permissions for the socket. */ - chmod (_PATH_NSCDSOCKET, 0666); + chmod (_PATH_NSCDSOCKET, DEFFILEMODE); /* Set the socket up to accept connections. */ if (listen (sock, SOMAXCONN) < 0) @@ -276,12 +542,11 @@ cannot handle old request version %d; current version is %d"), return; } + struct database_dyn *db = serv2db[req->type]; + if (__builtin_expect (req->type, GETPWBYNAME) >= GETPWBYNAME && __builtin_expect (req->type, LASTDBREQ) <= LASTDBREQ) { - struct hashentry *cached; - struct database *db = &dbs[serv2db[req->type]]; - if (__builtin_expect (debug_level, 0) > 0) { if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6) @@ -294,7 +559,7 @@ cannot handle old request version %d; current version is %d"), key, buf, sizeof (buf))); } else - dbg_log ("\t%s (%s)", serv2str[req->type], (char *)key); + dbg_log ("\t%s (%s)", serv2str[req->type], (char *) key); } /* Is this service enabled? */ @@ -318,18 +583,19 @@ cannot handle old request version %d; current version is %d"), /* Be sure we can read the data. */ if (__builtin_expect (pthread_rwlock_tryrdlock (&db->lock) != 0, 0)) { - ++db->rdlockdelayed; + ++db->head->rdlockdelayed; pthread_rwlock_rdlock (&db->lock); } /* See whether we can handle it from the cache. */ - cached = (struct hashentry *) cache_search (req->type, key, req->key_len, - db, uid); + struct datahead *cached; + cached = (struct datahead *) cache_search (req->type, key, req->key_len, + db, uid); if (cached != NULL) { /* Hurray it's in the cache. */ - if (TEMP_FAILURE_RETRY (write (fd, cached->packet, cached->total)) - != cached->total + if (TEMP_FAILURE_RETRY (write (fd, cached->data, cached->recsize)) + != cached->recsize && __builtin_expect (debug_level, 0) > 0) { /* We have problems sending the result. */ @@ -349,45 +615,43 @@ cannot handle old request version %d; current version is %d"), { if (req->type == INVALIDATE) dbg_log ("\t%s (%s)", serv2str[req->type], (char *)key); - else if (req->type > LASTDBREQ && req->type < LASTREQ) - dbg_log ("\t%s", serv2str[req->type]); else - dbg_log (_("\tinvalid request type %d"), req->type); + dbg_log ("\t%s", serv2str[req->type]); } /* Handle the request. */ switch (req->type) { case GETPWBYNAME: - addpwbyname (&dbs[serv2db[req->type]], fd, req, key, uid); + addpwbyname (db, fd, req, key, uid); break; case GETPWBYUID: - addpwbyuid (&dbs[serv2db[req->type]], fd, req, key, uid); + addpwbyuid (db, fd, req, key, uid); break; case GETGRBYNAME: - addgrbyname (&dbs[serv2db[req->type]], fd, req, key, uid); + addgrbyname (db, fd, req, key, uid); break; case GETGRBYGID: - addgrbygid (&dbs[serv2db[req->type]], fd, req, key, uid); + addgrbygid (db, fd, req, key, uid); break; case GETHOSTBYNAME: - addhstbyname (&dbs[serv2db[req->type]], fd, req, key, uid); + addhstbyname (db, fd, req, key, uid); break; case GETHOSTBYNAMEv6: - addhstbynamev6 (&dbs[serv2db[req->type]], fd, req, key, uid); + addhstbynamev6 (db, fd, req, key, uid); break; case GETHOSTBYADDR: - addhstbyaddr (&dbs[serv2db[req->type]], fd, req, key, uid); + addhstbyaddr (db, fd, req, key, uid); break; case GETHOSTBYADDRv6: - addhstbyaddrv6 (&dbs[serv2db[req->type]], fd, req, key, uid); + addhstbyaddrv6 (db, fd, req, key, uid); break; case GETSTAT: @@ -484,6 +748,7 @@ nscd_run (void *p) prune_cache (&dbs[my_number], time(NULL)); now = time (NULL); next_prune = now + CACHE_PRUNE_INTERVAL; + goto try_get; } } @@ -538,7 +803,7 @@ nscd_run (void *p) } if (req.type < GETPWBYNAME || req.type > LASTDBREQ - || secure[serv2db[req.type]]) + || serv2db[req.type]->secure) uid = caller.uid; pid = caller.pid; @@ -646,9 +911,7 @@ start_threads (void) static void begin_drop_privileges (void) { - struct passwd *pwd; - - pwd = getpwnam (server_user); + struct passwd *pwd = getpwnam (server_user); if (pwd == NULL) { @@ -660,15 +923,15 @@ begin_drop_privileges (void) server_uid = pwd->pw_uid; server_gid = pwd->pw_gid; + if (getgrouplist (server_user, server_gid, NULL, &server_ngroups) == 0) + { + /* This really must never happen. */ + dbg_log (_("Failed to run nscd as user '%s'"), server_user); + error (EXIT_FAILURE, errno, _("initial getgrouplist failed")); + } + server_groups = (gid_t *) xmalloc (server_ngroups * sizeof (gid_t)); - if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups) - == 0) - return; - - server_groups = (gid_t *) xrealloc (server_groups, - server_ngroups * sizeof (gid_t)); - if (getgrouplist (server_user, server_gid, server_groups, &server_ngroups) == -1) { diff --git a/nscd/grpcache.c b/nscd/grpcache.c index 8f5238fe75..6ee65f8e4a 100644 --- a/nscd/grpcache.c +++ b/nscd/grpcache.c @@ -19,9 +19,11 @@ 02111-1307 USA. */ #include +#include #include #include #include +#include #include #include #include @@ -29,7 +31,7 @@ #include #include #include -#include +#include #include #include "nscd.h" @@ -66,51 +68,87 @@ static const gr_response_header notfound = }; -struct groupdata -{ - gr_response_header resp; - char strdata[0]; -}; - - static void -cache_addgr (struct database *db, int fd, request_header *req, void *key, - struct group *grp, uid_t owner, int type) +cache_addgr (struct database_dyn *db, int fd, request_header *req, + const void *key, struct group *grp, uid_t owner, + struct hashentry *he, struct datahead *dh, int errval) { ssize_t total; ssize_t written; time_t t = time (NULL); + /* We allocate all data in one memory block: the iov vector, + the response header and the dataset itself. */ + struct dataset + { + struct datahead head; + gr_response_header resp; + char strdata[0]; + } *dataset; + + assert (offsetof (struct dataset, resp) == offsetof (struct datahead, data)); + if (grp == NULL) { - /* We have no data. This means we send the standard reply for this - case. */ - total = sizeof (notfound); - - written = TEMP_FAILURE_RETRY (write (fd, ¬found, total)); - - void *copy = malloc (req->key_len); - /* If we cannot allocate memory simply do not cache the information. */ - if (copy != NULL) + if (he != NULL && errval == EAGAIN) { - memcpy (copy, key, req->key_len); + /* If we have an old record available but cannot find one + now because the service is not available we keep the old + record and make sure it does not get removed. */ + if (reload_count != UINT_MAX) + /* Do not reset the value if we never not reload the record. */ + dh->nreloads = reload_count - 1; - /* Compute the timeout time. */ - t += db->negtimeout; + written = total = 0; + } + else + { + /* We have no data. This means we send the standard reply for this + case. */ + total = sizeof (notfound); - /* Now get the lock to safely insert the records. */ - pthread_rwlock_rdlock (&db->lock); + written = TEMP_FAILURE_RETRY (write (fd, ¬found, total)); - cache_add (req->type, copy, req->key_len, ¬found, - sizeof (notfound), (void *) -1, 0, t, db, owner); + dataset = mempool_alloc (db, sizeof (struct dataset) + req->key_len); + /* If we cannot permanently store the result, so be it. */ + if (dataset != NULL) + { + dataset->head.allocsize = sizeof (struct dataset) + req->key_len; + dataset->head.recsize = total; + dataset->head.notfound = true; + dataset->head.nreloads = 0; + dataset->head.usable = true; - pthread_rwlock_unlock (&db->lock); + /* Compute the timeout time. */ + dataset->head.timeout = t + db->negtimeout; + + /* This is the reply. */ + memcpy (&dataset->resp, ¬found, total); + + /* Copy the key data. */ + memcpy (dataset->strdata, key, req->key_len); + + /* Now get the lock to safely insert the records. */ + pthread_rwlock_rdlock (&db->lock); + + if (cache_add (req->type, &dataset->strdata, req->key_len, + &dataset->head, true, db, owner) < 0) + /* Ensure the data can be recovered. */ + dataset->head.usable = false; + + pthread_rwlock_unlock (&db->lock); + + /* Mark the old entry as obsolete. */ + if (dh != NULL) + dh->usable = false; + } + else + ++db->head->addfailed; } } else { /* Determine the I/O structure. */ - struct groupdata *data; size_t gr_name_len = strlen (grp->gr_name) + 1; size_t gr_passwd_len = strlen (grp->gr_passwd) + 1; size_t gr_mem_cnt = 0; @@ -118,12 +156,16 @@ cache_addgr (struct database *db, int fd, request_header *req, void *key, size_t gr_mem_len_total = 0; char *gr_name; char *cp; - char buf[12]; + const size_t key_len = strlen (key); + const size_t buf_len = 3 + sizeof (grp->gr_gid) + key_len + 1; + char *buf = alloca (buf_len); ssize_t n; size_t cnt; /* We need this to insert the `bygid' entry. */ - n = snprintf (buf, sizeof (buf), "%d", grp->gr_gid) + 1; + int key_offset; + n = snprintf (buf, buf_len, "%d%c%n%s", grp->gr_gid, '\0', + &key_offset, (char *) key) + 1; /* Determine the length of all members. */ while (grp->gr_mem[gr_mem_cnt]) @@ -135,24 +177,52 @@ cache_addgr (struct database *db, int fd, request_header *req, void *key, gr_mem_len_total += gr_mem_len[gr_mem_cnt]; } - /* We allocate all data in one memory block: the iov vector, - the response header and the dataset itself. */ - total = (sizeof (struct groupdata) - + gr_mem_cnt * sizeof (uint32_t) - + gr_name_len + gr_passwd_len + gr_mem_len_total); - data = (struct groupdata *) malloc (total + n + req->key_len); - if (data == NULL) - /* There is no reason to go on. */ - error (EXIT_FAILURE, errno, _("while allocating cache entry")); + written = total = (sizeof (struct dataset) + + gr_mem_cnt * sizeof (uint32_t) + + gr_name_len + gr_passwd_len + gr_mem_len_total); - data->resp.version = NSCD_VERSION; - data->resp.found = 1; - data->resp.gr_name_len = gr_name_len; - data->resp.gr_passwd_len = gr_passwd_len; - data->resp.gr_gid = grp->gr_gid; - data->resp.gr_mem_cnt = gr_mem_cnt; + /* If we refill the cache, first assume the reconrd did not + change. Allocate memory on the cache since it is likely + discarded anyway. If it turns out to be necessary to have a + new record we can still allocate real memory. */ + bool alloca_used = false; + dataset = NULL; - cp = data->strdata; + if (he == NULL) + { + dataset = (struct dataset *) mempool_alloc (db, total + n); + if (dataset == NULL) + ++db->head->addfailed; + } + + if (dataset == NULL) + { + /* We cannot permanently add the result in the moment. But + we can provide the result as is. Store the data in some + temporary memory. */ + dataset = (struct dataset *) alloca (total + n); + + /* We cannot add this record to the permanent database. */ + alloca_used = true; + } + + dataset->head.allocsize = total + n; + dataset->head.recsize = total - offsetof (struct dataset, resp); + dataset->head.notfound = false; + dataset->head.nreloads = he == NULL ? 0 : (dh->nreloads + 1); + dataset->head.usable = true; + + /* Compute the timeout time. */ + dataset->head.timeout = t + db->postimeout; + + dataset->resp.version = NSCD_VERSION; + dataset->resp.found = 1; + dataset->resp.gr_name_len = gr_name_len; + dataset->resp.gr_passwd_len = gr_passwd_len; + dataset->resp.gr_gid = grp->gr_gid; + dataset->resp.gr_mem_cnt = gr_mem_cnt; + + cp = dataset->strdata; /* This is the member string length array. */ cp = mempcpy (cp, gr_mem_len, gr_mem_cnt * sizeof (uint32_t)); @@ -163,33 +233,120 @@ cache_addgr (struct database *db, int fd, request_header *req, void *key, for (cnt = 0; cnt < gr_mem_cnt; ++cnt) cp = mempcpy (cp, grp->gr_mem[cnt], gr_mem_len[cnt]); - /* Next the stringified GID value. */ + /* Finally the stringified GID value. */ memcpy (cp, buf, n); + char *key_copy = cp + key_offset; + assert (key_copy == (char *) rawmemchr (cp, '\0') + 1); - /* Copy of the key in case it differs. */ - char *key_copy = memcpy (cp + n, key, req->key_len); + /* Now we can determine whether on refill we have to create a new + record or not. */ + if (he != NULL) + { + assert (fd == -1); - /* Write the result. */ - written = TEMP_FAILURE_RETRY (write (fd, &data->resp, total)); + if (total + n == dh->allocsize + && total - offsetof (struct dataset, resp) == dh->recsize + && memcmp (&dataset->resp, dh->data, + dh->allocsize - offsetof (struct dataset, resp)) == 0) + { + /* The data has not changed. We will just bump the + timeout value. Note that the new record has been + allocated on the stack and need not be freed. */ + dh->timeout = dataset->head.timeout; + ++dh->nreloads; + } + else + { + /* We have to create a new record. Just allocate + appropriate memory and copy it. */ + struct dataset *newp + = (struct dataset *) mempool_alloc (db, total + n); + if (newp != NULL) + { + /* Adjust pointers into the memory block. */ + gr_name = (char *) newp + (gr_name - (char *) dataset); + cp = (char *) newp + (cp - (char *) dataset); - /* Compute the timeout time. */ - t += db->postimeout; + dataset = memcpy (newp, dataset, total + n); + alloca_used = false; + } - /* Now get the lock to safely insert the records. */ - pthread_rwlock_rdlock (&db->lock); + /* Mark the old record as obsolete. */ + dh->usable = false; + } + } + else + { + /* We write the dataset before inserting it to the database + since while inserting this thread might block and so would + unnecessarily let the receiver wait. */ + assert (fd != -1); - /* We have to add the value for both, byname and byuid. */ - cache_add (GETGRBYNAME, gr_name, gr_name_len, data, - total, data, 0, t, db, owner); + written = TEMP_FAILURE_RETRY (write (fd, &dataset->resp, total)); + } - /* If the key is different from the name add a separate entry. */ - if (type == GETGRBYNAME && strcmp (key_copy, gr_name) != 0) - cache_add (GETGRBYNAME, key_copy, req->key_len, data, - total, data, 0, t, db, owner); + /* Add the record to the database. But only if it has not been + stored on the stack. */ + if (! alloca_used) + { + /* If necessary, we also propagate the data to disk. */ + if (db->persistent) + // XXX async OK? + msync (dataset, total + n, MS_ASYNC); - cache_add (GETGRBYGID, cp, n, data, total, data, 1, t, db, owner); + /* Now get the lock to safely insert the records. */ + pthread_rwlock_rdlock (&db->lock); - pthread_rwlock_unlock (&db->lock); + /* NB: in the following code we always must add the entry + marked with FIRST first. Otherwise we end up with + dangling "pointers" in case a latter hash entry cannot be + added. */ + bool first = req->type == GETGRBYNAME; + + /* If the request was by GID, add that entry first. */ + if (req->type != GETGRBYNAME) + { + if (cache_add (GETGRBYGID, cp, n, &dataset->head, true, db, + owner) < 0) + { + /* Could not allocate memory. Make sure the data gets + discarded. */ + dataset->head.usable = false; + goto out; + } + } + /* If the key is different from the name add a separate entry. */ + else if (strcmp (key_copy, gr_name) != 0) + { + if (cache_add (GETGRBYNAME, key_copy, key_len + 1, + &dataset->head, first, db, owner) < 0) + { + /* Could not allocate memory. Make sure the data gets + discarded. */ + dataset->head.usable = false; + goto out; + } + + first = false; + } + + /* We have to add the value for both, byname and byuid. */ + if (__builtin_expect (cache_add (GETGRBYNAME, gr_name, gr_name_len, + &dataset->head, first, db, owner) + == 0, 1)) + { + if (req->type == GETGRBYNAME) + (void) cache_add (GETGRBYGID, cp, n, &dataset->head, + req->type != GETGRBYNAME, db, owner); + } + else if (first) + /* Could not allocate memory. Make sure the data gets + discarded. */ + dataset->head.usable = false; + + out: + pthread_rwlock_unlock (&db->lock); + } } if (__builtin_expect (written != total, 0) && debug_level > 0) @@ -201,32 +358,57 @@ cache_addgr (struct database *db, int fd, request_header *req, void *key, } -void -addgrbyname (struct database *db, int fd, request_header *req, - void *key, uid_t uid) +union keytype +{ + void *v; + gid_t g; +}; + + +static int +lookup (int type, union keytype key, struct group *resultbufp, char *buffer, + size_t buflen, struct group **grp) +{ + if (type == GETGRBYNAME) + return __getgrnam_r (key.v, resultbufp, buffer, buflen, grp); + else + return __getgrgid_r (key.g, resultbufp, buffer, buflen, grp); +} + + +static void +addgrbyX (struct database_dyn *db, int fd, request_header *req, + union keytype key, const char *keystr, uid_t uid, + struct hashentry *he, struct datahead *dh) { /* Search for the entry matching the key. Please note that we don't look again in the table whether the dataset is now available. We simply insert it. It does not matter if it is in there twice. The pruning function only will look at the timestamp. */ - int buflen = 1024; + size_t buflen = 1024; char *buffer = (char *) alloca (buflen); struct group resultbuf; struct group *grp; uid_t oldeuid = 0; bool use_malloc = false; + int errval = 0; if (__builtin_expect (debug_level > 0, 0)) - dbg_log (_("Haven't found \"%s\" in group cache!"), (char *) key); + { + if (he == NULL) + dbg_log (_("Haven't found \"%s\" in group cache!"), keystr); + else + dbg_log (_("Reloading \"%s\" in group cache!"), keystr); + } - if (secure[grpdb]) + if (db->secure) { oldeuid = geteuid (); seteuid (uid); } - while (__getgrnam_r (key, &resultbuf, buffer, buflen, &grp) != 0 - && errno == ERANGE) + while (lookup (req->type, key, &resultbuf, buffer, buflen, &grp) != 0 + && (errval = errno) == ERANGE) { char *old_buffer = buffer; errno = 0; @@ -243,6 +425,11 @@ addgrbyname (struct database *db, int fd, request_header *req, never happen. */ grp = NULL; buffer = old_buffer; + + /* We set the error to indicate this is (possibly) a + temporary error and that it does not mean the entry + is not available at all. */ + errval = EAGAIN; break; } use_malloc = true; @@ -253,10 +440,10 @@ addgrbyname (struct database *db, int fd, request_header *req, buffer = (char *) extend_alloca (buffer, buflen, buflen + INCR); } - if (secure[grpdb]) + if (db->secure) seteuid (oldeuid); - cache_addgr (db, fd, req, key, grp, uid, GETGRBYNAME); + cache_addgr (db, fd, req, keystr, grp, uid, he, dh, errval); if (use_malloc) free (buffer); @@ -264,23 +451,38 @@ addgrbyname (struct database *db, int fd, request_header *req, void -addgrbygid (struct database *db, int fd, request_header *req, +addgrbyname (struct database_dyn *db, int fd, request_header *req, + void *key, uid_t uid) +{ + union keytype u = { .v = key }; + + addgrbyX (db, fd, req, u, key, uid, NULL, NULL); +} + + +void +readdgrbyname (struct database_dyn *db, struct hashentry *he, + struct datahead *dh) +{ + request_header req = + { + .type = GETGRBYNAME, + .key_len = he->len + }; + union keytype u = { .v = db->data + he->key }; + + addgrbyX (db, -1, &req, u, db->data + he->key, he->owner, he, dh); +} + + +void +addgrbygid (struct database_dyn *db, int fd, request_header *req, void *key, uid_t uid) { - /* Search for the entry matching the key. Please note that we don't - look again in the table whether the dataset is now available. We - simply insert it. It does not matter if it is in there twice. The - pruning function only will look at the timestamp. */ - int buflen = 1024; - char *buffer = (char *) alloca (buflen); - struct group resultbuf; - struct group *grp; - uid_t oldeuid = 0; char *ep; - gid_t gid = strtoul ((char *)key, &ep, 10); - bool use_malloc = false; + gid_t gid = strtoul ((char *) key, &ep, 10); - if (*(char *) key == '\0' || *ep != '\0') /* invalid numeric gid */ + if (*(char *) key == '\0' || *ep != '\0') /* invalid numeric uid */ { if (debug_level > 0) dbg_log (_("Invalid numeric gid \"%s\"!"), (char *) key); @@ -289,47 +491,28 @@ addgrbygid (struct database *db, int fd, request_header *req, return; } - if (__builtin_expect (debug_level > 0, 0)) - dbg_log (_("Haven't found \"%d\" in group cache!"), gid); + union keytype u = { .g = gid }; - if (secure[grpdb]) - { - oldeuid = geteuid (); - seteuid (uid); - } - - while (__getgrgid_r (gid, &resultbuf, buffer, buflen, &grp) != 0 - && errno == ERANGE) - { - char *old_buffer = buffer; - errno = 0; - - if (__builtin_expect (buflen > 32768, 0)) - { - buflen += INCR; - buffer = (char *) realloc (use_malloc ? buffer : NULL, buflen); - if (buffer == NULL) - { - /* We ran out of memory. We cannot do anything but - sending a negative response. In reality this should - never happen. */ - grp = NULL; - buffer = old_buffer; - break; - } - use_malloc = true; - } - else - /* Allocate a new buffer on the stack. If possible combine it - with the previously allocated buffer. */ - buffer = (char *) extend_alloca (buffer, buflen, buflen + INCR); - } - - if (secure[grpdb]) - seteuid (oldeuid); - - cache_addgr (db, fd, req, key, grp, uid, GETGRBYGID); - - if (use_malloc) - free (buffer); + addgrbyX (db, fd, req, u, key, uid, NULL, NULL); +} + + +void +readdgrbygid (struct database_dyn *db, struct hashentry *he, + struct datahead *dh) +{ + char *ep; + gid_t gid = strtoul (db->data + he->key, &ep, 10); + + /* Since the key has been added before it must be OK. */ + assert (*(db->data + he->key) != '\0' && *ep == '\0'); + + request_header req = + { + .type = GETGRBYGID, + .key_len = he->len + }; + union keytype u = { .g = gid }; + + addgrbyX (db, -1, &req, u, db->data + he->key, he->owner, he, dh); } diff --git a/nscd/hstcache.c b/nscd/hstcache.c index 44b76aa7c0..5a536b3346 100644 --- a/nscd/hstcache.c +++ b/nscd/hstcache.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -30,9 +31,9 @@ #include #include #include -#include #include #include +#include #include #include "nscd.h" @@ -74,51 +75,88 @@ static const hst_response_header notfound = }; -struct hostdata -{ - hst_response_header resp; - char strdata[0]; -}; - - static void -cache_addhst (struct database *db, int fd, request_header *req, void *key, - struct hostent *hst, uid_t owner, int add_addr) +cache_addhst (struct database_dyn *db, int fd, request_header *req, + const void *key, struct hostent *hst, uid_t owner, int add_addr, + struct hashentry *he, struct datahead *dh, int errval) { ssize_t total; ssize_t written; time_t t = time (NULL); + /* We allocate all data in one memory block: the iov vector, + the response header and the dataset itself. */ + struct dataset + { + struct datahead head; + hst_response_header resp; + char strdata[0]; + } *dataset; + + assert (offsetof (struct dataset, resp) == offsetof (struct datahead, data)); + if (hst == NULL) { - /* We have no data. This means we send the standard reply for this - case. */ - total = sizeof (notfound); - - written = TEMP_FAILURE_RETRY (write (fd, ¬found, total)); - - void *copy = malloc (req->key_len); - /* If we cannot allocate memory simply do not cache the information. */ - if (copy != NULL) + if (he != NULL && errval == EAGAIN) { - memcpy (copy, key, req->key_len); + /* If we have an old record available but cannot find one + now because the service is not available we keep the old + record and make sure it does not get removed. */ + if (reload_count != UINT_MAX) + /* Do not reset the value if we never not reload the record. */ + dh->nreloads = reload_count - 1; - /* Compute the timeout time. */ - t += db->negtimeout; + written = total = 0; + } + else + { + /* We have no data. This means we send the standard reply for this + case. */ + written = total = sizeof (notfound); - /* Now get the lock to safely insert the records. */ - pthread_rwlock_rdlock (&db->lock); + if (fd != -1) + written = TEMP_FAILURE_RETRY (write (fd, ¬found, total)); - cache_add (req->type, copy, req->key_len, ¬found, - sizeof (notfound), (void *) -1, 0, t, db, owner); + dataset = mempool_alloc (db, sizeof (struct dataset) + req->key_len); + /* If we cannot permanently store the result, so be it. */ + if (dataset != NULL) + { + dataset->head.allocsize = sizeof (struct dataset) + req->key_len; + dataset->head.recsize = total; + dataset->head.notfound = true; + dataset->head.nreloads = 0; + dataset->head.usable = true; - pthread_rwlock_unlock (&db->lock); + /* Compute the timeout time. */ + dataset->head.timeout = t + db->negtimeout; + + /* This is the reply. */ + memcpy (&dataset->resp, ¬found, total); + + /* Copy the key data. */ + memcpy (dataset->strdata, key, req->key_len); + + /* Now get the lock to safely insert the records. */ + pthread_rwlock_rdlock (&db->lock); + + if (cache_add (req->type, &dataset->strdata, req->key_len, + &dataset->head, true, db, owner) < 0) + /* Ensure the data can be recovered. */ + dataset->head.usable = false; + + pthread_rwlock_unlock (&db->lock); + + /* Mark the old entry as obsolete. */ + if (dh != NULL) + dh->usable = false; + } + else + ++db->head->addfailed; } } else { /* Determine the I/O structure. */ - struct hostdata *data; size_t h_name_len = strlen (hst->h_name) + 1; size_t h_aliases_cnt; uint32_t *h_aliases_len; @@ -148,28 +186,66 @@ cache_addhst (struct database *db, int fd, request_header *req, void *key, for (cnt = 0; hst->h_addr_list[cnt]; ++cnt) ++h_addr_list_cnt; - /* We allocate all data in one memory block: the iov vector, - the response header and the dataset itself. */ - total += (sizeof (struct hostdata) + if (h_addr_list_cnt == 0) + /* Invalid entry. */ + return; + + total += (sizeof (struct dataset) + h_name_len + h_aliases_cnt * sizeof (uint32_t) + h_addr_list_cnt * hst->h_length); + written = total; - data = (struct hostdata *) malloc (total + req->key_len); - if (data == NULL) - /* There is no reason to go on. */ - error (EXIT_FAILURE, errno, _("while allocating cache entry")); + /* If we refill the cache, first assume the reconrd did not + change. Allocate memory on the cache since it is likely + discarded anyway. If it turns out to be necessary to have a + new record we can still allocate real memory. */ + bool alloca_used = false; + dataset = NULL; - data->resp.version = NSCD_VERSION; - data->resp.found = 1; - data->resp.h_name_len = h_name_len; - data->resp.h_aliases_cnt = h_aliases_cnt; - data->resp.h_addrtype = hst->h_addrtype; - data->resp.h_length = hst->h_length; - data->resp.h_addr_list_cnt = h_addr_list_cnt; - data->resp.error = NETDB_SUCCESS; + /* If the record contains more than one IP address (used for + load balancing etc) don't cache the entry. This is something + the current cache handling cannot handle and it is more than + questionable whether it is worthwhile complicating the cache + handling just for handling such a special case. */ + if (he == NULL && (add_addr || hst->h_addr_list[1] == NULL)) + { + dataset = (struct dataset *) mempool_alloc (db, + total + req->key_len); + if (dataset == NULL) + ++db->head->addfailed; + } - cp = data->strdata; + if (dataset == NULL) + { + /* We cannot permanently add the result in the moment. But + we can provide the result as is. Store the data in some + temporary memory. */ + dataset = (struct dataset *) alloca (total + req->key_len); + + /* We cannot add this record to the permanent database. */ + alloca_used = true; + } + + dataset->head.allocsize = total + req->key_len; + dataset->head.recsize = total - offsetof (struct dataset, resp); + dataset->head.notfound = false; + dataset->head.nreloads = he == NULL ? 0 : (dh->nreloads + 1); + dataset->head.usable = true; + + /* Compute the timeout time. */ + dataset->head.timeout = t + db->postimeout; + + dataset->resp.version = NSCD_VERSION; + dataset->resp.found = 1; + dataset->resp.h_name_len = h_name_len; + dataset->resp.h_aliases_cnt = h_aliases_cnt; + dataset->resp.h_addrtype = hst->h_addrtype; + dataset->resp.h_length = hst->h_length; + dataset->resp.h_addr_list_cnt = h_addr_list_cnt; + dataset->resp.error = NETDB_SUCCESS; + + cp = dataset->strdata; cp = mempcpy (cp, hst->h_name, h_name_len); cp = mempcpy (cp, h_aliases_len, h_aliases_cnt * sizeof (uint32_t)); @@ -184,7 +260,9 @@ cache_addhst (struct database *db, int fd, request_header *req, void *key, for (cnt = 0; cnt < h_aliases_cnt; ++cnt) cp = mempcpy (cp, hst->h_aliases[cnt], h_aliases_len[cnt]); - assert (cp == data->strdata + total - sizeof (hst_response_header)); + assert (cp + == dataset->strdata + total - offsetof (struct dataset, + strdata)); /* If we are adding a GETHOSTBYNAME{,v6} entry we must be prepared that the answer we get from the NSS does not contain the key @@ -193,90 +271,221 @@ cache_addhst (struct database *db, int fd, request_header *req, void *key, we explicitly add the name here. */ if (req->type == GETHOSTBYNAME || req->type == GETHOSTBYNAMEv6) key_copy = memcpy (cp, key, req->key_len); + else + memset (cp, '\0', req->key_len); - /* We write the dataset before inserting it to the database - since while inserting this thread might block and so would - unnecessarily let the receiver wait. */ - written = TEMP_FAILURE_RETRY (write (fd, data, total)); - - /* If the record contains more than one IP address (used for - load balancing etc) don't cache the entry. This is something - the current cache handling cannot handle and it is more than - questionable whether it is worthwhile complicating the cache - handling just for handling such a special case. */ - if (!add_addr && hst->h_addr_list[1] != NULL) + /* Now we can determine whether on refill we have to create a new + record or not. */ + if (he != NULL) { - free (data); - return; + assert (fd == -1); + + if (total + req->key_len == dh->allocsize + && total - offsetof (struct dataset, resp) == dh->recsize + && memcmp (&dataset->resp, dh->data, + dh->allocsize - offsetof (struct dataset, resp)) == 0) + { + /* The sata has not changed. We will just bump the + timeout value. Note that the new record has been + allocated on the stack and need not be freed. */ + dh->timeout = dataset->head.timeout; + ++dh->nreloads; + } + else + { + /* We have to create a new record. Just allocate + appropriate memory and copy it. */ + struct dataset *newp + = (struct dataset *) mempool_alloc (db, total + req->key_len); + if (newp != NULL) + { + /* Adjust pointers into the memory block. */ + addresses = (char *) newp + (addresses - (char *) dataset); + aliases = (char *) newp + (aliases - (char *) dataset); + if (key_copy != NULL) + key_copy = (char *) newp + (key_copy - (char *) dataset); + + dataset = memcpy (newp, dataset, total + req->key_len); + alloca_used = false; + } + + /* Mark the old record as obsolete. */ + dh->usable = false; + } + } + else + { + /* We write the dataset before inserting it to the database + since while inserting this thread might block and so would + unnecessarily keep the receiver waiting. */ + assert (fd != -1); + + written = TEMP_FAILURE_RETRY (write (fd, &dataset->resp, total)); } - addr_list_type = (hst->h_length == NS_INADDRSZ - ? GETHOSTBYADDR : GETHOSTBYADDRv6); + /* Add the record to the database. But only if it has not been + stored on the stack. - /* Compute the timeout time. */ - t += db->postimeout; - - /* Now get the lock to safely insert the records. */ - pthread_rwlock_rdlock (&db->lock); - - /* First add all the aliases. */ - assert (add_addr || hst->h_addr_list[1] == NULL); - if (!add_addr) - for (cnt = 0; cnt < h_aliases_cnt; ++cnt) - { - if (addr_list_type == GETHOSTBYADDR) - cache_add (GETHOSTBYNAME, aliases, h_aliases_len[cnt], data, - total, data, 0, t, db, owner); - - cache_add (GETHOSTBYNAMEv6, aliases, h_aliases_len[cnt], data, - total, data, 0, t, db, owner); - - aliases += h_aliases_len[cnt]; - } - - /* Next the normal addresses. */ - if (add_addr) - for (cnt = 0; cnt < h_addr_list_cnt; ++cnt) - { - cache_add (addr_list_type, addresses, hst->h_length, data, total, - data, cnt + 1 == h_addr_list_cnt, t, db, owner); - addresses += hst->h_length; - } - - /* If necessary the IPv6 addresses. */ - if (add_addr && addr_list_type == GETHOSTBYADDR) - for (cnt = 0; cnt < h_addr_list_cnt; ++cnt) - { - cache_add (GETHOSTBYADDRv6, addresses, IN6ADDRSZ, data, total, - data, 0, t, db, owner); - addresses += IN6ADDRSZ; - } - - /* Avoid adding names if more than one address is available. See - above for more info. */ - if (!add_addr) + If the record contains more than one IP address (used for + load balancing etc) don't cache the entry. This is something + the current cache handling cannot handle and it is more than + questionable whether it is worthwhile complicating the cache + handling just for handling such a special case. */ + if (! alloca_used) { - /* If necessary add the key for this request. + /* If necessary, we also propagate the data to disk. */ + if (db->persistent) + // XXX async OK? + msync (dataset, total + req->key_len, MS_ASYNC); - Note: hst->h_addr_list[1] == NULL. */ - if (req->type == GETHOSTBYNAME || req->type == GETHOSTBYNAMEv6) + addr_list_type = (hst->h_length == NS_INADDRSZ + ? GETHOSTBYADDR : GETHOSTBYADDRv6); + + /* Now get the lock to safely insert the records. */ + pthread_rwlock_rdlock (&db->lock); + + /* NB: the following code is really complicated. It has + seemlingly duplicated code paths which do the same. The + problem is that we always must add the hash table entry + with the FIRST flag set first. Otherwise we get dangling + pointers in case memory allocation fails. */ + assert (add_addr || hst->h_addr_list[1] == NULL); + + /* Add the normal addresses. */ + if (add_addr) { + for (cnt = 0; cnt < h_addr_list_cnt; ++cnt) + { + if (cache_add (addr_list_type, addresses, hst->h_length, + &dataset->head, cnt == 0, db, owner) < 0) + { + /* Ensure the data can be recovered. */ + if (cnt == 0) + dataset->head.usable = false; + goto out; + } + addresses += hst->h_length; + } + + /* If necessary the IPv6 addresses. */ if (addr_list_type == GETHOSTBYADDR) - cache_add (GETHOSTBYNAME, key_copy, req->key_len, data, total, - data, 0, t, db, owner); - cache_add (GETHOSTBYNAMEv6, key_copy, req->key_len, data, - total, data, 0, t, db, owner); + for (cnt = 0; cnt < h_addr_list_cnt; ++cnt) + { + if (cache_add (GETHOSTBYADDRv6, addresses, IN6ADDRSZ, + &dataset->head, false, db, owner) < 0) + goto out; + + addresses += IN6ADDRSZ; + } + } + /* Avoid adding names if more than one address is available. See + above for more info. */ + else + { + assert (req->type == GETHOSTBYNAME + || req->type == GETHOSTBYNAMEv6 + || req->type == GETHOSTBYADDR + || req->type == GETHOSTBYADDRv6); + + /* If necessary add the key for this request. */ + if (req->type == GETHOSTBYNAME) + { + bool first = true; + if (addr_list_type == GETHOSTBYADDR) + { + if (cache_add (GETHOSTBYNAME, key_copy, req->key_len, + &dataset->head, true, db, owner) < 0) + { + /* Could not allocate memory. Make sure the + data gets discarded. */ + dataset->head.usable = false; + goto out; + } + + first = false; + } + if (cache_add (GETHOSTBYNAMEv6, key_copy, req->key_len, + &dataset->head, first, db, owner) < 0) + { + /* Could not allocate memory. Make sure the + data gets discarded. */ + if (first) + dataset->head.usable = false; + goto out; + } + } + else if (req->type == GETHOSTBYNAMEv6) + { + if (cache_add (GETHOSTBYNAMEv6, key_copy, req->key_len, + &dataset->head, true, db, owner) < 0) + { + /* Could not allocate memory. Make sure the + data gets discarded. */ + dataset->head.usable = false; + goto out; + } + + if (addr_list_type == GETHOSTBYADDR + && cache_add (GETHOSTBYNAME, key_copy, req->key_len, + &dataset->head, false, db, owner) < 0) + goto out; + } + + /* And finally the name. We mark this as the last entry. */ + if (addr_list_type == GETHOSTBYADDR + && req->type == GETHOSTBYADDR + && cache_add (GETHOSTBYNAME, dataset->strdata, h_name_len, + &dataset->head, true, db, owner) < 0) + { + /* Could not allocate memory. Make sure the + data gets discarded. */ + dataset->head.usable = false; + goto out; + } + + if (cache_add (GETHOSTBYNAMEv6, dataset->strdata, + h_name_len, &dataset->head, + ((req->type == GETHOSTBYADDR + && addr_list_type != GETHOSTBYADDR) + || req->type == GETHOSTBYADDRv6), db, + owner) < 0) + { + /* Could not allocate memory. Make sure the + data gets discarded. */ + if ((req->type == GETHOSTBYADDR + && addr_list_type != GETHOSTBYADDR) + || req->type == GETHOSTBYADDRv6) + dataset->head.usable = false; + goto out; + } + + if (addr_list_type == GETHOSTBYADDR + && req->type != GETHOSTBYADDR + && cache_add (GETHOSTBYNAME, dataset->strdata, h_name_len, + &dataset->head, false, db, owner) < 0) + goto out; + + /* First add all the aliases. */ + for (cnt = 0; cnt < h_aliases_cnt; ++cnt) + { + if (addr_list_type == GETHOSTBYADDR) + if (cache_add (GETHOSTBYNAME, aliases, + h_aliases_len[cnt], &dataset->head, + false, db, owner) < 0) + break; + + if (cache_add (GETHOSTBYNAMEv6, aliases, + h_aliases_len[cnt], &dataset->head, + false, db, owner) < 0) + break; + + aliases += h_aliases_len[cnt]; + } } - /* And finally the name. We mark this as the last entry. */ - if (addr_list_type == GETHOSTBYADDR) - cache_add (GETHOSTBYNAME, data->strdata, h_name_len, data, total, - data, 0, t, db, owner); - cache_add (GETHOSTBYNAMEv6, data->strdata, h_name_len, data, - total, data, 1, t, db, owner); + out: + pthread_rwlock_unlock (&db->lock); } - - pthread_rwlock_unlock (&db->lock); } if (__builtin_expect (written != total, 0) && debug_level > 0) @@ -288,9 +497,28 @@ cache_addhst (struct database *db, int fd, request_header *req, void *key, } -void -addhstbyname (struct database *db, int fd, request_header *req, - void *key, uid_t uid) +static int +lookup (int type, void *key, struct hostent *resultbufp, char *buffer, + size_t buflen, struct hostent **hst) +{ + if (type == GETHOSTBYNAME) + return __gethostbyname2_r (key, AF_INET, resultbufp, buffer, buflen, hst, + &h_errno); + else if (type == GETHOSTBYNAMEv6) + return __gethostbyname2_r (key, AF_INET6, resultbufp, buffer, buflen, hst, + &h_errno); + else if (type == GETHOSTBYADDR) + return __gethostbyaddr_r (key, NS_INADDRSZ, AF_INET, resultbufp, buffer, + buflen, hst, &h_errno); + else + return __gethostbyaddr_r (key, NS_IN6ADDRSZ, AF_INET6, resultbufp, buffer, + buflen, hst, &h_errno); +} + + +static void +addhstbyX (struct database_dyn *db, int fd, request_header *req, + void *key, uid_t uid, struct hashentry *he, struct datahead *dh) { /* Search for the entry matching the key. Please note that we don't look again in the table whether the dataset is now available. We @@ -302,20 +530,25 @@ addhstbyname (struct database *db, int fd, request_header *req, struct hostent *hst; uid_t oldeuid = 0; bool use_malloc = false; + int errval = 0; if (__builtin_expect (debug_level > 0, 0)) - dbg_log (_("Haven't found \"%s\" in hosts cache!"), (char *) key); + { + if (he == NULL) + dbg_log (_("Haven't found \"%s\" in hosts cache!"), (char *) key); + else + dbg_log (_("Reloading \"%s\" in hosts cache!"), (char *) key); + } - if (secure[hstdb]) + if (db->secure) { oldeuid = geteuid (); seteuid (uid); } - while (__gethostbyname2_r (key, AF_INET, &resultbuf, buffer, buflen, - &hst, &h_errno) != 0 + while (lookup (req->type, key, &resultbuf, buffer, buflen, &hst) != 0 && h_errno == NETDB_INTERNAL - && errno == ERANGE) + && (errval = errno) == ERANGE) { char *old_buffer = buffer; errno = 0; @@ -332,6 +565,11 @@ addhstbyname (struct database *db, int fd, request_header *req, never happen. */ hst = NULL; buffer = old_buffer; + + /* We set the error to indicate this is (possibly) a + temporary error and that it does not mean the entry + is not available at all. */ + errval = EAGAIN; break; } use_malloc = true; @@ -342,10 +580,11 @@ addhstbyname (struct database *db, int fd, request_header *req, buffer = (char *) extend_alloca (buffer, buflen, buflen + INCR); } - if (secure[hstdb]) + if (db->secure) seteuid (oldeuid); - cache_addhst (db, fd, req, key, hst, uid, 0); + cache_addhst (db, fd, req, key, hst, uid, 0, he, dh, + h_errno == TRY_AGAIN ? errval : 0); if (use_malloc) free (buffer); @@ -353,197 +592,88 @@ addhstbyname (struct database *db, int fd, request_header *req, void -addhstbyaddr (struct database *db, int fd, request_header *req, +addhstbyname (struct database_dyn *db, int fd, request_header *req, void *key, uid_t uid) { - /* Search for the entry matching the key. Please note that we don't - look again in the table whether the dataset is now available. We - simply insert it. It does not matter if it is in there twice. The - pruning function only will look at the timestamp. */ - int buflen = 1024; - char *buffer = (char *) alloca (buflen); - struct hostent resultbuf; - struct hostent *hst; - uid_t oldeuid = 0; - bool use_malloc = false; - - if (__builtin_expect (debug_level > 0, 0)) - { - char buf[INET_ADDRSTRLEN]; - dbg_log (_("Haven't found \"%s\" in hosts cache!"), - inet_ntop (AF_INET, key, buf, sizeof (buf))); - } - - if (secure[hstdb]) - { - oldeuid = geteuid (); - seteuid (uid); - } - - while (__gethostbyaddr_r (key, NS_INADDRSZ, AF_INET, &resultbuf, buffer, - buflen, &hst, &h_errno) != 0 - && h_errno == NETDB_INTERNAL - && errno == ERANGE) - { - char *old_buffer = buffer; - errno = 0; - - if (__builtin_expect (buflen > 32768, 0)) - { - buflen += INCR; - buffer = (char *) realloc (use_malloc ? buffer : NULL, buflen); - if (buffer == NULL) - { - /* We ran out of memory. We cannot do anything but - sending a negative response. In reality this should - never happen. */ - hst = NULL; - buffer = old_buffer; - break; - } - use_malloc = true; - } - else - /* Allocate a new buffer on the stack. If possible combine it - with the previously allocated buffer. */ - buffer = (char *) extend_alloca (buffer, buflen, buflen + INCR); - } - - if (secure[hstdb]) - seteuid (oldeuid); - - cache_addhst (db, fd, req, key, hst, uid, 1); - - if (use_malloc) - free (buffer); + addhstbyX (db, fd, req, key, uid, NULL, NULL); } void -addhstbynamev6 (struct database *db, int fd, request_header *req, - void *key, uid_t uid) +readdhstbyname (struct database_dyn *db, struct hashentry *he, + struct datahead *dh) { - /* Search for the entry matching the key. Please note that we don't - look again in the table whether the dataset is now available. We - simply insert it. It does not matter if it is in there twice. The - pruning function only will look at the timestamp. */ - int buflen = 1024; - char *buffer = (char *) alloca (buflen); - struct hostent resultbuf; - struct hostent *hst; - uid_t oldeuid = 0; - bool use_malloc = false; - - if (__builtin_expect (debug_level > 0, 0)) - dbg_log (_("Haven't found \"%s\" in hosts cache!"), (char *) key); - - if (secure[hstdb]) + request_header req = { - oldeuid = geteuid (); - seteuid (uid); - } + .type = GETHOSTBYNAME, + .key_len = he->len + }; - while (__gethostbyname2_r (key, AF_INET6, &resultbuf, buffer, buflen, - &hst, &h_errno) != 0 - && h_errno == NETDB_INTERNAL - && errno == ERANGE) - { - char *old_buffer = buffer; - errno = 0; - - if (__builtin_expect (buflen > 32768, 0)) - { - buflen += INCR; - buffer = (char *) realloc (use_malloc ? buffer : NULL, buflen); - if (buffer == NULL) - { - /* We ran out of memory. We cannot do anything but - sending a negative response. In reality this should - never happen. */ - hst = NULL; - buffer = old_buffer; - break; - } - use_malloc = true; - } - else - /* Allocate a new buffer on the stack. If possible combine it - with the previously allocated buffer. */ - buffer = (char *) extend_alloca (buffer, buflen, buflen + INCR); - } - - if (secure[hstdb]) - seteuid (oldeuid); - - cache_addhst (db, fd, req, key, hst, uid, 0); - - if (use_malloc) - free (buffer); + addhstbyX (db, -1, &req, db->data + he->key, he->owner, he, dh); } void -addhstbyaddrv6 (struct database *db, int fd, request_header *req, +addhstbyaddr (struct database_dyn *db, int fd, request_header *req, + void *key, uid_t uid) +{ + addhstbyX (db, fd, req, key, uid, NULL, NULL); +} + + +void +readdhstbyaddr (struct database_dyn *db, struct hashentry *he, + struct datahead *dh) +{ + request_header req = + { + .type = GETHOSTBYADDR, + .key_len = he->len + }; + + addhstbyX (db, -1, &req, db->data + he->key, he->owner, he, dh); +} + + +void +addhstbynamev6 (struct database_dyn *db, int fd, request_header *req, void *key, uid_t uid) { - /* Search for the entry matching the key. Please note that we don't - look again in the table whether the dataset is now available. We - simply insert it. It does not matter if it is in there twice. The - pruning function only will look at the timestamp. */ - int buflen = 1024; - char *buffer = (char *) alloca (buflen); - struct hostent resultbuf; - struct hostent *hst; - uid_t oldeuid = 0; - bool use_malloc = false; - - if (__builtin_expect (debug_level > 0, 0)) - { - char buf[INET6_ADDRSTRLEN]; - dbg_log (_("Haven't found \"%s\" in hosts cache!"), - inet_ntop (AF_INET6, key, buf, sizeof (buf))); - } - - if (secure[hstdb]) - { - oldeuid = geteuid (); - seteuid (uid); - } - - while (__gethostbyaddr_r (key, NS_IN6ADDRSZ, AF_INET6, &resultbuf, - buffer, buflen, &hst, &h_errno) != 0 - && h_errno == NETDB_INTERNAL - && errno == ERANGE) - { - char *old_buffer = buffer; - errno = 0; - - if (__builtin_expect (buflen > 32768, 0)) - { - buflen += INCR; - buffer = (char *) realloc (use_malloc ? buffer : NULL, buflen); - if (buffer == NULL) - { - /* We ran out of memory. We cannot do anything but - sending a negative response. In reality this should - never happen. */ - hst = NULL; - buffer = old_buffer; - break; - } - use_malloc = true; - } - else - /* Allocate a new buffer on the stack. If possible combine it - with the previously allocated buffer. */ - buffer = (char *) extend_alloca (buffer, buflen, buflen + INCR); - } - - if (secure[hstdb]) - seteuid (oldeuid); - - cache_addhst (db, fd, req, key, hst, uid, 1); - - if (use_malloc) - free (buffer); + addhstbyX (db, fd, req, key, uid, NULL, NULL); +} + + +void +readdhstbynamev6 (struct database_dyn *db, struct hashentry *he, + struct datahead *dh) +{ + request_header req = + { + .type = GETHOSTBYNAMEv6, + .key_len = he->len + }; + + addhstbyX (db, -1, &req, db->data + he->key, he->owner, he, dh); +} + + +void +addhstbyaddrv6 (struct database_dyn *db, int fd, request_header *req, + void *key, uid_t uid) +{ + addhstbyX (db, fd, req, key, uid, NULL, NULL); +} + + +void +readdhstbyaddrv6 (struct database_dyn *db, struct hashentry *he, + struct datahead *dh) +{ + request_header req = + { + .type = GETHOSTBYADDRv6, + .key_len = he->len + }; + + addhstbyX (db, -1, &req, db->data + he->key, he->owner, he, dh); } diff --git a/nscd/mem.c b/nscd/mem.c new file mode 100644 index 0000000000..a4e30535c8 --- /dev/null +++ b/nscd/mem.c @@ -0,0 +1,515 @@ +/* Cache memory handling. + Copyright (C) 2004 Free Software Foundation, Inc. + This file is part of the GNU C Library. + Contributed by Ulrich Drepper , 2004. + + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with the GNU C Library; if not, write to the Free + Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA + 02111-1307 USA. */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "dbg_log.h" +#include "nscd.h" + + +/* Maximum alignment requirement we will encounter. */ +#define BLOCK_ALIGN_LOG 3 +#define BLOCK_ALIGN (1 << BLOCK_ALIGN_LOG) +#define BLOCK_ALIGN_M1 (BLOCK_ALIGN - 1) + + +static int +sort_he (const void *p1, const void *p2) +{ + struct hashentry *h1 = *(struct hashentry **) p1; + struct hashentry *h2 = *(struct hashentry **) p2; + + if (h1 < h2) + return -1; + if (h1 > h2) + return 1; + return 0; +} + + +static int +sort_he_data (const void *p1, const void *p2) +{ + struct hashentry *h1 = *(struct hashentry **) p1; + struct hashentry *h2 = *(struct hashentry **) p2; + + if (h1->packet < h2->packet) + return -1; + if (h1->packet > h2->packet) + return 1; + return 0; +} + + +/* Basic definitions for the bitmap implementation. Only BITMAP_T + needs to be changed to choose a different word size. */ +#define BITMAP_T uint8_t +#define BITS (CHAR_BIT * sizeof (BITMAP_T)) +#define ALLBITS ((((BITMAP_T) 1) << BITS) - 1) +#define HIGHBIT (((BITMAP_T) 1) << (BITS - 1)) + + +static void +markrange (BITMAP_T *mark, ref_t start, size_t len) +{ + /* Adjust parameters for block alignment. */ + start /= BLOCK_ALIGN; + len = (len + BLOCK_ALIGN_M1) / BLOCK_ALIGN; + + size_t elem = start / BITS; + + if (start % BITS != 0) + { + if (start % BITS + len <= BITS) + { + /* All fits in the partial byte. */ + mark[elem] |= (ALLBITS >> (BITS - len)) << (start % BITS); + return; + } + + mark[elem++] |= 0xff << (start % BITS); + len -= BITS - (start % BITS); + } + + while (len >= BITS) + { + mark[elem++] = ALLBITS; + len -= BITS; + } + + if (len > 0) + mark[elem] |= ALLBITS >> (BITS - len); +} + + +void +gc (struct database_dyn *db) +{ + /* We need write access. */ + pthread_rwlock_wrlock (&db->lock); + + /* And the memory handling lock. */ + pthread_mutex_lock (&db->memlock); + + /* We need an array representing the data area. All memory + allocation is BLOCK_ALIGN aligned so this is the level at which + we have to look at the memory. We use a mark and sweep algorithm + where the marks are placed in this array. */ + assert (db->head->first_free % BLOCK_ALIGN == 0); + BITMAP_T mark[(db->head->first_free / BLOCK_ALIGN + BITS - 1) / BITS]; + memset (mark, '\0', sizeof (mark)); + + /* Create an array which can hold pointer to all the entries in hash + entries. */ + struct hashentry *he[db->head->nentries]; + struct hashentry *he_data[db->head->nentries]; + + size_t cnt = 0; + for (size_t idx = 0; idx < db->head->module; ++idx) + { + ref_t *prevp = &db->head->array[idx]; + ref_t run = *prevp; + + while (run != ENDREF) + { + assert (cnt < db->head->nentries); + he[cnt] = (struct hashentry *) (db->data + run); + + he[cnt]->prevp = prevp; + prevp = &he[cnt]->next; + + /* This is the hash entry itself. */ + markrange (mark, run, sizeof (struct hashentry)); + + /* Add the information for the data itself. We do this + only for the one special entry marked with FIRST. */ + if (he[cnt]->first) + { + struct datahead *dh + = (struct datahead *) (db->data + he[cnt]->packet); + markrange (mark, he[cnt]->packet, dh->allocsize); + } + + run = he[cnt]->next; + + ++cnt; + } + } + assert (cnt == db->head->nentries); + + /* Sort the entries by the addresses of the referenced data. All + the entries pointing to the same DATAHEAD object will have the + same key. Stability of the sorting is unimportant. */ + memcpy (he_data, he, cnt * sizeof (struct hashentry *)); + qsort (he_data, cnt, sizeof (struct hashentry *), sort_he_data); + + /* Sort the entries by their address. */ + qsort (he, cnt, sizeof (struct hashentry *), sort_he); + + /* Determine the highest used address. */ + size_t high = sizeof (mark); + while (high > 0 && mark[high - 1] == 0) + --high; + + /* No memory used. */ + if (high == 0) + { + db->head->first_free = 0; + goto out; + } + + /* Determine the highest offset. */ + BITMAP_T mask = HIGHBIT; + ref_t highref = (high * BITS - 1) * BLOCK_ALIGN; + while ((mark[high - 1] & mask) == 0) + { + mask >>= 1; + highref -= BLOCK_ALIGN; + } + + /* No we can iterate over the MARK array and find bits which are not + set. These represent memory which can be recovered. */ + size_t byte = 0; + /* Find the first gap. */ + while (byte < high && mark[byte] == ALLBITS) + ++byte; + + if (byte == high + || (byte == high - 1 && (mark[byte] & ~(mask | (mask - 1))) == 0)) + /* No gap. */ + goto out; + + mask = 1; + cnt = 0; + while ((mark[byte] & mask) != 0) + { + ++cnt; + mask <<= 1; + } + ref_t off_free = (byte * BITS + cnt) * BLOCK_ALIGN; + assert (off_free <= db->head->first_free); + + struct hashentry **next_hash = he; + struct hashentry **next_data = he_data; + + /* Skip over the hash entries in the first block which does not get + moved. */ + while (next_hash < &he[db->head->nentries] + && *next_hash < (struct hashentry *) (db->data + off_free)) + ++next_hash; + + while (next_data < &he_data[db->head->nentries] + && (*next_data)->packet < off_free) + ++next_data; + + + /* We do not perform the move operations right away since the + he_data array is not sorted by the address of the data. */ + struct moveinfo + { + void *from; + void *to; + size_t size; + struct moveinfo *next; + } *moves = NULL; + + while (byte < high) + { + /* Search for the next filled block. BYTE is the index of the + entry in MARK, MASK is the bit, and CNT is the bit number. + OFF_FILLED is the corresponding offset. */ + if ((mark[byte] & ~(mask - 1)) == 0) + { + /* No other bit set in the same element of MARK. Search in the + following memory. */ + do + ++byte; + while (byte < high && mark[byte] == 0); + + if (byte == high) + /* That was it. */ + break; + + mask = 1; + cnt = 0; + } + /* Find the exact bit. */ + while ((mark[byte] & mask) == 0) + { + ++cnt; + mask <<= 1; + } + + ref_t off_alloc = (byte * BITS + cnt) * BLOCK_ALIGN; + assert (off_alloc <= db->head->first_free); + + /* Find the end of the used area. */ + if ((mark[byte] & ~(mask - 1)) == (BITMAP_T) ~(mask - 1)) + { + /* All other bits set. Search the next bytes in MARK. */ + do + ++byte; + while (byte < high && mark[byte] == ALLBITS); + + mask = 1; + cnt = 0; + } + if (byte < high) + { + /* Find the exact bit. */ + while ((mark[byte] & mask) != 0) + { + ++cnt; + mask <<= 1; + } + } + + ref_t off_allocend = (byte * BITS + cnt) * BLOCK_ALIGN; + assert (off_allocend <= db->head->first_free); + /* Now we know that we can copy the area from OFF_ALLOC to + OFF_ALLOCEND (not included) to the memory starting at + OFF_FREE. First fix up all the entries for the + displacement. */ + ref_t disp = off_alloc - off_free; + + struct moveinfo *new_move + = (struct moveinfo *) alloca (sizeof (*new_move)); + new_move->from = db->data + off_alloc; + new_move->to = db->data + off_free; + new_move->size = off_allocend - off_alloc; + /* Create a circular list to be always able to append at the end. */ + if (moves == NULL) + moves = new_move->next = new_move; + else + { + new_move->next = moves->next; + moves = moves->next = new_move; + } + + /* The following loop will prepare to move this much data. */ + off_free += off_allocend - off_alloc; + + while (off_alloc < off_allocend) + { + /* Determine whether the next entry is for a hash entry or + the data. */ + if ((struct hashentry *) (db->data + off_alloc) == *next_hash) + { + /* Just correct the forward reference. */ + *(*next_hash++)->prevp -= disp; + + off_alloc += ((sizeof (struct hashentry) + BLOCK_ALIGN_M1) + & ~BLOCK_ALIGN_M1); + } + else + { + assert (next_data < &he_data[db->head->nentries]); + assert ((*next_data)->packet == off_alloc); + + struct datahead *dh = (struct datahead *) (db->data + off_alloc); + do + { + assert ((*next_data)->key >= (*next_data)->packet); + assert ((*next_data)->key + (*next_data)->len + <= (*next_data)->packet + dh->allocsize); + + (*next_data)->packet -= disp; + (*next_data)->key -= disp; + ++next_data; + } + while (next_data < &he_data[db->head->nentries] + && (*next_data)->packet == off_alloc); + + off_alloc += (dh->allocsize + BLOCK_ALIGN_M1) & ~BLOCK_ALIGN_M1; + } + } + assert (off_alloc == off_allocend); + + assert (off_alloc <= db->head->first_free); + if (off_alloc == db->head->first_free) + /* We are done, that was the last block. */ + break; + } + assert (next_hash == &he[db->head->nentries]); + assert (next_data == &he_data[db->head->nentries]); + + /* Now perform the actual moves. */ + if (moves != NULL) + { + struct moveinfo *runp = moves->next; + do + { + assert ((char *) runp->to >= db->data); + assert ((char *) runp->to + runp->size + <= db->data + db->head->first_free); + assert ((char *) runp->from >= db->data); + assert ((char *) runp->from + runp->size + <= db->data + db->head->first_free); + + /* The regions may overlap. */ + memmove (runp->to, runp->from, runp->size); + runp = runp->next; + } + while (runp != moves->next); + + if (__builtin_expect (debug_level >= 3, 0)) + dbg_log (_("freed %zu bytes in %s cache"), + db->head->first_free + - ((char *) moves->to + moves->size - db->data), + dbnames[db - dbs]); + + /* The byte past the end of the last copied block is the next + available byte. */ + db->head->first_free = (char *) moves->to + moves->size - db->data; + + /* Consistency check. */ + if (__builtin_expect (debug_level >= 3, 0)) + { + for (size_t idx = 0; idx < db->head->module; ++idx) + { + ref_t run = db->head->array[idx]; + size_t cnt = 0; + + while (run != ENDREF) + { + if (run + sizeof (struct hashentry) > db->head->first_free) + { + dbg_log ("entry %zu in hash bucket %zu out of bounds: " + "%" PRIu32 "+%zu > %zu\n", + cnt, idx, run, sizeof (struct hashentry), + db->head->first_free); + break; + } + + struct hashentry *he = (struct hashentry *) (db->data + run); + + if (he->key + he->len > db->head->first_free) + dbg_log ("key of entry %zu in hash bucket %zu out of " + "bounds: %" PRIu32 "+%zu > %zu\n", + cnt, idx, he->key, he->len, db->head->first_free); + + if (he->packet + sizeof (struct datahead) + > db->head->first_free) + dbg_log ("packet of entry %zu in hash bucket %zu out of " + "bounds: %" PRIu32 "+%zu > %zu\n", + cnt, idx, he->packet, sizeof (struct datahead), + db->head->first_free); + else + { + struct datahead *dh = (struct datahead *) (db->data + + he->packet); + if (he->packet + dh->allocsize + > db->head->first_free) + dbg_log ("full key of entry %zu in hash bucket %zu " + "out of bounds: %" PRIu32 "+%zu > %zu", + cnt, idx, he->packet, dh->allocsize, + db->head->first_free); + } + + run = he->next; + ++cnt; + } + } + } + } + + /* Make sure the data on disk is updated. */ + if (db->persistent) + msync (db->head, db->data + db->head->first_free - (char *) db->head, + MS_ASYNC); + + /* We are done. */ + out: + pthread_mutex_unlock (&db->memlock); + pthread_rwlock_unlock (&db->lock); +} + + +void * +mempool_alloc (struct database_dyn *db, size_t len) +{ + /* Make sure LEN is a multiple of our maximum alignment so we can + keep track of used memory is multiples of this alignment value. */ + if ((len & BLOCK_ALIGN_M1) != 0) + len += BLOCK_ALIGN - (len & BLOCK_ALIGN_M1); + + pthread_mutex_lock (&db->memlock); + + assert ((db->head->first_free & BLOCK_ALIGN_M1) == 0); + + bool tried_resize = false; + void *res; + retry: + res = db->data + db->head->first_free; + + if (__builtin_expect (db->head->first_free + len > db->head->data_size, 0)) + { + if (! tried_resize) + { + /* Try to resize the database. Grow size of 1/8th. */ + size_t new_data_size = db->head->data_size + db->head->data_size / 8; + size_t oldtotal = (sizeof (struct database_pers_head) + + db->head->module * sizeof (ref_t) + + db->head->data_size); + size_t newtotal = (sizeof (struct database_pers_head) + + db->head->module * sizeof (ref_t) + + new_data_size); + + if ((!db->mmap_used || ftruncate (db->wr_fd, newtotal) != 0) + /* Try to resize the mapping. Note: no MREMAP_MAYMOVE. */ + && mremap (db->head, oldtotal, newtotal, 0) == 0) + { + db->head->data_size = new_data_size; + tried_resize = true; + goto retry; + } + } + + if (! db->last_alloc_failed) + { + dbg_log (_("no more memory for database '%s'"), dbnames[db - dbs]); + + db->last_alloc_failed = true; + } + + /* No luck. */ + res = NULL; + } + else + { + db->head->first_free += len; + + db->last_alloc_failed = false; + } + + pthread_mutex_unlock (&db->memlock); + + return res; +} diff --git a/nscd/nscd.c b/nscd/nscd.c index e3040bb20f..35e48ca348 100644 --- a/nscd/nscd.c +++ b/nscd/nscd.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -69,7 +70,6 @@ int disabled_passwd; int disabled_group; int go_background = 1; -int secure[lastdb]; int secure_in_use; static const char *conffile = _PATH_NSCDCONF; @@ -342,11 +342,11 @@ parse_opt (int key, char *arg, struct argp_state *state) case 'S': if (strcmp (arg, "passwd,yes") == 0) - secure_in_use = secure[pwddb] = 1; + secure_in_use = dbs[pwddb].secure = 1; else if (strcmp (arg, "group,yes") == 0) - secure_in_use = secure[grpdb] = 1; + secure_in_use = dbs[grpdb].secure = 1; else if (strcmp (arg, "hosts,yes") == 0) - secure_in_use = secure[hstdb] = 1; + secure_in_use = dbs[hstdb].secure = 1; break; default: @@ -406,6 +406,14 @@ termination_handler (int signum) /* Clean up pid file. */ unlink (_PATH_NSCDPID); + // XXX Terminate threads. + + /* Synchronize memory. */ + for (int cnt = 0; cnt < lastdb; ++cnt) + if (dbs[cnt].persistent) + // XXX async OK? + msync (dbs[cnt].head, dbs[cnt].memsize, MS_ASYNC); + _exit (EXIT_SUCCESS); } diff --git a/nscd/nscd.conf b/nscd/nscd.conf index ed05ba495f..11d26b9efe 100644 --- a/nscd/nscd.conf +++ b/nscd/nscd.conf @@ -11,12 +11,14 @@ # server-user # server-user is ignored if nscd is started with -S parameters # stat-user +# reload-count unlimited| # # enable-cache # positive-time-to-live