1/* Copyright (c) 1998-2024 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published
6 by the Free Software Foundation; version 2 of the License, or
7 (at your option) any later version.
8
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, see <https://www.gnu.org/licenses/>. */
16
17#include <assert.h>
18#include <atomic.h>
19#include <errno.h>
20#include <error.h>
21#include <inttypes.h>
22#include <limits.h>
23#include <stdlib.h>
24#include <string.h>
25#include <libintl.h>
26#include <arpa/inet.h>
27#include <sys/mman.h>
28#include <sys/param.h>
29#include <sys/stat.h>
30#include <sys/uio.h>
31#include <nss.h>
32
33#include "nscd.h"
34#include "dbg_log.h"
35
36
37/* Wrapper functions with error checking for standard functions. */
38extern void *xcalloc (size_t n, size_t s);
39
40
41/* Number of times a value is reloaded without being used. UINT_MAX
42 means unlimited. */
43unsigned int reload_count = DEFAULT_RELOAD_LIMIT;
44
45
46static time_t (*const readdfcts[LASTREQ]) (struct database_dyn *,
47 struct hashentry *,
48 struct datahead *) =
49{
50 [GETPWBYNAME] = readdpwbyname,
51 [GETPWBYUID] = readdpwbyuid,
52 [GETGRBYNAME] = readdgrbyname,
53 [GETGRBYGID] = readdgrbygid,
54 [GETHOSTBYNAME] = readdhstbyname,
55 [GETHOSTBYNAMEv6] = readdhstbynamev6,
56 [GETHOSTBYADDR] = readdhstbyaddr,
57 [GETHOSTBYADDRv6] = readdhstbyaddrv6,
58 [GETAI] = readdhstai,
59 [INITGROUPS] = readdinitgroups,
60 [GETSERVBYNAME] = readdservbyname,
61 [GETSERVBYPORT] = readdservbyport,
62 [GETNETGRENT] = readdgetnetgrent,
63 [INNETGR] = readdinnetgr
64};
65
66
67/* Search the cache for a matching entry and return it when found. If
68 this fails search the negative cache and return (void *) -1 if this
69 search was successful. Otherwise return NULL.
70
71 This function must be called with the read-lock held. */
72struct datahead *
73cache_search (request_type type, const void *key, size_t len,
74 struct database_dyn *table, uid_t owner)
75{
76 unsigned long int hash = __nss_hash (key: key, length: len) % table->head->module;
77
78 unsigned long int nsearched = 0;
79 struct datahead *result = NULL;
80
81 ref_t work = table->head->array[hash];
82 while (work != ENDREF)
83 {
84 ++nsearched;
85
86 struct hashentry *here = (struct hashentry *) (table->data + work);
87
88 if (type == here->type && len == here->len
89 && memcmp (s1: key, s2: table->data + here->key, n: len) == 0
90 && here->owner == owner)
91 {
92 /* We found the entry. Increment the appropriate counter. */
93 struct datahead *dh
94 = (struct datahead *) (table->data + here->packet);
95
96 /* See whether we must ignore the entry. */
97 if (dh->usable)
98 {
99 /* We do not synchronize the memory here. The statistics
100 data is not crucial, we synchronize only once in a while
101 in the cleanup threads. */
102 if (dh->notfound)
103 ++table->head->neghit;
104 else
105 {
106 ++table->head->poshit;
107
108 if (dh->nreloads != 0)
109 dh->nreloads = 0;
110 }
111
112 result = dh;
113 break;
114 }
115 }
116
117 work = here->next;
118 }
119
120 if (nsearched > table->head->maxnsearched)
121 table->head->maxnsearched = nsearched;
122
123 return result;
124}
125
126/* Add a new entry to the cache. The return value is zero if the function
127 call was successful.
128
129 This function must be called with the read-lock held.
130
131 We modify the table but we nevertheless only acquire a read-lock.
132 This is ok since we use operations which would be safe even without
133 locking, given that the `prune_cache' function never runs. Using
134 the readlock reduces the chance of conflicts. */
135int
136cache_add (int type, const void *key, size_t len, struct datahead *packet,
137 bool first, struct database_dyn *table,
138 uid_t owner, bool prune_wakeup)
139{
140 if (__glibc_unlikely (debug_level >= 2))
141 {
142 const char *str;
143 char buf[INET6_ADDRSTRLEN + 1];
144 if (type == GETHOSTBYADDR || type == GETHOSTBYADDRv6)
145 str = inet_ntop (af: type == GETHOSTBYADDR ? AF_INET : AF_INET6,
146 cp: key, buf: buf, len: sizeof (buf));
147 else
148 str = key;
149
150 dbg_log (_("add new entry \"%s\" of type %s for %s to cache%s"),
151 str, serv2str[type], dbnames[table - dbs],
152 first ? _(" (first)") : "");
153 }
154
155 unsigned long int hash = __nss_hash (key: key, length: len) % table->head->module;
156 struct hashentry *newp;
157
158 newp = mempool_alloc (db: table, len: sizeof (struct hashentry), data_alloc: 0);
159 /* If we cannot allocate memory, just do not do anything. */
160 if (newp == NULL)
161 {
162 /* If necessary mark the entry as unusable so that lookups will
163 not use it. */
164 if (first)
165 packet->usable = false;
166
167 return -1;
168 }
169
170 newp->type = type;
171 newp->first = first;
172 newp->len = len;
173 newp->key = (char *) key - table->data;
174 assert (newp->key + newp->len <= table->head->first_free);
175 newp->owner = owner;
176 newp->packet = (char *) packet - table->data;
177 assert ((newp->packet & BLOCK_ALIGN_M1) == 0);
178
179 /* Put the new entry in the first position. */
180 /* TODO Review concurrency. Use atomic_exchange_release. */
181 newp->next = atomic_load_relaxed (&table->head->array[hash]);
182 while (!atomic_compare_exchange_weak_release (&table->head->array[hash],
183 (ref_t *) &newp->next,
184 (ref_t) ((char *) newp
185 - table->data)));
186
187 /* Update the statistics. */
188 if (packet->notfound)
189 ++table->head->negmiss;
190 else if (first)
191 ++table->head->posmiss;
192
193 /* We depend on this value being correct and at least as high as the
194 real number of entries. */
195 atomic_fetch_add_relaxed (&table->head->nentries, 1);
196
197 /* It does not matter that we are not loading the just increment
198 value, this is just for statistics. */
199 unsigned long int nentries = table->head->nentries;
200 if (nentries > table->head->maxnentries)
201 table->head->maxnentries = nentries;
202
203 if (table->persistent)
204 // XXX async OK?
205 msync (addr: (void *) table->head,
206 len: (char *) &table->head->array[hash] - (char *) table->head
207 + sizeof (ref_t), MS_ASYNC);
208
209 /* We do not have to worry about the pruning thread if we are
210 re-adding the data since this is done by the pruning thread. We
211 also do not have to do anything in case this is not the first
212 time the data is entered since different data heads all have the
213 same timeout. */
214 if (first && prune_wakeup)
215 {
216 /* Perhaps the prune thread for the table is not running in a long
217 time. Wake it if necessary. */
218 pthread_mutex_lock (mutex: &table->prune_lock);
219 time_t next_wakeup = table->wakeup_time;
220 bool do_wakeup = false;
221 if (next_wakeup > packet->timeout + CACHE_PRUNE_INTERVAL)
222 {
223 table->wakeup_time = packet->timeout;
224 do_wakeup = true;
225 }
226 pthread_mutex_unlock (mutex: &table->prune_lock);
227 if (do_wakeup)
228 pthread_cond_signal (cond: &table->prune_cond);
229 }
230
231 return 0;
232}
233
234/* Walk through the table and remove all entries which lifetime ended.
235
236 We have a problem here. To actually remove the entries we must get
237 the write-lock. But since we want to keep the time we have the
238 lock as short as possible we cannot simply acquire the lock when we
239 start looking for timedout entries.
240
241 Therefore we do it in two stages: first we look for entries which
242 must be invalidated and remember them. Then we get the lock and
243 actually remove them. This is complicated by the way we have to
244 free the data structures since some hash table entries share the same
245 data. */
246time_t
247prune_cache (struct database_dyn *table, time_t now, int fd)
248{
249 size_t cnt = table->head->module;
250
251 /* If this table is not actually used don't do anything. */
252 if (cnt == 0)
253 {
254 if (fd != -1)
255 {
256 /* Reply to the INVALIDATE initiator. */
257 int32_t resp = 0;
258 writeall (fd, buf: &resp, len: sizeof (resp));
259 }
260
261 /* No need to do this again anytime soon. */
262 return 24 * 60 * 60;
263 }
264
265 /* If we check for the modification of the underlying file we invalidate
266 the entries also in this case. */
267 if (table->check_file && now != LONG_MAX)
268 {
269 struct traced_file *runp = table->traced_files;
270
271 while (runp != NULL)
272 {
273#ifdef HAVE_INOTIFY
274 if (runp->inotify_descr[TRACED_FILE] == -1)
275#endif
276 {
277 struct stat64 st;
278
279 if (stat64 (file: runp->fname, buf: &st) < 0)
280 {
281 /* Print a diagnostic that the traced file was missing.
282 We must not disable tracing since the file might return
283 shortly and we want to reload it at the next pruning.
284 Disabling tracing here would go against the configuration
285 as specified by the user via check-files. */
286 char buf[128];
287 dbg_log (_("checking for monitored file `%s': %s"),
288 runp->fname, strerror_r (errno, buf: buf, buflen: sizeof (buf)));
289 }
290 else
291 {
292 /* This must be `!=` to catch cases where users turn the
293 clocks back and we still want to detect any time difference
294 in mtime. */
295 if (st.st_mtime != runp->mtime)
296 {
297 dbg_log (_("monitored file `%s` changed (mtime)"),
298 runp->fname);
299 /* The file changed. Invalidate all entries. */
300 now = LONG_MAX;
301 runp->mtime = st.st_mtime;
302#ifdef HAVE_INOTIFY
303 /* Attempt to install a watch on the file. */
304 install_watches (finfo: runp);
305#endif
306 }
307 }
308 }
309
310 runp = runp->next;
311 }
312 }
313
314 /* We run through the table and find values which are not valid anymore.
315
316 Note that for the initial step, finding the entries to be removed,
317 we don't need to get any lock. It is at all timed assured that the
318 linked lists are set up correctly and that no second thread prunes
319 the cache. */
320 bool *mark;
321 size_t memory_needed = cnt * sizeof (bool);
322 bool mark_use_alloca;
323 if (__glibc_likely (memory_needed <= MAX_STACK_USE))
324 {
325 mark = alloca (cnt * sizeof (bool));
326 memset (s: mark, c: '\0', n: memory_needed);
327 mark_use_alloca = true;
328 }
329 else
330 {
331 mark = xcalloc (n: 1, s: memory_needed);
332 mark_use_alloca = false;
333 }
334 size_t first = cnt + 1;
335 size_t last = 0;
336 char *const data = table->data;
337 bool any = false;
338
339 if (__glibc_unlikely (debug_level > 2))
340 dbg_log (_("pruning %s cache; time %ld"),
341 dbnames[table - dbs], (long int) now);
342
343#define NO_TIMEOUT LONG_MAX
344 time_t next_timeout = NO_TIMEOUT;
345 do
346 {
347 ref_t run = table->head->array[--cnt];
348
349 while (run != ENDREF)
350 {
351 struct hashentry *runp = (struct hashentry *) (data + run);
352 struct datahead *dh = (struct datahead *) (data + runp->packet);
353
354 /* Some debug support. */
355 if (__glibc_unlikely (debug_level > 2))
356 {
357 char buf[INET6_ADDRSTRLEN];
358 const char *str;
359
360 if (runp->type == GETHOSTBYADDR || runp->type == GETHOSTBYADDRv6)
361 {
362 inet_ntop (af: runp->type == GETHOSTBYADDR ? AF_INET : AF_INET6,
363 cp: data + runp->key, buf: buf, len: sizeof (buf));
364 str = buf;
365 }
366 else
367 str = data + runp->key;
368
369 dbg_log (_("considering %s entry \"%s\", timeout %" PRIu64),
370 serv2str[runp->type], str, dh->timeout);
371 }
372
373 /* Check whether the entry timed out. Timed out entries
374 will be revalidated. For unusable records, it is still
375 necessary to record that the bucket needs to be scanned
376 again below. */
377 if (dh->timeout < now || !dh->usable)
378 {
379 /* This hash bucket could contain entries which need to
380 be looked at. */
381 mark[cnt] = true;
382
383 first = MIN (first, cnt);
384 last = MAX (last, cnt);
385
386 /* We only have to look at the data of the first entries
387 since the count information is kept in the data part
388 which is shared. */
389 if (runp->first && dh->usable)
390 {
391
392 /* At this point there are two choices: we reload the
393 value or we discard it. Do not change NRELOADS if
394 we never not reload the record. */
395 if ((reload_count != UINT_MAX
396 && __builtin_expect (dh->nreloads >= reload_count, 0))
397 /* We always remove negative entries. */
398 || dh->notfound
399 /* Discard everything if the user explicitly
400 requests it. */
401 || now == LONG_MAX)
402 {
403 /* Remove the value. */
404 dh->usable = false;
405 }
406 else
407 {
408 /* Reload the value. We do this only for the
409 initially used key, not the additionally
410 added derived value. */
411 assert (runp->type < LASTREQ
412 && readdfcts[runp->type] != NULL);
413
414 time_t timeout = readdfcts[runp->type] (table, runp, dh);
415 next_timeout = MIN (next_timeout, timeout);
416 }
417 }
418
419 /* If the entry has been replaced, we might need cleanup. */
420 any |= !dh->usable;
421 }
422 else
423 /* Entry has not timed out and is usable. */
424 next_timeout = MIN (next_timeout, dh->timeout);
425
426 run = runp->next;
427 }
428 }
429 while (cnt > 0);
430
431 if (__glibc_unlikely (fd != -1))
432 {
433 /* Reply to the INVALIDATE initiator that the cache has been
434 invalidated. */
435 int32_t resp = 0;
436 writeall (fd, buf: &resp, len: sizeof (resp));
437 }
438
439 if (first <= last)
440 {
441 struct hashentry *head = NULL;
442
443 /* Now we have to get the write lock since we are about to modify
444 the table. */
445 if (__glibc_unlikely (pthread_rwlock_trywrlock (&table->lock) != 0))
446 {
447 ++table->head->wrlockdelayed;
448 pthread_rwlock_wrlock (rwlock: &table->lock);
449 }
450
451 /* Now we start modifying the data. Make sure all readers of the
452 data are aware of this and temporarily don't use the data. */
453 atomic_fetch_add_relaxed (&table->head->gc_cycle, 1);
454 assert ((table->head->gc_cycle & 1) == 1);
455
456 while (first <= last)
457 {
458 if (mark[first])
459 {
460 ref_t *old = &table->head->array[first];
461 ref_t run = table->head->array[first];
462
463 assert (run != ENDREF);
464 do
465 {
466 struct hashentry *runp = (struct hashentry *) (data + run);
467 struct datahead *dh
468 = (struct datahead *) (data + runp->packet);
469
470 if (! dh->usable)
471 {
472 /* We need the list only for debugging but it is
473 more costly to avoid creating the list than
474 doing it. */
475 runp->dellist = head;
476 head = runp;
477
478 /* No need for an atomic operation, we have the
479 write lock. */
480 --table->head->nentries;
481
482 run = *old = runp->next;
483 }
484 else
485 {
486 old = &runp->next;
487 run = runp->next;
488 }
489 }
490 while (run != ENDREF);
491 }
492
493 ++first;
494 }
495
496 /* Now we are done modifying the data. */
497 atomic_fetch_add_relaxed (&table->head->gc_cycle, 1);
498 assert ((table->head->gc_cycle & 1) == 0);
499
500 /* It's all done. */
501 pthread_rwlock_unlock (rwlock: &table->lock);
502
503 /* Make sure the data is saved to disk. */
504 if (table->persistent)
505 msync (addr: table->head,
506 len: data + table->head->first_free - (char *) table->head,
507 MS_ASYNC);
508
509 /* One extra pass if we do debugging. */
510 if (__glibc_unlikely (debug_level > 0))
511 {
512 struct hashentry *runp = head;
513
514 while (runp != NULL)
515 {
516 char buf[INET6_ADDRSTRLEN];
517 const char *str;
518
519 if (runp->type == GETHOSTBYADDR || runp->type == GETHOSTBYADDRv6)
520 {
521 inet_ntop (af: runp->type == GETHOSTBYADDR ? AF_INET : AF_INET6,
522 cp: data + runp->key, buf: buf, len: sizeof (buf));
523 str = buf;
524 }
525 else
526 str = data + runp->key;
527
528 dbg_log (str: "remove %s entry \"%s\"", serv2str[runp->type], str);
529
530 runp = runp->dellist;
531 }
532 }
533 }
534
535 if (__glibc_unlikely (! mark_use_alloca))
536 free (ptr: mark);
537
538 /* Run garbage collection if any entry has been removed or replaced. */
539 if (any)
540 gc (db: table);
541
542 /* If there is no entry in the database and we therefore have no new
543 timeout value, tell the caller to wake up in 24 hours. */
544 return next_timeout == NO_TIMEOUT ? 24 * 60 * 60 : next_timeout - now;
545}
546

source code of glibc/nscd/cache.c