1/* Inner loops of cache daemon.
2 Copyright (C) 1998-2024 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <https://www.gnu.org/licenses/>. */
17
18#include <alloca.h>
19#include <assert.h>
20#include <atomic.h>
21#include <error.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <grp.h>
25#include <ifaddrs.h>
26#include <libintl.h>
27#include <pthread.h>
28#include <pwd.h>
29#include <resolv.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <unistd.h>
33#include <stdint.h>
34#include <arpa/inet.h>
35#ifdef HAVE_NETLINK
36# include <linux/netlink.h>
37# include <linux/rtnetlink.h>
38#endif
39#ifdef HAVE_EPOLL
40# include <sys/epoll.h>
41#endif
42#ifdef HAVE_INOTIFY
43# include <sys/inotify.h>
44#endif
45#include <sys/mman.h>
46#include <sys/param.h>
47#include <sys/poll.h>
48#include <sys/socket.h>
49#include <sys/stat.h>
50#include <sys/un.h>
51
52#include "nscd.h"
53#include "dbg_log.h"
54#include "selinux.h"
55#include <resolv/resolv.h>
56
57#include <kernel-features.h>
58#include <libc-diag.h>
59
60
61/* Support to run nscd as an unprivileged user */
62const char *server_user;
63static uid_t server_uid;
64static gid_t server_gid;
65const char *stat_user;
66uid_t stat_uid;
67static gid_t *server_groups;
68#ifndef NGROUPS
69# define NGROUPS 32
70#endif
71static int server_ngroups;
72
73static pthread_attr_t attr;
74
75static void begin_drop_privileges (void);
76static void finish_drop_privileges (void);
77
78/* Map request type to a string. */
79const char *const serv2str[LASTREQ] =
80{
81 [GETPWBYNAME] = "GETPWBYNAME",
82 [GETPWBYUID] = "GETPWBYUID",
83 [GETGRBYNAME] = "GETGRBYNAME",
84 [GETGRBYGID] = "GETGRBYGID",
85 [GETHOSTBYNAME] = "GETHOSTBYNAME",
86 [GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
87 [GETHOSTBYADDR] = "GETHOSTBYADDR",
88 [GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
89 [SHUTDOWN] = "SHUTDOWN",
90 [GETSTAT] = "GETSTAT",
91 [INVALIDATE] = "INVALIDATE",
92 [GETFDPW] = "GETFDPW",
93 [GETFDGR] = "GETFDGR",
94 [GETFDHST] = "GETFDHST",
95 [GETAI] = "GETAI",
96 [INITGROUPS] = "INITGROUPS",
97 [GETSERVBYNAME] = "GETSERVBYNAME",
98 [GETSERVBYPORT] = "GETSERVBYPORT",
99 [GETFDSERV] = "GETFDSERV",
100 [GETNETGRENT] = "GETNETGRENT",
101 [INNETGR] = "INNETGR",
102 [GETFDNETGR] = "GETFDNETGR"
103};
104
105#ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
106# define RWLOCK_INITIALIZER PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
107#else
108# define RWLOCK_INITIALIZER PTHREAD_RWLOCK_INITIALIZER
109#endif
110
111/* The control data structures for the services. */
112struct database_dyn dbs[lastdb] =
113{
114 [pwddb] = {
115 .lock = RWLOCK_INITIALIZER,
116 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
117 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
118 .enabled = 0,
119 .check_file = 1,
120 .persistent = 0,
121 .propagate = 1,
122 .shared = 0,
123 .max_db_size = DEFAULT_MAX_DB_SIZE,
124 .suggested_module = DEFAULT_SUGGESTED_MODULE,
125 .db_filename = _PATH_NSCD_PASSWD_DB,
126 .disabled_iov = &pwd_iov_disabled,
127 .postimeout = 3600,
128 .negtimeout = 20,
129 .wr_fd = -1,
130 .ro_fd = -1,
131 .mmap_used = false
132 },
133 [grpdb] = {
134 .lock = RWLOCK_INITIALIZER,
135 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
136 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
137 .enabled = 0,
138 .check_file = 1,
139 .persistent = 0,
140 .propagate = 1,
141 .shared = 0,
142 .max_db_size = DEFAULT_MAX_DB_SIZE,
143 .suggested_module = DEFAULT_SUGGESTED_MODULE,
144 .db_filename = _PATH_NSCD_GROUP_DB,
145 .disabled_iov = &grp_iov_disabled,
146 .postimeout = 3600,
147 .negtimeout = 60,
148 .wr_fd = -1,
149 .ro_fd = -1,
150 .mmap_used = false
151 },
152 [hstdb] = {
153 .lock = RWLOCK_INITIALIZER,
154 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
155 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
156 .enabled = 0,
157 .check_file = 1,
158 .persistent = 0,
159 .propagate = 0, /* Not used. */
160 .shared = 0,
161 .max_db_size = DEFAULT_MAX_DB_SIZE,
162 .suggested_module = DEFAULT_SUGGESTED_MODULE,
163 .db_filename = _PATH_NSCD_HOSTS_DB,
164 .disabled_iov = &hst_iov_disabled,
165 .postimeout = 3600,
166 .negtimeout = 20,
167 .wr_fd = -1,
168 .ro_fd = -1,
169 .mmap_used = false
170 },
171 [servdb] = {
172 .lock = RWLOCK_INITIALIZER,
173 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
174 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
175 .enabled = 0,
176 .check_file = 1,
177 .persistent = 0,
178 .propagate = 0, /* Not used. */
179 .shared = 0,
180 .max_db_size = DEFAULT_MAX_DB_SIZE,
181 .suggested_module = DEFAULT_SUGGESTED_MODULE,
182 .db_filename = _PATH_NSCD_SERVICES_DB,
183 .disabled_iov = &serv_iov_disabled,
184 .postimeout = 28800,
185 .negtimeout = 20,
186 .wr_fd = -1,
187 .ro_fd = -1,
188 .mmap_used = false
189 },
190 [netgrdb] = {
191 .lock = RWLOCK_INITIALIZER,
192 .prune_lock = PTHREAD_MUTEX_INITIALIZER,
193 .prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
194 .enabled = 0,
195 .check_file = 1,
196 .persistent = 0,
197 .propagate = 0, /* Not used. */
198 .shared = 0,
199 .max_db_size = DEFAULT_MAX_DB_SIZE,
200 .suggested_module = DEFAULT_SUGGESTED_MODULE,
201 .db_filename = _PATH_NSCD_NETGROUP_DB,
202 .disabled_iov = &netgroup_iov_disabled,
203 .postimeout = 28800,
204 .negtimeout = 20,
205 .wr_fd = -1,
206 .ro_fd = -1,
207 .mmap_used = false
208 }
209};
210
211
212/* Mapping of request type to database. */
213static struct
214{
215 bool data_request;
216 struct database_dyn *db;
217} const reqinfo[LASTREQ] =
218{
219 [GETPWBYNAME] = { true, .db: &dbs[pwddb] },
220 [GETPWBYUID] = { true, .db: &dbs[pwddb] },
221 [GETGRBYNAME] = { true, .db: &dbs[grpdb] },
222 [GETGRBYGID] = { true, .db: &dbs[grpdb] },
223 [GETHOSTBYNAME] = { true, .db: &dbs[hstdb] },
224 [GETHOSTBYNAMEv6] = { true, .db: &dbs[hstdb] },
225 [GETHOSTBYADDR] = { true, .db: &dbs[hstdb] },
226 [GETHOSTBYADDRv6] = { true, .db: &dbs[hstdb] },
227 [SHUTDOWN] = { false, NULL },
228 [GETSTAT] = { false, NULL },
229 [GETFDPW] = { false, .db: &dbs[pwddb] },
230 [GETFDGR] = { false, .db: &dbs[grpdb] },
231 [GETFDHST] = { false, .db: &dbs[hstdb] },
232 [GETAI] = { true, .db: &dbs[hstdb] },
233 [INITGROUPS] = { true, .db: &dbs[grpdb] },
234 [GETSERVBYNAME] = { true, .db: &dbs[servdb] },
235 [GETSERVBYPORT] = { true, .db: &dbs[servdb] },
236 [GETFDSERV] = { false, .db: &dbs[servdb] },
237 [GETNETGRENT] = { true, .db: &dbs[netgrdb] },
238 [INNETGR] = { true, .db: &dbs[netgrdb] },
239 [GETFDNETGR] = { false, .db: &dbs[netgrdb] }
240};
241
242
243/* Initial number of threads to use. */
244int nthreads = -1;
245/* Maximum number of threads to use. */
246int max_nthreads = 32;
247
248/* Socket for incoming connections. */
249static int sock;
250
251#ifdef HAVE_INOTIFY
252/* Inotify descriptor. */
253int inotify_fd = -1;
254#endif
255
256#ifdef HAVE_NETLINK
257/* Descriptor for netlink status updates. */
258static int nl_status_fd = -1;
259
260static uint32_t
261__bump_nl_timestamp (void)
262{
263 static uint32_t nl_timestamp;
264
265 if (atomic_fetch_add_relaxed (&nl_timestamp, 1) + 1 == 0)
266 atomic_fetch_add_relaxed (&nl_timestamp, 1);
267
268 return nl_timestamp;
269}
270#endif
271
272/* Number of times clients had to wait. */
273unsigned long int client_queued;
274
275
276ssize_t
277writeall (int fd, const void *buf, size_t len)
278{
279 size_t n = len;
280 ssize_t ret;
281 do
282 {
283 ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
284 if (ret <= 0)
285 break;
286 buf = (const char *) buf + ret;
287 n -= ret;
288 }
289 while (n > 0);
290 return ret < 0 ? ret : len - n;
291}
292
293
294enum usekey
295 {
296 use_not = 0,
297 /* The following three are not really used, they are symbolic constants. */
298 use_first = 16,
299 use_begin = 32,
300 use_end = 64,
301
302 use_he = 1,
303 use_he_begin = use_he | use_begin,
304 use_he_end = use_he | use_end,
305 use_data = 3,
306 use_data_begin = use_data | use_begin,
307 use_data_end = use_data | use_end,
308 use_data_first = use_data_begin | use_first
309 };
310
311
312static int
313check_use (const char *data, nscd_ssize_t first_free, uint8_t *usemap,
314 enum usekey use, ref_t start, size_t len)
315{
316 if (len < 2)
317 return 0;
318
319 if (start > first_free || start + len > first_free
320 || (start & BLOCK_ALIGN_M1))
321 return 0;
322
323 if (usemap[start] == use_not)
324 {
325 /* Add the start marker. */
326 usemap[start] = use | use_begin;
327 use &= ~use_first;
328
329 while (--len > 0)
330 if (usemap[++start] != use_not)
331 return 0;
332 else
333 usemap[start] = use;
334
335 /* Add the end marker. */
336 usemap[start] = use | use_end;
337 }
338 else if ((usemap[start] & ~use_first) == ((use | use_begin) & ~use_first))
339 {
340 /* Hash entries can't be shared. */
341 if (use == use_he)
342 return 0;
343
344 usemap[start] |= (use & use_first);
345 use &= ~use_first;
346
347 while (--len > 1)
348 if (usemap[++start] != use)
349 return 0;
350
351 if (usemap[++start] != (use | use_end))
352 return 0;
353 }
354 else
355 /* Points to a wrong object or somewhere in the middle. */
356 return 0;
357
358 return 1;
359}
360
361
362/* Verify data in persistent database. */
363static int
364verify_persistent_db (void *mem, struct database_pers_head *readhead, int dbnr)
365{
366 assert (dbnr == pwddb || dbnr == grpdb || dbnr == hstdb || dbnr == servdb
367 || dbnr == netgrdb);
368
369 time_t now = time (NULL);
370
371 struct database_pers_head *head = mem;
372 struct database_pers_head head_copy = *head;
373
374 /* Check that the header that was read matches the head in the database. */
375 if (memcmp (s1: head, s2: readhead, n: sizeof (*head)) != 0)
376 return 0;
377
378 /* First some easy tests: make sure the database header is sane. */
379 if (head->version != DB_VERSION
380 || head->header_size != sizeof (*head)
381 /* We allow a timestamp to be one hour ahead of the current time.
382 This should cover daylight saving time changes. */
383 || head->timestamp > now + 60 * 60 + 60
384 || (head->gc_cycle & 1)
385 || head->module == 0
386 || (size_t) head->module > INT32_MAX / sizeof (ref_t)
387 || (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
388 || head->first_free < 0
389 || head->first_free > head->data_size
390 || (head->first_free & BLOCK_ALIGN_M1) != 0
391 || head->maxnentries < 0
392 || head->maxnsearched < 0)
393 return 0;
394
395 uint8_t *usemap = calloc (nmemb: head->first_free, size: 1);
396 if (usemap == NULL)
397 return 0;
398
399 const char *data = (char *) &head->array[roundup (head->module,
400 ALIGN / sizeof (ref_t))];
401
402 nscd_ssize_t he_cnt = 0;
403 for (nscd_ssize_t cnt = 0; cnt < head->module; ++cnt)
404 {
405 ref_t trail = head->array[cnt];
406 ref_t work = trail;
407 int tick = 0;
408
409 while (work != ENDREF)
410 {
411 if (! check_use (data, first_free: head->first_free, usemap, use: use_he, start: work,
412 len: sizeof (struct hashentry)))
413 goto fail;
414
415 /* Now we know we can dereference the record. */
416 struct hashentry *here = (struct hashentry *) (data + work);
417
418 ++he_cnt;
419
420 /* Make sure the record is for this type of service. */
421 if (here->type >= LASTREQ
422 || reqinfo[here->type].db != &dbs[dbnr])
423 goto fail;
424
425 /* Validate boolean field value. */
426 if (here->first != false && here->first != true)
427 goto fail;
428
429 if (here->len < 0)
430 goto fail;
431
432 /* Now the data. */
433 if (here->packet < 0
434 || here->packet > head->first_free
435 || here->packet + sizeof (struct datahead) > head->first_free)
436 goto fail;
437
438 struct datahead *dh = (struct datahead *) (data + here->packet);
439
440 if (! check_use (data, first_free: head->first_free, usemap,
441 use: use_data | (here->first ? use_first : 0),
442 start: here->packet, len: dh->allocsize))
443 goto fail;
444
445 if (dh->allocsize < sizeof (struct datahead)
446 || dh->recsize > dh->allocsize
447 || (dh->notfound != false && dh->notfound != true)
448 || (dh->usable != false && dh->usable != true))
449 goto fail;
450
451 if (here->key < here->packet + sizeof (struct datahead)
452 || here->key > here->packet + dh->allocsize
453 || here->key + here->len > here->packet + dh->allocsize)
454 goto fail;
455
456 work = here->next;
457
458 if (work == trail)
459 /* A circular list, this must not happen. */
460 goto fail;
461 if (tick)
462 trail = ((struct hashentry *) (data + trail))->next;
463 tick = 1 - tick;
464 }
465 }
466
467 if (he_cnt != head->nentries)
468 goto fail;
469
470 /* See if all data and keys had at least one reference from
471 he->first == true hashentry. */
472 for (ref_t idx = 0; idx < head->first_free; ++idx)
473 {
474 if (usemap[idx] == use_data_begin)
475 goto fail;
476 }
477
478 /* Finally, make sure the database hasn't changed since the first test. */
479 if (memcmp (s1: mem, s2: &head_copy, n: sizeof (*head)) != 0)
480 goto fail;
481
482 free (ptr: usemap);
483 return 1;
484
485fail:
486 free (ptr: usemap);
487 return 0;
488}
489
490
491/* Initialize database information structures. */
492void
493nscd_init (void)
494{
495 /* Look up unprivileged uid/gid/groups before we start listening on the
496 socket */
497 if (server_user != NULL)
498 begin_drop_privileges ();
499
500 if (nthreads == -1)
501 /* No configuration for this value, assume a default. */
502 nthreads = 4;
503
504 for (size_t cnt = 0; cnt < lastdb; ++cnt)
505 if (dbs[cnt].enabled)
506 {
507 pthread_rwlock_init (rwlock: &dbs[cnt].lock, NULL);
508 pthread_mutex_init (mutex: &dbs[cnt].memlock, NULL);
509
510 if (dbs[cnt].persistent)
511 {
512 /* Try to open the appropriate file on disk. */
513 int fd = open (file: dbs[cnt].db_filename, O_RDWR | O_CLOEXEC);
514 if (fd != -1)
515 {
516 char *msg = NULL;
517 struct stat64 st;
518 void *mem;
519 size_t total;
520 struct database_pers_head head;
521 ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
522 sizeof (head)));
523 if (n != sizeof (head) || fstat64 (fd: fd, buf: &st) != 0)
524 {
525 fail_db_errno:
526 /* The code is single-threaded at this point so
527 using strerror is just fine. */
528 msg = strerror (errno);
529 fail_db:
530 dbg_log (_("invalid persistent database file \"%s\": %s"),
531 dbs[cnt].db_filename, msg);
532 unlink (name: dbs[cnt].db_filename);
533 }
534 else if (head.module == 0 && head.data_size == 0)
535 {
536 /* The file has been created, but the head has not
537 been initialized yet. */
538 msg = _("uninitialized header");
539 goto fail_db;
540 }
541 else if (head.header_size != (int) sizeof (head))
542 {
543 msg = _("header size does not match");
544 goto fail_db;
545 }
546 else if ((total = (sizeof (head)
547 + roundup (head.module * sizeof (ref_t),
548 ALIGN)
549 + head.data_size))
550 > st.st_size
551 || total < sizeof (head))
552 {
553 msg = _("file size does not match");
554 goto fail_db;
555 }
556 /* Note we map with the maximum size allowed for the
557 database. This is likely much larger than the
558 actual file size. This is OK on most OSes since
559 extensions of the underlying file will
560 automatically translate more pages available for
561 memory access. */
562 else if ((mem = mmap (NULL, len: dbs[cnt].max_db_size,
563 PROT_READ | PROT_WRITE,
564 MAP_SHARED, fd: fd, offset: 0))
565 == MAP_FAILED)
566 goto fail_db_errno;
567 else if (!verify_persistent_db (mem, readhead: &head, dbnr: cnt))
568 {
569 munmap (addr: mem, len: total);
570 msg = _("verification failed");
571 goto fail_db;
572 }
573 else
574 {
575 /* Success. We have the database. */
576 dbs[cnt].head = mem;
577 dbs[cnt].memsize = total;
578 dbs[cnt].data = (char *)
579 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
580 ALIGN / sizeof (ref_t))];
581 dbs[cnt].mmap_used = true;
582
583 if (dbs[cnt].suggested_module > head.module)
584 dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
585 dbnames[cnt]);
586
587 dbs[cnt].wr_fd = fd;
588 fd = -1;
589 /* We also need a read-only descriptor. */
590 if (dbs[cnt].shared)
591 {
592 dbs[cnt].ro_fd = open (file: dbs[cnt].db_filename,
593 O_RDONLY | O_CLOEXEC);
594 if (dbs[cnt].ro_fd == -1)
595 dbg_log (_("\
596cannot create read-only descriptor for \"%s\"; no mmap"),
597 dbs[cnt].db_filename);
598 }
599
600 // XXX Shall we test whether the descriptors actually
601 // XXX point to the same file?
602 }
603
604 /* Close the file descriptors in case something went
605 wrong in which case the variable have not been
606 assigned -1. */
607 if (fd != -1)
608 close (fd: fd);
609 }
610 else if (errno == EACCES)
611 do_exit (EXIT_FAILURE, errnum: 0, _("cannot access '%s'"),
612 dbs[cnt].db_filename);
613 }
614
615 if (dbs[cnt].head == NULL)
616 {
617 /* No database loaded. Allocate the data structure,
618 possibly on disk. */
619 struct database_pers_head head;
620 size_t total = (sizeof (head)
621 + roundup (dbs[cnt].suggested_module
622 * sizeof (ref_t), ALIGN)
623 + (dbs[cnt].suggested_module
624 * DEFAULT_DATASIZE_PER_BUCKET));
625
626 /* Try to create the database. If we do not need a
627 persistent database create a temporary file. */
628 int fd;
629 int ro_fd = -1;
630 if (dbs[cnt].persistent)
631 {
632 fd = open (file: dbs[cnt].db_filename,
633 O_RDWR | O_CREAT | O_EXCL | O_TRUNC | O_CLOEXEC,
634 S_IRUSR | S_IWUSR);
635 if (fd != -1 && dbs[cnt].shared)
636 ro_fd = open (file: dbs[cnt].db_filename,
637 O_RDONLY | O_CLOEXEC);
638 }
639 else
640 {
641 char fname[] = _PATH_NSCD_XYZ_DB_TMP;
642 fd = mkostemp (template: fname, O_CLOEXEC);
643
644 /* We do not need the file name anymore after we
645 opened another file descriptor in read-only mode. */
646 if (fd != -1)
647 {
648 if (dbs[cnt].shared)
649 ro_fd = open (file: fname, O_RDONLY | O_CLOEXEC);
650
651 unlink (name: fname);
652 }
653 }
654
655 if (fd == -1)
656 {
657 if (errno == EEXIST)
658 {
659 dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
660 dbnames[cnt], dbs[cnt].db_filename);
661 do_exit (child_ret: 1, errnum: 0, NULL);
662 }
663
664 if (dbs[cnt].persistent)
665 dbg_log (_("cannot create %s; no persistent database used"),
666 dbs[cnt].db_filename);
667 else
668 dbg_log (_("cannot create %s; no sharing possible"),
669 dbs[cnt].db_filename);
670
671 dbs[cnt].persistent = 0;
672 // XXX remember: no mmap
673 }
674 else
675 {
676 /* Tell the user if we could not create the read-only
677 descriptor. */
678 if (ro_fd == -1 && dbs[cnt].shared)
679 dbg_log (_("\
680cannot create read-only descriptor for \"%s\"; no mmap"),
681 dbs[cnt].db_filename);
682
683 /* Before we create the header, initialize the hash
684 table. That way if we get interrupted while writing
685 the header we can recognize a partially initialized
686 database. */
687 size_t ps = sysconf (_SC_PAGESIZE);
688 char tmpbuf[ps];
689 assert (~ENDREF == 0);
690 memset (s: tmpbuf, c: '\xff', n: ps);
691
692 size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
693 off_t offset = sizeof (head);
694
695 size_t towrite;
696 if (offset % ps != 0)
697 {
698 towrite = MIN (remaining, ps - (offset % ps));
699 if (pwrite (fd: fd, buf: tmpbuf, nbytes: towrite, offset: offset) != towrite)
700 goto write_fail;
701 offset += towrite;
702 remaining -= towrite;
703 }
704
705 while (remaining > ps)
706 {
707 if (pwrite (fd: fd, buf: tmpbuf, nbytes: ps, offset: offset) == -1)
708 goto write_fail;
709 offset += ps;
710 remaining -= ps;
711 }
712
713 if (remaining > 0
714 && pwrite (fd: fd, buf: tmpbuf, nbytes: remaining, offset: offset) != remaining)
715 goto write_fail;
716
717 /* Create the header of the file. */
718 struct database_pers_head head =
719 {
720 .version = DB_VERSION,
721 .header_size = sizeof (head),
722 .module = dbs[cnt].suggested_module,
723 .data_size = (dbs[cnt].suggested_module
724 * DEFAULT_DATASIZE_PER_BUCKET),
725 .first_free = 0
726 };
727 void *mem;
728
729 if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
730 != sizeof (head))
731 || (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, 0, total))
732 != 0)
733 || (mem = mmap (NULL, len: dbs[cnt].max_db_size,
734 PROT_READ | PROT_WRITE,
735 MAP_SHARED, fd: fd, offset: 0)) == MAP_FAILED)
736 {
737 write_fail:
738 unlink (name: dbs[cnt].db_filename);
739 dbg_log (_("cannot write to database file %s: %s"),
740 dbs[cnt].db_filename, strerror (errno));
741 dbs[cnt].persistent = 0;
742 }
743 else
744 {
745 /* Success. */
746 dbs[cnt].head = mem;
747 dbs[cnt].data = (char *)
748 &dbs[cnt].head->array[roundup (dbs[cnt].head->module,
749 ALIGN / sizeof (ref_t))];
750 dbs[cnt].memsize = total;
751 dbs[cnt].mmap_used = true;
752
753 /* Remember the descriptors. */
754 dbs[cnt].wr_fd = fd;
755 dbs[cnt].ro_fd = ro_fd;
756 fd = -1;
757 ro_fd = -1;
758 }
759
760 if (fd != -1)
761 close (fd: fd);
762 if (ro_fd != -1)
763 close (fd: ro_fd);
764 }
765 }
766
767 if (dbs[cnt].head == NULL)
768 {
769 /* We do not use the persistent database. Just
770 create an in-memory data structure. */
771 assert (! dbs[cnt].persistent);
772
773 dbs[cnt].head = xmalloc (n: sizeof (struct database_pers_head)
774 + (dbs[cnt].suggested_module
775 * sizeof (ref_t)));
776 memset (s: dbs[cnt].head, c: '\0', n: sizeof (struct database_pers_head));
777 assert (~ENDREF == 0);
778 memset (s: dbs[cnt].head->array, c: '\xff',
779 n: dbs[cnt].suggested_module * sizeof (ref_t));
780 dbs[cnt].head->module = dbs[cnt].suggested_module;
781 dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
782 * dbs[cnt].head->module);
783 dbs[cnt].data = xmalloc (n: dbs[cnt].head->data_size);
784 dbs[cnt].head->first_free = 0;
785
786 dbs[cnt].shared = 0;
787 assert (dbs[cnt].ro_fd == -1);
788 }
789 }
790
791 /* Create the socket. */
792 sock = socket (AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, protocol: 0);
793 if (sock < 0)
794 {
795 dbg_log (_("cannot open socket: %s"), strerror (errno));
796 do_exit (errno == EACCES ? 4 : 1, errnum: 0, NULL);
797 }
798 /* Bind a name to the socket. */
799 struct sockaddr_un sock_addr;
800 sock_addr.sun_family = AF_UNIX;
801 strcpy (dest: sock_addr.sun_path, _PATH_NSCDSOCKET);
802 if (bind (fd: sock, addr: (struct sockaddr *) &sock_addr, len: sizeof (sock_addr)) < 0)
803 {
804 dbg_log (str: "%s: %s", _PATH_NSCDSOCKET, strerror (errno));
805 do_exit (errno == EACCES ? 4 : 1, errnum: 0, NULL);
806 }
807
808 /* Set permissions for the socket. */
809 chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
810
811 /* Set the socket up to accept connections. */
812 if (listen (fd: sock, SOMAXCONN) < 0)
813 {
814 dbg_log (_("cannot enable socket to accept connections: %s"),
815 strerror (errno));
816 do_exit (child_ret: 1, errnum: 0, NULL);
817 }
818
819#ifdef HAVE_NETLINK
820 if (dbs[hstdb].enabled)
821 {
822 /* Try to open netlink socket to monitor network setting changes. */
823 nl_status_fd = socket (AF_NETLINK,
824 SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK,
825 NETLINK_ROUTE);
826 if (nl_status_fd != -1)
827 {
828 struct sockaddr_nl snl;
829 memset (s: &snl, c: '\0', n: sizeof (snl));
830 snl.nl_family = AF_NETLINK;
831 /* XXX Is this the best set to use? */
832 snl.nl_groups = (RTMGRP_IPV4_IFADDR | RTMGRP_TC | RTMGRP_IPV4_MROUTE
833 | RTMGRP_IPV4_ROUTE | RTMGRP_IPV4_RULE
834 | RTMGRP_IPV6_IFADDR | RTMGRP_IPV6_MROUTE
835 | RTMGRP_IPV6_ROUTE | RTMGRP_IPV6_IFINFO
836 | RTMGRP_IPV6_PREFIX);
837
838 if (bind (fd: nl_status_fd, addr: (struct sockaddr *) &snl, len: sizeof (snl)) != 0)
839 {
840 close (fd: nl_status_fd);
841 nl_status_fd = -1;
842 }
843 else
844 {
845 /* Start the timestamp process. */
846 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
847 = __bump_nl_timestamp ();
848 }
849 }
850 }
851#endif
852
853 /* Change to unprivileged uid/gid/groups if specified in config file */
854 if (server_user != NULL)
855 finish_drop_privileges ();
856}
857
858#ifdef HAVE_INOTIFY
859#define TRACED_FILE_MASK (IN_DELETE_SELF | IN_CLOSE_WRITE | IN_MOVE_SELF)
860#define TRACED_DIR_MASK (IN_DELETE_SELF | IN_CREATE | IN_MOVED_TO | IN_MOVE_SELF)
861void
862install_watches (struct traced_file *finfo)
863{
864 /* Use inotify support if we have it. */
865 if (finfo->inotify_descr[TRACED_FILE] < 0)
866 finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (fd: inotify_fd,
867 name: finfo->fname,
868 TRACED_FILE_MASK);
869 if (finfo->inotify_descr[TRACED_FILE] < 0)
870 {
871 dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
872 finfo->fname, strerror (errno));
873 return;
874 }
875 dbg_log (_("monitoring file `%s` (%d)"),
876 finfo->fname, finfo->inotify_descr[TRACED_FILE]);
877 /* Additionally listen for events in the file's parent directory.
878 We do this because the file to be watched might be
879 deleted and then added back again. When it is added back again
880 we must re-add the watch. We must also cover IN_MOVED_TO to
881 detect a file being moved into the directory. */
882 if (finfo->inotify_descr[TRACED_DIR] < 0)
883 finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (fd: inotify_fd,
884 name: finfo->dname,
885 TRACED_DIR_MASK);
886 if (finfo->inotify_descr[TRACED_DIR] < 0)
887 {
888 dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
889 finfo->fname, strerror (errno));
890 return;
891 }
892 dbg_log (_("monitoring directory `%s` (%d)"),
893 finfo->dname, finfo->inotify_descr[TRACED_DIR]);
894}
895#endif
896
897/* Register the file in FINFO as a traced file for the database DBS[DBIX].
898
899 We support registering multiple files per database. Each call to
900 register_traced_file adds to the list of registered files.
901
902 When we prune the database, either through timeout or a request to
903 invalidate, we will check to see if any of the registered files has changed.
904 When we accept new connections to handle a cache request we will also
905 check to see if any of the registered files has changed.
906
907 If we have inotify support then we install an inotify fd to notify us of
908 file deletion or modification, both of which will require we invalidate
909 the cache for the database. Without inotify support we stat the file and
910 store st_mtime to determine if the file has been modified. */
911void
912register_traced_file (size_t dbidx, struct traced_file *finfo)
913{
914 /* If the database is disabled or file checking is disabled
915 then ignore the registration. */
916 if (! dbs[dbidx].enabled || ! dbs[dbidx].check_file)
917 return;
918
919 if (__glibc_unlikely (debug_level > 0))
920 dbg_log (_("monitoring file %s for database %s"),
921 finfo->fname, dbnames[dbidx]);
922
923#ifdef HAVE_INOTIFY
924 install_watches (finfo);
925#endif
926 struct stat64 st;
927 if (stat64 (file: finfo->fname, buf: &st) < 0)
928 {
929 /* We cannot stat() the file. Set mtime to zero and try again later. */
930 dbg_log (_("stat failed for file `%s'; will try again later: %s"),
931 finfo->fname, strerror (errno));
932 finfo->mtime = 0;
933 }
934 else
935 finfo->mtime = st.st_mtime;
936
937 /* Queue up the file name. */
938 finfo->next = dbs[dbidx].traced_files;
939 dbs[dbidx].traced_files = finfo;
940}
941
942
943/* Close the connections. */
944void
945close_sockets (void)
946{
947 close (fd: sock);
948}
949
950
951static void
952invalidate_cache (char *key, int fd)
953{
954 dbtype number;
955 int32_t resp;
956
957 for (number = pwddb; number < lastdb; ++number)
958 if (strcmp (s1: key, s2: dbnames[number]) == 0)
959 {
960 struct traced_file *runp = dbs[number].traced_files;
961 while (runp != NULL)
962 {
963 /* Make sure we reload from file when checking mtime. */
964 runp->mtime = 0;
965#ifdef HAVE_INOTIFY
966 /* During an invalidation we try to reload the traced
967 file watches. This allows the user to re-sync if
968 inotify events were lost. Similar to what we do during
969 pruning. */
970 install_watches (finfo: runp);
971#endif
972 if (runp->call_res_init)
973 {
974 res_init ();
975 break;
976 }
977 runp = runp->next;
978 }
979 break;
980 }
981
982 if (number == lastdb)
983 {
984 resp = EINVAL;
985 writeall (fd, buf: &resp, len: sizeof (resp));
986 return;
987 }
988
989 if (dbs[number].enabled)
990 {
991 pthread_mutex_lock (mutex: &dbs[number].prune_run_lock);
992 prune_cache (table: &dbs[number], LONG_MAX, fd);
993 pthread_mutex_unlock (mutex: &dbs[number].prune_run_lock);
994 }
995 else
996 {
997 resp = 0;
998 writeall (fd, buf: &resp, len: sizeof (resp));
999 }
1000}
1001
1002
1003#ifdef SCM_RIGHTS
1004static void
1005send_ro_fd (struct database_dyn *db, char *key, int fd)
1006{
1007 /* If we do not have an read-only file descriptor do nothing. */
1008 if (db->ro_fd == -1)
1009 return;
1010
1011 /* We need to send some data along with the descriptor. */
1012 uint64_t mapsize = (db->head->data_size
1013 + roundup (db->head->module * sizeof (ref_t), ALIGN)
1014 + sizeof (struct database_pers_head));
1015 struct iovec iov[2];
1016 iov[0].iov_base = key;
1017 iov[0].iov_len = strlen (s: key) + 1;
1018 iov[1].iov_base = &mapsize;
1019 iov[1].iov_len = sizeof (mapsize);
1020
1021 /* Prepare the control message to transfer the descriptor. */
1022 union
1023 {
1024 struct cmsghdr hdr;
1025 char bytes[CMSG_SPACE (sizeof (int))];
1026 } buf;
1027 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
1028 .msg_control = buf.bytes,
1029 .msg_controllen = sizeof (buf) };
1030 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1031
1032 cmsg->cmsg_level = SOL_SOCKET;
1033 cmsg->cmsg_type = SCM_RIGHTS;
1034 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1035
1036 int *ip = (int *) CMSG_DATA (cmsg);
1037 *ip = db->ro_fd;
1038
1039 msg.msg_controllen = cmsg->cmsg_len;
1040
1041 /* Send the control message. We repeat when we are interrupted but
1042 everything else is ignored. */
1043#ifndef MSG_NOSIGNAL
1044# define MSG_NOSIGNAL 0
1045#endif
1046 (void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1047
1048 if (__glibc_unlikely (debug_level > 0))
1049 dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1050}
1051#endif /* SCM_RIGHTS */
1052
1053
1054/* Handle new request. */
1055static void
1056handle_request (int fd, request_header *req, void *key, uid_t uid, pid_t pid)
1057{
1058 if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1059 {
1060 if (debug_level > 0)
1061 dbg_log (_("\
1062cannot handle old request version %d; current version is %d"),
1063 req->version, NSCD_VERSION);
1064 return;
1065 }
1066
1067 /* Perform the SELinux check before we go on to the standard checks. */
1068 if (selinux_enabled && nscd_request_avc_has_perm (fd, req: req->type) != 0)
1069 {
1070 if (debug_level > 0)
1071 {
1072#ifdef SO_PEERCRED
1073 char pbuf[sizeof ("/proc//exe") + 3 * sizeof (long int)];
1074# ifdef PATH_MAX
1075 char buf[PATH_MAX];
1076# else
1077 char buf[4096];
1078# endif
1079
1080 snprintf (s: pbuf, maxlen: sizeof (pbuf), format: "/proc/%ld/exe", (long int) pid);
1081 ssize_t n = readlink (path: pbuf, buf: buf, len: sizeof (buf) - 1);
1082
1083 if (n <= 0)
1084 dbg_log (_("\
1085request from %ld not handled due to missing permission"), (long int) pid);
1086 else
1087 {
1088 buf[n] = '\0';
1089 dbg_log (_("\
1090request from '%s' [%ld] not handled due to missing permission"),
1091 buf, (long int) pid);
1092 }
1093#else
1094 dbg_log (_("request not handled due to missing permission"));
1095#endif
1096 }
1097 return;
1098 }
1099
1100 struct database_dyn *db = reqinfo[req->type].db;
1101
1102 /* See whether we can service the request from the cache. */
1103 if (__builtin_expect (reqinfo[req->type].data_request, true))
1104 {
1105 if (__builtin_expect (debug_level, 0) > 0)
1106 {
1107 if (req->type == GETHOSTBYADDR || req->type == GETHOSTBYADDRv6)
1108 {
1109 char buf[INET6_ADDRSTRLEN];
1110
1111 dbg_log (str: "\t%s (%s)", serv2str[req->type],
1112 inet_ntop (af: req->type == GETHOSTBYADDR
1113 ? AF_INET : AF_INET6,
1114 cp: key, buf: buf, len: sizeof (buf)));
1115 }
1116 else
1117 dbg_log (str: "\t%s (%s)", serv2str[req->type], (char *) key);
1118 }
1119
1120 /* Is this service enabled? */
1121 if (__glibc_unlikely (!db->enabled))
1122 {
1123 /* No, sent the prepared record. */
1124 if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1125 db->disabled_iov->iov_len,
1126 MSG_NOSIGNAL))
1127 != (ssize_t) db->disabled_iov->iov_len
1128 && __builtin_expect (debug_level, 0) > 0)
1129 {
1130 /* We have problems sending the result. */
1131 char buf[256];
1132 dbg_log (_("cannot write result: %s"),
1133 strerror_r (errno, buf: buf, buflen: sizeof (buf)));
1134 }
1135
1136 return;
1137 }
1138
1139 /* Be sure we can read the data. */
1140 if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != 0))
1141 {
1142 ++db->head->rdlockdelayed;
1143 pthread_rwlock_rdlock (rwlock: &db->lock);
1144 }
1145
1146 /* See whether we can handle it from the cache. */
1147 struct datahead *cached;
1148 cached = (struct datahead *) cache_search (req->type, key, len: req->key_len,
1149 table: db, owner: uid);
1150 if (cached != NULL)
1151 {
1152 /* Hurray it's in the cache. */
1153 if (writeall (fd, buf: cached->data, len: cached->recsize) != cached->recsize
1154 && __glibc_unlikely (debug_level > 0))
1155 {
1156 /* We have problems sending the result. */
1157 char buf[256];
1158 dbg_log (_("cannot write result: %s"),
1159 strerror_r (errno, buf: buf, buflen: sizeof (buf)));
1160 }
1161
1162 pthread_rwlock_unlock (rwlock: &db->lock);
1163
1164 return;
1165 }
1166
1167 pthread_rwlock_unlock (rwlock: &db->lock);
1168 }
1169 else if (__builtin_expect (debug_level, 0) > 0)
1170 {
1171 if (req->type == INVALIDATE)
1172 dbg_log (str: "\t%s (%s)", serv2str[req->type], (char *) key);
1173 else
1174 dbg_log (str: "\t%s", serv2str[req->type]);
1175 }
1176
1177 /* Handle the request. */
1178 switch (req->type)
1179 {
1180 case GETPWBYNAME:
1181 addpwbyname (db, fd, req, key, uid);
1182 break;
1183
1184 case GETPWBYUID:
1185 addpwbyuid (db, fd, req, key, uid);
1186 break;
1187
1188 case GETGRBYNAME:
1189 addgrbyname (db, fd, req, key, uid);
1190 break;
1191
1192 case GETGRBYGID:
1193 addgrbygid (db, fd, req, key, uid);
1194 break;
1195
1196 case GETHOSTBYNAME:
1197 addhstbyname (db, fd, req, key, uid);
1198 break;
1199
1200 case GETHOSTBYNAMEv6:
1201 addhstbynamev6 (db, fd, req, key, uid);
1202 break;
1203
1204 case GETHOSTBYADDR:
1205 addhstbyaddr (db, fd, req, key, uid);
1206 break;
1207
1208 case GETHOSTBYADDRv6:
1209 addhstbyaddrv6 (db, fd, req, key, uid);
1210 break;
1211
1212 case GETAI:
1213 addhstai (db, fd, req, key, uid);
1214 break;
1215
1216 case INITGROUPS:
1217 addinitgroups (db, fd, req, key, uid);
1218 break;
1219
1220 case GETSERVBYNAME:
1221 addservbyname (db, fd, req, key, uid);
1222 break;
1223
1224 case GETSERVBYPORT:
1225 addservbyport (db, fd, req, key, uid);
1226 break;
1227
1228 case GETNETGRENT:
1229 addgetnetgrent (db, fd, req, key, uid);
1230 break;
1231
1232 case INNETGR:
1233 addinnetgr (db, fd, req, key, uid);
1234 break;
1235
1236 case GETSTAT:
1237 case SHUTDOWN:
1238 case INVALIDATE:
1239 {
1240 /* Get the callers credentials. */
1241#ifdef SO_PEERCRED
1242 struct ucred caller;
1243 socklen_t optlen = sizeof (caller);
1244
1245 if (getsockopt (fd: fd, SOL_SOCKET, SO_PEERCRED, optval: &caller, optlen: &optlen) < 0)
1246 {
1247 char buf[256];
1248
1249 dbg_log (_("error getting caller's id: %s"),
1250 strerror_r (errno, buf: buf, buflen: sizeof (buf)));
1251 break;
1252 }
1253
1254 uid = caller.uid;
1255#else
1256 /* Some systems have no SO_PEERCRED implementation. They don't
1257 care about security so we don't as well. */
1258 uid = 0;
1259#endif
1260 }
1261
1262 /* Accept shutdown, getstat and invalidate only from root. For
1263 the stat call also allow the user specified in the config file. */
1264 if (req->type == GETSTAT)
1265 {
1266 if (uid == 0 || uid == stat_uid)
1267 send_stats (fd, dbs);
1268 }
1269 else if (uid == 0)
1270 {
1271 if (req->type == INVALIDATE)
1272 invalidate_cache (key, fd);
1273 else
1274 termination_handler (signum: 0);
1275 }
1276 break;
1277
1278 case GETFDPW:
1279 case GETFDGR:
1280 case GETFDHST:
1281 case GETFDSERV:
1282 case GETFDNETGR:
1283#ifdef SCM_RIGHTS
1284 send_ro_fd (db: reqinfo[req->type].db, key, fd);
1285#endif
1286 break;
1287
1288 default:
1289 /* Ignore the command, it's nothing we know. */
1290 break;
1291 }
1292}
1293
1294static char *
1295read_cmdline (size_t *size)
1296{
1297 int fd = open (file: "/proc/self/cmdline", O_RDONLY);
1298 if (fd < 0)
1299 return NULL;
1300 size_t current = 0;
1301 size_t limit = 1024;
1302 char *buffer = malloc (size: limit);
1303 if (buffer == NULL)
1304 {
1305 close (fd: fd);
1306 errno = ENOMEM;
1307 return NULL;
1308 }
1309 while (1)
1310 {
1311 if (current == limit)
1312 {
1313 char *newptr;
1314 if (2 * limit < limit
1315 || (newptr = realloc (ptr: buffer, size: 2 * limit)) == NULL)
1316 {
1317 free (ptr: buffer);
1318 close (fd: fd);
1319 errno = ENOMEM;
1320 return NULL;
1321 }
1322 buffer = newptr;
1323 limit *= 2;
1324 }
1325
1326 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buffer + current,
1327 limit - current));
1328 if (n == -1)
1329 {
1330 int e = errno;
1331 free (ptr: buffer);
1332 close (fd: fd);
1333 errno = e;
1334 return NULL;
1335 }
1336 if (n == 0)
1337 break;
1338 current += n;
1339 }
1340
1341 close (fd: fd);
1342 *size = current;
1343 return buffer;
1344}
1345
1346
1347/* Restart the process. */
1348static void
1349restart (void)
1350{
1351 /* First determine the parameters. We do not use the parameters
1352 passed to main because then nscd would use the system libc after
1353 restarting even if it was started by a non-system dynamic linker
1354 during glibc testing. */
1355 size_t readlen;
1356 char *cmdline = read_cmdline (size: &readlen);
1357 if (cmdline == NULL)
1358 {
1359 dbg_log (_("\
1360cannot open /proc/self/cmdline: %m; disabling paranoia mode"));
1361 paranoia = 0;
1362 return;
1363 }
1364
1365 /* Parse the command line. Worst case scenario: every two
1366 characters form one parameter (one character plus NUL). */
1367 char **argv = alloca ((readlen / 2 + 1) * sizeof (argv[0]));
1368 int argc = 0;
1369
1370 for (char *cp = cmdline; cp < cmdline + readlen;)
1371 {
1372 argv[argc++] = cp;
1373 cp = strchr (s: cp, c: '\0') + 1;
1374 }
1375 argv[argc] = NULL;
1376
1377 /* Second, change back to the old user if we changed it. */
1378 if (server_user != NULL)
1379 {
1380 if (setresuid (ruid: old_uid, euid: old_uid, suid: old_uid) != 0)
1381 {
1382 dbg_log (_("\
1383cannot change to old UID: %s; disabling paranoia mode"),
1384 strerror (errno));
1385
1386 paranoia = 0;
1387 free (ptr: cmdline);
1388 return;
1389 }
1390
1391 if (setresgid (rgid: old_gid, egid: old_gid, sgid: old_gid) != 0)
1392 {
1393 dbg_log (_("\
1394cannot change to old GID: %s; disabling paranoia mode"),
1395 strerror (errno));
1396
1397 ignore_value (setuid (server_uid));
1398 paranoia = 0;
1399 free (ptr: cmdline);
1400 return;
1401 }
1402 }
1403
1404 /* Next change back to the old working directory. */
1405 if (chdir (path: oldcwd) == -1)
1406 {
1407 dbg_log (_("\
1408cannot change to old working directory: %s; disabling paranoia mode"),
1409 strerror (errno));
1410
1411 if (server_user != NULL)
1412 {
1413 ignore_value (setuid (server_uid));
1414 ignore_value (setgid (server_gid));
1415 }
1416 paranoia = 0;
1417 free (ptr: cmdline);
1418 return;
1419 }
1420
1421 /* Synchronize memory. */
1422 int32_t certainly[lastdb];
1423 for (int cnt = 0; cnt < lastdb; ++cnt)
1424 if (dbs[cnt].enabled)
1425 {
1426 /* Make sure nobody keeps using the database. */
1427 dbs[cnt].head->timestamp = 0;
1428 certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1429 dbs[cnt].head->nscd_certainly_running = 0;
1430
1431 if (dbs[cnt].persistent)
1432 // XXX async OK?
1433 msync (addr: dbs[cnt].head, len: dbs[cnt].memsize, MS_ASYNC);
1434 }
1435
1436 /* The preparations are done. */
1437#ifdef PATH_MAX
1438 char pathbuf[PATH_MAX];
1439#else
1440 char pathbuf[256];
1441#endif
1442 /* Try to exec the real nscd program so the process name (as reported
1443 in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1444 if readlink or the exec with the result of the readlink call fails. */
1445 ssize_t n = readlink (path: "/proc/self/exe", buf: pathbuf, len: sizeof (pathbuf) - 1);
1446 if (n != -1)
1447 {
1448 pathbuf[n] = '\0';
1449 execv (path: pathbuf, argv: argv);
1450 }
1451 execv (path: "/proc/self/exe", argv: argv);
1452
1453 /* If we come here, we will never be able to re-exec. */
1454 dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1455 strerror (errno));
1456
1457 if (server_user != NULL)
1458 {
1459 ignore_value (setuid (server_uid));
1460 ignore_value (setgid (server_gid));
1461 }
1462 if (chdir (path: "/") != 0)
1463 dbg_log (_("cannot change current working directory to \"/\": %s"),
1464 strerror (errno));
1465 paranoia = 0;
1466 free (ptr: cmdline);
1467
1468 /* Re-enable the databases. */
1469 time_t now = time (NULL);
1470 for (int cnt = 0; cnt < lastdb; ++cnt)
1471 if (dbs[cnt].enabled)
1472 {
1473 dbs[cnt].head->timestamp = now;
1474 dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1475 }
1476}
1477
1478
1479/* List of file descriptors. */
1480struct fdlist
1481{
1482 int fd;
1483 struct fdlist *next;
1484};
1485/* Memory allocated for the list. */
1486static struct fdlist *fdlist;
1487/* List of currently ready-to-read file descriptors. */
1488static struct fdlist *readylist;
1489
1490/* Conditional variable and mutex to signal availability of entries in
1491 READYLIST. The condvar is initialized dynamically since we might
1492 use a different clock depending on availability. */
1493static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1494static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1495
1496/* The clock to use with the condvar. */
1497static clockid_t timeout_clock = CLOCK_REALTIME;
1498
1499/* Number of threads ready to handle the READYLIST. */
1500static unsigned long int nready;
1501
1502
1503/* Function for the clean-up threads. */
1504static void *
1505__attribute__ ((__noreturn__))
1506nscd_run_prune (void *p)
1507{
1508 const long int my_number = (long int) p;
1509 assert (dbs[my_number].enabled);
1510
1511 int dont_need_update = setup_thread (&dbs[my_number]);
1512
1513 time_t now = time (NULL);
1514
1515 /* We are running. */
1516 dbs[my_number].head->timestamp = now;
1517
1518 struct timespec prune_ts;
1519 if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -1))
1520 /* Should never happen. */
1521 abort ();
1522
1523 /* Compute the initial timeout time. Prevent all the timers to go
1524 off at the same time by adding a db-based value. */
1525 prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1526 dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1527
1528 pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1529 pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1530 pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1531
1532 pthread_mutex_lock (mutex: prune_lock);
1533 while (1)
1534 {
1535 /* Wait, but not forever. */
1536 int e = 0;
1537 if (! dbs[my_number].clear_cache)
1538 e = pthread_cond_timedwait (cond: prune_cond, mutex: prune_lock, abstime: &prune_ts);
1539 assert (__builtin_expect (e == 0 || e == ETIMEDOUT, 1));
1540
1541 time_t next_wait;
1542 now = time (NULL);
1543 if (e == ETIMEDOUT || now >= dbs[my_number].wakeup_time
1544 || dbs[my_number].clear_cache)
1545 {
1546 /* We will determine the new timeout values based on the
1547 cache content. Should there be concurrent additions to
1548 the cache which are not accounted for in the cache
1549 pruning we want to know about it. Therefore set the
1550 timeout to the maximum. It will be decreased when adding
1551 new entries to the cache, if necessary. */
1552 dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1553
1554 /* Unconditionally reset the flag. */
1555 time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1556 dbs[my_number].clear_cache = 0;
1557
1558 pthread_mutex_unlock (mutex: prune_lock);
1559
1560 /* We use a separate lock for running the prune function (instead
1561 of keeping prune_lock locked) because this enables concurrent
1562 invocations of cache_add which might modify the timeout value. */
1563 pthread_mutex_lock (mutex: prune_run_lock);
1564 next_wait = prune_cache (table: &dbs[my_number], now: prune_now, fd: -1);
1565 pthread_mutex_unlock (mutex: prune_run_lock);
1566
1567 next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1568 /* If clients cannot determine for sure whether nscd is running
1569 we need to wake up occasionally to update the timestamp.
1570 Wait 90% of the update period. */
1571#define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1572 if (__glibc_unlikely (! dont_need_update))
1573 {
1574 next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1575 dbs[my_number].head->timestamp = now;
1576 }
1577
1578 pthread_mutex_lock (mutex: prune_lock);
1579
1580 /* Make it known when we will wake up again. */
1581 if (now + next_wait < dbs[my_number].wakeup_time)
1582 dbs[my_number].wakeup_time = now + next_wait;
1583 else
1584 next_wait = dbs[my_number].wakeup_time - now;
1585 }
1586 else
1587 /* The cache was just pruned. Do not do it again now. Just
1588 use the new timeout value. */
1589 next_wait = dbs[my_number].wakeup_time - now;
1590
1591 if (clock_gettime (clock_id: timeout_clock, tp: &prune_ts) == -1)
1592 /* Should never happen. */
1593 abort ();
1594
1595 /* Compute next timeout time. */
1596 prune_ts.tv_sec += next_wait;
1597 }
1598}
1599
1600
1601/* This is the main loop. It is replicated in different threads but
1602 the use of the ready list makes sure only one thread handles an
1603 incoming connection. */
1604static void *
1605__attribute__ ((__noreturn__))
1606nscd_run_worker (void *p)
1607{
1608 char buf[256];
1609
1610 /* Initial locking. */
1611 pthread_mutex_lock (mutex: &readylist_lock);
1612
1613 /* One more thread available. */
1614 ++nready;
1615
1616 while (1)
1617 {
1618 while (readylist == NULL)
1619 pthread_cond_wait (cond: &readylist_cond, mutex: &readylist_lock);
1620
1621 struct fdlist *it = readylist->next;
1622 if (readylist->next == readylist)
1623 /* Just one entry on the list. */
1624 readylist = NULL;
1625 else
1626 readylist->next = it->next;
1627
1628 /* Extract the information and mark the record ready to be used
1629 again. */
1630 int fd = it->fd;
1631 it->next = NULL;
1632
1633 /* One more thread available. */
1634 --nready;
1635
1636 /* We are done with the list. */
1637 pthread_mutex_unlock (mutex: &readylist_lock);
1638
1639 /* Now read the request. */
1640 request_header req;
1641 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1642 != sizeof (req), 0))
1643 {
1644 /* We failed to read data. Note that this also might mean we
1645 failed because we would have blocked. */
1646 if (debug_level > 0)
1647 dbg_log (_("short read while reading request: %s"),
1648 strerror_r (errno, buf: buf, buflen: sizeof (buf)));
1649 goto close_and_out;
1650 }
1651
1652 /* Check whether this is a valid request type. */
1653 if (req.type < GETPWBYNAME || req.type >= LASTREQ)
1654 goto close_and_out;
1655
1656 /* Some systems have no SO_PEERCRED implementation. They don't
1657 care about security so we don't as well. */
1658 uid_t uid = -1;
1659#ifdef SO_PEERCRED
1660 pid_t pid = 0;
1661
1662 if (__glibc_unlikely (debug_level > 0))
1663 {
1664 struct ucred caller;
1665 socklen_t optlen = sizeof (caller);
1666
1667 if (getsockopt (fd: fd, SOL_SOCKET, SO_PEERCRED, optval: &caller, optlen: &optlen) == 0)
1668 pid = caller.pid;
1669 }
1670#else
1671 const pid_t pid = 0;
1672#endif
1673
1674 /* It should not be possible to crash the nscd with a silly
1675 request (i.e., a terribly large key). We limit the size to 1kb. */
1676 if (__builtin_expect (req.key_len, 1) < 0
1677 || __builtin_expect (req.key_len, 1) > MAXKEYLEN)
1678 {
1679 if (debug_level > 0)
1680 dbg_log (_("key length in request too long: %d"), req.key_len);
1681 }
1682 else
1683 {
1684 /* Get the key. */
1685 char keybuf[MAXKEYLEN + 1];
1686
1687 if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1688 req.key_len))
1689 != req.key_len, 0))
1690 {
1691 /* Again, this can also mean we would have blocked. */
1692 if (debug_level > 0)
1693 dbg_log (_("short read while reading request key: %s"),
1694 strerror_r (errno, buf: buf, buflen: sizeof (buf)));
1695 goto close_and_out;
1696 }
1697 keybuf[req.key_len] = '\0';
1698
1699 if (__builtin_expect (debug_level, 0) > 0)
1700 {
1701#ifdef SO_PEERCRED
1702 if (pid != 0)
1703 dbg_log (_("\
1704handle_request: request received (Version = %d) from PID %ld"),
1705 req.version, (long int) pid);
1706 else
1707#endif
1708 dbg_log (_("\
1709handle_request: request received (Version = %d)"), req.version);
1710 }
1711
1712 /* Phew, we got all the data, now process it. */
1713 handle_request (fd, req: &req, key: keybuf, uid, pid);
1714 }
1715
1716 close_and_out:
1717 /* We are done. */
1718 close (fd: fd);
1719
1720 /* Re-locking. */
1721 pthread_mutex_lock (mutex: &readylist_lock);
1722
1723 /* One more thread available. */
1724 ++nready;
1725 }
1726 /* NOTREACHED */
1727}
1728
1729
1730static unsigned int nconns;
1731
1732static void
1733fd_ready (int fd)
1734{
1735 pthread_mutex_lock (mutex: &readylist_lock);
1736
1737 /* Find an empty entry in FDLIST. */
1738 size_t inner;
1739 for (inner = 0; inner < nconns; ++inner)
1740 if (fdlist[inner].next == NULL)
1741 break;
1742 assert (inner < nconns);
1743
1744 fdlist[inner].fd = fd;
1745
1746 if (readylist == NULL)
1747 readylist = fdlist[inner].next = &fdlist[inner];
1748 else
1749 {
1750 fdlist[inner].next = readylist->next;
1751 readylist = readylist->next = &fdlist[inner];
1752 }
1753
1754 bool do_signal = true;
1755 if (__glibc_unlikely (nready == 0))
1756 {
1757 ++client_queued;
1758 do_signal = false;
1759
1760 /* Try to start another thread to help out. */
1761 pthread_t th;
1762 if (nthreads < max_nthreads
1763 && pthread_create (newthread: &th, attr: &attr, start_routine: nscd_run_worker,
1764 arg: (void *) (long int) nthreads) == 0)
1765 {
1766 /* We got another thread. */
1767 ++nthreads;
1768 /* The new thread might need a kick. */
1769 do_signal = true;
1770 }
1771
1772 }
1773
1774 pthread_mutex_unlock (mutex: &readylist_lock);
1775
1776 /* Tell one of the worker threads there is work to do. */
1777 if (do_signal)
1778 pthread_cond_signal (cond: &readylist_cond);
1779}
1780
1781
1782/* Check whether restarting should happen. */
1783static bool
1784restart_p (time_t now)
1785{
1786 return (paranoia && readylist == NULL && nready == nthreads
1787 && now >= restart_time);
1788}
1789
1790
1791/* Array for times a connection was accepted. */
1792static time_t *starttime;
1793
1794#ifdef HAVE_INOTIFY
1795/* Inotify event for changed file. */
1796union __inev
1797{
1798 struct inotify_event i;
1799# ifndef PATH_MAX
1800# define PATH_MAX 1024
1801# endif
1802 char buf[sizeof (struct inotify_event) + PATH_MAX];
1803};
1804
1805/* Returns 0 if the file is there otherwise -1. */
1806int
1807check_file (struct traced_file *finfo)
1808{
1809 struct stat64 st;
1810 /* We could check mtime and if different re-add
1811 the watches, and invalidate the database, but we
1812 don't because we are called from inotify_check_files
1813 which should be doing that work. If sufficient inotify
1814 events were lost then the next pruning or invalidation
1815 will do the stat and mtime check. We don't do it here to
1816 keep the logic simple. */
1817 if (stat64 (file: finfo->fname, buf: &st) < 0)
1818 return -1;
1819 return 0;
1820}
1821
1822/* Process the inotify event in INEV. If the event matches any of the files
1823 registered with a database then mark that database as requiring its cache
1824 to be cleared. We indicate the cache needs clearing by setting
1825 TO_CLEAR[DBCNT] to true for the matching database. */
1826static void
1827inotify_check_files (bool *to_clear, union __inev *inev)
1828{
1829 /* Check which of the files changed. */
1830 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1831 {
1832 struct traced_file *finfo = dbs[dbcnt].traced_files;
1833
1834 while (finfo != NULL)
1835 {
1836 /* The configuration file was moved or deleted.
1837 We stop watching it at that point, and reinitialize. */
1838 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1839 && ((inev->i.mask & IN_MOVE_SELF)
1840 || (inev->i.mask & IN_DELETE_SELF)
1841 || (inev->i.mask & IN_IGNORED)))
1842 {
1843 int ret;
1844 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1845
1846 if (check_file (finfo) == 0)
1847 {
1848 dbg_log (_("ignored inotify event for `%s` (file exists)"),
1849 finfo->fname);
1850 return;
1851 }
1852
1853 dbg_log (_("monitored file `%s` was %s, removing watch"),
1854 finfo->fname, moved ? "moved" : "deleted");
1855 /* File was moved out, remove the watch. Watches are
1856 automatically removed when the file is deleted. */
1857 if (moved)
1858 {
1859 ret = inotify_rm_watch (fd: inotify_fd, wd: inev->i.wd);
1860 if (ret < 0)
1861 dbg_log (_("failed to remove file watch `%s`: %s"),
1862 finfo->fname, strerror (errno));
1863 }
1864 finfo->inotify_descr[TRACED_FILE] = -1;
1865 to_clear[dbcnt] = true;
1866 if (finfo->call_res_init)
1867 res_init ();
1868 return;
1869 }
1870 /* The configuration file was open for writing and has just closed.
1871 We reset the cache and reinitialize. */
1872 if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1873 && inev->i.mask & IN_CLOSE_WRITE)
1874 {
1875 /* Mark cache as needing to be cleared and reinitialize. */
1876 dbg_log (_("monitored file `%s` was written to"), finfo->fname);
1877 to_clear[dbcnt] = true;
1878 if (finfo->call_res_init)
1879 res_init ();
1880 return;
1881 }
1882 /* The parent directory was moved or deleted. We trigger one last
1883 invalidation. At the next pruning or invalidation we may add
1884 this watch back if the file is present again. */
1885 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1886 && ((inev->i.mask & IN_DELETE_SELF)
1887 || (inev->i.mask & IN_MOVE_SELF)
1888 || (inev->i.mask & IN_IGNORED)))
1889 {
1890 bool moved = (inev->i.mask & IN_MOVE_SELF) != 0;
1891 /* The directory watch may have already been removed
1892 but we don't know so we just remove it again and
1893 ignore the error. Then we remove the file watch.
1894 Note: watches are automatically removed for deleted
1895 files. */
1896 if (moved)
1897 inotify_rm_watch (fd: inotify_fd, wd: inev->i.wd);
1898 if (finfo->inotify_descr[TRACED_FILE] != -1)
1899 {
1900 dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
1901 finfo->dname, moved ? "moved" : "deleted", finfo->fname);
1902 if (inotify_rm_watch (fd: inotify_fd, wd: finfo->inotify_descr[TRACED_FILE]) < 0)
1903 dbg_log (_("failed to remove file watch `%s`: %s"),
1904 finfo->dname, strerror (errno));
1905 }
1906 finfo->inotify_descr[TRACED_FILE] = -1;
1907 finfo->inotify_descr[TRACED_DIR] = -1;
1908 to_clear[dbcnt] = true;
1909 if (finfo->call_res_init)
1910 res_init ();
1911 /* Continue to the next entry since this might be the
1912 parent directory for multiple registered files and
1913 we want to remove watches for all registered files. */
1914 continue;
1915 }
1916 /* The parent directory had a create or moved to event. */
1917 if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1918 && ((inev->i.mask & IN_MOVED_TO)
1919 || (inev->i.mask & IN_CREATE))
1920 && strcmp (s1: inev->i.name, s2: finfo->sfname) == 0)
1921 {
1922 /* We detected a directory change. We look for the creation
1923 of the file we are tracking or the move of the same file
1924 into the directory. */
1925 int ret;
1926 dbg_log (_("monitored file `%s` was %s, adding watch"),
1927 finfo->fname,
1928 inev->i.mask & IN_CREATE ? "created" : "moved into place");
1929 /* File was moved in or created. Regenerate the watch. */
1930 if (finfo->inotify_descr[TRACED_FILE] != -1)
1931 inotify_rm_watch (fd: inotify_fd,
1932 wd: finfo->inotify_descr[TRACED_FILE]);
1933
1934 ret = inotify_add_watch (fd: inotify_fd,
1935 name: finfo->fname,
1936 TRACED_FILE_MASK);
1937 if (ret < 0)
1938 dbg_log (_("failed to add file watch `%s`: %s"),
1939 finfo->fname, strerror (errno));
1940
1941 finfo->inotify_descr[TRACED_FILE] = ret;
1942
1943 /* The file is new or moved so mark cache as needing to
1944 be cleared and reinitialize. */
1945 to_clear[dbcnt] = true;
1946 if (finfo->call_res_init)
1947 res_init ();
1948
1949 /* Done re-adding the watch. Don't return, we may still
1950 have other files in this same directory, same watch
1951 descriptor, and need to process them. */
1952 }
1953 /* Other events are ignored, and we move on to the next file. */
1954 finfo = finfo->next;
1955 }
1956 }
1957}
1958
1959/* If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache
1960 for the associated database, otherwise do nothing. The TO_CLEAR array must
1961 have LASTDB entries. */
1962static inline void
1963clear_db_cache (bool *to_clear)
1964{
1965 for (size_t dbcnt = 0; dbcnt < lastdb; ++dbcnt)
1966 if (to_clear[dbcnt])
1967 {
1968 pthread_mutex_lock (mutex: &dbs[dbcnt].prune_lock);
1969 dbs[dbcnt].clear_cache = 1;
1970 pthread_mutex_unlock (mutex: &dbs[dbcnt].prune_lock);
1971 pthread_cond_signal (cond: &dbs[dbcnt].prune_cond);
1972 }
1973}
1974
1975int
1976handle_inotify_events (void)
1977{
1978 bool to_clear[lastdb] = { false, };
1979 union __inev inev;
1980
1981 /* Read all inotify events for files registered via
1982 register_traced_file(). */
1983 while (1)
1984 {
1985 /* Potentially read multiple events into buf. */
1986 ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
1987 &inev.buf,
1988 sizeof (inev)));
1989 if (nb < (ssize_t) sizeof (struct inotify_event))
1990 {
1991 /* Not even 1 event. */
1992 if (__glibc_unlikely (nb == -1 && errno != EAGAIN))
1993 return -1;
1994 /* Done reading events that are ready. */
1995 break;
1996 }
1997 /* Process all events. The normal inotify interface delivers
1998 complete events on a read and never a partial event. */
1999 char *eptr = &inev.buf[0];
2000 ssize_t count;
2001 while (1)
2002 {
2003 /* Check which of the files changed. */
2004 inotify_check_files (to_clear, inev: &inev);
2005 count = sizeof (struct inotify_event) + inev.i.len;
2006 eptr += count;
2007 nb -= count;
2008 if (nb >= (ssize_t) sizeof (struct inotify_event))
2009 memcpy (dest: &inev, src: eptr, n: nb);
2010 else
2011 break;
2012 }
2013 continue;
2014 }
2015 /* Actually perform the cache clearing. */
2016 clear_db_cache (to_clear);
2017 return 0;
2018}
2019
2020#endif
2021
2022static void
2023__attribute__ ((__noreturn__))
2024main_loop_poll (void)
2025{
2026 struct pollfd *conns = (struct pollfd *) xmalloc (n: nconns
2027 * sizeof (conns[0]));
2028
2029 conns[0].fd = sock;
2030 conns[0].events = POLLRDNORM;
2031 size_t nused = 1;
2032 size_t firstfree = 1;
2033
2034#ifdef HAVE_INOTIFY
2035 if (inotify_fd != -1)
2036 {
2037 conns[1].fd = inotify_fd;
2038 conns[1].events = POLLRDNORM;
2039 nused = 2;
2040 firstfree = 2;
2041 }
2042#endif
2043
2044#ifdef HAVE_NETLINK
2045 size_t idx_nl_status_fd = 0;
2046 if (nl_status_fd != -1)
2047 {
2048 idx_nl_status_fd = nused;
2049 conns[nused].fd = nl_status_fd;
2050 conns[nused].events = POLLRDNORM;
2051 ++nused;
2052 firstfree = nused;
2053 }
2054#endif
2055
2056 while (1)
2057 {
2058 /* Wait for any event. We wait at most a couple of seconds so
2059 that we can check whether we should close any of the accepted
2060 connections since we have not received a request. */
2061#define MAX_ACCEPT_TIMEOUT 30
2062#define MIN_ACCEPT_TIMEOUT 5
2063#define MAIN_THREAD_TIMEOUT \
2064 (MAX_ACCEPT_TIMEOUT * 1000 \
2065 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2066
2067 int n = poll (fds: conns, nfds: nused, MAIN_THREAD_TIMEOUT);
2068
2069 time_t now = time (NULL);
2070
2071 /* If there is a descriptor ready for reading or there is a new
2072 connection, process this now. */
2073 if (n > 0)
2074 {
2075 if (conns[0].revents != 0)
2076 {
2077 /* We have a new incoming connection. Accept the connection. */
2078 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2079 SOCK_NONBLOCK));
2080
2081 /* Use the descriptor if we have not reached the limit. */
2082 if (fd >= 0)
2083 {
2084 if (firstfree < nconns)
2085 {
2086 conns[firstfree].fd = fd;
2087 conns[firstfree].events = POLLRDNORM;
2088 starttime[firstfree] = now;
2089 if (firstfree >= nused)
2090 nused = firstfree + 1;
2091
2092 do
2093 ++firstfree;
2094 while (firstfree < nused && conns[firstfree].fd != -1);
2095 }
2096 else
2097 /* We cannot use the connection so close it. */
2098 close (fd: fd);
2099 }
2100
2101 --n;
2102 }
2103
2104 size_t first = 1;
2105#ifdef HAVE_INOTIFY
2106 if (inotify_fd != -1 && conns[1].fd == inotify_fd)
2107 {
2108 if (conns[1].revents != 0)
2109 {
2110 int ret;
2111 ret = handle_inotify_events ();
2112 if (ret == -1)
2113 {
2114 /* Something went wrong when reading the inotify
2115 data. Better disable inotify. */
2116 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2117 conns[1].fd = -1;
2118 firstfree = 1;
2119 if (nused == 2)
2120 nused = 1;
2121 close (fd: inotify_fd);
2122 inotify_fd = -1;
2123 }
2124 --n;
2125 }
2126
2127 first = 2;
2128 }
2129#endif
2130
2131#ifdef HAVE_NETLINK
2132 if (idx_nl_status_fd != 0 && conns[idx_nl_status_fd].revents != 0)
2133 {
2134 char buf[4096];
2135 /* Read all the data. We do not interpret it here. */
2136 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2137 sizeof (buf))) != -1)
2138 ;
2139
2140 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2141 = __bump_nl_timestamp ();
2142 }
2143#endif
2144
2145 for (size_t cnt = first; cnt < nused && n > 0; ++cnt)
2146 if (conns[cnt].revents != 0)
2147 {
2148 fd_ready (fd: conns[cnt].fd);
2149
2150 /* Clean up the CONNS array. */
2151 conns[cnt].fd = -1;
2152 if (cnt < firstfree)
2153 firstfree = cnt;
2154 if (cnt == nused - 1)
2155 do
2156 --nused;
2157 while (conns[nused - 1].fd == -1);
2158
2159 --n;
2160 }
2161 }
2162
2163 /* Now find entries which have timed out. */
2164 assert (nused > 0);
2165
2166 /* We make the timeout length depend on the number of file
2167 descriptors currently used. */
2168#define ACCEPT_TIMEOUT \
2169 (MAX_ACCEPT_TIMEOUT \
2170 - ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2171 time_t laststart = now - ACCEPT_TIMEOUT;
2172
2173 for (size_t cnt = nused - 1; cnt > 0; --cnt)
2174 {
2175 if (conns[cnt].fd != -1 && starttime[cnt] < laststart)
2176 {
2177 /* Remove the entry, it timed out. */
2178 (void) close (fd: conns[cnt].fd);
2179 conns[cnt].fd = -1;
2180
2181 if (cnt < firstfree)
2182 firstfree = cnt;
2183 if (cnt == nused - 1)
2184 do
2185 --nused;
2186 while (conns[nused - 1].fd == -1);
2187 }
2188 }
2189
2190 if (restart_p (now))
2191 restart ();
2192 }
2193}
2194
2195
2196#ifdef HAVE_EPOLL
2197static void
2198main_loop_epoll (int efd)
2199{
2200 struct epoll_event ev = { 0, };
2201 int nused = 1;
2202 size_t highest = 0;
2203
2204 /* Add the socket. */
2205 ev.events = EPOLLRDNORM;
2206 ev.data.fd = sock;
2207 if (epoll_ctl (epfd: efd, EPOLL_CTL_ADD, fd: sock, event: &ev) == -1)
2208 /* We cannot use epoll. */
2209 return;
2210
2211# ifdef HAVE_INOTIFY
2212 if (inotify_fd != -1)
2213 {
2214 ev.events = EPOLLRDNORM;
2215 ev.data.fd = inotify_fd;
2216 if (epoll_ctl (epfd: efd, EPOLL_CTL_ADD, fd: inotify_fd, event: &ev) == -1)
2217 /* We cannot use epoll. */
2218 return;
2219 nused = 2;
2220 }
2221# endif
2222
2223# ifdef HAVE_NETLINK
2224 if (nl_status_fd != -1)
2225 {
2226 ev.events = EPOLLRDNORM;
2227 ev.data.fd = nl_status_fd;
2228 if (epoll_ctl (epfd: efd, EPOLL_CTL_ADD, fd: nl_status_fd, event: &ev) == -1)
2229 /* We cannot use epoll. */
2230 return;
2231 }
2232# endif
2233
2234 while (1)
2235 {
2236 struct epoll_event revs[100];
2237# define nrevs (sizeof (revs) / sizeof (revs[0]))
2238
2239 int n = epoll_wait (epfd: efd, events: revs, nrevs, MAIN_THREAD_TIMEOUT);
2240
2241 time_t now = time (NULL);
2242
2243 for (int cnt = 0; cnt < n; ++cnt)
2244 if (revs[cnt].data.fd == sock)
2245 {
2246 /* A new connection. */
2247 int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2248 SOCK_NONBLOCK));
2249
2250 /* Use the descriptor if we have not reached the limit. */
2251 if (fd >= 0)
2252 {
2253 /* Try to add the new descriptor. */
2254 ev.data.fd = fd;
2255 if (fd >= nconns
2256 || epoll_ctl (epfd: efd, EPOLL_CTL_ADD, fd: fd, event: &ev) == -1)
2257 /* The descriptor is too large or something went
2258 wrong. Close the descriptor. */
2259 close (fd: fd);
2260 else
2261 {
2262 /* Remember when we accepted the connection. */
2263 starttime[fd] = now;
2264
2265 if (fd > highest)
2266 highest = fd;
2267
2268 ++nused;
2269 }
2270 }
2271 }
2272# ifdef HAVE_INOTIFY
2273 else if (revs[cnt].data.fd == inotify_fd)
2274 {
2275 int ret;
2276 ret = handle_inotify_events ();
2277 if (ret == -1)
2278 {
2279 /* Something went wrong when reading the inotify
2280 data. Better disable inotify. */
2281 dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2282 (void) epoll_ctl (epfd: efd, EPOLL_CTL_DEL, fd: inotify_fd, NULL);
2283 close (fd: inotify_fd);
2284 inotify_fd = -1;
2285 break;
2286 }
2287 }
2288# endif
2289# ifdef HAVE_NETLINK
2290 else if (revs[cnt].data.fd == nl_status_fd)
2291 {
2292 char buf[4096];
2293 /* Read all the data. We do not interpret it here. */
2294 while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2295 sizeof (buf))) != -1)
2296 ;
2297
2298 dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2299 = __bump_nl_timestamp ();
2300 }
2301# endif
2302 else
2303 {
2304 /* Remove the descriptor from the epoll descriptor. */
2305 (void) epoll_ctl (epfd: efd, EPOLL_CTL_DEL, fd: revs[cnt].data.fd, NULL);
2306
2307 /* Get a worker to handle the request. */
2308 fd_ready (fd: revs[cnt].data.fd);
2309
2310 /* Reset the time. */
2311 starttime[revs[cnt].data.fd] = 0;
2312 if (revs[cnt].data.fd == highest)
2313 do
2314 --highest;
2315 while (highest > 0 && starttime[highest] == 0);
2316
2317 --nused;
2318 }
2319
2320 /* Now look for descriptors for accepted connections which have
2321 no reply in too long of a time. */
2322 time_t laststart = now - ACCEPT_TIMEOUT;
2323 assert (starttime[sock] == 0);
2324# ifdef HAVE_INOTIFY
2325 assert (inotify_fd == -1 || starttime[inotify_fd] == 0);
2326# endif
2327 assert (nl_status_fd == -1 || starttime[nl_status_fd] == 0);
2328 for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2329 if (starttime[cnt] != 0 && starttime[cnt] < laststart)
2330 {
2331 /* We are waiting for this one for too long. Close it. */
2332 (void) epoll_ctl (epfd: efd, EPOLL_CTL_DEL, fd: cnt, NULL);
2333
2334 (void) close (fd: cnt);
2335
2336 starttime[cnt] = 0;
2337 if (cnt == highest)
2338 --highest;
2339 }
2340 else if (cnt != sock && starttime[cnt] == 0 && cnt == highest)
2341 --highest;
2342
2343 if (restart_p (now))
2344 restart ();
2345 }
2346}
2347#endif
2348
2349
2350/* Start all the threads we want. The initial process is thread no. 1. */
2351void
2352start_threads (void)
2353{
2354 /* Initialize the conditional variable we will use. The only
2355 non-standard attribute we might use is the clock selection. */
2356 pthread_condattr_t condattr;
2357 pthread_condattr_init (attr: &condattr);
2358
2359#if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2360 && defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2361 /* Determine whether the monotonous clock is available. */
2362 struct timespec dummy;
2363# if _POSIX_MONOTONIC_CLOCK == 0
2364 if (sysconf (_SC_MONOTONIC_CLOCK) > 0)
2365# endif
2366# if _POSIX_CLOCK_SELECTION == 0
2367 if (sysconf (_SC_CLOCK_SELECTION) > 0)
2368# endif
2369 if (clock_getres (CLOCK_MONOTONIC, res: &dummy) == 0
2370 && pthread_condattr_setclock (attr: &condattr, CLOCK_MONOTONIC) == 0)
2371 timeout_clock = CLOCK_MONOTONIC;
2372#endif
2373
2374 /* Create the attribute for the threads. They are all created
2375 detached. */
2376 pthread_attr_init (attr: &attr);
2377 pthread_attr_setdetachstate (attr: &attr, PTHREAD_CREATE_DETACHED);
2378 /* Use 1MB stacks, twice as much for 64-bit architectures. */
2379 pthread_attr_setstacksize (attr: &attr, NSCD_THREAD_STACKSIZE);
2380
2381 /* We allow less than LASTDB threads only for debugging. */
2382 if (debug_level == 0)
2383 nthreads = MAX (nthreads, lastdb);
2384
2385 /* Create the threads which prune the databases. */
2386 // XXX Ideally this work would be done by some of the worker threads.
2387 // XXX But this is problematic since we would need to be able to wake
2388 // XXX them up explicitly as well as part of the group handling the
2389 // XXX ready-list. This requires an operation where we can wait on
2390 // XXX two conditional variables at the same time. This operation
2391 // XXX does not exist (yet).
2392 for (long int i = 0; i < lastdb; ++i)
2393 {
2394 /* Initialize the conditional variable. */
2395 if (pthread_cond_init (cond: &dbs[i].prune_cond, cond_attr: &condattr) != 0)
2396 {
2397 dbg_log (_("could not initialize conditional variable"));
2398 do_exit (child_ret: 1, errnum: 0, NULL);
2399 }
2400
2401 pthread_t th;
2402 if (dbs[i].enabled
2403 && pthread_create (newthread: &th, attr: &attr, start_routine: nscd_run_prune, arg: (void *) i) != 0)
2404 {
2405 dbg_log (_("could not start clean-up thread; terminating"));
2406 do_exit (child_ret: 1, errnum: 0, NULL);
2407 }
2408 }
2409
2410 pthread_condattr_destroy (attr: &condattr);
2411
2412 for (long int i = 0; i < nthreads; ++i)
2413 {
2414 pthread_t th;
2415 if (pthread_create (newthread: &th, attr: &attr, start_routine: nscd_run_worker, NULL) != 0)
2416 {
2417 if (i == 0)
2418 {
2419 dbg_log (_("could not start any worker thread; terminating"));
2420 do_exit (child_ret: 1, errnum: 0, NULL);
2421 }
2422
2423 break;
2424 }
2425 }
2426
2427 /* Now it is safe to let the parent know that we're doing fine and it can
2428 exit. */
2429 notify_parent (child_ret: 0);
2430
2431 /* Determine how much room for descriptors we should initially
2432 allocate. This might need to change later if we cap the number
2433 with MAXCONN. */
2434 const long int nfds = sysconf (_SC_OPEN_MAX);
2435#define MINCONN 32
2436#define MAXCONN 16384
2437 if (nfds == -1 || nfds > MAXCONN)
2438 nconns = MAXCONN;
2439 else if (nfds < MINCONN)
2440 nconns = MINCONN;
2441 else
2442 nconns = nfds;
2443
2444 /* We need memory to pass descriptors on to the worker threads. */
2445 fdlist = (struct fdlist *) xcalloc (n: nconns, s: sizeof (fdlist[0]));
2446 /* Array to keep track when connection was accepted. */
2447 starttime = (time_t *) xcalloc (n: nconns, s: sizeof (starttime[0]));
2448
2449 /* In the main thread we execute the loop which handles incoming
2450 connections. */
2451#ifdef HAVE_EPOLL
2452 int efd = epoll_create (size: 100);
2453 if (efd != -1)
2454 {
2455 main_loop_epoll (efd);
2456 close (fd: efd);
2457 }
2458#endif
2459
2460 main_loop_poll ();
2461}
2462
2463
2464/* Look up the uid, gid, and supplementary groups to run nscd as. When
2465 this function is called, we are not listening on the nscd socket yet so
2466 we can just use the ordinary lookup functions without causing a lockup */
2467static void
2468begin_drop_privileges (void)
2469{
2470 struct passwd *pwd = getpwnam (name: server_user);
2471
2472 if (pwd == NULL)
2473 {
2474 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2475 do_exit (EXIT_FAILURE, errnum: 0,
2476 _("Failed to run nscd as user '%s'"), server_user);
2477 }
2478
2479 server_uid = pwd->pw_uid;
2480 server_gid = pwd->pw_gid;
2481
2482 /* Save the old UID/GID if we have to change back. */
2483 if (paranoia)
2484 {
2485 old_uid = getuid ();
2486 old_gid = getgid ();
2487 }
2488
2489 if (getgrouplist (user: server_user, group: server_gid, NULL, ngroups: &server_ngroups) == 0)
2490 {
2491 /* This really must never happen. */
2492 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2493 do_exit (EXIT_FAILURE, errno,
2494 _("initial getgrouplist failed"));
2495 }
2496
2497 server_groups = (gid_t *) xmalloc (n: server_ngroups * sizeof (gid_t));
2498
2499 if (getgrouplist (user: server_user, group: server_gid, groups: server_groups, ngroups: &server_ngroups)
2500 == -1)
2501 {
2502 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2503 do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2504 }
2505}
2506
2507
2508/* Call setgroups(), setgid(), and setuid() to drop root privileges and
2509 run nscd as the user specified in the configuration file. */
2510static void
2511finish_drop_privileges (void)
2512{
2513#if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2514 /* We need to preserve the capabilities to connect to the audit daemon. */
2515 cap_t new_caps = preserve_capabilities ();
2516#endif
2517
2518 if (setgroups (n: server_ngroups, groups: server_groups) == -1)
2519 {
2520 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2521 do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2522 }
2523
2524 int res;
2525 if (paranoia)
2526 res = setresgid (rgid: server_gid, egid: server_gid, sgid: old_gid);
2527 else
2528 res = setgid (server_gid);
2529 if (res == -1)
2530 {
2531 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2532 do_exit (child_ret: 4, errno, format: "setgid");
2533 }
2534
2535 if (paranoia)
2536 res = setresuid (ruid: server_uid, euid: server_uid, suid: old_uid);
2537 else
2538 res = setuid (server_uid);
2539 if (res == -1)
2540 {
2541 dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2542 do_exit (child_ret: 4, errno, format: "setuid");
2543 }
2544
2545#if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2546 /* Remove the temporary capabilities. */
2547 install_real_capabilities (new_caps);
2548#endif
2549}
2550

source code of glibc/nscd/connections.c