connections.c source code [glibc/nscd/connections.c]

1	/ Inner loops of cache daemon.*
2	Copyright (C) 1998-2024 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4
5	This program is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published
7	by the Free Software Foundation; version 2 of the License, or
8	(at your option) any later version.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	You should have received a copy of the GNU General Public License
16	along with this program; if not, see <https://www.gnu.org/licenses/>. /*
17
18	#include <alloca.h>
19	#include <assert.h>
20	#include <atomic.h>
21	#include <error.h>
22	#include <errno.h>
23	#include <fcntl.h>
24	#include <grp.h>
25	#include <ifaddrs.h>
26	#include <libintl.h>
27	#include <pthread.h>
28	#include <pwd.h>
29	#include <resolv.h>
30	#include <stdio.h>
31	#include <stdlib.h>
32	#include <unistd.h>
33	#include <stdint.h>
34	#include <arpa/inet.h>
35	#ifdef HAVE_NETLINK
36	# include <linux/netlink.h>
37	# include <linux/rtnetlink.h>
38	#endif
39	#ifdef HAVE_EPOLL
40	# include <sys/epoll.h>
41	#endif
42	#ifdef HAVE_INOTIFY
43	# include <sys/inotify.h>
44	#endif
45	#include <sys/mman.h>
46	#include <sys/param.h>
47	#include <sys/poll.h>
48	#include <sys/socket.h>
49	#include <sys/stat.h>
50	#include <sys/un.h>
51
52	#include "nscd.h"
53	#include "dbg_log.h"
54	#include "selinux.h"
55	#include <resolv/resolv.h>
56
57	#include <kernel-features.h>
58	#include <libc-diag.h>
59
60
61	/ Support to run nscd as an unprivileged user /
62	const char *server_user;
63	static uid_t server_uid;
64	static gid_t server_gid;
65	const char *stat_user;
66	uid_t stat_uid;
67	static gid_t *server_groups;
68	#ifndef NGROUPS
69	# define NGROUPS 32
70	#endif
71	static int server_ngroups;
72
73	static pthread_attr_t attr;
74
75	static void begin_drop_privileges (void);
76	static void finish_drop_privileges (void);
77
78	/ Map request type to a string. /
79	const char *const serv2str[LASTREQ] =
80	{
81	[GETPWBYNAME] = "GETPWBYNAME",
82	[GETPWBYUID] = "GETPWBYUID",
83	[GETGRBYNAME] = "GETGRBYNAME",
84	[GETGRBYGID] = "GETGRBYGID",
85	[GETHOSTBYNAME] = "GETHOSTBYNAME",
86	[GETHOSTBYNAMEv6] = "GETHOSTBYNAMEv6",
87	[GETHOSTBYADDR] = "GETHOSTBYADDR",
88	[GETHOSTBYADDRv6] = "GETHOSTBYADDRv6",
89	[SHUTDOWN] = "SHUTDOWN",
90	[GETSTAT] = "GETSTAT",
91	[INVALIDATE] = "INVALIDATE",
92	[GETFDPW] = "GETFDPW",
93	[GETFDGR] = "GETFDGR",
94	[GETFDHST] = "GETFDHST",
95	[GETAI] = "GETAI",
96	[INITGROUPS] = "INITGROUPS",
97	[GETSERVBYNAME] = "GETSERVBYNAME",
98	[GETSERVBYPORT] = "GETSERVBYPORT",
99	[GETFDSERV] = "GETFDSERV",
100	[GETNETGRENT] = "GETNETGRENT",
101	[INNETGR] = "INNETGR",
102	[GETFDNETGR] = "GETFDNETGR"
103	};
104
105	#ifdef PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
106	# define RWLOCK_INITIALIZER PTHREAD_RWLOCK_WRITER_NONRECURSIVE_INITIALIZER_NP
107	#else
108	# define RWLOCK_INITIALIZER PTHREAD_RWLOCK_INITIALIZER
109	#endif
110
111	/ The control data structures for the services. /
112	struct database_dyn dbs[lastdb] =
113	{
114	[pwddb] = {
115	.lock = RWLOCK_INITIALIZER,
116	.prune_lock = PTHREAD_MUTEX_INITIALIZER,
117	.prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
118	.enabled = `0`,
119	.check_file = `1`,
120	.persistent = `0`,
121	.propagate = `1`,
122	.shared = `0`,
123	.max_db_size = DEFAULT_MAX_DB_SIZE,
124	.suggested_module = DEFAULT_SUGGESTED_MODULE,
125	.db_filename = _PATH_NSCD_PASSWD_DB,
126	.disabled_iov = &pwd_iov_disabled,
127	.postimeout = `3600`,
128	.negtimeout = `20`,
129	.wr_fd = -`1`,
130	.ro_fd = -`1`,
131	.mmap_used = false
132	},
133	[grpdb] = {
134	.lock = RWLOCK_INITIALIZER,
135	.prune_lock = PTHREAD_MUTEX_INITIALIZER,
136	.prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
137	.enabled = `0`,
138	.check_file = `1`,
139	.persistent = `0`,
140	.propagate = `1`,
141	.shared = `0`,
142	.max_db_size = DEFAULT_MAX_DB_SIZE,
143	.suggested_module = DEFAULT_SUGGESTED_MODULE,
144	.db_filename = _PATH_NSCD_GROUP_DB,
145	.disabled_iov = &grp_iov_disabled,
146	.postimeout = `3600`,
147	.negtimeout = `60`,
148	.wr_fd = -`1`,
149	.ro_fd = -`1`,
150	.mmap_used = false
151	},
152	[hstdb] = {
153	.lock = RWLOCK_INITIALIZER,
154	.prune_lock = PTHREAD_MUTEX_INITIALIZER,
155	.prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
156	.enabled = `0`,
157	.check_file = `1`,
158	.persistent = `0`,
159	.propagate = `0`, / Not used. /
160	.shared = `0`,
161	.max_db_size = DEFAULT_MAX_DB_SIZE,
162	.suggested_module = DEFAULT_SUGGESTED_MODULE,
163	.db_filename = _PATH_NSCD_HOSTS_DB,
164	.disabled_iov = &hst_iov_disabled,
165	.postimeout = `3600`,
166	.negtimeout = `20`,
167	.wr_fd = -`1`,
168	.ro_fd = -`1`,
169	.mmap_used = false
170	},
171	[servdb] = {
172	.lock = RWLOCK_INITIALIZER,
173	.prune_lock = PTHREAD_MUTEX_INITIALIZER,
174	.prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
175	.enabled = `0`,
176	.check_file = `1`,
177	.persistent = `0`,
178	.propagate = `0`, / Not used. /
179	.shared = `0`,
180	.max_db_size = DEFAULT_MAX_DB_SIZE,
181	.suggested_module = DEFAULT_SUGGESTED_MODULE,
182	.db_filename = _PATH_NSCD_SERVICES_DB,
183	.disabled_iov = &serv_iov_disabled,
184	.postimeout = `28800`,
185	.negtimeout = `20`,
186	.wr_fd = -`1`,
187	.ro_fd = -`1`,
188	.mmap_used = false
189	},
190	[netgrdb] = {
191	.lock = RWLOCK_INITIALIZER,
192	.prune_lock = PTHREAD_MUTEX_INITIALIZER,
193	.prune_run_lock = PTHREAD_MUTEX_INITIALIZER,
194	.enabled = `0`,
195	.check_file = `1`,
196	.persistent = `0`,
197	.propagate = `0`, / Not used. /
198	.shared = `0`,
199	.max_db_size = DEFAULT_MAX_DB_SIZE,
200	.suggested_module = DEFAULT_SUGGESTED_MODULE,
201	.db_filename = _PATH_NSCD_NETGROUP_DB,
202	.disabled_iov = &netgroup_iov_disabled,
203	.postimeout = `28800`,
204	.negtimeout = `20`,
205	.wr_fd = -`1`,
206	.ro_fd = -`1`,
207	.mmap_used = false
208	}
209	};
210
211
212	/ Mapping of request type to database. /
213	static struct
214	{
215	bool data_request;
216	struct database_dyn *db;
217	} const reqinfo[LASTREQ] =
218	{
219	[GETPWBYNAME] = { true, .db: &dbs[pwddb] },
220	[GETPWBYUID] = { true, .db: &dbs[pwddb] },
221	[GETGRBYNAME] = { true, .db: &dbs[grpdb] },
222	[GETGRBYGID] = { true, .db: &dbs[grpdb] },
223	[GETHOSTBYNAME] = { true, .db: &dbs[hstdb] },
224	[GETHOSTBYNAMEv6] = { true, .db: &dbs[hstdb] },
225	[GETHOSTBYADDR] = { true, .db: &dbs[hstdb] },
226	[GETHOSTBYADDRv6] = { true, .db: &dbs[hstdb] },
227	[SHUTDOWN] = { false, NULL },
228	[GETSTAT] = { false, NULL },
229	[GETFDPW] = { false, .db: &dbs[pwddb] },
230	[GETFDGR] = { false, .db: &dbs[grpdb] },
231	[GETFDHST] = { false, .db: &dbs[hstdb] },
232	[GETAI] = { true, .db: &dbs[hstdb] },
233	[INITGROUPS] = { true, .db: &dbs[grpdb] },
234	[GETSERVBYNAME] = { true, .db: &dbs[servdb] },
235	[GETSERVBYPORT] = { true, .db: &dbs[servdb] },
236	[GETFDSERV] = { false, .db: &dbs[servdb] },
237	[GETNETGRENT] = { true, .db: &dbs[netgrdb] },
238	[INNETGR] = { true, .db: &dbs[netgrdb] },
239	[GETFDNETGR] = { false, .db: &dbs[netgrdb] }
240	};
241
242
243	/ Initial number of threads to use. /
244	int nthreads = -`1`;
245	/ Maximum number of threads to use. /
246	int max_nthreads = `32`;
247
248	/ Socket for incoming connections. /
249	static int sock;
250
251	#ifdef HAVE_INOTIFY
252	/ Inotify descriptor. /
253	int inotify_fd = -`1`;
254	#endif
255
256	#ifdef HAVE_NETLINK
257	/ Descriptor for netlink status updates. /
258	static int nl_status_fd = -`1`;
259
260	static uint32_t
261	__bump_nl_timestamp (void)
262	{
263	static uint32_t nl_timestamp;
264
265	if (atomic_fetch_add_relaxed (&nl_timestamp, `1`) + `1` == `0`)
266	atomic_fetch_add_relaxed (&nl_timestamp, `1`);
267
268	return nl_timestamp;
269	}
270	#endif
271
272	/ Number of times clients had to wait. /
273	unsigned long int client_queued;
274
275
276	ssize_t
277	writeall (int fd, const void *buf, size_t len)
278	{
279	size_t n = len;
280	ssize_t ret;
281	do
282	{
283	ret = TEMP_FAILURE_RETRY (send (fd, buf, n, MSG_NOSIGNAL));
284	if (ret <= `0`)
285	break;
286	buf = (const char *) buf + ret;
287	n -= ret;
288	}
289	while (n > `0`);
290	return ret < `0` ? ret : len - n;
291	}
292
293
294	enum usekey
295	{
296	use_not = `0`,
297	/ The following three are not really used, they are symbolic constants. /
298	use_first = `16`,
299	use_begin = `32`,
300	use_end = `64`,
301
302	use_he = `1`,
303	use_he_begin = use_he \| use_begin,
304	use_he_end = use_he \| use_end,
305	use_data = `3`,
306	use_data_begin = use_data \| use_begin,
307	use_data_end = use_data \| use_end,
308	use_data_first = use_data_begin \| use_first
309	};
310
311
312	static int
313	check_use (const char data, nscd_ssize_t first_free, uint8_t usemap,
314	enum usekey use, ref_t start, size_t len)
315	{
316	if (len < `2`)
317	return `0`;
318
319	if (start > first_free \|\| start + len > first_free
320	\|\| (start & BLOCK_ALIGN_M1))
321	return `0`;
322
323	if (usemap[start] == use_not)
324	{
325	/ Add the start marker. /
326	usemap[start] = use \| use_begin;
327	use &= ~use_first;
328
329	while (--len > `0`)
330	if (usemap[++start] != use_not)
331	return `0`;
332	else
333	usemap[start] = use;
334
335	/ Add the end marker. /
336	usemap[start] = use \| use_end;
337	}
338	else if ((usemap[start] & ~use_first) == ((use \| use_begin) & ~use_first))
339	{
340	/ Hash entries can't be shared. /
341	if (use == use_he)
342	return `0`;
343
344	usemap[start] \|= (use & use_first);
345	use &= ~use_first;
346
347	while (--len > `1`)
348	if (usemap[++start] != use)
349	return `0`;
350
351	if (usemap[++start] != (use \| use_end))
352	return `0`;
353	}
354	else
355	/ Points to a wrong object or somewhere in the middle. /
356	return `0`;
357
358	return `1`;
359	}
360
361
362	/ Verify data in persistent database. /
363	static int
364	verify_persistent_db (void mem, struct* database_pers_head readhead, int* dbnr)
365	{
366	assert (dbnr == pwddb \|\| dbnr == grpdb \|\| dbnr == hstdb \|\| dbnr == servdb
367	\|\| dbnr == netgrdb);
368
369	time_t now = time (NULL);
370
371	struct database_pers_head *head = mem;
372	struct database_pers_head head_copy = *head;
373
374	/ Check that the header that was read matches the head in the database. /
375	if (memcmp (s1: head, s2: readhead, n: sizeof (*head)) != `0`)
376	return `0`;
377
378	/ First some easy tests: make sure the database header is sane. /
379	if (head->version != DB_VERSION
380	\|\| head->header_size != sizeof (*head)
381	/ We allow a timestamp to be one hour ahead of the current time.*
382	This should cover daylight saving time changes. /*
383	\|\| head->timestamp > now + `60` * `60` + `60`
384	\|\| (head->gc_cycle & `1`)
385	\|\| head->module == `0`
386	\|\| (size_t) head->module > INT32_MAX / sizeof (ref_t)
387	\|\| (size_t) head->data_size > INT32_MAX - head->module * sizeof (ref_t)
388	\|\| head->first_free < `0`
389	\|\| head->first_free > head->data_size
390	\|\| (head->first_free & BLOCK_ALIGN_M1) != `0`
391	\|\| head->maxnentries < `0`
392	\|\| head->maxnsearched < `0`)
393	return `0`;
394
395	uint8_t *usemap = calloc (nmemb: head->first_free, size: `1`);
396	if (usemap == NULL)
397	return `0`;
398
399	const char data = (char* *) &head->array[roundup (head->module,
400	ALIGN / sizeof (ref_t))];
401
402	nscd_ssize_t he_cnt = `0`;
403	for (nscd_ssize_t cnt = `0`; cnt < head->module; ++cnt)
404	{
405	ref_t trail = head->array[cnt];
406	ref_t work = trail;
407	int tick = `0`;
408
409	while (work != ENDREF)
410	{
411	if (! check_use (data, first_free: head->first_free, usemap, use: use_he, start: work,
412	len: sizeof (struct hashentry)))
413	goto fail;
414
415	/ Now we know we can dereference the record. /
416	struct hashentry here = (struct* hashentry *) (data + work);
417
418	++he_cnt;
419
420	/ Make sure the record is for this type of service. /
421	if (here->type >= LASTREQ
422	\|\| reqinfo[here->type].db != &dbs[dbnr])
423	goto fail;
424
425	/ Validate boolean field value. /
426	if (here->first != false && here->first != true)
427	goto fail;
428
429	if (here->len < `0`)
430	goto fail;
431
432	/ Now the data. /
433	if (here->packet < `0`
434	\|\| here->packet > head->first_free
435	\|\| here->packet + sizeof (struct datahead) > head->first_free)
436	goto fail;
437
438	struct datahead dh = (struct* datahead *) (data + here->packet);
439
440	if (! check_use (data, first_free: head->first_free, usemap,
441	use: use_data \| (here->first ? use_first : `0`),
442	start: here->packet, len: dh->allocsize))
443	goto fail;
444
445	if (dh->allocsize < sizeof (struct datahead)
446	\|\| dh->recsize > dh->allocsize
447	\|\| (dh->notfound != false && dh->notfound != true)
448	\|\| (dh->usable != false && dh->usable != true))
449	goto fail;
450
451	if (here->key < here->packet + sizeof (struct datahead)
452	\|\| here->key > here->packet + dh->allocsize
453	\|\| here->key + here->len > here->packet + dh->allocsize)
454	goto fail;
455
456	work = here->next;
457
458	if (work == trail)
459	/ A circular list, this must not happen. /
460	goto fail;
461	if (tick)
462	trail = ((struct hashentry *) (data + trail))->next;
463	tick = `1` - tick;
464	}
465	}
466
467	if (he_cnt != head->nentries)
468	goto fail;
469
470	/ See if all data and keys had at least one reference from*
471	he->first == true hashentry. /*
472	for (ref_t idx = `0`; idx < head->first_free; ++idx)
473	{
474	if (usemap[idx] == use_data_begin)
475	goto fail;
476	}
477
478	/ Finally, make sure the database hasn't changed since the first test. /
479	if (memcmp (s1: mem, s2: &head_copy, n: sizeof (*head)) != `0`)
480	goto fail;
481
482	free (ptr: usemap);
483	return `1`;
484
485	fail:
486	free (ptr: usemap);
487	return `0`;
488	}
489
490
491	/ Initialize database information structures. /
492	void
493	nscd_init (void)
494	{
495	/ Look up unprivileged uid/gid/groups before we start listening on the*
496	socket /*
497	if (server_user != NULL)
498	begin_drop_privileges ();
499
500	if (nthreads == -`1`)
501	/ No configuration for this value, assume a default. /
502	nthreads = `4`;
503
504	for (size_t cnt = `0`; cnt < lastdb; ++cnt)
505	if (dbs[cnt].enabled)
506	{
507	pthread_rwlock_init (rwlock: &dbs[cnt].lock, NULL);
508	pthread_mutex_init (mutex: &dbs[cnt].memlock, NULL);
509
510	if (dbs[cnt].persistent)
511	{
512	/ Try to open the appropriate file on disk. /
513	int fd = open (file: dbs[cnt].db_filename, O_RDWR \| O_CLOEXEC);
514	if (fd != -`1`)
515	{
516	char *msg = NULL;
517	struct stat64 st;
518	void *mem;
519	size_t total;
520	struct database_pers_head head;
521	ssize_t n = TEMP_FAILURE_RETRY (read (fd, &head,
522	sizeof (head)));
523	if (n != sizeof (head) \|\| fstat64 (fd: fd, buf: &st) != `0`)
524	{
525	fail_db_errno:
526	/ The code is single-threaded at this point so*
527	using strerror is just fine. /*
528	msg = strerror (errno);
529	fail_db:
530	dbg_log (_("invalid persistent database file \"%s\": %s"),
531	dbs[cnt].db_filename, msg);
532	unlink (name: dbs[cnt].db_filename);
533	}
534	else if (head.module == `0` && head.data_size == `0`)
535	{
536	/ The file has been created, but the head has not*
537	been initialized yet. /*
538	msg = _("uninitialized header");
539	goto fail_db;
540	}
541	else if (head.header_size != (int) sizeof (head))
542	{
543	msg = _("header size does not match");
544	goto fail_db;
545	}
546	else if ((total = (sizeof (head)
547	+ roundup (head.module * sizeof (ref_t),
548	ALIGN)
549	+ head.data_size))
550	> st.st_size
551	\|\| total < sizeof (head))
552	{
553	msg = _("file size does not match");
554	goto fail_db;
555	}
556	/ Note we map with the maximum size allowed for the*
557	database. This is likely much larger than the
558	actual file size. This is OK on most OSes since
559	extensions of the underlying file will
560	automatically translate more pages available for
561	memory access. /*
562	else if ((mem = mmap (NULL, len: dbs[cnt].max_db_size,
563	PROT_READ \| PROT_WRITE,
564	MAP_SHARED, fd: fd, offset: `0`))
565	== MAP_FAILED)
566	goto fail_db_errno;
567	else if (!verify_persistent_db (mem, readhead: &head, dbnr: cnt))
568	{
569	munmap (addr: mem, len: total);
570	msg = _("verification failed");
571	goto fail_db;
572	}
573	else
574	{
575	/ Success. We have the database. /
576	dbs[cnt].head = mem;
577	dbs[cnt].memsize = total;
578	dbs[cnt].data = (char *)
579	&dbs[cnt].head->array[roundup (dbs[cnt].head->module,
580	ALIGN / sizeof (ref_t))];
581	dbs[cnt].mmap_used = true;
582
583	if (dbs[cnt].suggested_module > head.module)
584	dbg_log (_("suggested size of table for database %s larger than the persistent database's table"),
585	dbnames[cnt]);
586
587	dbs[cnt].wr_fd = fd;
588	fd = -`1`;
589	/ We also need a read-only descriptor. /
590	if (dbs[cnt].shared)
591	{
592	dbs[cnt].ro_fd = open (file: dbs[cnt].db_filename,
593	O_RDONLY \| O_CLOEXEC);
594	if (dbs[cnt].ro_fd == -`1`)
595	dbg_log (_("\
596	cannot create read-only descriptor for \"%s\"; no mmap"),
597	dbs[cnt].db_filename);
598	}
599
600	// XXX Shall we test whether the descriptors actually
601	// XXX point to the same file?
602	}
603
604	/ Close the file descriptors in case something went*
605	wrong in which case the variable have not been
606	assigned -1. /*
607	if (fd != -`1`)
608	close (fd: fd);
609	}
610	else if (errno == EACCES)
611	do_exit (EXIT_FAILURE, errnum: `0`, _("cannot access '%s'"),
612	dbs[cnt].db_filename);
613	}
614
615	if (dbs[cnt].head == NULL)
616	{
617	/ No database loaded. Allocate the data structure,*
618	possibly on disk. /*
619	struct database_pers_head head;
620	size_t total = (sizeof (head)
621	+ roundup (dbs[cnt].suggested_module
622	* sizeof (ref_t), ALIGN)
623	+ (dbs[cnt].suggested_module
624	* DEFAULT_DATASIZE_PER_BUCKET));
625
626	/ Try to create the database. If we do not need a*
627	persistent database create a temporary file. /*
628	int fd;
629	int ro_fd = -`1`;
630	if (dbs[cnt].persistent)
631	{
632	fd = open (file: dbs[cnt].db_filename,
633	O_RDWR \| O_CREAT \| O_EXCL \| O_TRUNC \| O_CLOEXEC,
634	S_IRUSR \| S_IWUSR);
635	if (fd != -`1` && dbs[cnt].shared)
636	ro_fd = open (file: dbs[cnt].db_filename,
637	O_RDONLY \| O_CLOEXEC);
638	}
639	else
640	{
641	char fname[] = _PATH_NSCD_XYZ_DB_TMP;
642	fd = mkostemp (template: fname, O_CLOEXEC);
643
644	/ We do not need the file name anymore after we*
645	opened another file descriptor in read-only mode. /*
646	if (fd != -`1`)
647	{
648	if (dbs[cnt].shared)
649	ro_fd = open (file: fname, O_RDONLY \| O_CLOEXEC);
650
651	unlink (name: fname);
652	}
653	}
654
655	if (fd == -`1`)
656	{
657	if (errno == EEXIST)
658	{
659	dbg_log (_("database for %s corrupted or simultaneously used; remove %s manually if necessary and restart"),
660	dbnames[cnt], dbs[cnt].db_filename);
661	do_exit (child_ret: `1`, errnum: `0`, NULL);
662	}
663
664	if (dbs[cnt].persistent)
665	dbg_log (_("cannot create %s; no persistent database used"),
666	dbs[cnt].db_filename);
667	else
668	dbg_log (_("cannot create %s; no sharing possible"),
669	dbs[cnt].db_filename);
670
671	dbs[cnt].persistent = `0`;
672	// XXX remember: no mmap
673	}
674	else
675	{
676	/ Tell the user if we could not create the read-only*
677	descriptor. /*
678	if (ro_fd == -`1` && dbs[cnt].shared)
679	dbg_log (_("\
680	cannot create read-only descriptor for \"%s\"; no mmap"),
681	dbs[cnt].db_filename);
682
683	/ Before we create the header, initialize the hash*
684	table. That way if we get interrupted while writing
685	the header we can recognize a partially initialized
686	database. /*
687	size_t ps = sysconf (_SC_PAGESIZE);
688	char tmpbuf[ps];
689	assert (~ENDREF == `0`);
690	memset (s: tmpbuf, c: `'\xff'`, n: ps);
691
692	size_t remaining = dbs[cnt].suggested_module * sizeof (ref_t);
693	off_t offset = sizeof (head);
694
695	size_t towrite;
696	if (offset % ps != `0`)
697	{
698	towrite = MIN (remaining, ps - (offset % ps));
699	if (pwrite (fd: fd, buf: tmpbuf, nbytes: towrite, offset: offset) != towrite)
700	goto write_fail;
701	offset += towrite;
702	remaining -= towrite;
703	}
704
705	while (remaining > ps)
706	{
707	if (pwrite (fd: fd, buf: tmpbuf, nbytes: ps, offset: offset) == -`1`)
708	goto write_fail;
709	offset += ps;
710	remaining -= ps;
711	}
712
713	if (remaining > `0`
714	&& pwrite (fd: fd, buf: tmpbuf, nbytes: remaining, offset: offset) != remaining)
715	goto write_fail;
716
717	/ Create the header of the file. /
718	struct database_pers_head head =
719	{
720	.version = DB_VERSION,
721	.header_size = sizeof (head),
722	.module = dbs[cnt].suggested_module,
723	.data_size = (dbs[cnt].suggested_module
724	* DEFAULT_DATASIZE_PER_BUCKET),
725	.first_free = `0`
726	};
727	void *mem;
728
729	if ((TEMP_FAILURE_RETRY (write (fd, &head, sizeof (head)))
730	!= sizeof (head))
731	\|\| (TEMP_FAILURE_RETRY_VAL (posix_fallocate (fd, `0`, total))
732	!= `0`)
733	\|\| (mem = mmap (NULL, len: dbs[cnt].max_db_size,
734	PROT_READ \| PROT_WRITE,
735	MAP_SHARED, fd: fd, offset: `0`)) == MAP_FAILED)
736	{
737	write_fail:
738	unlink (name: dbs[cnt].db_filename);
739	dbg_log (_("cannot write to database file %s: %s"),
740	dbs[cnt].db_filename, strerror (errno));
741	dbs[cnt].persistent = `0`;
742	}
743	else
744	{
745	/ Success. /
746	dbs[cnt].head = mem;
747	dbs[cnt].data = (char *)
748	&dbs[cnt].head->array[roundup (dbs[cnt].head->module,
749	ALIGN / sizeof (ref_t))];
750	dbs[cnt].memsize = total;
751	dbs[cnt].mmap_used = true;
752
753	/ Remember the descriptors. /
754	dbs[cnt].wr_fd = fd;
755	dbs[cnt].ro_fd = ro_fd;
756	fd = -`1`;
757	ro_fd = -`1`;
758	}
759
760	if (fd != -`1`)
761	close (fd: fd);
762	if (ro_fd != -`1`)
763	close (fd: ro_fd);
764	}
765	}
766
767	if (dbs[cnt].head == NULL)
768	{
769	/ We do not use the persistent database. Just*
770	create an in-memory data structure. /*
771	assert (! dbs[cnt].persistent);
772
773	dbs[cnt].head = xmalloc (n: sizeof (struct database_pers_head)
774	+ (dbs[cnt].suggested_module
775	* sizeof (ref_t)));
776	memset (s: dbs[cnt].head, c: `'\0'`, n: sizeof (struct database_pers_head));
777	assert (~ENDREF == `0`);
778	memset (s: dbs[cnt].head->array, c: `'\xff'`,
779	n: dbs[cnt].suggested_module * sizeof (ref_t));
780	dbs[cnt].head->module = dbs[cnt].suggested_module;
781	dbs[cnt].head->data_size = (DEFAULT_DATASIZE_PER_BUCKET
782	* dbs[cnt].head->module);
783	dbs[cnt].data = xmalloc (n: dbs[cnt].head->data_size);
784	dbs[cnt].head->first_free = `0`;
785
786	dbs[cnt].shared = `0`;
787	assert (dbs[cnt].ro_fd == -`1`);
788	}
789	}
790
791	/ Create the socket. /
792	sock = socket (AF_UNIX, SOCK_STREAM \| SOCK_CLOEXEC \| SOCK_NONBLOCK, protocol: `0`);
793	if (sock < `0`)
794	{
795	dbg_log (_("cannot open socket: %s"), strerror (errno));
796	do_exit (errno == EACCES ? `4` : `1`, errnum: `0`, NULL);
797	}
798	/ Bind a name to the socket. /
799	struct sockaddr_un sock_addr;
800	sock_addr.sun_family = AF_UNIX;
801	strcpy (dest: sock_addr.sun_path, _PATH_NSCDSOCKET);
802	if (bind (fd: sock, addr: (struct sockaddr ) &sock_addr, len: sizeof* (sock_addr)) < `0`)
803	{
804	dbg_log (str: "%s: %s", _PATH_NSCDSOCKET, strerror (errno));
805	do_exit (errno == EACCES ? `4` : `1`, errnum: `0`, NULL);
806	}
807
808	/ Set permissions for the socket. /
809	chmod (_PATH_NSCDSOCKET, DEFFILEMODE);
810
811	/ Set the socket up to accept connections. /
812	if (listen (fd: sock, SOMAXCONN) < `0`)
813	{
814	dbg_log (_("cannot enable socket to accept connections: %s"),
815	strerror (errno));
816	do_exit (child_ret: `1`, errnum: `0`, NULL);
817	}
818
819	#ifdef HAVE_NETLINK
820	if (dbs[hstdb].enabled)
821	{
822	/ Try to open netlink socket to monitor network setting changes. /
823	nl_status_fd = socket (AF_NETLINK,
824	SOCK_RAW \| SOCK_CLOEXEC \| SOCK_NONBLOCK,
825	NETLINK_ROUTE);
826	if (nl_status_fd != -`1`)
827	{
828	struct sockaddr_nl snl;
829	memset (s: &snl, c: `'\0'`, n: sizeof (snl));
830	snl.nl_family = AF_NETLINK;
831	/ XXX Is this the best set to use? /
832	snl.nl_groups = (RTMGRP_IPV4_IFADDR \| RTMGRP_TC \| RTMGRP_IPV4_MROUTE
833	\| RTMGRP_IPV4_ROUTE \| RTMGRP_IPV4_RULE
834	\| RTMGRP_IPV6_IFADDR \| RTMGRP_IPV6_MROUTE
835	\| RTMGRP_IPV6_ROUTE \| RTMGRP_IPV6_IFINFO
836	\| RTMGRP_IPV6_PREFIX);
837
838	if (bind (fd: nl_status_fd, addr: (struct sockaddr ) &snl, len: sizeof* (snl)) != `0`)
839	{
840	close (fd: nl_status_fd);
841	nl_status_fd = -`1`;
842	}
843	else
844	{
845	/ Start the timestamp process. /
846	dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
847	= __bump_nl_timestamp ();
848	}
849	}
850	}
851	#endif
852
853	/ Change to unprivileged uid/gid/groups if specified in config file /
854	if (server_user != NULL)
855	finish_drop_privileges ();
856	}
857
858	#ifdef HAVE_INOTIFY
859	#define TRACED_FILE_MASK (IN_DELETE_SELF \| IN_CLOSE_WRITE \| IN_MOVE_SELF)
860	#define TRACED_DIR_MASK (IN_DELETE_SELF \| IN_CREATE \| IN_MOVED_TO \| IN_MOVE_SELF)
861	void
862	install_watches (struct traced_file *finfo)
863	{
864	/ Use inotify support if we have it. /
865	if (finfo->inotify_descr[TRACED_FILE] < `0`)
866	finfo->inotify_descr[TRACED_FILE] = inotify_add_watch (fd: inotify_fd,
867	name: finfo->fname,
868	TRACED_FILE_MASK);
869	if (finfo->inotify_descr[TRACED_FILE] < `0`)
870	{
871	dbg_log (_("disabled inotify-based monitoring for file `%s': %s"),
872	finfo->fname, strerror (errno));
873	return;
874	}
875	dbg_log (_("monitoring file `%s` (%d)"),
876	finfo->fname, finfo->inotify_descr[TRACED_FILE]);
877	/ Additionally listen for events in the file's parent directory.*
878	We do this because the file to be watched might be
879	deleted and then added back again. When it is added back again
880	we must re-add the watch. We must also cover IN_MOVED_TO to
881	detect a file being moved into the directory. /*
882	if (finfo->inotify_descr[TRACED_DIR] < `0`)
883	finfo->inotify_descr[TRACED_DIR] = inotify_add_watch (fd: inotify_fd,
884	name: finfo->dname,
885	TRACED_DIR_MASK);
886	if (finfo->inotify_descr[TRACED_DIR] < `0`)
887	{
888	dbg_log (_("disabled inotify-based monitoring for directory `%s': %s"),
889	finfo->fname, strerror (errno));
890	return;
891	}
892	dbg_log (_("monitoring directory `%s` (%d)"),
893	finfo->dname, finfo->inotify_descr[TRACED_DIR]);
894	}
895	#endif
896
897	/ Register the file in FINFO as a traced file for the database DBS[DBIX].*
898
899	We support registering multiple files per database. Each call to
900	register_traced_file adds to the list of registered files.
901
902	When we prune the database, either through timeout or a request to
903	invalidate, we will check to see if any of the registered files has changed.
904	When we accept new connections to handle a cache request we will also
905	check to see if any of the registered files has changed.
906
907	If we have inotify support then we install an inotify fd to notify us of
908	file deletion or modification, both of which will require we invalidate
909	the cache for the database. Without inotify support we stat the file and
910	store st_mtime to determine if the file has been modified. /*
911	void
912	register_traced_file (size_t dbidx, struct traced_file *finfo)
913	{
914	/ If the database is disabled or file checking is disabled*
915	then ignore the registration. /*
916	if (! dbs[dbidx].enabled \|\| ! dbs[dbidx].check_file)
917	return;
918
919	if (__glibc_unlikely (debug_level > `0`))
920	dbg_log (_("monitoring file %s for database %s"),
921	finfo->fname, dbnames[dbidx]);
922
923	#ifdef HAVE_INOTIFY
924	install_watches (finfo);
925	#endif
926	struct stat64 st;
927	if (stat64 (file: finfo->fname, buf: &st) < `0`)
928	{
929	/ We cannot stat() the file. Set mtime to zero and try again later. /
930	dbg_log (_("stat failed for file `%s'; will try again later: %s"),
931	finfo->fname, strerror (errno));
932	finfo->mtime = `0`;
933	}
934	else
935	finfo->mtime = st.st_mtime;
936
937	/ Queue up the file name. /
938	finfo->next = dbs[dbidx].traced_files;
939	dbs[dbidx].traced_files = finfo;
940	}
941
942
943	/ Close the connections. /
944	void
945	close_sockets (void)
946	{
947	close (fd: sock);
948	}
949
950
951	static void
952	invalidate_cache (char key, int* fd)
953	{
954	dbtype number;
955	int32_t resp;
956
957	for (number = pwddb; number < lastdb; ++number)
958	if (strcmp (s1: key, s2: dbnames[number]) == `0`)
959	{
960	struct traced_file *runp = dbs[number].traced_files;
961	while (runp != NULL)
962	{
963	/ Make sure we reload from file when checking mtime. /
964	runp->mtime = `0`;
965	#ifdef HAVE_INOTIFY
966	/ During an invalidation we try to reload the traced*
967	file watches. This allows the user to re-sync if
968	inotify events were lost. Similar to what we do during
969	pruning. /*
970	install_watches (finfo: runp);
971	#endif
972	if (runp->call_res_init)
973	{
974	res_init ();
975	break;
976	}
977	runp = runp->next;
978	}
979	break;
980	}
981
982	if (number == lastdb)
983	{
984	resp = EINVAL;
985	writeall (fd, buf: &resp, len: sizeof (resp));
986	return;
987	}
988
989	if (dbs[number].enabled)
990	{
991	pthread_mutex_lock (mutex: &dbs[number].prune_run_lock);
992	prune_cache (table: &dbs[number], LONG_MAX, fd);
993	pthread_mutex_unlock (mutex: &dbs[number].prune_run_lock);
994	}
995	else
996	{
997	resp = `0`;
998	writeall (fd, buf: &resp, len: sizeof (resp));
999	}
1000	}
1001
1002
1003	#ifdef SCM_RIGHTS
1004	static void
1005	send_ro_fd (struct database_dyn db, char* key, int* fd)
1006	{
1007	/ If we do not have an read-only file descriptor do nothing. /
1008	if (db->ro_fd == -`1`)
1009	return;
1010
1011	/ We need to send some data along with the descriptor. /
1012	uint64_t mapsize = (db->head->data_size
1013	+ roundup (db->head->module * sizeof (ref_t), ALIGN)
1014	+ sizeof (struct database_pers_head));
1015	struct iovec iov[`2`];
1016	iov[`0`].iov_base = key;
1017	iov[`0`].iov_len = strlen (s: key) + `1`;
1018	iov[`1`].iov_base = &mapsize;
1019	iov[`1`].iov_len = sizeof (mapsize);
1020
1021	/ Prepare the control message to transfer the descriptor. /
1022	union
1023	{
1024	struct cmsghdr hdr;
1025	char bytes[CMSG_SPACE (sizeof (int))];
1026	} buf;
1027	struct msghdr msg = { .msg_iov = iov, .msg_iovlen = `2`,
1028	.msg_control = buf.bytes,
1029	.msg_controllen = sizeof (buf) };
1030	struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
1031
1032	cmsg->cmsg_level = SOL_SOCKET;
1033	cmsg->cmsg_type = SCM_RIGHTS;
1034	cmsg->cmsg_len = CMSG_LEN (sizeof (int));
1035
1036	int ip = (int* *) CMSG_DATA (cmsg);
1037	*ip = db->ro_fd;
1038
1039	msg.msg_controllen = cmsg->cmsg_len;
1040
1041	/ Send the control message. We repeat when we are interrupted but*
1042	everything else is ignored. /*
1043	#ifndef MSG_NOSIGNAL
1044	# define MSG_NOSIGNAL 0
1045	#endif
1046	(void) TEMP_FAILURE_RETRY (sendmsg (fd, &msg, MSG_NOSIGNAL));
1047
1048	if (__glibc_unlikely (debug_level > `0`))
1049	dbg_log (_("provide access to FD %d, for %s"), db->ro_fd, key);
1050	}
1051	#endif /* SCM_RIGHTS */
1052
1053
1054	/ Handle new request. /
1055	static void
1056	handle_request (int fd, request_header req, void* *key, uid_t uid, pid_t pid)
1057	{
1058	if (__builtin_expect (req->version, NSCD_VERSION) != NSCD_VERSION)
1059	{
1060	if (debug_level > `0`)
1061	dbg_log (_("\
1062	cannot handle old request version %d; current version is %d"),
1063	req->version, NSCD_VERSION);
1064	return;
1065	}
1066
1067	/ Perform the SELinux check before we go on to the standard checks. /
1068	if (selinux_enabled && nscd_request_avc_has_perm (fd, req: req->type) != `0`)
1069	{
1070	if (debug_level > `0`)
1071	{
1072	#ifdef SO_PEERCRED
1073	char pbuf[sizeof ("/proc//exe") + `3` * sizeof (long int)];
1074	# ifdef PATH_MAX
1075	char buf[PATH_MAX];
1076	# else
1077	char buf[`4096`];
1078	# endif
1079
1080	snprintf (s: pbuf, maxlen: sizeof (pbuf), format: "/proc/%ld/exe", (long int) pid);
1081	ssize_t n = readlink (path: pbuf, buf: buf, len: sizeof (buf) - `1`);
1082
1083	if (n <= `0`)
1084	dbg_log (_("\
1085	request from %ld not handled due to missing permission"), (long int) pid);
1086	else
1087	{
1088	buf[n] = `'\0'`;
1089	dbg_log (_("\
1090	request from '%s' [%ld] not handled due to missing permission"),
1091	buf, (long int) pid);
1092	}
1093	#else
1094	dbg_log (_("request not handled due to missing permission"));
1095	#endif
1096	}
1097	return;
1098	}
1099
1100	struct database_dyn *db = reqinfo[req->type].db;
1101
1102	/ See whether we can service the request from the cache. /
1103	if (__builtin_expect (reqinfo[req->type].data_request, true))
1104	{
1105	if (__builtin_expect (debug_level, `0`) > `0`)
1106	{
1107	if (req->type == GETHOSTBYADDR \|\| req->type == GETHOSTBYADDRv6)
1108	{
1109	char buf[INET6_ADDRSTRLEN];
1110
1111	dbg_log (str: "\t%s (%s)", serv2str[req->type],
1112	inet_ntop (af: req->type == GETHOSTBYADDR
1113	? AF_INET : AF_INET6,
1114	cp: key, buf: buf, len: sizeof (buf)));
1115	}
1116	else
1117	dbg_log (str: "\t%s (%s)", serv2str[req->type], (char *) key);
1118	}
1119
1120	/ Is this service enabled? /
1121	if (__glibc_unlikely (!db->enabled))
1122	{
1123	/ No, sent the prepared record. /
1124	if (TEMP_FAILURE_RETRY (send (fd, db->disabled_iov->iov_base,
1125	db->disabled_iov->iov_len,
1126	MSG_NOSIGNAL))
1127	!= (ssize_t) db->disabled_iov->iov_len
1128	&& __builtin_expect (debug_level, `0`) > `0`)
1129	{
1130	/ We have problems sending the result. /
1131	char buf[`256`];
1132	dbg_log (_("cannot write result: %s"),
1133	strerror_r (errno, buf: buf, buflen: sizeof (buf)));
1134	}
1135
1136	return;
1137	}
1138
1139	/ Be sure we can read the data. /
1140	if (__glibc_unlikely (pthread_rwlock_tryrdlock (&db->lock) != `0`))
1141	{
1142	++db->head->rdlockdelayed;
1143	pthread_rwlock_rdlock (rwlock: &db->lock);
1144	}
1145
1146	/ See whether we can handle it from the cache. /
1147	struct datahead *cached;
1148	cached = (struct datahead *) cache_search (req->type, key, len: req->key_len,
1149	table: db, owner: uid);
1150	if (cached != NULL)
1151	{
1152	/ Hurray it's in the cache. /
1153	if (writeall (fd, buf: cached->data, len: cached->recsize) != cached->recsize
1154	&& __glibc_unlikely (debug_level > `0`))
1155	{
1156	/ We have problems sending the result. /
1157	char buf[`256`];
1158	dbg_log (_("cannot write result: %s"),
1159	strerror_r (errno, buf: buf, buflen: sizeof (buf)));
1160	}
1161
1162	pthread_rwlock_unlock (rwlock: &db->lock);
1163
1164	return;
1165	}
1166
1167	pthread_rwlock_unlock (rwlock: &db->lock);
1168	}
1169	else if (__builtin_expect (debug_level, `0`) > `0`)
1170	{
1171	if (req->type == INVALIDATE)
1172	dbg_log (str: "\t%s (%s)", serv2str[req->type], (char *) key);
1173	else
1174	dbg_log (str: "\t%s", serv2str[req->type]);
1175	}
1176
1177	/ Handle the request. /
1178	switch (req->type)
1179	{
1180	case GETPWBYNAME:
1181	addpwbyname (db, fd, req, key, uid);
1182	break;
1183
1184	case GETPWBYUID:
1185	addpwbyuid (db, fd, req, key, uid);
1186	break;
1187
1188	case GETGRBYNAME:
1189	addgrbyname (db, fd, req, key, uid);
1190	break;
1191
1192	case GETGRBYGID:
1193	addgrbygid (db, fd, req, key, uid);
1194	break;
1195
1196	case GETHOSTBYNAME:
1197	addhstbyname (db, fd, req, key, uid);
1198	break;
1199
1200	case GETHOSTBYNAMEv6:
1201	addhstbynamev6 (db, fd, req, key, uid);
1202	break;
1203
1204	case GETHOSTBYADDR:
1205	addhstbyaddr (db, fd, req, key, uid);
1206	break;
1207
1208	case GETHOSTBYADDRv6:
1209	addhstbyaddrv6 (db, fd, req, key, uid);
1210	break;
1211
1212	case GETAI:
1213	addhstai (db, fd, req, key, uid);
1214	break;
1215
1216	case INITGROUPS:
1217	addinitgroups (db, fd, req, key, uid);
1218	break;
1219
1220	case GETSERVBYNAME:
1221	addservbyname (db, fd, req, key, uid);
1222	break;
1223
1224	case GETSERVBYPORT:
1225	addservbyport (db, fd, req, key, uid);
1226	break;
1227
1228	case GETNETGRENT:
1229	addgetnetgrent (db, fd, req, key, uid);
1230	break;
1231
1232	case INNETGR:
1233	addinnetgr (db, fd, req, key, uid);
1234	break;
1235
1236	case GETSTAT:
1237	case SHUTDOWN:
1238	case INVALIDATE:
1239	{
1240	/ Get the callers credentials. /
1241	#ifdef SO_PEERCRED
1242	struct ucred caller;
1243	socklen_t optlen = sizeof (caller);
1244
1245	if (getsockopt (fd: fd, SOL_SOCKET, SO_PEERCRED, optval: &caller, optlen: &optlen) < `0`)
1246	{
1247	char buf[`256`];
1248
1249	dbg_log (_("error getting caller's id: %s"),
1250	strerror_r (errno, buf: buf, buflen: sizeof (buf)));
1251	break;
1252	}
1253
1254	uid = caller.uid;
1255	#else
1256	/ Some systems have no SO_PEERCRED implementation. They don't*
1257	care about security so we don't as well. /*
1258	uid = `0`;
1259	#endif
1260	}
1261
1262	/ Accept shutdown, getstat and invalidate only from root. For*
1263	the stat call also allow the user specified in the config file. /*
1264	if (req->type == GETSTAT)
1265	{
1266	if (uid == `0` \|\| uid == stat_uid)
1267	send_stats (fd, dbs);
1268	}
1269	else if (uid == `0`)
1270	{
1271	if (req->type == INVALIDATE)
1272	invalidate_cache (key, fd);
1273	else
1274	termination_handler (signum: `0`);
1275	}
1276	break;
1277
1278	case GETFDPW:
1279	case GETFDGR:
1280	case GETFDHST:
1281	case GETFDSERV:
1282	case GETFDNETGR:
1283	#ifdef SCM_RIGHTS
1284	send_ro_fd (db: reqinfo[req->type].db, key, fd);
1285	#endif
1286	break;
1287
1288	default:
1289	/ Ignore the command, it's nothing we know. /
1290	break;
1291	}
1292	}
1293
1294	static char *
1295	read_cmdline (size_t *size)
1296	{
1297	int fd = open (file: "/proc/self/cmdline", O_RDONLY);
1298	if (fd < `0`)
1299	return NULL;
1300	size_t current = `0`;
1301	size_t limit = `1024`;
1302	char *buffer = malloc (size: limit);
1303	if (buffer == NULL)
1304	{
1305	close (fd: fd);
1306	errno = ENOMEM;
1307	return NULL;
1308	}
1309	while (`1`)
1310	{
1311	if (current == limit)
1312	{
1313	char *newptr;
1314	if (`2` * limit < limit
1315	\|\| (newptr = realloc (ptr: buffer, size: `2` * limit)) == NULL)
1316	{
1317	free (ptr: buffer);
1318	close (fd: fd);
1319	errno = ENOMEM;
1320	return NULL;
1321	}
1322	buffer = newptr;
1323	limit *= `2`;
1324	}
1325
1326	ssize_t n = TEMP_FAILURE_RETRY (read (fd, buffer + current,
1327	limit - current));
1328	if (n == -`1`)
1329	{
1330	int e = errno;
1331	free (ptr: buffer);
1332	close (fd: fd);
1333	errno = e;
1334	return NULL;
1335	}
1336	if (n == `0`)
1337	break;
1338	current += n;
1339	}
1340
1341	close (fd: fd);
1342	*size = current;
1343	return buffer;
1344	}
1345
1346
1347	/ Restart the process. /
1348	static void
1349	restart (void)
1350	{
1351	/ First determine the parameters. We do not use the parameters*
1352	passed to main because then nscd would use the system libc after
1353	restarting even if it was started by a non-system dynamic linker
1354	during glibc testing. /*
1355	size_t readlen;
1356	char *cmdline = read_cmdline (size: &readlen);
1357	if (cmdline == NULL)
1358	{
1359	dbg_log (_("\
1360	cannot open /proc/self/cmdline: %m; disabling paranoia mode"));
1361	paranoia = `0`;
1362	return;
1363	}
1364
1365	/ Parse the command line. Worst case scenario: every two*
1366	characters form one parameter (one character plus NUL). /*
1367	char *argv = alloca ((readlen / `2` + `1`) sizeof (argv[`0`]));
1368	int argc = `0`;
1369
1370	for (char *cp = cmdline; cp < cmdline + readlen;)
1371	{
1372	argv[argc++] = cp;
1373	cp = strchr (s: cp, c: `'\0'`) + `1`;
1374	}
1375	argv[argc] = NULL;
1376
1377	/ Second, change back to the old user if we changed it. /
1378	if (server_user != NULL)
1379	{
1380	if (setresuid (ruid: old_uid, euid: old_uid, suid: old_uid) != `0`)
1381	{
1382	dbg_log (_("\
1383	cannot change to old UID: %s; disabling paranoia mode"),
1384	strerror (errno));
1385
1386	paranoia = `0`;
1387	free (ptr: cmdline);
1388	return;
1389	}
1390
1391	if (setresgid (rgid: old_gid, egid: old_gid, sgid: old_gid) != `0`)
1392	{
1393	dbg_log (_("\
1394	cannot change to old GID: %s; disabling paranoia mode"),
1395	strerror (errno));
1396
1397	ignore_value (setuid (server_uid));
1398	paranoia = `0`;
1399	free (ptr: cmdline);
1400	return;
1401	}
1402	}
1403
1404	/ Next change back to the old working directory. /
1405	if (chdir (path: oldcwd) == -`1`)
1406	{
1407	dbg_log (_("\
1408	cannot change to old working directory: %s; disabling paranoia mode"),
1409	strerror (errno));
1410
1411	if (server_user != NULL)
1412	{
1413	ignore_value (setuid (server_uid));
1414	ignore_value (setgid (server_gid));
1415	}
1416	paranoia = `0`;
1417	free (ptr: cmdline);
1418	return;
1419	}
1420
1421	/ Synchronize memory. /
1422	int32_t certainly[lastdb];
1423	for (int cnt = `0`; cnt < lastdb; ++cnt)
1424	if (dbs[cnt].enabled)
1425	{
1426	/ Make sure nobody keeps using the database. /
1427	dbs[cnt].head->timestamp = `0`;
1428	certainly[cnt] = dbs[cnt].head->nscd_certainly_running;
1429	dbs[cnt].head->nscd_certainly_running = `0`;
1430
1431	if (dbs[cnt].persistent)
1432	// XXX async OK?
1433	msync (addr: dbs[cnt].head, len: dbs[cnt].memsize, MS_ASYNC);
1434	}
1435
1436	/ The preparations are done. /
1437	#ifdef PATH_MAX
1438	char pathbuf[PATH_MAX];
1439	#else
1440	char pathbuf[`256`];
1441	#endif
1442	/ Try to exec the real nscd program so the process name (as reported*
1443	in /proc/PID/status) will be 'nscd', but fall back to /proc/self/exe
1444	if readlink or the exec with the result of the readlink call fails. /*
1445	ssize_t n = readlink (path: "/proc/self/exe", buf: pathbuf, len: sizeof (pathbuf) - `1`);
1446	if (n != -`1`)
1447	{
1448	pathbuf[n] = `'\0'`;
1449	execv (path: pathbuf, argv: argv);
1450	}
1451	execv (path: "/proc/self/exe", argv: argv);
1452
1453	/ If we come here, we will never be able to re-exec. /
1454	dbg_log (_("re-exec failed: %s; disabling paranoia mode"),
1455	strerror (errno));
1456
1457	if (server_user != NULL)
1458	{
1459	ignore_value (setuid (server_uid));
1460	ignore_value (setgid (server_gid));
1461	}
1462	if (chdir (path: "/") != `0`)
1463	dbg_log (_("cannot change current working directory to \"/\": %s"),
1464	strerror (errno));
1465	paranoia = `0`;
1466	free (ptr: cmdline);
1467
1468	/ Re-enable the databases. /
1469	time_t now = time (NULL);
1470	for (int cnt = `0`; cnt < lastdb; ++cnt)
1471	if (dbs[cnt].enabled)
1472	{
1473	dbs[cnt].head->timestamp = now;
1474	dbs[cnt].head->nscd_certainly_running = certainly[cnt];
1475	}
1476	}
1477
1478
1479	/ List of file descriptors. /
1480	struct fdlist
1481	{
1482	int fd;
1483	struct fdlist *next;
1484	};
1485	/ Memory allocated for the list. /
1486	static struct fdlist *fdlist;
1487	/ List of currently ready-to-read file descriptors. /
1488	static struct fdlist *readylist;
1489
1490	/ Conditional variable and mutex to signal availability of entries in*
1491	READYLIST. The condvar is initialized dynamically since we might
1492	use a different clock depending on availability. /*
1493	static pthread_cond_t readylist_cond = PTHREAD_COND_INITIALIZER;
1494	static pthread_mutex_t readylist_lock = PTHREAD_MUTEX_INITIALIZER;
1495
1496	/ The clock to use with the condvar. /
1497	static clockid_t timeout_clock = CLOCK_REALTIME;
1498
1499	/ Number of threads ready to handle the READYLIST. /
1500	static unsigned long int nready;
1501
1502
1503	/ Function for the clean-up threads. /
1504	static void *
1505	__attribute__ ((__noreturn__))
1506	nscd_run_prune (void *p)
1507	{
1508	const long int my_number = (long int) p;
1509	assert (dbs[my_number].enabled);
1510
1511	int dont_need_update = setup_thread (&dbs[my_number]);
1512
1513	time_t now = time (NULL);
1514
1515	/ We are running. /
1516	dbs[my_number].head->timestamp = now;
1517
1518	struct timespec prune_ts;
1519	if (__glibc_unlikely (clock_gettime (timeout_clock, &prune_ts) == -`1`))
1520	/ Should never happen. /
1521	abort ();
1522
1523	/ Compute the initial timeout time. Prevent all the timers to go*
1524	off at the same time by adding a db-based value. /*
1525	prune_ts.tv_sec += CACHE_PRUNE_INTERVAL + my_number;
1526	dbs[my_number].wakeup_time = now + CACHE_PRUNE_INTERVAL + my_number;
1527
1528	pthread_mutex_t *prune_lock = &dbs[my_number].prune_lock;
1529	pthread_mutex_t *prune_run_lock = &dbs[my_number].prune_run_lock;
1530	pthread_cond_t *prune_cond = &dbs[my_number].prune_cond;
1531
1532	pthread_mutex_lock (mutex: prune_lock);
1533	while (`1`)
1534	{
1535	/ Wait, but not forever. /
1536	int e = `0`;
1537	if (! dbs[my_number].clear_cache)
1538	e = pthread_cond_timedwait (cond: prune_cond, mutex: prune_lock, abstime: &prune_ts);
1539	assert (__builtin_expect (e == `0` \|\| e == ETIMEDOUT, `1`));
1540
1541	time_t next_wait;
1542	now = time (NULL);
1543	if (e == ETIMEDOUT \|\| now >= dbs[my_number].wakeup_time
1544	\|\| dbs[my_number].clear_cache)
1545	{
1546	/ We will determine the new timeout values based on the*
1547	cache content. Should there be concurrent additions to
1548	the cache which are not accounted for in the cache
1549	pruning we want to know about it. Therefore set the
1550	timeout to the maximum. It will be decreased when adding
1551	new entries to the cache, if necessary. /*
1552	dbs[my_number].wakeup_time = MAX_TIMEOUT_VALUE;
1553
1554	/ Unconditionally reset the flag. /
1555	time_t prune_now = dbs[my_number].clear_cache ? LONG_MAX : now;
1556	dbs[my_number].clear_cache = `0`;
1557
1558	pthread_mutex_unlock (mutex: prune_lock);
1559
1560	/ We use a separate lock for running the prune function (instead*
1561	of keeping prune_lock locked) because this enables concurrent
1562	invocations of cache_add which might modify the timeout value. /*
1563	pthread_mutex_lock (mutex: prune_run_lock);
1564	next_wait = prune_cache (table: &dbs[my_number], now: prune_now, fd: -`1`);
1565	pthread_mutex_unlock (mutex: prune_run_lock);
1566
1567	next_wait = MAX (next_wait, CACHE_PRUNE_INTERVAL);
1568	/ If clients cannot determine for sure whether nscd is running*
1569	we need to wake up occasionally to update the timestamp.
1570	Wait 90% of the update period. /*
1571	#define UPDATE_MAPPING_TIMEOUT (MAPPING_TIMEOUT * 9 / 10)
1572	if (__glibc_unlikely (! dont_need_update))
1573	{
1574	next_wait = MIN (UPDATE_MAPPING_TIMEOUT, next_wait);
1575	dbs[my_number].head->timestamp = now;
1576	}
1577
1578	pthread_mutex_lock (mutex: prune_lock);
1579
1580	/ Make it known when we will wake up again. /
1581	if (now + next_wait < dbs[my_number].wakeup_time)
1582	dbs[my_number].wakeup_time = now + next_wait;
1583	else
1584	next_wait = dbs[my_number].wakeup_time - now;
1585	}
1586	else
1587	/ The cache was just pruned. Do not do it again now. Just*
1588	use the new timeout value. /*
1589	next_wait = dbs[my_number].wakeup_time - now;
1590
1591	if (clock_gettime (clock_id: timeout_clock, tp: &prune_ts) == -`1`)
1592	/ Should never happen. /
1593	abort ();
1594
1595	/ Compute next timeout time. /
1596	prune_ts.tv_sec += next_wait;
1597	}
1598	}
1599
1600
1601	/ This is the main loop. It is replicated in different threads but*
1602	the use of the ready list makes sure only one thread handles an
1603	incoming connection. /*
1604	static void *
1605	__attribute__ ((__noreturn__))
1606	nscd_run_worker (void *p)
1607	{
1608	char buf[`256`];
1609
1610	/ Initial locking. /
1611	pthread_mutex_lock (mutex: &readylist_lock);
1612
1613	/ One more thread available. /
1614	++nready;
1615
1616	while (`1`)
1617	{
1618	while (readylist == NULL)
1619	pthread_cond_wait (cond: &readylist_cond, mutex: &readylist_lock);
1620
1621	struct fdlist *it = readylist->next;
1622	if (readylist->next == readylist)
1623	/ Just one entry on the list. /
1624	readylist = NULL;
1625	else
1626	readylist->next = it->next;
1627
1628	/ Extract the information and mark the record ready to be used*
1629	again. /*
1630	int fd = it->fd;
1631	it->next = NULL;
1632
1633	/ One more thread available. /
1634	--nready;
1635
1636	/ We are done with the list. /
1637	pthread_mutex_unlock (mutex: &readylist_lock);
1638
1639	/ Now read the request. /
1640	request_header req;
1641	if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, &req, sizeof (req)))
1642	!= sizeof (req), `0`))
1643	{
1644	/ We failed to read data. Note that this also might mean we*
1645	failed because we would have blocked. /*
1646	if (debug_level > `0`)
1647	dbg_log (_("short read while reading request: %s"),
1648	strerror_r (errno, buf: buf, buflen: sizeof (buf)));
1649	goto close_and_out;
1650	}
1651
1652	/ Check whether this is a valid request type. /
1653	if (req.type < GETPWBYNAME \|\| req.type >= LASTREQ)
1654	goto close_and_out;
1655
1656	/ Some systems have no SO_PEERCRED implementation. They don't*
1657	care about security so we don't as well. /*
1658	uid_t uid = -`1`;
1659	#ifdef SO_PEERCRED
1660	pid_t pid = `0`;
1661
1662	if (__glibc_unlikely (debug_level > `0`))
1663	{
1664	struct ucred caller;
1665	socklen_t optlen = sizeof (caller);
1666
1667	if (getsockopt (fd: fd, SOL_SOCKET, SO_PEERCRED, optval: &caller, optlen: &optlen) == `0`)
1668	pid = caller.pid;
1669	}
1670	#else
1671	const pid_t pid = `0`;
1672	#endif
1673
1674	/ It should not be possible to crash the nscd with a silly*
1675	request (i.e., a terribly large key). We limit the size to 1kb. /*
1676	if (__builtin_expect (req.key_len, `1`) < `0`
1677	\|\| __builtin_expect (req.key_len, `1`) > MAXKEYLEN)
1678	{
1679	if (debug_level > `0`)
1680	dbg_log (_("key length in request too long: %d"), req.key_len);
1681	}
1682	else
1683	{
1684	/ Get the key. /
1685	char keybuf[MAXKEYLEN + `1`];
1686
1687	if (__builtin_expect (TEMP_FAILURE_RETRY (read (fd, keybuf,
1688	req.key_len))
1689	!= req.key_len, `0`))
1690	{
1691	/ Again, this can also mean we would have blocked. /
1692	if (debug_level > `0`)
1693	dbg_log (_("short read while reading request key: %s"),
1694	strerror_r (errno, buf: buf, buflen: sizeof (buf)));
1695	goto close_and_out;
1696	}
1697	keybuf[req.key_len] = `'\0'`;
1698
1699	if (__builtin_expect (debug_level, `0`) > `0`)
1700	{
1701	#ifdef SO_PEERCRED
1702	if (pid != `0`)
1703	dbg_log (_("\
1704	handle_request: request received (Version = %d) from PID %ld"),
1705	req.version, (long int) pid);
1706	else
1707	#endif
1708	dbg_log (_("\
1709	handle_request: request received (Version = %d)"), req.version);
1710	}
1711
1712	/ Phew, we got all the data, now process it. /
1713	handle_request (fd, req: &req, key: keybuf, uid, pid);
1714	}
1715
1716	close_and_out:
1717	/ We are done. /
1718	close (fd: fd);
1719
1720	/ Re-locking. /
1721	pthread_mutex_lock (mutex: &readylist_lock);
1722
1723	/ One more thread available. /
1724	++nready;
1725	}
1726	/ NOTREACHED /
1727	}
1728
1729
1730	static unsigned int nconns;
1731
1732	static void
1733	fd_ready (int fd)
1734	{
1735	pthread_mutex_lock (mutex: &readylist_lock);
1736
1737	/ Find an empty entry in FDLIST. /
1738	size_t inner;
1739	for (inner = `0`; inner < nconns; ++inner)
1740	if (fdlist[inner].next == NULL)
1741	break;
1742	assert (inner < nconns);
1743
1744	fdlist[inner].fd = fd;
1745
1746	if (readylist == NULL)
1747	readylist = fdlist[inner].next = &fdlist[inner];
1748	else
1749	{
1750	fdlist[inner].next = readylist->next;
1751	readylist = readylist->next = &fdlist[inner];
1752	}
1753
1754	bool do_signal = true;
1755	if (__glibc_unlikely (nready == `0`))
1756	{
1757	++client_queued;
1758	do_signal = false;
1759
1760	/ Try to start another thread to help out. /
1761	pthread_t th;
1762	if (nthreads < max_nthreads
1763	&& pthread_create (newthread: &th, attr: &attr, start_routine: nscd_run_worker,
1764	arg: (void ) (long* int) nthreads) == `0`)
1765	{
1766	/ We got another thread. /
1767	++nthreads;
1768	/ The new thread might need a kick. /
1769	do_signal = true;
1770	}
1771
1772	}
1773
1774	pthread_mutex_unlock (mutex: &readylist_lock);
1775
1776	/ Tell one of the worker threads there is work to do. /
1777	if (do_signal)
1778	pthread_cond_signal (cond: &readylist_cond);
1779	}
1780
1781
1782	/ Check whether restarting should happen. /
1783	static bool
1784	restart_p (time_t now)
1785	{
1786	return (paranoia && readylist == NULL && nready == nthreads
1787	&& now >= restart_time);
1788	}
1789
1790
1791	/ Array for times a connection was accepted. /
1792	static time_t *starttime;
1793
1794	#ifdef HAVE_INOTIFY
1795	/ Inotify event for changed file. /
1796	union __inev
1797	{
1798	struct inotify_event i;
1799	# ifndef PATH_MAX
1800	# define PATH_MAX 1024
1801	# endif
1802	char buf[sizeof (struct inotify_event) + PATH_MAX];
1803	};
1804
1805	/ Returns 0 if the file is there otherwise -1. /
1806	int
1807	check_file (struct traced_file *finfo)
1808	{
1809	struct stat64 st;
1810	/ We could check mtime and if different re-add*
1811	the watches, and invalidate the database, but we
1812	don't because we are called from inotify_check_files
1813	which should be doing that work. If sufficient inotify
1814	events were lost then the next pruning or invalidation
1815	will do the stat and mtime check. We don't do it here to
1816	keep the logic simple. /*
1817	if (stat64 (file: finfo->fname, buf: &st) < `0`)
1818	return -`1`;
1819	return `0`;
1820	}
1821
1822	/ Process the inotify event in INEV. If the event matches any of the files*
1823	registered with a database then mark that database as requiring its cache
1824	to be cleared. We indicate the cache needs clearing by setting
1825	TO_CLEAR[DBCNT] to true for the matching database. /*
1826	static void
1827	inotify_check_files (bool to_clear, union* __inev *inev)
1828	{
1829	/ Check which of the files changed. /
1830	for (size_t dbcnt = `0`; dbcnt < lastdb; ++dbcnt)
1831	{
1832	struct traced_file *finfo = dbs[dbcnt].traced_files;
1833
1834	while (finfo != NULL)
1835	{
1836	/ The configuration file was moved or deleted.*
1837	We stop watching it at that point, and reinitialize. /*
1838	if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1839	&& ((inev->i.mask & IN_MOVE_SELF)
1840	\|\| (inev->i.mask & IN_DELETE_SELF)
1841	\|\| (inev->i.mask & IN_IGNORED)))
1842	{
1843	int ret;
1844	bool moved = (inev->i.mask & IN_MOVE_SELF) != `0`;
1845
1846	if (check_file (finfo) == `0`)
1847	{
1848	dbg_log (_("ignored inotify event for `%s` (file exists)"),
1849	finfo->fname);
1850	return;
1851	}
1852
1853	dbg_log (_("monitored file `%s` was %s, removing watch"),
1854	finfo->fname, moved ? "moved" : "deleted");
1855	/ File was moved out, remove the watch. Watches are*
1856	automatically removed when the file is deleted. /*
1857	if (moved)
1858	{
1859	ret = inotify_rm_watch (fd: inotify_fd, wd: inev->i.wd);
1860	if (ret < `0`)
1861	dbg_log (_("failed to remove file watch `%s`: %s"),
1862	finfo->fname, strerror (errno));
1863	}
1864	finfo->inotify_descr[TRACED_FILE] = -`1`;
1865	to_clear[dbcnt] = true;
1866	if (finfo->call_res_init)
1867	res_init ();
1868	return;
1869	}
1870	/ The configuration file was open for writing and has just closed.*
1871	We reset the cache and reinitialize. /*
1872	if (finfo->inotify_descr[TRACED_FILE] == inev->i.wd
1873	&& inev->i.mask & IN_CLOSE_WRITE)
1874	{
1875	/ Mark cache as needing to be cleared and reinitialize. /
1876	dbg_log (_("monitored file `%s` was written to"), finfo->fname);
1877	to_clear[dbcnt] = true;
1878	if (finfo->call_res_init)
1879	res_init ();
1880	return;
1881	}
1882	/ The parent directory was moved or deleted. We trigger one last*
1883	invalidation. At the next pruning or invalidation we may add
1884	this watch back if the file is present again. /*
1885	if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1886	&& ((inev->i.mask & IN_DELETE_SELF)
1887	\|\| (inev->i.mask & IN_MOVE_SELF)
1888	\|\| (inev->i.mask & IN_IGNORED)))
1889	{
1890	bool moved = (inev->i.mask & IN_MOVE_SELF) != `0`;
1891	/ The directory watch may have already been removed*
1892	but we don't know so we just remove it again and
1893	ignore the error. Then we remove the file watch.
1894	Note: watches are automatically removed for deleted
1895	files. /*
1896	if (moved)
1897	inotify_rm_watch (fd: inotify_fd, wd: inev->i.wd);
1898	if (finfo->inotify_descr[TRACED_FILE] != -`1`)
1899	{
1900	dbg_log (_("monitored parent directory `%s` was %s, removing watch on `%s`"),
1901	finfo->dname, moved ? "moved" : "deleted", finfo->fname);
1902	if (inotify_rm_watch (fd: inotify_fd, wd: finfo->inotify_descr[TRACED_FILE]) < `0`)
1903	dbg_log (_("failed to remove file watch `%s`: %s"),
1904	finfo->dname, strerror (errno));
1905	}
1906	finfo->inotify_descr[TRACED_FILE] = -`1`;
1907	finfo->inotify_descr[TRACED_DIR] = -`1`;
1908	to_clear[dbcnt] = true;
1909	if (finfo->call_res_init)
1910	res_init ();
1911	/ Continue to the next entry since this might be the*
1912	parent directory for multiple registered files and
1913	we want to remove watches for all registered files. /*
1914	continue;
1915	}
1916	/ The parent directory had a create or moved to event. /
1917	if (finfo->inotify_descr[TRACED_DIR] == inev->i.wd
1918	&& ((inev->i.mask & IN_MOVED_TO)
1919	\|\| (inev->i.mask & IN_CREATE))
1920	&& strcmp (s1: inev->i.name, s2: finfo->sfname) == `0`)
1921	{
1922	/ We detected a directory change. We look for the creation*
1923	of the file we are tracking or the move of the same file
1924	into the directory. /*
1925	int ret;
1926	dbg_log (_("monitored file `%s` was %s, adding watch"),
1927	finfo->fname,
1928	inev->i.mask & IN_CREATE ? "created" : "moved into place");
1929	/ File was moved in or created. Regenerate the watch. /
1930	if (finfo->inotify_descr[TRACED_FILE] != -`1`)
1931	inotify_rm_watch (fd: inotify_fd,
1932	wd: finfo->inotify_descr[TRACED_FILE]);
1933
1934	ret = inotify_add_watch (fd: inotify_fd,
1935	name: finfo->fname,
1936	TRACED_FILE_MASK);
1937	if (ret < `0`)
1938	dbg_log (_("failed to add file watch `%s`: %s"),
1939	finfo->fname, strerror (errno));
1940
1941	finfo->inotify_descr[TRACED_FILE] = ret;
1942
1943	/ The file is new or moved so mark cache as needing to*
1944	be cleared and reinitialize. /*
1945	to_clear[dbcnt] = true;
1946	if (finfo->call_res_init)
1947	res_init ();
1948
1949	/ Done re-adding the watch. Don't return, we may still*
1950	have other files in this same directory, same watch
1951	descriptor, and need to process them. /*
1952	}
1953	/ Other events are ignored, and we move on to the next file. /
1954	finfo = finfo->next;
1955	}
1956	}
1957	}
1958
1959	/ If an entry in the array of booleans TO_CLEAR is TRUE then clear the cache*
1960	for the associated database, otherwise do nothing. The TO_CLEAR array must
1961	have LASTDB entries. /*
1962	static inline void
1963	clear_db_cache (bool *to_clear)
1964	{
1965	for (size_t dbcnt = `0`; dbcnt < lastdb; ++dbcnt)
1966	if (to_clear[dbcnt])
1967	{
1968	pthread_mutex_lock (mutex: &dbs[dbcnt].prune_lock);
1969	dbs[dbcnt].clear_cache = `1`;
1970	pthread_mutex_unlock (mutex: &dbs[dbcnt].prune_lock);
1971	pthread_cond_signal (cond: &dbs[dbcnt].prune_cond);
1972	}
1973	}
1974
1975	int
1976	handle_inotify_events (void)
1977	{
1978	bool to_clear[lastdb] = { false, };
1979	union __inev inev;
1980
1981	/ Read all inotify events for files registered via*
1982	register_traced_file(). /*
1983	while (`1`)
1984	{
1985	/ Potentially read multiple events into buf. /
1986	ssize_t nb = TEMP_FAILURE_RETRY (read (inotify_fd,
1987	&inev.buf,
1988	sizeof (inev)));
1989	if (nb < (ssize_t) sizeof (struct inotify_event))
1990	{
1991	/ Not even 1 event. /
1992	if (__glibc_unlikely (nb == -`1` && errno != EAGAIN))
1993	return -`1`;
1994	/ Done reading events that are ready. /
1995	break;
1996	}
1997	/ Process all events. The normal inotify interface delivers*
1998	complete events on a read and never a partial event. /*
1999	char *eptr = &inev.buf[`0`];
2000	ssize_t count;
2001	while (`1`)
2002	{
2003	/ Check which of the files changed. /
2004	inotify_check_files (to_clear, inev: &inev);
2005	count = sizeof (struct inotify_event) + inev.i.len;
2006	eptr += count;
2007	nb -= count;
2008	if (nb >= (ssize_t) sizeof (struct inotify_event))
2009	memcpy (dest: &inev, src: eptr, n: nb);
2010	else
2011	break;
2012	}
2013	continue;
2014	}
2015	/ Actually perform the cache clearing. /
2016	clear_db_cache (to_clear);
2017	return `0`;
2018	}
2019
2020	#endif
2021
2022	static void
2023	__attribute__ ((__noreturn__))
2024	main_loop_poll (void)
2025	{
2026	struct pollfd conns = (struct* pollfd *) xmalloc (n: nconns
2027	* sizeof (conns[`0`]));
2028
2029	conns[`0`].fd = sock;
2030	conns[`0`].events = POLLRDNORM;
2031	size_t nused = `1`;
2032	size_t firstfree = `1`;
2033
2034	#ifdef HAVE_INOTIFY
2035	if (inotify_fd != -`1`)
2036	{
2037	conns[`1`].fd = inotify_fd;
2038	conns[`1`].events = POLLRDNORM;
2039	nused = `2`;
2040	firstfree = `2`;
2041	}
2042	#endif
2043
2044	#ifdef HAVE_NETLINK
2045	size_t idx_nl_status_fd = `0`;
2046	if (nl_status_fd != -`1`)
2047	{
2048	idx_nl_status_fd = nused;
2049	conns[nused].fd = nl_status_fd;
2050	conns[nused].events = POLLRDNORM;
2051	++nused;
2052	firstfree = nused;
2053	}
2054	#endif
2055
2056	while (`1`)
2057	{
2058	/ Wait for any event. We wait at most a couple of seconds so*
2059	that we can check whether we should close any of the accepted
2060	connections since we have not received a request. /*
2061	#define MAX_ACCEPT_TIMEOUT 30
2062	#define MIN_ACCEPT_TIMEOUT 5
2063	#define MAIN_THREAD_TIMEOUT \
2064	(MAX_ACCEPT_TIMEOUT * 1000 \
2065	- ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * 1000 * nused) / (2 * nconns))
2066
2067	int n = poll (fds: conns, nfds: nused, MAIN_THREAD_TIMEOUT);
2068
2069	time_t now = time (NULL);
2070
2071	/ If there is a descriptor ready for reading or there is a new*
2072	connection, process this now. /*
2073	if (n > `0`)
2074	{
2075	if (conns[`0`].revents != `0`)
2076	{
2077	/ We have a new incoming connection. Accept the connection. /
2078	int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2079	SOCK_NONBLOCK));
2080
2081	/ Use the descriptor if we have not reached the limit. /
2082	if (fd >= `0`)
2083	{
2084	if (firstfree < nconns)
2085	{
2086	conns[firstfree].fd = fd;
2087	conns[firstfree].events = POLLRDNORM;
2088	starttime[firstfree] = now;
2089	if (firstfree >= nused)
2090	nused = firstfree + `1`;
2091
2092	do
2093	++firstfree;
2094	while (firstfree < nused && conns[firstfree].fd != -`1`);
2095	}
2096	else
2097	/ We cannot use the connection so close it. /
2098	close (fd: fd);
2099	}
2100
2101	--n;
2102	}
2103
2104	size_t first = `1`;
2105	#ifdef HAVE_INOTIFY
2106	if (inotify_fd != -`1` && conns[`1`].fd == inotify_fd)
2107	{
2108	if (conns[`1`].revents != `0`)
2109	{
2110	int ret;
2111	ret = handle_inotify_events ();
2112	if (ret == -`1`)
2113	{
2114	/ Something went wrong when reading the inotify*
2115	data. Better disable inotify. /*
2116	dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2117	conns[`1`].fd = -`1`;
2118	firstfree = `1`;
2119	if (nused == `2`)
2120	nused = `1`;
2121	close (fd: inotify_fd);
2122	inotify_fd = -`1`;
2123	}
2124	--n;
2125	}
2126
2127	first = `2`;
2128	}
2129	#endif
2130
2131	#ifdef HAVE_NETLINK
2132	if (idx_nl_status_fd != `0` && conns[idx_nl_status_fd].revents != `0`)
2133	{
2134	char buf[`4096`];
2135	/ Read all the data. We do not interpret it here. /
2136	while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2137	sizeof (buf))) != -`1`)
2138	;
2139
2140	dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2141	= __bump_nl_timestamp ();
2142	}
2143	#endif
2144
2145	for (size_t cnt = first; cnt < nused && n > `0`; ++cnt)
2146	if (conns[cnt].revents != `0`)
2147	{
2148	fd_ready (fd: conns[cnt].fd);
2149
2150	/ Clean up the CONNS array. /
2151	conns[cnt].fd = -`1`;
2152	if (cnt < firstfree)
2153	firstfree = cnt;
2154	if (cnt == nused - `1`)
2155	do
2156	--nused;
2157	while (conns[nused - `1`].fd == -`1`);
2158
2159	--n;
2160	}
2161	}
2162
2163	/ Now find entries which have timed out. /
2164	assert (nused > `0`);
2165
2166	/ We make the timeout length depend on the number of file*
2167	descriptors currently used. /*
2168	#define ACCEPT_TIMEOUT \
2169	(MAX_ACCEPT_TIMEOUT \
2170	- ((MAX_ACCEPT_TIMEOUT - MIN_ACCEPT_TIMEOUT) * nused) / nconns)
2171	time_t laststart = now - ACCEPT_TIMEOUT;
2172
2173	for (size_t cnt = nused - `1`; cnt > `0`; --cnt)
2174	{
2175	if (conns[cnt].fd != -`1` && starttime[cnt] < laststart)
2176	{
2177	/ Remove the entry, it timed out. /
2178	(void) close (fd: conns[cnt].fd);
2179	conns[cnt].fd = -`1`;
2180
2181	if (cnt < firstfree)
2182	firstfree = cnt;
2183	if (cnt == nused - `1`)
2184	do
2185	--nused;
2186	while (conns[nused - `1`].fd == -`1`);
2187	}
2188	}
2189
2190	if (restart_p (now))
2191	restart ();
2192	}
2193	}
2194
2195
2196	#ifdef HAVE_EPOLL
2197	static void
2198	main_loop_epoll (int efd)
2199	{
2200	struct epoll_event ev = { `0`, };
2201	int nused = `1`;
2202	size_t highest = `0`;
2203
2204	/ Add the socket. /
2205	ev.events = EPOLLRDNORM;
2206	ev.data.fd = sock;
2207	if (epoll_ctl (epfd: efd, EPOLL_CTL_ADD, fd: sock, event: &ev) == -`1`)
2208	/ We cannot use epoll. /
2209	return;
2210
2211	# ifdef HAVE_INOTIFY
2212	if (inotify_fd != -`1`)
2213	{
2214	ev.events = EPOLLRDNORM;
2215	ev.data.fd = inotify_fd;
2216	if (epoll_ctl (epfd: efd, EPOLL_CTL_ADD, fd: inotify_fd, event: &ev) == -`1`)
2217	/ We cannot use epoll. /
2218	return;
2219	nused = `2`;
2220	}
2221	# endif
2222
2223	# ifdef HAVE_NETLINK
2224	if (nl_status_fd != -`1`)
2225	{
2226	ev.events = EPOLLRDNORM;
2227	ev.data.fd = nl_status_fd;
2228	if (epoll_ctl (epfd: efd, EPOLL_CTL_ADD, fd: nl_status_fd, event: &ev) == -`1`)
2229	/ We cannot use epoll. /
2230	return;
2231	}
2232	# endif
2233
2234	while (`1`)
2235	{
2236	struct epoll_event revs[`100`];
2237	# define nrevs (sizeof (revs) / sizeof (revs[0]))
2238
2239	int n = epoll_wait (epfd: efd, events: revs, nrevs, MAIN_THREAD_TIMEOUT);
2240
2241	time_t now = time (NULL);
2242
2243	for (int cnt = `0`; cnt < n; ++cnt)
2244	if (revs[cnt].data.fd == sock)
2245	{
2246	/ A new connection. /
2247	int fd = TEMP_FAILURE_RETRY (accept4 (sock, NULL, NULL,
2248	SOCK_NONBLOCK));
2249
2250	/ Use the descriptor if we have not reached the limit. /
2251	if (fd >= `0`)
2252	{
2253	/ Try to add the new descriptor. /
2254	ev.data.fd = fd;
2255	if (fd >= nconns
2256	\|\| epoll_ctl (epfd: efd, EPOLL_CTL_ADD, fd: fd, event: &ev) == -`1`)
2257	/ The descriptor is too large or something went*
2258	wrong. Close the descriptor. /*
2259	close (fd: fd);
2260	else
2261	{
2262	/ Remember when we accepted the connection. /
2263	starttime[fd] = now;
2264
2265	if (fd > highest)
2266	highest = fd;
2267
2268	++nused;
2269	}
2270	}
2271	}
2272	# ifdef HAVE_INOTIFY
2273	else if (revs[cnt].data.fd == inotify_fd)
2274	{
2275	int ret;
2276	ret = handle_inotify_events ();
2277	if (ret == -`1`)
2278	{
2279	/ Something went wrong when reading the inotify*
2280	data. Better disable inotify. /*
2281	dbg_log (_("disabled inotify-based monitoring after read error %d"), errno);
2282	(void) epoll_ctl (epfd: efd, EPOLL_CTL_DEL, fd: inotify_fd, NULL);
2283	close (fd: inotify_fd);
2284	inotify_fd = -`1`;
2285	break;
2286	}
2287	}
2288	# endif
2289	# ifdef HAVE_NETLINK
2290	else if (revs[cnt].data.fd == nl_status_fd)
2291	{
2292	char buf[`4096`];
2293	/ Read all the data. We do not interpret it here. /
2294	while (TEMP_FAILURE_RETRY (read (nl_status_fd, buf,
2295	sizeof (buf))) != -`1`)
2296	;
2297
2298	dbs[hstdb].head->extra_data[NSCD_HST_IDX_CONF_TIMESTAMP]
2299	= __bump_nl_timestamp ();
2300	}
2301	# endif
2302	else
2303	{
2304	/ Remove the descriptor from the epoll descriptor. /
2305	(void) epoll_ctl (epfd: efd, EPOLL_CTL_DEL, fd: revs[cnt].data.fd, NULL);
2306
2307	/ Get a worker to handle the request. /
2308	fd_ready (fd: revs[cnt].data.fd);
2309
2310	/ Reset the time. /
2311	starttime[revs[cnt].data.fd] = `0`;
2312	if (revs[cnt].data.fd == highest)
2313	do
2314	--highest;
2315	while (highest > `0` && starttime[highest] == `0`);
2316
2317	--nused;
2318	}
2319
2320	/ Now look for descriptors for accepted connections which have*
2321	no reply in too long of a time. /*
2322	time_t laststart = now - ACCEPT_TIMEOUT;
2323	assert (starttime[sock] == `0`);
2324	# ifdef HAVE_INOTIFY
2325	assert (inotify_fd == -`1` \|\| starttime[inotify_fd] == `0`);
2326	# endif
2327	assert (nl_status_fd == -`1` \|\| starttime[nl_status_fd] == `0`);
2328	for (int cnt = highest; cnt > STDERR_FILENO; --cnt)
2329	if (starttime[cnt] != `0` && starttime[cnt] < laststart)
2330	{
2331	/ We are waiting for this one for too long. Close it. /
2332	(void) epoll_ctl (epfd: efd, EPOLL_CTL_DEL, fd: cnt, NULL);
2333
2334	(void) close (fd: cnt);
2335
2336	starttime[cnt] = `0`;
2337	if (cnt == highest)
2338	--highest;
2339	}
2340	else if (cnt != sock && starttime[cnt] == `0` && cnt == highest)
2341	--highest;
2342
2343	if (restart_p (now))
2344	restart ();
2345	}
2346	}
2347	#endif
2348
2349
2350	/ Start all the threads we want. The initial process is thread no. 1. /
2351	void
2352	start_threads (void)
2353	{
2354	/ Initialize the conditional variable we will use. The only*
2355	non-standard attribute we might use is the clock selection. /*
2356	pthread_condattr_t condattr;
2357	pthread_condattr_init (attr: &condattr);
2358
2359	#if defined _POSIX_CLOCK_SELECTION && _POSIX_CLOCK_SELECTION >= 0 \
2360	&& defined _POSIX_MONOTONIC_CLOCK && _POSIX_MONOTONIC_CLOCK >= 0
2361	/ Determine whether the monotonous clock is available. /
2362	struct timespec dummy;
2363	# if _POSIX_MONOTONIC_CLOCK == 0
2364	if (sysconf (_SC_MONOTONIC_CLOCK) > `0`)
2365	# endif
2366	# if _POSIX_CLOCK_SELECTION == 0
2367	if (sysconf (_SC_CLOCK_SELECTION) > `0`)
2368	# endif
2369	if (clock_getres (CLOCK_MONOTONIC, res: &dummy) == `0`
2370	&& pthread_condattr_setclock (attr: &condattr, CLOCK_MONOTONIC) == `0`)
2371	timeout_clock = CLOCK_MONOTONIC;
2372	#endif
2373
2374	/ Create the attribute for the threads. They are all created*
2375	detached. /*
2376	pthread_attr_init (attr: &attr);
2377	pthread_attr_setdetachstate (attr: &attr, PTHREAD_CREATE_DETACHED);
2378	/ Use 1MB stacks, twice as much for 64-bit architectures. /
2379	pthread_attr_setstacksize (attr: &attr, NSCD_THREAD_STACKSIZE);
2380
2381	/ We allow less than LASTDB threads only for debugging. /
2382	if (debug_level == `0`)
2383	nthreads = MAX (nthreads, lastdb);
2384
2385	/ Create the threads which prune the databases. /
2386	// XXX Ideally this work would be done by some of the worker threads.
2387	// XXX But this is problematic since we would need to be able to wake
2388	// XXX them up explicitly as well as part of the group handling the
2389	// XXX ready-list. This requires an operation where we can wait on
2390	// XXX two conditional variables at the same time. This operation
2391	// XXX does not exist (yet).
2392	for (long int i = `0`; i < lastdb; ++i)
2393	{
2394	/ Initialize the conditional variable. /
2395	if (pthread_cond_init (cond: &dbs[i].prune_cond, cond_attr: &condattr) != `0`)
2396	{
2397	dbg_log (_("could not initialize conditional variable"));
2398	do_exit (child_ret: `1`, errnum: `0`, NULL);
2399	}
2400
2401	pthread_t th;
2402	if (dbs[i].enabled
2403	&& pthread_create (newthread: &th, attr: &attr, start_routine: nscd_run_prune, arg: (void *) i) != `0`)
2404	{
2405	dbg_log (_("could not start clean-up thread; terminating"));
2406	do_exit (child_ret: `1`, errnum: `0`, NULL);
2407	}
2408	}
2409
2410	pthread_condattr_destroy (attr: &condattr);
2411
2412	for (long int i = `0`; i < nthreads; ++i)
2413	{
2414	pthread_t th;
2415	if (pthread_create (newthread: &th, attr: &attr, start_routine: nscd_run_worker, NULL) != `0`)
2416	{
2417	if (i == `0`)
2418	{
2419	dbg_log (_("could not start any worker thread; terminating"));
2420	do_exit (child_ret: `1`, errnum: `0`, NULL);
2421	}
2422
2423	break;
2424	}
2425	}
2426
2427	/ Now it is safe to let the parent know that we're doing fine and it can*
2428	exit. /*
2429	notify_parent (child_ret: `0`);
2430
2431	/ Determine how much room for descriptors we should initially*
2432	allocate. This might need to change later if we cap the number
2433	with MAXCONN. /*
2434	const long int nfds = sysconf (_SC_OPEN_MAX);
2435	#define MINCONN 32
2436	#define MAXCONN 16384
2437	if (nfds == -`1` \|\| nfds > MAXCONN)
2438	nconns = MAXCONN;
2439	else if (nfds < MINCONN)
2440	nconns = MINCONN;
2441	else
2442	nconns = nfds;
2443
2444	/ We need memory to pass descriptors on to the worker threads. /
2445	fdlist = (struct fdlist ) xcalloc (n: nconns, s: sizeof* (fdlist[`0`]));
2446	/ Array to keep track when connection was accepted. /
2447	starttime = (time_t ) xcalloc (n: nconns, s: sizeof* (starttime[`0`]));
2448
2449	/ In the main thread we execute the loop which handles incoming*
2450	connections. /*
2451	#ifdef HAVE_EPOLL
2452	int efd = epoll_create (size: `100`);
2453	if (efd != -`1`)
2454	{
2455	main_loop_epoll (efd);
2456	close (fd: efd);
2457	}
2458	#endif
2459
2460	main_loop_poll ();
2461	}
2462
2463
2464	/ Look up the uid, gid, and supplementary groups to run nscd as. When*
2465	this function is called, we are not listening on the nscd socket yet so
2466	we can just use the ordinary lookup functions without causing a lockup /*
2467	static void
2468	begin_drop_privileges (void)
2469	{
2470	struct passwd *pwd = getpwnam (name: server_user);
2471
2472	if (pwd == NULL)
2473	{
2474	dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2475	do_exit (EXIT_FAILURE, errnum: `0`,
2476	_("Failed to run nscd as user '%s'"), server_user);
2477	}
2478
2479	server_uid = pwd->pw_uid;
2480	server_gid = pwd->pw_gid;
2481
2482	/ Save the old UID/GID if we have to change back. /
2483	if (paranoia)
2484	{
2485	old_uid = getuid ();
2486	old_gid = getgid ();
2487	}
2488
2489	if (getgrouplist (user: server_user, group: server_gid, NULL, ngroups: &server_ngroups) == `0`)
2490	{
2491	/ This really must never happen. /
2492	dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2493	do_exit (EXIT_FAILURE, errno,
2494	_("initial getgrouplist failed"));
2495	}
2496
2497	server_groups = (gid_t ) xmalloc (n: server_ngroups sizeof (gid_t));
2498
2499	if (getgrouplist (user: server_user, group: server_gid, groups: server_groups, ngroups: &server_ngroups)
2500	== -`1`)
2501	{
2502	dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2503	do_exit (EXIT_FAILURE, errno, _("getgrouplist failed"));
2504	}
2505	}
2506
2507
2508	/ Call setgroups(), setgid(), and setuid() to drop root privileges and*
2509	run nscd as the user specified in the configuration file. /*
2510	static void
2511	finish_drop_privileges (void)
2512	{
2513	#if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2514	/ We need to preserve the capabilities to connect to the audit daemon. /
2515	cap_t new_caps = preserve_capabilities ();
2516	#endif
2517
2518	if (setgroups (n: server_ngroups, groups: server_groups) == -`1`)
2519	{
2520	dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2521	do_exit (EXIT_FAILURE, errno, _("setgroups failed"));
2522	}
2523
2524	int res;
2525	if (paranoia)
2526	res = setresgid (rgid: server_gid, egid: server_gid, sgid: old_gid);
2527	else
2528	res = setgid (server_gid);
2529	if (res == -`1`)
2530	{
2531	dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2532	do_exit (child_ret: `4`, errno, format: "setgid");
2533	}
2534
2535	if (paranoia)
2536	res = setresuid (ruid: server_uid, euid: server_uid, suid: old_uid);
2537	else
2538	res = setuid (server_uid);
2539	if (res == -`1`)
2540	{
2541	dbg_log (_("Failed to run nscd as user '%s'"), server_user);
2542	do_exit (child_ret: `4`, errno, format: "setuid");
2543	}
2544
2545	#if defined HAVE_LIBAUDIT && defined HAVE_LIBCAP
2546	/ Remove the temporary capabilities. /
2547	install_real_capabilities (new_caps);
2548	#endif
2549	}
2550

source code of glibc/nscd/connections.c