1/* Copyright (C) 1998-2022 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <https://www.gnu.org/licenses/>. */
17
18#include <assert.h>
19#include <errno.h>
20#include <fcntl.h>
21#include <stdbool.h>
22#include <stddef.h>
23#include <stdlib.h>
24#include <string.h>
25#include <time.h>
26#include <unistd.h>
27#include <stdint.h>
28#include <sys/mman.h>
29#include <sys/param.h>
30#include <sys/poll.h>
31#include <sys/socket.h>
32#include <sys/stat.h>
33#include <sys/time.h>
34#include <sys/uio.h>
35#include <sys/un.h>
36#include <not-cancel.h>
37#include <kernel-features.h>
38#include <nss.h>
39#include <struct___timespec64.h>
40
41#include "nscd-client.h"
42
43/* Extra time we wait if the socket is still receiving data. This
44 value is in milliseconds. Note that the other side is nscd on the
45 local machine and it is already transmitting data. So the wait
46 time need not be long. */
47#define EXTRA_RECEIVE_TIME 200
48
49
50static int
51wait_on_socket (int sock, long int usectmo)
52{
53 struct pollfd fds[1];
54 fds[0].fd = sock;
55 fds[0].events = POLLIN | POLLERR | POLLHUP;
56 int n = __poll (fds, 1, usectmo);
57 if (n == -1 && __builtin_expect (errno == EINTR, 0))
58 {
59 /* Handle the case where the poll() call is interrupted by a
60 signal. We cannot just use TEMP_FAILURE_RETRY since it might
61 lead to infinite loops. */
62 struct __timespec64 now;
63 __clock_gettime64 (CLOCK_REALTIME, &now);
64 int64_t end = (now.tv_sec * 1000 + usectmo
65 + (now.tv_nsec + 500000) / 1000000);
66 long int timeout = usectmo;
67 while (1)
68 {
69 n = __poll (fds, 1, timeout);
70 if (n != -1 || errno != EINTR)
71 break;
72
73 /* Recompute the timeout time. */
74 __clock_gettime64 (CLOCK_REALTIME, &now);
75 timeout = end - ((now.tv_sec * 1000
76 + (now.tv_nsec + 500000) / 1000000));
77 }
78 }
79
80 return n;
81}
82
83
84ssize_t
85__readall (int fd, void *buf, size_t len)
86{
87 size_t n = len;
88 ssize_t ret;
89 do
90 {
91 again:
92 ret = TEMP_FAILURE_RETRY (__read (fd, buf, n));
93 if (ret <= 0)
94 {
95 if (__builtin_expect (ret < 0 && errno == EAGAIN, 0)
96 /* The socket is still receiving data. Wait a bit more. */
97 && wait_on_socket (sock: fd, EXTRA_RECEIVE_TIME) > 0)
98 goto again;
99
100 break;
101 }
102 buf = (char *) buf + ret;
103 n -= ret;
104 }
105 while (n > 0);
106 return ret < 0 ? ret : len - n;
107}
108
109
110ssize_t
111__readvall (int fd, const struct iovec *iov, int iovcnt)
112{
113 ssize_t ret = TEMP_FAILURE_RETRY (__readv (fd, iov, iovcnt));
114 if (ret <= 0)
115 {
116 if (__glibc_likely (ret == 0 || errno != EAGAIN))
117 /* A genuine error or no data to read. */
118 return ret;
119
120 /* The data has not all yet been received. Do as if we have not
121 read anything yet. */
122 ret = 0;
123 }
124
125 size_t total = 0;
126 for (int i = 0; i < iovcnt; ++i)
127 total += iov[i].iov_len;
128
129 if (ret < total)
130 {
131 struct iovec iov_buf[iovcnt];
132 ssize_t r = ret;
133
134 struct iovec *iovp = memcpy (iov_buf, iov, iovcnt * sizeof (*iov));
135 do
136 {
137 while (iovp->iov_len <= r)
138 {
139 r -= iovp->iov_len;
140 --iovcnt;
141 ++iovp;
142 }
143 iovp->iov_base = (char *) iovp->iov_base + r;
144 iovp->iov_len -= r;
145 again:
146 r = TEMP_FAILURE_RETRY (__readv (fd, iovp, iovcnt));
147 if (r <= 0)
148 {
149 if (__builtin_expect (r < 0 && errno == EAGAIN, 0)
150 /* The socket is still receiving data. Wait a bit more. */
151 && wait_on_socket (sock: fd, EXTRA_RECEIVE_TIME) > 0)
152 goto again;
153
154 break;
155 }
156 ret += r;
157 }
158 while (ret < total);
159 if (r < 0)
160 ret = r;
161 }
162 return ret;
163}
164
165
166static int
167open_socket (request_type type, const char *key, size_t keylen)
168{
169 int sock;
170
171 sock = __socket (PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC | SOCK_NONBLOCK, 0);
172 if (sock < 0)
173 return -1;
174
175 size_t real_sizeof_reqdata = sizeof (request_header) + keylen;
176 struct
177 {
178 request_header req;
179 char key[];
180 } *reqdata = alloca (real_sizeof_reqdata);
181
182 struct sockaddr_un sun;
183 sun.sun_family = AF_UNIX;
184 strcpy (sun.sun_path, _PATH_NSCDSOCKET);
185 if (__connect (sock, (struct sockaddr *) &sun, sizeof (sun)) < 0
186 && errno != EINPROGRESS)
187 goto out;
188
189 reqdata->req.version = NSCD_VERSION;
190 reqdata->req.type = type;
191 reqdata->req.key_len = keylen;
192
193 memcpy (reqdata->key, key, keylen);
194
195 bool first_try = true;
196 struct __timespec64 tvend = { 0, 0 };
197 while (1)
198 {
199#ifndef MSG_NOSIGNAL
200# define MSG_NOSIGNAL 0
201#endif
202 ssize_t wres = TEMP_FAILURE_RETRY (__send (sock, reqdata,
203 real_sizeof_reqdata,
204 MSG_NOSIGNAL));
205 if (__glibc_likely (wres == (ssize_t) real_sizeof_reqdata))
206 /* We managed to send the request. */
207 return sock;
208
209 if (wres != -1 || errno != EAGAIN)
210 /* Something is really wrong, no chance to continue. */
211 break;
212
213 /* The daemon is busy wait for it. */
214 int to;
215 struct __timespec64 now;
216 __clock_gettime64 (CLOCK_REALTIME, &now);
217 if (first_try)
218 {
219 tvend.tv_nsec = now.tv_nsec;
220 tvend.tv_sec = now.tv_sec + 5;
221 to = 5 * 1000;
222 first_try = false;
223 }
224 else
225 to = ((tvend.tv_sec - now.tv_sec) * 1000
226 + (tvend.tv_nsec - now.tv_nsec) / 1000000);
227
228 struct pollfd fds[1];
229 fds[0].fd = sock;
230 fds[0].events = POLLOUT | POLLERR | POLLHUP;
231 if (__poll (fds, 1, to) <= 0)
232 /* The connection timed out or broke down. */
233 break;
234
235 /* We try to write again. */
236 }
237
238 out:
239 __close_nocancel_nostatus (fd: sock);
240
241 return -1;
242}
243
244
245void
246__nscd_unmap (struct mapped_database *mapped)
247{
248 assert (mapped->counter == 0);
249 __munmap ((void *) mapped->head, mapped->mapsize);
250 free (ptr: mapped);
251}
252
253
254/* Try to get a file descriptor for the shared meory segment
255 containing the database. */
256struct mapped_database *
257__nscd_get_mapping (request_type type, const char *key,
258 struct mapped_database **mappedp)
259{
260 struct mapped_database *result = NO_MAPPING;
261#ifdef SCM_RIGHTS
262 const size_t keylen = strlen (key) + 1;
263 int saved_errno = errno;
264
265 int mapfd = -1;
266 char resdata[keylen];
267
268 /* Open a socket and send the request. */
269 int sock = open_socket (type, key, keylen);
270 if (sock < 0)
271 goto out;
272
273 /* Room for the data sent along with the file descriptor. We expect
274 the key name back. */
275 uint64_t mapsize;
276 struct iovec iov[2];
277 iov[0].iov_base = resdata;
278 iov[0].iov_len = keylen;
279 iov[1].iov_base = &mapsize;
280 iov[1].iov_len = sizeof (mapsize);
281
282 union
283 {
284 struct cmsghdr hdr;
285 char bytes[CMSG_SPACE (sizeof (int))];
286 } buf;
287 struct msghdr msg = { .msg_iov = iov, .msg_iovlen = 2,
288 .msg_control = buf.bytes,
289 .msg_controllen = sizeof (buf) };
290 struct cmsghdr *cmsg = CMSG_FIRSTHDR (&msg);
291
292 cmsg->cmsg_level = SOL_SOCKET;
293 cmsg->cmsg_type = SCM_RIGHTS;
294 cmsg->cmsg_len = CMSG_LEN (sizeof (int));
295
296 /* This access is well-aligned since BUF is correctly aligned for an
297 int and CMSG_DATA preserves this alignment. */
298 memset (CMSG_DATA (cmsg), '\xff', sizeof (int));
299
300 msg.msg_controllen = cmsg->cmsg_len;
301
302 if (wait_on_socket (sock, usectmo: 5 * 1000) <= 0)
303 goto out_close2;
304
305# ifndef MSG_CMSG_CLOEXEC
306# define MSG_CMSG_CLOEXEC 0
307# endif
308 ssize_t n = TEMP_FAILURE_RETRY (__recvmsg (sock, &msg, MSG_CMSG_CLOEXEC));
309
310 if (__builtin_expect (CMSG_FIRSTHDR (&msg) == NULL
311 || (CMSG_FIRSTHDR (&msg)->cmsg_len
312 != CMSG_LEN (sizeof (int))), 0))
313 goto out_close2;
314
315 int *ip = (void *) CMSG_DATA (cmsg);
316 mapfd = *ip;
317
318 if (__glibc_unlikely (n != keylen && n != keylen + sizeof (mapsize)))
319 goto out_close;
320
321 if (__glibc_unlikely (strcmp (resdata, key) != 0))
322 goto out_close;
323
324 if (__glibc_unlikely (n == keylen))
325 {
326 struct __stat64_t64 st;
327 if (__glibc_unlikely (__fstat64_time64 (mapfd, &st) != 0)
328 || __builtin_expect (st.st_size < sizeof (struct database_pers_head),
329 0))
330 goto out_close;
331
332 mapsize = st.st_size;
333 }
334
335 /* The file is large enough, map it now. */
336 void *mapping = __mmap (NULL, mapsize, PROT_READ, MAP_SHARED, mapfd, 0);
337 if (__glibc_likely (mapping != MAP_FAILED))
338 {
339 /* Check whether the database is correct and up-to-date. */
340 struct database_pers_head *head = mapping;
341
342 if (__builtin_expect (head->version != DB_VERSION, 0)
343 || __builtin_expect (head->header_size != sizeof (*head), 0)
344 /* Catch some misconfiguration. The server should catch
345 them now but some older versions did not. */
346 || __builtin_expect (head->module == 0, 0)
347 /* This really should not happen but who knows, maybe the update
348 thread got stuck. */
349 || __builtin_expect (! head->nscd_certainly_running
350 && (head->timestamp + MAPPING_TIMEOUT
351 < time_now ()), 0))
352 {
353 out_unmap:
354 __munmap (mapping, mapsize);
355 goto out_close;
356 }
357
358 size_t size = (sizeof (*head) + roundup (head->module * sizeof (ref_t),
359 ALIGN)
360 + head->data_size);
361
362 if (__glibc_unlikely (mapsize < size))
363 goto out_unmap;
364
365 /* Allocate a record for the mapping. */
366 struct mapped_database *newp = malloc (size: sizeof (*newp));
367 if (newp == NULL)
368 /* Ugh, after all we went through the memory allocation failed. */
369 goto out_unmap;
370
371 newp->head = mapping;
372 newp->data = ((char *) mapping + head->header_size
373 + roundup (head->module * sizeof (ref_t), ALIGN));
374 newp->mapsize = size;
375 newp->datasize = head->data_size;
376 /* Set counter to 1 to show it is usable. */
377 newp->counter = 1;
378
379 result = newp;
380 }
381
382 out_close:
383 __close (mapfd);
384 out_close2:
385 __close (sock);
386 out:
387 __set_errno (saved_errno);
388#endif /* SCM_RIGHTS */
389
390 struct mapped_database *oldval = *mappedp;
391 *mappedp = result;
392
393 if (oldval != NULL && atomic_decrement_val (&oldval->counter) == 0)
394 __nscd_unmap (mapped: oldval);
395
396 return result;
397}
398
399struct mapped_database *
400__nscd_get_map_ref (request_type type, const char *name,
401 volatile struct locked_map_ptr *mapptr, int *gc_cyclep)
402{
403 struct mapped_database *cur = mapptr->mapped;
404 if (cur == NO_MAPPING)
405 return cur;
406
407 if (!__nscd_acquire_maplock (mapptr))
408 return NO_MAPPING;
409
410 cur = mapptr->mapped;
411
412 if (__glibc_likely (cur != NO_MAPPING))
413 {
414 /* If not mapped or timestamp not updated, request new map. */
415 if (cur == NULL
416 || (cur->head->nscd_certainly_running == 0
417 && cur->head->timestamp + MAPPING_TIMEOUT < time_now ())
418 || cur->head->data_size > cur->datasize)
419 cur = __nscd_get_mapping (type, key: name,
420 mappedp: (struct mapped_database **) &mapptr->mapped);
421
422 if (__glibc_likely (cur != NO_MAPPING))
423 {
424 if (__builtin_expect (((*gc_cyclep = cur->head->gc_cycle) & 1) != 0,
425 0))
426 cur = NO_MAPPING;
427 else
428 atomic_increment (&cur->counter);
429 }
430 }
431
432 mapptr->lock = 0;
433
434 return cur;
435}
436
437
438/* Using sizeof (hashentry) is not always correct to determine the size of
439 the data structure as found in the nscd cache. The program could be
440 a 64-bit process and nscd could be a 32-bit process. In this case
441 sizeof (hashentry) would overestimate the size. The following is
442 the minimum size of such an entry, good enough for our tests here. */
443#define MINIMUM_HASHENTRY_SIZE \
444 (offsetof (struct hashentry, dellist) + sizeof (int32_t))
445
446/* Don't return const struct datahead *, as eventhough the record
447 is normally constant, it can change arbitrarily during nscd
448 garbage collection. */
449struct datahead *
450__nscd_cache_search (request_type type, const char *key, size_t keylen,
451 const struct mapped_database *mapped, size_t datalen)
452{
453 unsigned long int hash = __nss_hash (key, keylen) % mapped->head->module;
454 size_t datasize = mapped->datasize;
455
456 ref_t trail = mapped->head->array[hash];
457 trail = atomic_forced_read (trail);
458 ref_t work = trail;
459 size_t loop_cnt = datasize / (MINIMUM_HASHENTRY_SIZE
460 + offsetof (struct datahead, data) / 2);
461 int tick = 0;
462
463 while (work != ENDREF && work + MINIMUM_HASHENTRY_SIZE <= datasize)
464 {
465 struct hashentry *here = (struct hashentry *) (mapped->data + work);
466 ref_t here_key, here_packet;
467
468#if !_STRING_ARCH_unaligned
469 /* Although during garbage collection when moving struct hashentry
470 records around we first copy from old to new location and then
471 adjust pointer from previous hashentry to it, there is no barrier
472 between those memory writes. It is very unlikely to hit it,
473 so check alignment only if a misaligned load can crash the
474 application. */
475 if ((uintptr_t) here & (__alignof__ (*here) - 1))
476 return NULL;
477#endif
478
479 if (type == here->type
480 && keylen == here->len
481 && (here_key = atomic_forced_read (here->key)) + keylen <= datasize
482 && memcmp (key, mapped->data + here_key, keylen) == 0
483 && ((here_packet = atomic_forced_read (here->packet))
484 + sizeof (struct datahead) <= datasize))
485 {
486 /* We found the entry. Increment the appropriate counter. */
487 struct datahead *dh
488 = (struct datahead *) (mapped->data + here_packet);
489
490#if !_STRING_ARCH_unaligned
491 if ((uintptr_t) dh & (__alignof__ (*dh) - 1))
492 return NULL;
493#endif
494
495 /* See whether we must ignore the entry or whether something
496 is wrong because garbage collection is in progress. */
497 if (dh->usable
498 && here_packet + dh->allocsize <= datasize
499 && (here_packet + offsetof (struct datahead, data) + datalen
500 <= datasize))
501 return dh;
502 }
503
504 work = atomic_forced_read (here->next);
505 /* Prevent endless loops. This should never happen but perhaps
506 the database got corrupted, accidentally or deliberately. */
507 if (work == trail || loop_cnt-- == 0)
508 break;
509 if (tick)
510 {
511 struct hashentry *trailelem;
512 trailelem = (struct hashentry *) (mapped->data + trail);
513
514#if !_STRING_ARCH_unaligned
515 /* We have to redo the checks. Maybe the data changed. */
516 if ((uintptr_t) trailelem & (__alignof__ (*trailelem) - 1))
517 return NULL;
518#endif
519
520 if (trail + MINIMUM_HASHENTRY_SIZE > datasize)
521 return NULL;
522
523 trail = atomic_forced_read (trailelem->next);
524 }
525 tick = 1 - tick;
526 }
527
528 return NULL;
529}
530
531
532/* Create a socket connected to a name. */
533int
534__nscd_open_socket (const char *key, size_t keylen, request_type type,
535 void *response, size_t responselen)
536{
537 /* This should never happen and it is something the nscd daemon
538 enforces, too. He it helps to limit the amount of stack
539 used. */
540 if (keylen > MAXKEYLEN)
541 return -1;
542
543 int saved_errno = errno;
544
545 int sock = open_socket (type, key, keylen);
546 if (sock >= 0)
547 {
548 /* Wait for data. */
549 if (wait_on_socket (sock, usectmo: 5 * 1000) > 0)
550 {
551 ssize_t nbytes = TEMP_FAILURE_RETRY (__read (sock, response,
552 responselen));
553 if (nbytes == (ssize_t) responselen)
554 return sock;
555 }
556
557 __close_nocancel_nostatus (fd: sock);
558 }
559
560 __set_errno (saved_errno);
561
562 return -1;
563}
564

source code of glibc/nscd/nscd_helper.c