1 | /* Guts of both `select' and `poll' for Hurd. |
2 | Copyright (C) 1991-2024 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <https://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <sys/time.h> |
20 | #include <sys/types.h> |
21 | #include <sys/poll.h> |
22 | #include <hurd.h> |
23 | #include <hurd/fd.h> |
24 | #include <hurd/io_request.h> |
25 | #include <mach_rpc.h> |
26 | #include <stdlib.h> |
27 | #include <string.h> |
28 | #include <assert.h> |
29 | #include <stdint.h> |
30 | #include <limits.h> |
31 | #include <time.h> |
32 | #include <sysdep-cancel.h> |
33 | |
34 | /* All user select types. */ |
35 | #define SELECT_ALL (SELECT_READ | SELECT_WRITE | SELECT_URG) |
36 | |
37 | /* Used to record that a particular select rpc returned. Must be distinct |
38 | from SELECT_ALL (which better not have the high bit set). */ |
39 | #define SELECT_RETURNED ((SELECT_ALL << 1) & ~SELECT_ALL) |
40 | #define SELECT_ERROR (SELECT_RETURNED << 1) |
41 | |
42 | /* Check the first NFDS descriptors either in POLLFDS (if nonnnull) or in |
43 | each of READFDS, WRITEFDS, EXCEPTFDS that is nonnull. If TIMEOUT is not |
44 | NULL, time out after waiting the interval specified therein. Returns |
45 | the number of ready descriptors, or -1 for errors. */ |
46 | int |
47 | _hurd_select (int nfds, |
48 | struct pollfd *pollfds, |
49 | fd_set *readfds, fd_set *writefds, fd_set *exceptfds, |
50 | const struct timespec *timeout, const sigset_t *sigmask) |
51 | { |
52 | int i; |
53 | mach_port_t portset, sigport; |
54 | int got, ready; |
55 | error_t err; |
56 | fd_set rfds, wfds, xfds; |
57 | int firstfd, lastfd; |
58 | mach_msg_id_t reply_msgid; |
59 | mach_msg_timeout_t to; |
60 | struct timespec ts; |
61 | struct |
62 | { |
63 | struct hurd_userlink ulink; |
64 | struct hurd_fd *cell; |
65 | mach_port_t io_port; |
66 | int type; |
67 | mach_port_t reply_port; |
68 | int error; |
69 | } d[nfds]; |
70 | sigset_t oset; |
71 | struct hurd_sigstate *ss = NULL; |
72 | |
73 | if (nfds < 0 || (pollfds == NULL && nfds > FD_SETSIZE)) |
74 | return __hurd_fail (EINVAL); |
75 | |
76 | #define IO_SELECT_REPLY_MSGID (21012 + 100) /* XXX */ |
77 | #define IO_SELECT_TIMEOUT_REPLY_MSGID (21031 + 100) /* XXX */ |
78 | |
79 | if (timeout == NULL) |
80 | reply_msgid = IO_SELECT_REPLY_MSGID; |
81 | else |
82 | { |
83 | struct timespec now; |
84 | |
85 | if (timeout->tv_sec < 0 || ! valid_nanoseconds (ns: timeout->tv_nsec)) |
86 | return __hurd_fail (EINVAL); |
87 | |
88 | err = __clock_gettime (CLOCK_REALTIME, &now); |
89 | if (err) |
90 | return -1; |
91 | |
92 | ts.tv_sec = now.tv_sec + timeout->tv_sec; |
93 | ts.tv_nsec = now.tv_nsec + timeout->tv_nsec; |
94 | |
95 | if (ts.tv_nsec >= 1000000000) |
96 | { |
97 | ts.tv_sec++; |
98 | ts.tv_nsec -= 1000000000; |
99 | } |
100 | |
101 | if (ts.tv_sec < 0) |
102 | ts.tv_sec = LONG_MAX; /* XXX */ |
103 | |
104 | reply_msgid = IO_SELECT_TIMEOUT_REPLY_MSGID; |
105 | } |
106 | |
107 | if (sigmask) |
108 | { |
109 | /* Add a port to the portset for the case when we get the signal even |
110 | before calling __mach_msg. */ |
111 | |
112 | sigport = __mach_reply_port (); |
113 | |
114 | ss = _hurd_self_sigstate (); |
115 | _hurd_sigstate_lock (ss); |
116 | /* And tell the signal thread to message us when a signal arrives. */ |
117 | ss->suspended = sigport; |
118 | _hurd_sigstate_unlock (ss); |
119 | |
120 | if (__sigprocmask (SIG_SETMASK, set: sigmask, oset: &oset)) |
121 | { |
122 | _hurd_sigstate_lock (ss); |
123 | ss->suspended = MACH_PORT_NULL; |
124 | _hurd_sigstate_unlock (ss); |
125 | __mach_port_destroy (__mach_task_self (), sigport); |
126 | return -1; |
127 | } |
128 | } |
129 | else |
130 | sigport = MACH_PORT_NULL; |
131 | |
132 | if (pollfds) |
133 | { |
134 | int error = 0; |
135 | /* Collect interesting descriptors from the user's `pollfd' array. |
136 | We do a first pass that reads the user's array before taking |
137 | any locks. The second pass then only touches our own stack, |
138 | and gets the port references. */ |
139 | |
140 | for (i = 0; i < nfds; ++i) |
141 | if (pollfds[i].fd >= 0) |
142 | { |
143 | int type = 0; |
144 | if (pollfds[i].events & POLLIN) |
145 | type |= SELECT_READ; |
146 | if (pollfds[i].events & POLLOUT) |
147 | type |= SELECT_WRITE; |
148 | if (pollfds[i].events & POLLPRI) |
149 | type |= SELECT_URG; |
150 | |
151 | d[i].io_port = pollfds[i].fd; |
152 | d[i].type = type; |
153 | } |
154 | else |
155 | d[i].type = 0; |
156 | |
157 | HURD_CRITICAL_BEGIN; |
158 | __mutex_lock (&_hurd_dtable_lock); |
159 | |
160 | for (i = 0; i < nfds; ++i) |
161 | if (d[i].type != 0) |
162 | { |
163 | const int fd = (int) d[i].io_port; |
164 | |
165 | if (fd < _hurd_dtablesize) |
166 | { |
167 | d[i].cell = _hurd_dtable[fd]; |
168 | if (d[i].cell != NULL) |
169 | { |
170 | d[i].io_port = _hurd_port_get (&d[i].cell->port, |
171 | &d[i].ulink); |
172 | if (d[i].io_port != MACH_PORT_NULL) |
173 | continue; |
174 | } |
175 | } |
176 | |
177 | /* Bogus descriptor, make it EBADF already. */ |
178 | d[i].error = EBADF; |
179 | d[i].type = SELECT_ERROR; |
180 | error = 1; |
181 | } |
182 | |
183 | __mutex_unlock (&_hurd_dtable_lock); |
184 | HURD_CRITICAL_END; |
185 | |
186 | if (error) |
187 | { |
188 | /* Set timeout to 0. */ |
189 | err = __clock_gettime (CLOCK_REALTIME, &ts); |
190 | if (err) |
191 | { |
192 | /* Really bad luck. */ |
193 | err = errno; |
194 | HURD_CRITICAL_BEGIN; |
195 | __mutex_lock (&_hurd_dtable_lock); |
196 | while (i-- > 0) |
197 | if (d[i].type & ~SELECT_ERROR != 0) |
198 | _hurd_port_free (&d[i].cell->port, &d[i].ulink, |
199 | d[i].io_port); |
200 | __mutex_unlock (&_hurd_dtable_lock); |
201 | HURD_CRITICAL_END; |
202 | if (sigmask) |
203 | __sigprocmask (SIG_SETMASK, set: &oset, NULL); |
204 | errno = err; |
205 | return -1; |
206 | } |
207 | reply_msgid = IO_SELECT_TIMEOUT_REPLY_MSGID; |
208 | } |
209 | |
210 | lastfd = i - 1; |
211 | firstfd = i == 0 ? lastfd : 0; |
212 | } |
213 | else |
214 | { |
215 | /* Collect interested descriptors from the user's fd_set arguments. |
216 | Use local copies so we can't crash from user bogosity. */ |
217 | |
218 | if (readfds == NULL) |
219 | FD_ZERO (&rfds); |
220 | else |
221 | rfds = *readfds; |
222 | if (writefds == NULL) |
223 | FD_ZERO (&wfds); |
224 | else |
225 | wfds = *writefds; |
226 | if (exceptfds == NULL) |
227 | FD_ZERO (&xfds); |
228 | else |
229 | xfds = *exceptfds; |
230 | |
231 | HURD_CRITICAL_BEGIN; |
232 | __mutex_lock (&_hurd_dtable_lock); |
233 | |
234 | /* Collect the ports for interesting FDs. */ |
235 | firstfd = lastfd = -1; |
236 | for (i = 0; i < nfds; ++i) |
237 | { |
238 | int type = 0; |
239 | if (readfds != NULL && FD_ISSET (i, &rfds)) |
240 | type |= SELECT_READ; |
241 | if (writefds != NULL && FD_ISSET (i, &wfds)) |
242 | type |= SELECT_WRITE; |
243 | if (exceptfds != NULL && FD_ISSET (i, &xfds)) |
244 | type |= SELECT_URG; |
245 | d[i].type = type; |
246 | if (type) |
247 | { |
248 | if (i < _hurd_dtablesize) |
249 | { |
250 | d[i].cell = _hurd_dtable[i]; |
251 | if (d[i].cell != NULL) |
252 | d[i].io_port = _hurd_port_get (&d[i].cell->port, |
253 | &d[i].ulink); |
254 | } |
255 | if (i >= _hurd_dtablesize || d[i].cell == NULL || |
256 | d[i].io_port == MACH_PORT_NULL) |
257 | { |
258 | /* If one descriptor is bogus, we fail completely. */ |
259 | while (i-- > 0) |
260 | if (d[i].type != 0) |
261 | _hurd_port_free (&d[i].cell->port, &d[i].ulink, |
262 | d[i].io_port); |
263 | break; |
264 | } |
265 | lastfd = i; |
266 | if (firstfd == -1) |
267 | firstfd = i; |
268 | } |
269 | } |
270 | |
271 | __mutex_unlock (&_hurd_dtable_lock); |
272 | HURD_CRITICAL_END; |
273 | |
274 | if (i < nfds) |
275 | { |
276 | if (sigmask) |
277 | __sigprocmask (SIG_SETMASK, set: &oset, NULL); |
278 | return __hurd_fail (EBADF); |
279 | } |
280 | |
281 | if (nfds > _hurd_dtablesize) |
282 | nfds = _hurd_dtablesize; |
283 | } |
284 | |
285 | |
286 | err = 0; |
287 | got = 0; |
288 | |
289 | /* Send them all io_select request messages. */ |
290 | |
291 | if (firstfd == -1) |
292 | { |
293 | if (sigport == MACH_PORT_NULL) |
294 | /* But not if there were no ports to deal with at all. |
295 | We are just a pure timeout. */ |
296 | portset = __mach_reply_port (); |
297 | else |
298 | portset = sigport; |
299 | } |
300 | else |
301 | { |
302 | portset = MACH_PORT_NULL; |
303 | |
304 | for (i = firstfd; i <= lastfd; ++i) |
305 | if (!(d[i].type & ~SELECT_ERROR)) |
306 | d[i].reply_port = MACH_PORT_NULL; |
307 | else |
308 | { |
309 | int type = d[i].type; |
310 | d[i].reply_port = __mach_reply_port (); |
311 | if (timeout == NULL) |
312 | err = __io_select_request (d[i].io_port, d[i].reply_port, type); |
313 | else |
314 | err = __io_select_timeout_request (d[i].io_port, d[i].reply_port, |
315 | ts, type); |
316 | if (!err) |
317 | { |
318 | if (firstfd == lastfd && sigport == MACH_PORT_NULL) |
319 | /* When there's a single descriptor, we don't need a |
320 | portset, so just pretend we have one, but really |
321 | use the single reply port. */ |
322 | portset = d[i].reply_port; |
323 | else if (got == 0) |
324 | /* We've got multiple reply ports, so we need a port set to |
325 | multiplex them. */ |
326 | { |
327 | /* We will wait again for a reply later. */ |
328 | if (portset == MACH_PORT_NULL) |
329 | /* Create the portset to receive all the replies on. */ |
330 | err = __mach_port_allocate (__mach_task_self (), |
331 | MACH_PORT_RIGHT_PORT_SET, |
332 | &portset); |
333 | if (! err) |
334 | /* Put this reply port in the port set. */ |
335 | __mach_port_move_member (__mach_task_self (), |
336 | d[i].reply_port, portset); |
337 | } |
338 | } |
339 | else |
340 | { |
341 | /* No error should happen, but record it for later |
342 | processing. */ |
343 | d[i].error = err; |
344 | d[i].type |= SELECT_ERROR; |
345 | ++got; |
346 | } |
347 | _hurd_port_free (&d[i].cell->port, &d[i].ulink, d[i].io_port); |
348 | } |
349 | |
350 | if (got == 0 && sigport != MACH_PORT_NULL) |
351 | { |
352 | if (portset == MACH_PORT_NULL) |
353 | /* Create the portset to receive the signal message on. */ |
354 | __mach_port_allocate (__mach_task_self (), MACH_PORT_RIGHT_PORT_SET, |
355 | &portset); |
356 | /* Put the signal reply port in the port set. */ |
357 | __mach_port_move_member (__mach_task_self (), sigport, portset); |
358 | } |
359 | } |
360 | |
361 | /* GOT is the number of replies (or errors), while READY is the number of |
362 | replies with at least one type bit set. */ |
363 | ready = 0; |
364 | |
365 | /* Now wait for reply messages. */ |
366 | if (!err && got == 0) |
367 | { |
368 | /* Now wait for io_select_reply messages on PORT, |
369 | timing out as appropriate. */ |
370 | |
371 | union |
372 | { |
373 | mach_msg_header_t head; |
374 | #ifdef MACH_MSG_TRAILER_MINIMUM_SIZE |
375 | struct |
376 | { |
377 | mach_msg_header_t head; |
378 | NDR_record_t ndr; |
379 | error_t err; |
380 | } error; |
381 | struct |
382 | { |
383 | mach_msg_header_t head; |
384 | NDR_record_t ndr; |
385 | error_t err; |
386 | int result; |
387 | mach_msg_trailer_t trailer; |
388 | } success; |
389 | #else |
390 | struct |
391 | { |
392 | mach_msg_header_t head; |
393 | mach_msg_type_t err_type; |
394 | error_t err; |
395 | } error; |
396 | struct |
397 | { |
398 | mach_msg_header_t head; |
399 | mach_msg_type_t err_type; |
400 | error_t err; |
401 | mach_msg_type_t result_type; |
402 | int result; |
403 | } success; |
404 | #endif |
405 | } msg; |
406 | mach_msg_option_t options; |
407 | error_t msgerr; |
408 | |
409 | /* We rely on servers to implement the timeout, but when there are none, |
410 | do it on the client side. */ |
411 | if (timeout != NULL && firstfd == -1) |
412 | { |
413 | options = MACH_RCV_TIMEOUT; |
414 | to = timeout->tv_sec * 1000 + (timeout->tv_nsec + 999999) / 1000000; |
415 | } |
416 | else |
417 | { |
418 | options = 0; |
419 | to = MACH_MSG_TIMEOUT_NONE; |
420 | } |
421 | |
422 | int cancel_oldtype = LIBC_CANCEL_ASYNC(); |
423 | while ((msgerr = __mach_msg (&msg.head, |
424 | MACH_RCV_MSG | MACH_RCV_INTERRUPT | options, |
425 | 0, sizeof msg, portset, to, |
426 | MACH_PORT_NULL)) == MACH_MSG_SUCCESS) |
427 | { |
428 | LIBC_CANCEL_RESET (cancel_oldtype); |
429 | |
430 | /* We got a message. Decode it. */ |
431 | #ifdef MACH_MSG_TYPE_BIT |
432 | static const mach_msg_type_t inttype = { |
433 | .msgt_name = MACH_MSG_TYPE_INTEGER_T, |
434 | .msgt_size = sizeof (integer_t) * 8, |
435 | .msgt_number = 1, |
436 | .msgt_inline = TRUE, |
437 | .msgt_longform = FALSE, |
438 | .msgt_deallocate = FALSE, |
439 | .msgt_unused = 0 |
440 | }; |
441 | #endif |
442 | |
443 | if (sigport != MACH_PORT_NULL && sigport == msg.head.msgh_local_port) |
444 | { |
445 | /* We actually got interrupted by a signal before |
446 | __mach_msg; poll for further responses and then |
447 | return quickly. */ |
448 | err = EINTR; |
449 | goto poll; |
450 | } |
451 | |
452 | if (msg.head.msgh_id == reply_msgid |
453 | && msg.head.msgh_size >= sizeof msg.error |
454 | && !(msg.head.msgh_bits & MACH_MSGH_BITS_COMPLEX) |
455 | #ifdef MACH_MSG_TYPE_BIT |
456 | && !BAD_TYPECHECK (&msg.error.err_type, &inttype) |
457 | #endif |
458 | ) |
459 | { |
460 | /* This is a properly formatted message so far. |
461 | See if it is a success or a failure. */ |
462 | if (msg.error.err == EINTR |
463 | && msg.head.msgh_size == sizeof msg.error) |
464 | { |
465 | /* EINTR response; poll for further responses |
466 | and then return quickly. */ |
467 | err = EINTR; |
468 | goto poll; |
469 | } |
470 | /* Keep in mind msg.success.result can be 0 if a timeout |
471 | occurred. */ |
472 | if (msg.error.err |
473 | #ifdef MACH_MSG_TYPE_BIT |
474 | || BAD_TYPECHECK (&msg.success.result_type, &inttype) |
475 | #endif |
476 | || msg.head.msgh_size != sizeof msg.success) |
477 | { |
478 | /* Error or bogus reply. */ |
479 | if (!msg.error.err) |
480 | msg.error.err = EIO; |
481 | __mach_msg_destroy (&msg.head); |
482 | } |
483 | |
484 | /* Look up the respondent's reply port and record its |
485 | readiness. */ |
486 | { |
487 | int had = got; |
488 | if (firstfd != -1) |
489 | for (i = firstfd; i <= lastfd; ++i) |
490 | if (d[i].type |
491 | && d[i].reply_port == msg.head.msgh_local_port) |
492 | { |
493 | if (msg.error.err) |
494 | { |
495 | d[i].error = msg.error.err; |
496 | d[i].type = SELECT_ERROR; |
497 | ++ready; |
498 | } |
499 | else |
500 | { |
501 | d[i].type &= msg.success.result; |
502 | if (d[i].type) |
503 | ++ready; |
504 | } |
505 | |
506 | d[i].type |= SELECT_RETURNED; |
507 | ++got; |
508 | } |
509 | assert (got > had); |
510 | } |
511 | } |
512 | |
513 | if (msg.head.msgh_remote_port != MACH_PORT_NULL) |
514 | __mach_port_deallocate (__mach_task_self (), |
515 | msg.head.msgh_remote_port); |
516 | |
517 | if (got) |
518 | poll: |
519 | { |
520 | /* Poll for another message. */ |
521 | to = 0; |
522 | options |= MACH_RCV_TIMEOUT; |
523 | } |
524 | } |
525 | LIBC_CANCEL_RESET (cancel_oldtype); |
526 | |
527 | if (msgerr == MACH_RCV_INTERRUPTED) |
528 | /* Interruption on our side (e.g. signal reception). */ |
529 | err = EINTR; |
530 | |
531 | if (ready) |
532 | /* At least one descriptor is known to be ready now, so we will |
533 | return success. */ |
534 | err = 0; |
535 | } |
536 | |
537 | if (firstfd != -1) |
538 | for (i = firstfd; i <= lastfd; ++i) |
539 | if (d[i].reply_port != MACH_PORT_NULL) |
540 | __mach_port_destroy (__mach_task_self (), d[i].reply_port); |
541 | |
542 | if (sigport != MACH_PORT_NULL) |
543 | { |
544 | _hurd_sigstate_lock (ss); |
545 | ss->suspended = MACH_PORT_NULL; |
546 | _hurd_sigstate_unlock (ss); |
547 | __mach_port_destroy (__mach_task_self (), sigport); |
548 | } |
549 | |
550 | if ((firstfd == -1 && sigport == MACH_PORT_NULL) |
551 | || ((firstfd != lastfd || sigport != MACH_PORT_NULL) && portset != MACH_PORT_NULL)) |
552 | /* Destroy PORTSET, but only if it's not actually the reply port for a |
553 | single descriptor (in which case it's destroyed in the previous loop; |
554 | not doing it here is just a bit more efficient). */ |
555 | __mach_port_destroy (__mach_task_self (), portset); |
556 | |
557 | if (err) |
558 | { |
559 | if (sigmask) |
560 | __sigprocmask (SIG_SETMASK, set: &oset, NULL); |
561 | return __hurd_fail (err); |
562 | } |
563 | |
564 | if (pollfds) |
565 | /* Fill in the `revents' members of the user's array. */ |
566 | for (i = 0; i < nfds; ++i) |
567 | { |
568 | int type = d[i].type; |
569 | int revents = 0; |
570 | |
571 | if (type & SELECT_ERROR) |
572 | switch (d[i].error) |
573 | { |
574 | case EPIPE: |
575 | revents = POLLHUP; |
576 | break; |
577 | case EBADF: |
578 | revents = POLLNVAL; |
579 | break; |
580 | default: |
581 | revents = POLLERR; |
582 | break; |
583 | } |
584 | else |
585 | if (type & SELECT_RETURNED) |
586 | { |
587 | if (type & SELECT_READ) |
588 | revents |= POLLIN; |
589 | if (type & SELECT_WRITE) |
590 | revents |= POLLOUT; |
591 | if (type & SELECT_URG) |
592 | revents |= POLLPRI; |
593 | } |
594 | |
595 | pollfds[i].revents = revents; |
596 | } |
597 | else |
598 | { |
599 | /* Below we recalculate READY to include an increment for each operation |
600 | allowed on each fd. */ |
601 | ready = 0; |
602 | |
603 | /* Set the user bitarrays. We only ever have to clear bits, as all |
604 | desired ones are initially set. */ |
605 | if (firstfd != -1) |
606 | for (i = firstfd; i <= lastfd; ++i) |
607 | { |
608 | int type = d[i].type; |
609 | |
610 | if ((type & SELECT_RETURNED) == 0) |
611 | type = 0; |
612 | |
613 | /* Callers of select don't expect to see errors, so we simulate |
614 | readiness of the erring object and the next call hopefully |
615 | will get the error again. */ |
616 | if (type & SELECT_ERROR) |
617 | { |
618 | type = 0; |
619 | if (readfds != NULL && FD_ISSET (i, readfds)) |
620 | type |= SELECT_READ; |
621 | if (writefds != NULL && FD_ISSET (i, writefds)) |
622 | type |= SELECT_WRITE; |
623 | if (exceptfds != NULL && FD_ISSET (i, exceptfds)) |
624 | type |= SELECT_URG; |
625 | } |
626 | |
627 | if (type & SELECT_READ) |
628 | ready++; |
629 | else if (readfds) |
630 | FD_CLR (i, readfds); |
631 | if (type & SELECT_WRITE) |
632 | ready++; |
633 | else if (writefds) |
634 | FD_CLR (i, writefds); |
635 | if (type & SELECT_URG) |
636 | ready++; |
637 | else if (exceptfds) |
638 | FD_CLR (i, exceptfds); |
639 | } |
640 | } |
641 | |
642 | if (sigmask && __sigprocmask (SIG_SETMASK, set: &oset, NULL)) |
643 | return -1; |
644 | |
645 | return ready; |
646 | } |
647 | |