1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Fd transport layer. Includes deprecated socket layer. |
4 | * |
5 | * Copyright (C) 2006 by Russ Cox <rsc@swtch.com> |
6 | * Copyright (C) 2004-2005 by Latchesar Ionkov <lucho@ionkov.net> |
7 | * Copyright (C) 2004-2008 by Eric Van Hensbergen <ericvh@gmail.com> |
8 | * Copyright (C) 1997-2002 by Ron Minnich <rminnich@sarnoff.com> |
9 | */ |
10 | |
11 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt |
12 | |
13 | #include <linux/in.h> |
14 | #include <linux/module.h> |
15 | #include <linux/net.h> |
16 | #include <linux/ipv6.h> |
17 | #include <linux/kthread.h> |
18 | #include <linux/errno.h> |
19 | #include <linux/kernel.h> |
20 | #include <linux/un.h> |
21 | #include <linux/uaccess.h> |
22 | #include <linux/inet.h> |
23 | #include <linux/file.h> |
24 | #include <linux/parser.h> |
25 | #include <linux/slab.h> |
26 | #include <linux/seq_file.h> |
27 | #include <net/9p/9p.h> |
28 | #include <net/9p/client.h> |
29 | #include <net/9p/transport.h> |
30 | |
31 | #include <linux/syscalls.h> /* killme */ |
32 | |
33 | #define P9_PORT 564 |
34 | #define MAX_SOCK_BUF (1024*1024) |
35 | #define MAXPOLLWADDR 2 |
36 | |
37 | static struct p9_trans_module p9_tcp_trans; |
38 | static struct p9_trans_module p9_fd_trans; |
39 | |
40 | /** |
41 | * struct p9_fd_opts - per-transport options |
42 | * @rfd: file descriptor for reading (trans=fd) |
43 | * @wfd: file descriptor for writing (trans=fd) |
44 | * @port: port to connect to (trans=tcp) |
45 | * @privport: port is privileged |
46 | */ |
47 | |
48 | struct p9_fd_opts { |
49 | int rfd; |
50 | int wfd; |
51 | u16 port; |
52 | bool privport; |
53 | }; |
54 | |
55 | /* |
56 | * Option Parsing (code inspired by NFS code) |
57 | * - a little lazy - parse all fd-transport options |
58 | */ |
59 | |
60 | enum { |
61 | /* Options that take integer arguments */ |
62 | Opt_port, Opt_rfdno, Opt_wfdno, Opt_err, |
63 | /* Options that take no arguments */ |
64 | Opt_privport, |
65 | }; |
66 | |
67 | static const match_table_t tokens = { |
68 | {Opt_port, "port=%u" }, |
69 | {Opt_rfdno, "rfdno=%u" }, |
70 | {Opt_wfdno, "wfdno=%u" }, |
71 | {Opt_privport, "privport" }, |
72 | {Opt_err, NULL}, |
73 | }; |
74 | |
75 | enum { |
76 | Rworksched = 1, /* read work scheduled or running */ |
77 | Rpending = 2, /* can read */ |
78 | Wworksched = 4, /* write work scheduled or running */ |
79 | Wpending = 8, /* can write */ |
80 | }; |
81 | |
82 | struct p9_poll_wait { |
83 | struct p9_conn *conn; |
84 | wait_queue_entry_t wait; |
85 | wait_queue_head_t *wait_addr; |
86 | }; |
87 | |
88 | /** |
89 | * struct p9_conn - fd mux connection state information |
90 | * @mux_list: list link for mux to manage multiple connections (?) |
91 | * @client: reference to client instance for this connection |
92 | * @err: error state |
93 | * @req_lock: lock protecting req_list and requests statuses |
94 | * @req_list: accounting for requests which have been sent |
95 | * @unsent_req_list: accounting for requests that haven't been sent |
96 | * @rreq: read request |
97 | * @wreq: write request |
98 | * @req: current request being processed (if any) |
99 | * @tmp_buf: temporary buffer to read in header |
100 | * @rc: temporary fcall for reading current frame |
101 | * @wpos: write position for current frame |
102 | * @wsize: amount of data to write for current frame |
103 | * @wbuf: current write buffer |
104 | * @poll_pending_link: pending links to be polled per conn |
105 | * @poll_wait: array of wait_q's for various worker threads |
106 | * @pt: poll state |
107 | * @rq: current read work |
108 | * @wq: current write work |
109 | * @wsched: ???? |
110 | * |
111 | */ |
112 | |
113 | struct p9_conn { |
114 | struct list_head mux_list; |
115 | struct p9_client *client; |
116 | int err; |
117 | spinlock_t req_lock; |
118 | struct list_head req_list; |
119 | struct list_head unsent_req_list; |
120 | struct p9_req_t *rreq; |
121 | struct p9_req_t *wreq; |
122 | char tmp_buf[P9_HDRSZ]; |
123 | struct p9_fcall rc; |
124 | int wpos; |
125 | int wsize; |
126 | char *wbuf; |
127 | struct list_head poll_pending_link; |
128 | struct p9_poll_wait poll_wait[MAXPOLLWADDR]; |
129 | poll_table pt; |
130 | struct work_struct rq; |
131 | struct work_struct wq; |
132 | unsigned long wsched; |
133 | }; |
134 | |
135 | /** |
136 | * struct p9_trans_fd - transport state |
137 | * @rd: reference to file to read from |
138 | * @wr: reference of file to write to |
139 | * @conn: connection state reference |
140 | * |
141 | */ |
142 | |
143 | struct p9_trans_fd { |
144 | struct file *rd; |
145 | struct file *wr; |
146 | struct p9_conn conn; |
147 | }; |
148 | |
149 | static void p9_poll_workfn(struct work_struct *work); |
150 | |
151 | static DEFINE_SPINLOCK(p9_poll_lock); |
152 | static LIST_HEAD(p9_poll_pending_list); |
153 | static DECLARE_WORK(p9_poll_work, p9_poll_workfn); |
154 | |
155 | static unsigned int p9_ipport_resv_min = P9_DEF_MIN_RESVPORT; |
156 | static unsigned int p9_ipport_resv_max = P9_DEF_MAX_RESVPORT; |
157 | |
158 | static void p9_mux_poll_stop(struct p9_conn *m) |
159 | { |
160 | unsigned long flags; |
161 | int i; |
162 | |
163 | for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { |
164 | struct p9_poll_wait *pwait = &m->poll_wait[i]; |
165 | |
166 | if (pwait->wait_addr) { |
167 | remove_wait_queue(wq_head: pwait->wait_addr, wq_entry: &pwait->wait); |
168 | pwait->wait_addr = NULL; |
169 | } |
170 | } |
171 | |
172 | spin_lock_irqsave(&p9_poll_lock, flags); |
173 | list_del_init(entry: &m->poll_pending_link); |
174 | spin_unlock_irqrestore(lock: &p9_poll_lock, flags); |
175 | |
176 | flush_work(work: &p9_poll_work); |
177 | } |
178 | |
179 | /** |
180 | * p9_conn_cancel - cancel all pending requests with error |
181 | * @m: mux data |
182 | * @err: error code |
183 | * |
184 | */ |
185 | |
186 | static void p9_conn_cancel(struct p9_conn *m, int err) |
187 | { |
188 | struct p9_req_t *req, *rtmp; |
189 | LIST_HEAD(cancel_list); |
190 | |
191 | p9_debug(P9_DEBUG_ERROR, "mux %p err %d\n" , m, err); |
192 | |
193 | spin_lock(lock: &m->req_lock); |
194 | |
195 | if (m->err) { |
196 | spin_unlock(lock: &m->req_lock); |
197 | return; |
198 | } |
199 | |
200 | m->err = err; |
201 | |
202 | list_for_each_entry_safe(req, rtmp, &m->req_list, req_list) { |
203 | list_move(list: &req->req_list, head: &cancel_list); |
204 | WRITE_ONCE(req->status, REQ_STATUS_ERROR); |
205 | } |
206 | list_for_each_entry_safe(req, rtmp, &m->unsent_req_list, req_list) { |
207 | list_move(list: &req->req_list, head: &cancel_list); |
208 | WRITE_ONCE(req->status, REQ_STATUS_ERROR); |
209 | } |
210 | |
211 | spin_unlock(lock: &m->req_lock); |
212 | |
213 | list_for_each_entry_safe(req, rtmp, &cancel_list, req_list) { |
214 | p9_debug(P9_DEBUG_ERROR, "call back req %p\n" , req); |
215 | list_del(entry: &req->req_list); |
216 | if (!req->t_err) |
217 | req->t_err = err; |
218 | p9_client_cb(c: m->client, req, status: REQ_STATUS_ERROR); |
219 | } |
220 | } |
221 | |
222 | static __poll_t |
223 | p9_fd_poll(struct p9_client *client, struct poll_table_struct *pt, int *err) |
224 | { |
225 | __poll_t ret; |
226 | struct p9_trans_fd *ts = NULL; |
227 | |
228 | if (client && client->status == Connected) |
229 | ts = client->trans; |
230 | |
231 | if (!ts) { |
232 | if (err) |
233 | *err = -EREMOTEIO; |
234 | return EPOLLERR; |
235 | } |
236 | |
237 | ret = vfs_poll(file: ts->rd, pt); |
238 | if (ts->rd != ts->wr) |
239 | ret = (ret & ~EPOLLOUT) | (vfs_poll(file: ts->wr, pt) & ~EPOLLIN); |
240 | return ret; |
241 | } |
242 | |
243 | /** |
244 | * p9_fd_read- read from a fd |
245 | * @client: client instance |
246 | * @v: buffer to receive data into |
247 | * @len: size of receive buffer |
248 | * |
249 | */ |
250 | |
251 | static int p9_fd_read(struct p9_client *client, void *v, int len) |
252 | { |
253 | int ret; |
254 | struct p9_trans_fd *ts = NULL; |
255 | loff_t pos; |
256 | |
257 | if (client && client->status != Disconnected) |
258 | ts = client->trans; |
259 | |
260 | if (!ts) |
261 | return -EREMOTEIO; |
262 | |
263 | if (!(ts->rd->f_flags & O_NONBLOCK)) |
264 | p9_debug(P9_DEBUG_ERROR, "blocking read ...\n" ); |
265 | |
266 | pos = ts->rd->f_pos; |
267 | ret = kernel_read(ts->rd, v, len, &pos); |
268 | if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) |
269 | client->status = Disconnected; |
270 | return ret; |
271 | } |
272 | |
273 | /** |
274 | * p9_read_work - called when there is some data to be read from a transport |
275 | * @work: container of work to be done |
276 | * |
277 | */ |
278 | |
279 | static void p9_read_work(struct work_struct *work) |
280 | { |
281 | __poll_t n; |
282 | int err; |
283 | struct p9_conn *m; |
284 | |
285 | m = container_of(work, struct p9_conn, rq); |
286 | |
287 | if (m->err < 0) |
288 | return; |
289 | |
290 | p9_debug(P9_DEBUG_TRANS, "start mux %p pos %zd\n" , m, m->rc.offset); |
291 | |
292 | if (!m->rc.sdata) { |
293 | m->rc.sdata = m->tmp_buf; |
294 | m->rc.offset = 0; |
295 | m->rc.capacity = P9_HDRSZ; /* start by reading header */ |
296 | } |
297 | |
298 | clear_bit(nr: Rpending, addr: &m->wsched); |
299 | p9_debug(P9_DEBUG_TRANS, "read mux %p pos %zd size: %zd = %zd\n" , |
300 | m, m->rc.offset, m->rc.capacity, |
301 | m->rc.capacity - m->rc.offset); |
302 | err = p9_fd_read(client: m->client, v: m->rc.sdata + m->rc.offset, |
303 | len: m->rc.capacity - m->rc.offset); |
304 | p9_debug(P9_DEBUG_TRANS, "mux %p got %d bytes\n" , m, err); |
305 | if (err == -EAGAIN) |
306 | goto end_clear; |
307 | |
308 | if (err <= 0) |
309 | goto error; |
310 | |
311 | m->rc.offset += err; |
312 | |
313 | /* header read in */ |
314 | if ((!m->rreq) && (m->rc.offset == m->rc.capacity)) { |
315 | p9_debug(P9_DEBUG_TRANS, "got new header\n" ); |
316 | |
317 | /* Header size */ |
318 | m->rc.size = P9_HDRSZ; |
319 | err = p9_parse_header(pdu: &m->rc, size: &m->rc.size, NULL, NULL, rewind: 0); |
320 | if (err) { |
321 | p9_debug(P9_DEBUG_ERROR, |
322 | "error parsing header: %d\n" , err); |
323 | goto error; |
324 | } |
325 | |
326 | p9_debug(P9_DEBUG_TRANS, |
327 | "mux %p pkt: size: %d bytes tag: %d\n" , |
328 | m, m->rc.size, m->rc.tag); |
329 | |
330 | m->rreq = p9_tag_lookup(c: m->client, tag: m->rc.tag); |
331 | if (!m->rreq || (m->rreq->status != REQ_STATUS_SENT)) { |
332 | p9_debug(P9_DEBUG_ERROR, "Unexpected packet tag %d\n" , |
333 | m->rc.tag); |
334 | err = -EIO; |
335 | goto error; |
336 | } |
337 | |
338 | if (m->rc.size > m->rreq->rc.capacity) { |
339 | p9_debug(P9_DEBUG_ERROR, |
340 | "requested packet size too big: %d for tag %d with capacity %zd\n" , |
341 | m->rc.size, m->rc.tag, m->rreq->rc.capacity); |
342 | err = -EIO; |
343 | goto error; |
344 | } |
345 | |
346 | if (!m->rreq->rc.sdata) { |
347 | p9_debug(P9_DEBUG_ERROR, |
348 | "No recv fcall for tag %d (req %p), disconnecting!\n" , |
349 | m->rc.tag, m->rreq); |
350 | p9_req_put(c: m->client, r: m->rreq); |
351 | m->rreq = NULL; |
352 | err = -EIO; |
353 | goto error; |
354 | } |
355 | m->rc.sdata = m->rreq->rc.sdata; |
356 | memcpy(m->rc.sdata, m->tmp_buf, m->rc.capacity); |
357 | m->rc.capacity = m->rc.size; |
358 | } |
359 | |
360 | /* packet is read in |
361 | * not an else because some packets (like clunk) have no payload |
362 | */ |
363 | if ((m->rreq) && (m->rc.offset == m->rc.capacity)) { |
364 | p9_debug(P9_DEBUG_TRANS, "got new packet\n" ); |
365 | m->rreq->rc.size = m->rc.offset; |
366 | spin_lock(lock: &m->req_lock); |
367 | if (m->rreq->status == REQ_STATUS_SENT) { |
368 | list_del(entry: &m->rreq->req_list); |
369 | p9_client_cb(c: m->client, req: m->rreq, status: REQ_STATUS_RCVD); |
370 | } else if (m->rreq->status == REQ_STATUS_FLSHD) { |
371 | /* Ignore replies associated with a cancelled request. */ |
372 | p9_debug(P9_DEBUG_TRANS, |
373 | "Ignore replies associated with a cancelled request\n" ); |
374 | } else { |
375 | spin_unlock(lock: &m->req_lock); |
376 | p9_debug(P9_DEBUG_ERROR, |
377 | "Request tag %d errored out while we were reading the reply\n" , |
378 | m->rc.tag); |
379 | err = -EIO; |
380 | goto error; |
381 | } |
382 | spin_unlock(lock: &m->req_lock); |
383 | m->rc.sdata = NULL; |
384 | m->rc.offset = 0; |
385 | m->rc.capacity = 0; |
386 | p9_req_put(c: m->client, r: m->rreq); |
387 | m->rreq = NULL; |
388 | } |
389 | |
390 | end_clear: |
391 | clear_bit(nr: Rworksched, addr: &m->wsched); |
392 | |
393 | if (!list_empty(head: &m->req_list)) { |
394 | if (test_and_clear_bit(nr: Rpending, addr: &m->wsched)) |
395 | n = EPOLLIN; |
396 | else |
397 | n = p9_fd_poll(client: m->client, NULL, NULL); |
398 | |
399 | if ((n & EPOLLIN) && !test_and_set_bit(nr: Rworksched, addr: &m->wsched)) { |
400 | p9_debug(P9_DEBUG_TRANS, "sched read work %p\n" , m); |
401 | schedule_work(work: &m->rq); |
402 | } |
403 | } |
404 | |
405 | return; |
406 | error: |
407 | p9_conn_cancel(m, err); |
408 | clear_bit(nr: Rworksched, addr: &m->wsched); |
409 | } |
410 | |
411 | /** |
412 | * p9_fd_write - write to a socket |
413 | * @client: client instance |
414 | * @v: buffer to send data from |
415 | * @len: size of send buffer |
416 | * |
417 | */ |
418 | |
419 | static int p9_fd_write(struct p9_client *client, void *v, int len) |
420 | { |
421 | ssize_t ret; |
422 | struct p9_trans_fd *ts = NULL; |
423 | |
424 | if (client && client->status != Disconnected) |
425 | ts = client->trans; |
426 | |
427 | if (!ts) |
428 | return -EREMOTEIO; |
429 | |
430 | if (!(ts->wr->f_flags & O_NONBLOCK)) |
431 | p9_debug(P9_DEBUG_ERROR, "blocking write ...\n" ); |
432 | |
433 | ret = kernel_write(ts->wr, v, len, &ts->wr->f_pos); |
434 | if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) |
435 | client->status = Disconnected; |
436 | return ret; |
437 | } |
438 | |
439 | /** |
440 | * p9_write_work - called when a transport can send some data |
441 | * @work: container for work to be done |
442 | * |
443 | */ |
444 | |
445 | static void p9_write_work(struct work_struct *work) |
446 | { |
447 | __poll_t n; |
448 | int err; |
449 | struct p9_conn *m; |
450 | struct p9_req_t *req; |
451 | |
452 | m = container_of(work, struct p9_conn, wq); |
453 | |
454 | if (m->err < 0) { |
455 | clear_bit(nr: Wworksched, addr: &m->wsched); |
456 | return; |
457 | } |
458 | |
459 | if (!m->wsize) { |
460 | spin_lock(lock: &m->req_lock); |
461 | if (list_empty(head: &m->unsent_req_list)) { |
462 | clear_bit(nr: Wworksched, addr: &m->wsched); |
463 | spin_unlock(lock: &m->req_lock); |
464 | return; |
465 | } |
466 | |
467 | req = list_entry(m->unsent_req_list.next, struct p9_req_t, |
468 | req_list); |
469 | WRITE_ONCE(req->status, REQ_STATUS_SENT); |
470 | p9_debug(P9_DEBUG_TRANS, "move req %p\n" , req); |
471 | list_move_tail(list: &req->req_list, head: &m->req_list); |
472 | |
473 | m->wbuf = req->tc.sdata; |
474 | m->wsize = req->tc.size; |
475 | m->wpos = 0; |
476 | p9_req_get(r: req); |
477 | m->wreq = req; |
478 | spin_unlock(lock: &m->req_lock); |
479 | } |
480 | |
481 | p9_debug(P9_DEBUG_TRANS, "mux %p pos %d size %d\n" , |
482 | m, m->wpos, m->wsize); |
483 | clear_bit(nr: Wpending, addr: &m->wsched); |
484 | err = p9_fd_write(client: m->client, v: m->wbuf + m->wpos, len: m->wsize - m->wpos); |
485 | p9_debug(P9_DEBUG_TRANS, "mux %p sent %d bytes\n" , m, err); |
486 | if (err == -EAGAIN) |
487 | goto end_clear; |
488 | |
489 | |
490 | if (err < 0) |
491 | goto error; |
492 | else if (err == 0) { |
493 | err = -EREMOTEIO; |
494 | goto error; |
495 | } |
496 | |
497 | m->wpos += err; |
498 | if (m->wpos == m->wsize) { |
499 | m->wpos = m->wsize = 0; |
500 | p9_req_put(c: m->client, r: m->wreq); |
501 | m->wreq = NULL; |
502 | } |
503 | |
504 | end_clear: |
505 | clear_bit(nr: Wworksched, addr: &m->wsched); |
506 | |
507 | if (m->wsize || !list_empty(head: &m->unsent_req_list)) { |
508 | if (test_and_clear_bit(nr: Wpending, addr: &m->wsched)) |
509 | n = EPOLLOUT; |
510 | else |
511 | n = p9_fd_poll(client: m->client, NULL, NULL); |
512 | |
513 | if ((n & EPOLLOUT) && |
514 | !test_and_set_bit(nr: Wworksched, addr: &m->wsched)) { |
515 | p9_debug(P9_DEBUG_TRANS, "sched write work %p\n" , m); |
516 | schedule_work(work: &m->wq); |
517 | } |
518 | } |
519 | |
520 | return; |
521 | |
522 | error: |
523 | p9_conn_cancel(m, err); |
524 | clear_bit(nr: Wworksched, addr: &m->wsched); |
525 | } |
526 | |
527 | static int p9_pollwake(wait_queue_entry_t *wait, unsigned int mode, int sync, void *key) |
528 | { |
529 | struct p9_poll_wait *pwait = |
530 | container_of(wait, struct p9_poll_wait, wait); |
531 | struct p9_conn *m = pwait->conn; |
532 | unsigned long flags; |
533 | |
534 | spin_lock_irqsave(&p9_poll_lock, flags); |
535 | if (list_empty(head: &m->poll_pending_link)) |
536 | list_add_tail(new: &m->poll_pending_link, head: &p9_poll_pending_list); |
537 | spin_unlock_irqrestore(lock: &p9_poll_lock, flags); |
538 | |
539 | schedule_work(work: &p9_poll_work); |
540 | return 1; |
541 | } |
542 | |
543 | /** |
544 | * p9_pollwait - add poll task to the wait queue |
545 | * @filp: file pointer being polled |
546 | * @wait_address: wait_q to block on |
547 | * @p: poll state |
548 | * |
549 | * called by files poll operation to add v9fs-poll task to files wait queue |
550 | */ |
551 | |
552 | static void |
553 | p9_pollwait(struct file *filp, wait_queue_head_t *wait_address, poll_table *p) |
554 | { |
555 | struct p9_conn *m = container_of(p, struct p9_conn, pt); |
556 | struct p9_poll_wait *pwait = NULL; |
557 | int i; |
558 | |
559 | for (i = 0; i < ARRAY_SIZE(m->poll_wait); i++) { |
560 | if (m->poll_wait[i].wait_addr == NULL) { |
561 | pwait = &m->poll_wait[i]; |
562 | break; |
563 | } |
564 | } |
565 | |
566 | if (!pwait) { |
567 | p9_debug(P9_DEBUG_ERROR, "not enough wait_address slots\n" ); |
568 | return; |
569 | } |
570 | |
571 | pwait->conn = m; |
572 | pwait->wait_addr = wait_address; |
573 | init_waitqueue_func_entry(wq_entry: &pwait->wait, func: p9_pollwake); |
574 | add_wait_queue(wq_head: wait_address, wq_entry: &pwait->wait); |
575 | } |
576 | |
577 | /** |
578 | * p9_conn_create - initialize the per-session mux data |
579 | * @client: client instance |
580 | * |
581 | * Note: Creates the polling task if this is the first session. |
582 | */ |
583 | |
584 | static void p9_conn_create(struct p9_client *client) |
585 | { |
586 | __poll_t n; |
587 | struct p9_trans_fd *ts = client->trans; |
588 | struct p9_conn *m = &ts->conn; |
589 | |
590 | p9_debug(P9_DEBUG_TRANS, "client %p msize %d\n" , client, client->msize); |
591 | |
592 | INIT_LIST_HEAD(list: &m->mux_list); |
593 | m->client = client; |
594 | |
595 | spin_lock_init(&m->req_lock); |
596 | INIT_LIST_HEAD(list: &m->req_list); |
597 | INIT_LIST_HEAD(list: &m->unsent_req_list); |
598 | INIT_WORK(&m->rq, p9_read_work); |
599 | INIT_WORK(&m->wq, p9_write_work); |
600 | INIT_LIST_HEAD(list: &m->poll_pending_link); |
601 | init_poll_funcptr(pt: &m->pt, qproc: p9_pollwait); |
602 | |
603 | n = p9_fd_poll(client, pt: &m->pt, NULL); |
604 | if (n & EPOLLIN) { |
605 | p9_debug(P9_DEBUG_TRANS, "mux %p can read\n" , m); |
606 | set_bit(nr: Rpending, addr: &m->wsched); |
607 | } |
608 | |
609 | if (n & EPOLLOUT) { |
610 | p9_debug(P9_DEBUG_TRANS, "mux %p can write\n" , m); |
611 | set_bit(nr: Wpending, addr: &m->wsched); |
612 | } |
613 | } |
614 | |
615 | /** |
616 | * p9_poll_mux - polls a mux and schedules read or write works if necessary |
617 | * @m: connection to poll |
618 | * |
619 | */ |
620 | |
621 | static void p9_poll_mux(struct p9_conn *m) |
622 | { |
623 | __poll_t n; |
624 | int err = -ECONNRESET; |
625 | |
626 | if (m->err < 0) |
627 | return; |
628 | |
629 | n = p9_fd_poll(client: m->client, NULL, err: &err); |
630 | if (n & (EPOLLERR | EPOLLHUP | EPOLLNVAL)) { |
631 | p9_debug(P9_DEBUG_TRANS, "error mux %p err %d\n" , m, n); |
632 | p9_conn_cancel(m, err); |
633 | } |
634 | |
635 | if (n & EPOLLIN) { |
636 | set_bit(nr: Rpending, addr: &m->wsched); |
637 | p9_debug(P9_DEBUG_TRANS, "mux %p can read\n" , m); |
638 | if (!test_and_set_bit(nr: Rworksched, addr: &m->wsched)) { |
639 | p9_debug(P9_DEBUG_TRANS, "sched read work %p\n" , m); |
640 | schedule_work(work: &m->rq); |
641 | } |
642 | } |
643 | |
644 | if (n & EPOLLOUT) { |
645 | set_bit(nr: Wpending, addr: &m->wsched); |
646 | p9_debug(P9_DEBUG_TRANS, "mux %p can write\n" , m); |
647 | if ((m->wsize || !list_empty(head: &m->unsent_req_list)) && |
648 | !test_and_set_bit(nr: Wworksched, addr: &m->wsched)) { |
649 | p9_debug(P9_DEBUG_TRANS, "sched write work %p\n" , m); |
650 | schedule_work(work: &m->wq); |
651 | } |
652 | } |
653 | } |
654 | |
655 | /** |
656 | * p9_fd_request - send 9P request |
657 | * The function can sleep until the request is scheduled for sending. |
658 | * The function can be interrupted. Return from the function is not |
659 | * a guarantee that the request is sent successfully. |
660 | * |
661 | * @client: client instance |
662 | * @req: request to be sent |
663 | * |
664 | */ |
665 | |
666 | static int p9_fd_request(struct p9_client *client, struct p9_req_t *req) |
667 | { |
668 | __poll_t n; |
669 | struct p9_trans_fd *ts = client->trans; |
670 | struct p9_conn *m = &ts->conn; |
671 | |
672 | p9_debug(P9_DEBUG_TRANS, "mux %p task %p tcall %p id %d\n" , |
673 | m, current, &req->tc, req->tc.id); |
674 | |
675 | spin_lock(lock: &m->req_lock); |
676 | |
677 | if (m->err < 0) { |
678 | spin_unlock(lock: &m->req_lock); |
679 | return m->err; |
680 | } |
681 | |
682 | WRITE_ONCE(req->status, REQ_STATUS_UNSENT); |
683 | list_add_tail(new: &req->req_list, head: &m->unsent_req_list); |
684 | spin_unlock(lock: &m->req_lock); |
685 | |
686 | if (test_and_clear_bit(nr: Wpending, addr: &m->wsched)) |
687 | n = EPOLLOUT; |
688 | else |
689 | n = p9_fd_poll(client: m->client, NULL, NULL); |
690 | |
691 | if (n & EPOLLOUT && !test_and_set_bit(nr: Wworksched, addr: &m->wsched)) |
692 | schedule_work(work: &m->wq); |
693 | |
694 | return 0; |
695 | } |
696 | |
697 | static int p9_fd_cancel(struct p9_client *client, struct p9_req_t *req) |
698 | { |
699 | struct p9_trans_fd *ts = client->trans; |
700 | struct p9_conn *m = &ts->conn; |
701 | int ret = 1; |
702 | |
703 | p9_debug(P9_DEBUG_TRANS, "client %p req %p\n" , client, req); |
704 | |
705 | spin_lock(lock: &m->req_lock); |
706 | |
707 | if (req->status == REQ_STATUS_UNSENT) { |
708 | list_del(entry: &req->req_list); |
709 | WRITE_ONCE(req->status, REQ_STATUS_FLSHD); |
710 | p9_req_put(c: client, r: req); |
711 | ret = 0; |
712 | } |
713 | spin_unlock(lock: &m->req_lock); |
714 | |
715 | return ret; |
716 | } |
717 | |
718 | static int p9_fd_cancelled(struct p9_client *client, struct p9_req_t *req) |
719 | { |
720 | struct p9_trans_fd *ts = client->trans; |
721 | struct p9_conn *m = &ts->conn; |
722 | |
723 | p9_debug(P9_DEBUG_TRANS, "client %p req %p\n" , client, req); |
724 | |
725 | spin_lock(lock: &m->req_lock); |
726 | /* Ignore cancelled request if message has been received |
727 | * before lock. |
728 | */ |
729 | if (req->status == REQ_STATUS_RCVD) { |
730 | spin_unlock(lock: &m->req_lock); |
731 | return 0; |
732 | } |
733 | |
734 | /* we haven't received a response for oldreq, |
735 | * remove it from the list. |
736 | */ |
737 | list_del(entry: &req->req_list); |
738 | WRITE_ONCE(req->status, REQ_STATUS_FLSHD); |
739 | spin_unlock(lock: &m->req_lock); |
740 | |
741 | p9_req_put(c: client, r: req); |
742 | |
743 | return 0; |
744 | } |
745 | |
746 | static int p9_fd_show_options(struct seq_file *m, struct p9_client *clnt) |
747 | { |
748 | if (clnt->trans_mod == &p9_tcp_trans) { |
749 | if (clnt->trans_opts.tcp.port != P9_PORT) |
750 | seq_printf(m, fmt: ",port=%u" , clnt->trans_opts.tcp.port); |
751 | } else if (clnt->trans_mod == &p9_fd_trans) { |
752 | if (clnt->trans_opts.fd.rfd != ~0) |
753 | seq_printf(m, fmt: ",rfd=%u" , clnt->trans_opts.fd.rfd); |
754 | if (clnt->trans_opts.fd.wfd != ~0) |
755 | seq_printf(m, fmt: ",wfd=%u" , clnt->trans_opts.fd.wfd); |
756 | } |
757 | return 0; |
758 | } |
759 | |
760 | /** |
761 | * parse_opts - parse mount options into p9_fd_opts structure |
762 | * @params: options string passed from mount |
763 | * @opts: fd transport-specific structure to parse options into |
764 | * |
765 | * Returns 0 upon success, -ERRNO upon failure |
766 | */ |
767 | |
768 | static int parse_opts(char *params, struct p9_fd_opts *opts) |
769 | { |
770 | char *p; |
771 | substring_t args[MAX_OPT_ARGS]; |
772 | int option; |
773 | char *options, *tmp_options; |
774 | |
775 | opts->port = P9_PORT; |
776 | opts->rfd = ~0; |
777 | opts->wfd = ~0; |
778 | opts->privport = false; |
779 | |
780 | if (!params) |
781 | return 0; |
782 | |
783 | tmp_options = kstrdup(s: params, GFP_KERNEL); |
784 | if (!tmp_options) { |
785 | p9_debug(P9_DEBUG_ERROR, |
786 | "failed to allocate copy of option string\n" ); |
787 | return -ENOMEM; |
788 | } |
789 | options = tmp_options; |
790 | |
791 | while ((p = strsep(&options, "," )) != NULL) { |
792 | int token; |
793 | int r; |
794 | if (!*p) |
795 | continue; |
796 | token = match_token(p, table: tokens, args); |
797 | if ((token != Opt_err) && (token != Opt_privport)) { |
798 | r = match_int(&args[0], result: &option); |
799 | if (r < 0) { |
800 | p9_debug(P9_DEBUG_ERROR, |
801 | "integer field, but no integer?\n" ); |
802 | continue; |
803 | } |
804 | } |
805 | switch (token) { |
806 | case Opt_port: |
807 | opts->port = option; |
808 | break; |
809 | case Opt_rfdno: |
810 | opts->rfd = option; |
811 | break; |
812 | case Opt_wfdno: |
813 | opts->wfd = option; |
814 | break; |
815 | case Opt_privport: |
816 | opts->privport = true; |
817 | break; |
818 | default: |
819 | continue; |
820 | } |
821 | } |
822 | |
823 | kfree(objp: tmp_options); |
824 | return 0; |
825 | } |
826 | |
827 | static int p9_fd_open(struct p9_client *client, int rfd, int wfd) |
828 | { |
829 | struct p9_trans_fd *ts = kzalloc(size: sizeof(struct p9_trans_fd), |
830 | GFP_KERNEL); |
831 | if (!ts) |
832 | return -ENOMEM; |
833 | |
834 | ts->rd = fget(fd: rfd); |
835 | if (!ts->rd) |
836 | goto out_free_ts; |
837 | if (!(ts->rd->f_mode & FMODE_READ)) |
838 | goto out_put_rd; |
839 | /* Prevent workers from hanging on IO when fd is a pipe. |
840 | * It's technically possible for userspace or concurrent mounts to |
841 | * modify this flag concurrently, which will likely result in a |
842 | * broken filesystem. However, just having bad flags here should |
843 | * not crash the kernel or cause any other sort of bug, so mark this |
844 | * particular data race as intentional so that tooling (like KCSAN) |
845 | * can allow it and detect further problems. |
846 | */ |
847 | data_race(ts->rd->f_flags |= O_NONBLOCK); |
848 | ts->wr = fget(fd: wfd); |
849 | if (!ts->wr) |
850 | goto out_put_rd; |
851 | if (!(ts->wr->f_mode & FMODE_WRITE)) |
852 | goto out_put_wr; |
853 | data_race(ts->wr->f_flags |= O_NONBLOCK); |
854 | |
855 | client->trans = ts; |
856 | client->status = Connected; |
857 | |
858 | return 0; |
859 | |
860 | out_put_wr: |
861 | fput(ts->wr); |
862 | out_put_rd: |
863 | fput(ts->rd); |
864 | out_free_ts: |
865 | kfree(objp: ts); |
866 | return -EIO; |
867 | } |
868 | |
869 | static int p9_socket_open(struct p9_client *client, struct socket *csocket) |
870 | { |
871 | struct p9_trans_fd *p; |
872 | struct file *file; |
873 | |
874 | p = kzalloc(size: sizeof(struct p9_trans_fd), GFP_KERNEL); |
875 | if (!p) { |
876 | sock_release(sock: csocket); |
877 | return -ENOMEM; |
878 | } |
879 | |
880 | csocket->sk->sk_allocation = GFP_NOIO; |
881 | csocket->sk->sk_use_task_frag = false; |
882 | file = sock_alloc_file(sock: csocket, flags: 0, NULL); |
883 | if (IS_ERR(ptr: file)) { |
884 | pr_err("%s (%d): failed to map fd\n" , |
885 | __func__, task_pid_nr(current)); |
886 | kfree(objp: p); |
887 | return PTR_ERR(ptr: file); |
888 | } |
889 | |
890 | get_file(f: file); |
891 | p->wr = p->rd = file; |
892 | client->trans = p; |
893 | client->status = Connected; |
894 | |
895 | p->rd->f_flags |= O_NONBLOCK; |
896 | |
897 | p9_conn_create(client); |
898 | return 0; |
899 | } |
900 | |
901 | /** |
902 | * p9_conn_destroy - cancels all pending requests of mux |
903 | * @m: mux to destroy |
904 | * |
905 | */ |
906 | |
907 | static void p9_conn_destroy(struct p9_conn *m) |
908 | { |
909 | p9_debug(P9_DEBUG_TRANS, "mux %p prev %p next %p\n" , |
910 | m, m->mux_list.prev, m->mux_list.next); |
911 | |
912 | p9_mux_poll_stop(m); |
913 | cancel_work_sync(work: &m->rq); |
914 | if (m->rreq) { |
915 | p9_req_put(c: m->client, r: m->rreq); |
916 | m->rreq = NULL; |
917 | } |
918 | cancel_work_sync(work: &m->wq); |
919 | if (m->wreq) { |
920 | p9_req_put(c: m->client, r: m->wreq); |
921 | m->wreq = NULL; |
922 | } |
923 | |
924 | p9_conn_cancel(m, err: -ECONNRESET); |
925 | |
926 | m->client = NULL; |
927 | } |
928 | |
929 | /** |
930 | * p9_fd_close - shutdown file descriptor transport |
931 | * @client: client instance |
932 | * |
933 | */ |
934 | |
935 | static void p9_fd_close(struct p9_client *client) |
936 | { |
937 | struct p9_trans_fd *ts; |
938 | |
939 | if (!client) |
940 | return; |
941 | |
942 | ts = client->trans; |
943 | if (!ts) |
944 | return; |
945 | |
946 | client->status = Disconnected; |
947 | |
948 | p9_conn_destroy(m: &ts->conn); |
949 | |
950 | if (ts->rd) |
951 | fput(ts->rd); |
952 | if (ts->wr) |
953 | fput(ts->wr); |
954 | |
955 | kfree(objp: ts); |
956 | } |
957 | |
958 | /* |
959 | * stolen from NFS - maybe should be made a generic function? |
960 | */ |
961 | static inline int valid_ipaddr4(const char *buf) |
962 | { |
963 | int rc, count, in[4]; |
964 | |
965 | rc = sscanf(buf, "%d.%d.%d.%d" , &in[0], &in[1], &in[2], &in[3]); |
966 | if (rc != 4) |
967 | return -EINVAL; |
968 | for (count = 0; count < 4; count++) { |
969 | if (in[count] > 255) |
970 | return -EINVAL; |
971 | } |
972 | return 0; |
973 | } |
974 | |
975 | static int p9_bind_privport(struct socket *sock) |
976 | { |
977 | struct sockaddr_in cl; |
978 | int port, err = -EINVAL; |
979 | |
980 | memset(&cl, 0, sizeof(cl)); |
981 | cl.sin_family = AF_INET; |
982 | cl.sin_addr.s_addr = htonl(INADDR_ANY); |
983 | for (port = p9_ipport_resv_max; port >= p9_ipport_resv_min; port--) { |
984 | cl.sin_port = htons((ushort)port); |
985 | err = kernel_bind(sock, addr: (struct sockaddr *)&cl, addrlen: sizeof(cl)); |
986 | if (err != -EADDRINUSE) |
987 | break; |
988 | } |
989 | return err; |
990 | } |
991 | |
992 | |
993 | static int |
994 | p9_fd_create_tcp(struct p9_client *client, const char *addr, char *args) |
995 | { |
996 | int err; |
997 | struct socket *csocket; |
998 | struct sockaddr_in sin_server; |
999 | struct p9_fd_opts opts; |
1000 | |
1001 | err = parse_opts(params: args, opts: &opts); |
1002 | if (err < 0) |
1003 | return err; |
1004 | |
1005 | if (addr == NULL || valid_ipaddr4(buf: addr) < 0) |
1006 | return -EINVAL; |
1007 | |
1008 | csocket = NULL; |
1009 | |
1010 | client->trans_opts.tcp.port = opts.port; |
1011 | client->trans_opts.tcp.privport = opts.privport; |
1012 | sin_server.sin_family = AF_INET; |
1013 | sin_server.sin_addr.s_addr = in_aton(str: addr); |
1014 | sin_server.sin_port = htons(opts.port); |
1015 | err = __sock_create(current->nsproxy->net_ns, PF_INET, |
1016 | type: SOCK_STREAM, IPPROTO_TCP, res: &csocket, kern: 1); |
1017 | if (err) { |
1018 | pr_err("%s (%d): problem creating socket\n" , |
1019 | __func__, task_pid_nr(current)); |
1020 | return err; |
1021 | } |
1022 | |
1023 | if (opts.privport) { |
1024 | err = p9_bind_privport(sock: csocket); |
1025 | if (err < 0) { |
1026 | pr_err("%s (%d): problem binding to privport\n" , |
1027 | __func__, task_pid_nr(current)); |
1028 | sock_release(sock: csocket); |
1029 | return err; |
1030 | } |
1031 | } |
1032 | |
1033 | err = READ_ONCE(csocket->ops)->connect(csocket, |
1034 | (struct sockaddr *)&sin_server, |
1035 | sizeof(struct sockaddr_in), 0); |
1036 | if (err < 0) { |
1037 | pr_err("%s (%d): problem connecting socket to %s\n" , |
1038 | __func__, task_pid_nr(current), addr); |
1039 | sock_release(sock: csocket); |
1040 | return err; |
1041 | } |
1042 | |
1043 | return p9_socket_open(client, csocket); |
1044 | } |
1045 | |
1046 | static int |
1047 | p9_fd_create_unix(struct p9_client *client, const char *addr, char *args) |
1048 | { |
1049 | int err; |
1050 | struct socket *csocket; |
1051 | struct sockaddr_un sun_server; |
1052 | |
1053 | csocket = NULL; |
1054 | |
1055 | if (!addr || !strlen(addr)) |
1056 | return -EINVAL; |
1057 | |
1058 | if (strlen(addr) >= UNIX_PATH_MAX) { |
1059 | pr_err("%s (%d): address too long: %s\n" , |
1060 | __func__, task_pid_nr(current), addr); |
1061 | return -ENAMETOOLONG; |
1062 | } |
1063 | |
1064 | sun_server.sun_family = PF_UNIX; |
1065 | strcpy(p: sun_server.sun_path, q: addr); |
1066 | err = __sock_create(current->nsproxy->net_ns, PF_UNIX, |
1067 | type: SOCK_STREAM, proto: 0, res: &csocket, kern: 1); |
1068 | if (err < 0) { |
1069 | pr_err("%s (%d): problem creating socket\n" , |
1070 | __func__, task_pid_nr(current)); |
1071 | |
1072 | return err; |
1073 | } |
1074 | err = READ_ONCE(csocket->ops)->connect(csocket, (struct sockaddr *)&sun_server, |
1075 | sizeof(struct sockaddr_un) - 1, 0); |
1076 | if (err < 0) { |
1077 | pr_err("%s (%d): problem connecting socket: %s: %d\n" , |
1078 | __func__, task_pid_nr(current), addr, err); |
1079 | sock_release(sock: csocket); |
1080 | return err; |
1081 | } |
1082 | |
1083 | return p9_socket_open(client, csocket); |
1084 | } |
1085 | |
1086 | static int |
1087 | p9_fd_create(struct p9_client *client, const char *addr, char *args) |
1088 | { |
1089 | int err; |
1090 | struct p9_fd_opts opts; |
1091 | |
1092 | err = parse_opts(params: args, opts: &opts); |
1093 | if (err < 0) |
1094 | return err; |
1095 | client->trans_opts.fd.rfd = opts.rfd; |
1096 | client->trans_opts.fd.wfd = opts.wfd; |
1097 | |
1098 | if (opts.rfd == ~0 || opts.wfd == ~0) { |
1099 | pr_err("Insufficient options for proto=fd\n" ); |
1100 | return -ENOPROTOOPT; |
1101 | } |
1102 | |
1103 | err = p9_fd_open(client, rfd: opts.rfd, wfd: opts.wfd); |
1104 | if (err < 0) |
1105 | return err; |
1106 | |
1107 | p9_conn_create(client); |
1108 | |
1109 | return 0; |
1110 | } |
1111 | |
1112 | static struct p9_trans_module p9_tcp_trans = { |
1113 | .name = "tcp" , |
1114 | .maxsize = MAX_SOCK_BUF, |
1115 | .pooled_rbuffers = false, |
1116 | .def = 0, |
1117 | .create = p9_fd_create_tcp, |
1118 | .close = p9_fd_close, |
1119 | .request = p9_fd_request, |
1120 | .cancel = p9_fd_cancel, |
1121 | .cancelled = p9_fd_cancelled, |
1122 | .show_options = p9_fd_show_options, |
1123 | .owner = THIS_MODULE, |
1124 | }; |
1125 | MODULE_ALIAS_9P("tcp" ); |
1126 | |
1127 | static struct p9_trans_module p9_unix_trans = { |
1128 | .name = "unix" , |
1129 | .maxsize = MAX_SOCK_BUF, |
1130 | .def = 0, |
1131 | .create = p9_fd_create_unix, |
1132 | .close = p9_fd_close, |
1133 | .request = p9_fd_request, |
1134 | .cancel = p9_fd_cancel, |
1135 | .cancelled = p9_fd_cancelled, |
1136 | .show_options = p9_fd_show_options, |
1137 | .owner = THIS_MODULE, |
1138 | }; |
1139 | MODULE_ALIAS_9P("unix" ); |
1140 | |
1141 | static struct p9_trans_module p9_fd_trans = { |
1142 | .name = "fd" , |
1143 | .maxsize = MAX_SOCK_BUF, |
1144 | .def = 0, |
1145 | .create = p9_fd_create, |
1146 | .close = p9_fd_close, |
1147 | .request = p9_fd_request, |
1148 | .cancel = p9_fd_cancel, |
1149 | .cancelled = p9_fd_cancelled, |
1150 | .show_options = p9_fd_show_options, |
1151 | .owner = THIS_MODULE, |
1152 | }; |
1153 | MODULE_ALIAS_9P("fd" ); |
1154 | |
1155 | /** |
1156 | * p9_poll_workfn - poll worker thread |
1157 | * @work: work queue |
1158 | * |
1159 | * polls all v9fs transports for new events and queues the appropriate |
1160 | * work to the work queue |
1161 | * |
1162 | */ |
1163 | |
1164 | static void p9_poll_workfn(struct work_struct *work) |
1165 | { |
1166 | unsigned long flags; |
1167 | |
1168 | p9_debug(P9_DEBUG_TRANS, "start %p\n" , current); |
1169 | |
1170 | spin_lock_irqsave(&p9_poll_lock, flags); |
1171 | while (!list_empty(head: &p9_poll_pending_list)) { |
1172 | struct p9_conn *conn = list_first_entry(&p9_poll_pending_list, |
1173 | struct p9_conn, |
1174 | poll_pending_link); |
1175 | list_del_init(entry: &conn->poll_pending_link); |
1176 | spin_unlock_irqrestore(lock: &p9_poll_lock, flags); |
1177 | |
1178 | p9_poll_mux(m: conn); |
1179 | |
1180 | spin_lock_irqsave(&p9_poll_lock, flags); |
1181 | } |
1182 | spin_unlock_irqrestore(lock: &p9_poll_lock, flags); |
1183 | |
1184 | p9_debug(P9_DEBUG_TRANS, "finish\n" ); |
1185 | } |
1186 | |
1187 | static int __init p9_trans_fd_init(void) |
1188 | { |
1189 | v9fs_register_trans(m: &p9_tcp_trans); |
1190 | v9fs_register_trans(m: &p9_unix_trans); |
1191 | v9fs_register_trans(m: &p9_fd_trans); |
1192 | |
1193 | return 0; |
1194 | } |
1195 | |
1196 | static void __exit p9_trans_fd_exit(void) |
1197 | { |
1198 | flush_work(work: &p9_poll_work); |
1199 | v9fs_unregister_trans(m: &p9_tcp_trans); |
1200 | v9fs_unregister_trans(m: &p9_unix_trans); |
1201 | v9fs_unregister_trans(m: &p9_fd_trans); |
1202 | } |
1203 | |
1204 | module_init(p9_trans_fd_init); |
1205 | module_exit(p9_trans_fd_exit); |
1206 | |
1207 | MODULE_AUTHOR("Eric Van Hensbergen <ericvh@gmail.com>" ); |
1208 | MODULE_DESCRIPTION("Filedescriptor Transport for 9P" ); |
1209 | MODULE_LICENSE("GPL" ); |
1210 | |