1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * linux/net/sunrpc/svc.c |
4 | * |
5 | * High-level RPC service routines |
6 | * |
7 | * Copyright (C) 1995, 1996 Olaf Kirch <okir@monad.swb.de> |
8 | * |
9 | * Multiple threads pools and NUMAisation |
10 | * Copyright (c) 2006 Silicon Graphics, Inc. |
11 | * by Greg Banks <gnb@melbourne.sgi.com> |
12 | */ |
13 | |
14 | #include <linux/linkage.h> |
15 | #include <linux/sched/signal.h> |
16 | #include <linux/errno.h> |
17 | #include <linux/net.h> |
18 | #include <linux/in.h> |
19 | #include <linux/mm.h> |
20 | #include <linux/interrupt.h> |
21 | #include <linux/module.h> |
22 | #include <linux/kthread.h> |
23 | #include <linux/slab.h> |
24 | |
25 | #include <linux/sunrpc/types.h> |
26 | #include <linux/sunrpc/xdr.h> |
27 | #include <linux/sunrpc/stats.h> |
28 | #include <linux/sunrpc/svcsock.h> |
29 | #include <linux/sunrpc/clnt.h> |
30 | #include <linux/sunrpc/bc_xprt.h> |
31 | |
32 | #include <trace/events/sunrpc.h> |
33 | |
34 | #include "fail.h" |
35 | |
36 | #define RPCDBG_FACILITY RPCDBG_SVCDSP |
37 | |
38 | static void svc_unregister(const struct svc_serv *serv, struct net *net); |
39 | |
40 | #define SVC_POOL_DEFAULT SVC_POOL_GLOBAL |
41 | |
42 | /* |
43 | * Mode for mapping cpus to pools. |
44 | */ |
45 | enum { |
46 | SVC_POOL_AUTO = -1, /* choose one of the others */ |
47 | SVC_POOL_GLOBAL, /* no mapping, just a single global pool |
48 | * (legacy & UP mode) */ |
49 | SVC_POOL_PERCPU, /* one pool per cpu */ |
50 | SVC_POOL_PERNODE /* one pool per numa node */ |
51 | }; |
52 | |
53 | /* |
54 | * Structure for mapping cpus to pools and vice versa. |
55 | * Setup once during sunrpc initialisation. |
56 | */ |
57 | |
58 | struct svc_pool_map { |
59 | int count; /* How many svc_servs use us */ |
60 | int mode; /* Note: int not enum to avoid |
61 | * warnings about "enumeration value |
62 | * not handled in switch" */ |
63 | unsigned int npools; |
64 | unsigned int *pool_to; /* maps pool id to cpu or node */ |
65 | unsigned int *to_pool; /* maps cpu or node to pool id */ |
66 | }; |
67 | |
68 | static struct svc_pool_map svc_pool_map = { |
69 | .mode = SVC_POOL_DEFAULT |
70 | }; |
71 | |
72 | static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ |
73 | |
74 | static int |
75 | param_set_pool_mode(const char *val, const struct kernel_param *kp) |
76 | { |
77 | int *ip = (int *)kp->arg; |
78 | struct svc_pool_map *m = &svc_pool_map; |
79 | int err; |
80 | |
81 | mutex_lock(&svc_pool_map_mutex); |
82 | |
83 | err = -EBUSY; |
84 | if (m->count) |
85 | goto out; |
86 | |
87 | err = 0; |
88 | if (!strncmp(val, "auto" , 4)) |
89 | *ip = SVC_POOL_AUTO; |
90 | else if (!strncmp(val, "global" , 6)) |
91 | *ip = SVC_POOL_GLOBAL; |
92 | else if (!strncmp(val, "percpu" , 6)) |
93 | *ip = SVC_POOL_PERCPU; |
94 | else if (!strncmp(val, "pernode" , 7)) |
95 | *ip = SVC_POOL_PERNODE; |
96 | else |
97 | err = -EINVAL; |
98 | |
99 | out: |
100 | mutex_unlock(lock: &svc_pool_map_mutex); |
101 | return err; |
102 | } |
103 | |
104 | static int |
105 | param_get_pool_mode(char *buf, const struct kernel_param *kp) |
106 | { |
107 | int *ip = (int *)kp->arg; |
108 | |
109 | switch (*ip) |
110 | { |
111 | case SVC_POOL_AUTO: |
112 | return sysfs_emit(buf, fmt: "auto\n" ); |
113 | case SVC_POOL_GLOBAL: |
114 | return sysfs_emit(buf, fmt: "global\n" ); |
115 | case SVC_POOL_PERCPU: |
116 | return sysfs_emit(buf, fmt: "percpu\n" ); |
117 | case SVC_POOL_PERNODE: |
118 | return sysfs_emit(buf, fmt: "pernode\n" ); |
119 | default: |
120 | return sysfs_emit(buf, fmt: "%d\n" , *ip); |
121 | } |
122 | } |
123 | |
124 | module_param_call(pool_mode, param_set_pool_mode, param_get_pool_mode, |
125 | &svc_pool_map.mode, 0644); |
126 | |
127 | /* |
128 | * Detect best pool mapping mode heuristically, |
129 | * according to the machine's topology. |
130 | */ |
131 | static int |
132 | svc_pool_map_choose_mode(void) |
133 | { |
134 | unsigned int node; |
135 | |
136 | if (nr_online_nodes > 1) { |
137 | /* |
138 | * Actually have multiple NUMA nodes, |
139 | * so split pools on NUMA node boundaries |
140 | */ |
141 | return SVC_POOL_PERNODE; |
142 | } |
143 | |
144 | node = first_online_node; |
145 | if (nr_cpus_node(node) > 2) { |
146 | /* |
147 | * Non-trivial SMP, or CONFIG_NUMA on |
148 | * non-NUMA hardware, e.g. with a generic |
149 | * x86_64 kernel on Xeons. In this case we |
150 | * want to divide the pools on cpu boundaries. |
151 | */ |
152 | return SVC_POOL_PERCPU; |
153 | } |
154 | |
155 | /* default: one global pool */ |
156 | return SVC_POOL_GLOBAL; |
157 | } |
158 | |
159 | /* |
160 | * Allocate the to_pool[] and pool_to[] arrays. |
161 | * Returns 0 on success or an errno. |
162 | */ |
163 | static int |
164 | svc_pool_map_alloc_arrays(struct svc_pool_map *m, unsigned int maxpools) |
165 | { |
166 | m->to_pool = kcalloc(n: maxpools, size: sizeof(unsigned int), GFP_KERNEL); |
167 | if (!m->to_pool) |
168 | goto fail; |
169 | m->pool_to = kcalloc(n: maxpools, size: sizeof(unsigned int), GFP_KERNEL); |
170 | if (!m->pool_to) |
171 | goto fail_free; |
172 | |
173 | return 0; |
174 | |
175 | fail_free: |
176 | kfree(objp: m->to_pool); |
177 | m->to_pool = NULL; |
178 | fail: |
179 | return -ENOMEM; |
180 | } |
181 | |
182 | /* |
183 | * Initialise the pool map for SVC_POOL_PERCPU mode. |
184 | * Returns number of pools or <0 on error. |
185 | */ |
186 | static int |
187 | svc_pool_map_init_percpu(struct svc_pool_map *m) |
188 | { |
189 | unsigned int maxpools = nr_cpu_ids; |
190 | unsigned int pidx = 0; |
191 | unsigned int cpu; |
192 | int err; |
193 | |
194 | err = svc_pool_map_alloc_arrays(m, maxpools); |
195 | if (err) |
196 | return err; |
197 | |
198 | for_each_online_cpu(cpu) { |
199 | BUG_ON(pidx >= maxpools); |
200 | m->to_pool[cpu] = pidx; |
201 | m->pool_to[pidx] = cpu; |
202 | pidx++; |
203 | } |
204 | /* cpus brought online later all get mapped to pool0, sorry */ |
205 | |
206 | return pidx; |
207 | }; |
208 | |
209 | |
210 | /* |
211 | * Initialise the pool map for SVC_POOL_PERNODE mode. |
212 | * Returns number of pools or <0 on error. |
213 | */ |
214 | static int |
215 | svc_pool_map_init_pernode(struct svc_pool_map *m) |
216 | { |
217 | unsigned int maxpools = nr_node_ids; |
218 | unsigned int pidx = 0; |
219 | unsigned int node; |
220 | int err; |
221 | |
222 | err = svc_pool_map_alloc_arrays(m, maxpools); |
223 | if (err) |
224 | return err; |
225 | |
226 | for_each_node_with_cpus(node) { |
227 | /* some architectures (e.g. SN2) have cpuless nodes */ |
228 | BUG_ON(pidx > maxpools); |
229 | m->to_pool[node] = pidx; |
230 | m->pool_to[pidx] = node; |
231 | pidx++; |
232 | } |
233 | /* nodes brought online later all get mapped to pool0, sorry */ |
234 | |
235 | return pidx; |
236 | } |
237 | |
238 | |
239 | /* |
240 | * Add a reference to the global map of cpus to pools (and |
241 | * vice versa) if pools are in use. |
242 | * Initialise the map if we're the first user. |
243 | * Returns the number of pools. If this is '1', no reference |
244 | * was taken. |
245 | */ |
246 | static unsigned int |
247 | svc_pool_map_get(void) |
248 | { |
249 | struct svc_pool_map *m = &svc_pool_map; |
250 | int npools = -1; |
251 | |
252 | mutex_lock(&svc_pool_map_mutex); |
253 | |
254 | if (m->count++) { |
255 | mutex_unlock(lock: &svc_pool_map_mutex); |
256 | WARN_ON_ONCE(m->npools <= 1); |
257 | return m->npools; |
258 | } |
259 | |
260 | if (m->mode == SVC_POOL_AUTO) |
261 | m->mode = svc_pool_map_choose_mode(); |
262 | |
263 | switch (m->mode) { |
264 | case SVC_POOL_PERCPU: |
265 | npools = svc_pool_map_init_percpu(m); |
266 | break; |
267 | case SVC_POOL_PERNODE: |
268 | npools = svc_pool_map_init_pernode(m); |
269 | break; |
270 | } |
271 | |
272 | if (npools <= 0) { |
273 | /* default, or memory allocation failure */ |
274 | npools = 1; |
275 | m->mode = SVC_POOL_GLOBAL; |
276 | } |
277 | m->npools = npools; |
278 | |
279 | if (npools == 1) |
280 | /* service is unpooled, so doesn't hold a reference */ |
281 | m->count--; |
282 | |
283 | mutex_unlock(lock: &svc_pool_map_mutex); |
284 | return npools; |
285 | } |
286 | |
287 | /* |
288 | * Drop a reference to the global map of cpus to pools, if |
289 | * pools were in use, i.e. if npools > 1. |
290 | * When the last reference is dropped, the map data is |
291 | * freed; this allows the sysadmin to change the pool |
292 | * mode using the pool_mode module option without |
293 | * rebooting or re-loading sunrpc.ko. |
294 | */ |
295 | static void |
296 | svc_pool_map_put(int npools) |
297 | { |
298 | struct svc_pool_map *m = &svc_pool_map; |
299 | |
300 | if (npools <= 1) |
301 | return; |
302 | mutex_lock(&svc_pool_map_mutex); |
303 | |
304 | if (!--m->count) { |
305 | kfree(objp: m->to_pool); |
306 | m->to_pool = NULL; |
307 | kfree(objp: m->pool_to); |
308 | m->pool_to = NULL; |
309 | m->npools = 0; |
310 | } |
311 | |
312 | mutex_unlock(lock: &svc_pool_map_mutex); |
313 | } |
314 | |
315 | static int svc_pool_map_get_node(unsigned int pidx) |
316 | { |
317 | const struct svc_pool_map *m = &svc_pool_map; |
318 | |
319 | if (m->count) { |
320 | if (m->mode == SVC_POOL_PERCPU) |
321 | return cpu_to_node(cpu: m->pool_to[pidx]); |
322 | if (m->mode == SVC_POOL_PERNODE) |
323 | return m->pool_to[pidx]; |
324 | } |
325 | return NUMA_NO_NODE; |
326 | } |
327 | /* |
328 | * Set the given thread's cpus_allowed mask so that it |
329 | * will only run on cpus in the given pool. |
330 | */ |
331 | static inline void |
332 | svc_pool_map_set_cpumask(struct task_struct *task, unsigned int pidx) |
333 | { |
334 | struct svc_pool_map *m = &svc_pool_map; |
335 | unsigned int node = m->pool_to[pidx]; |
336 | |
337 | /* |
338 | * The caller checks for sv_nrpools > 1, which |
339 | * implies that we've been initialized. |
340 | */ |
341 | WARN_ON_ONCE(m->count == 0); |
342 | if (m->count == 0) |
343 | return; |
344 | |
345 | switch (m->mode) { |
346 | case SVC_POOL_PERCPU: |
347 | { |
348 | set_cpus_allowed_ptr(p: task, cpumask_of(node)); |
349 | break; |
350 | } |
351 | case SVC_POOL_PERNODE: |
352 | { |
353 | set_cpus_allowed_ptr(p: task, new_mask: cpumask_of_node(node)); |
354 | break; |
355 | } |
356 | } |
357 | } |
358 | |
359 | /** |
360 | * svc_pool_for_cpu - Select pool to run a thread on this cpu |
361 | * @serv: An RPC service |
362 | * |
363 | * Use the active CPU and the svc_pool_map's mode setting to |
364 | * select the svc thread pool to use. Once initialized, the |
365 | * svc_pool_map does not change. |
366 | * |
367 | * Return value: |
368 | * A pointer to an svc_pool |
369 | */ |
370 | struct svc_pool *svc_pool_for_cpu(struct svc_serv *serv) |
371 | { |
372 | struct svc_pool_map *m = &svc_pool_map; |
373 | int cpu = raw_smp_processor_id(); |
374 | unsigned int pidx = 0; |
375 | |
376 | if (serv->sv_nrpools <= 1) |
377 | return serv->sv_pools; |
378 | |
379 | switch (m->mode) { |
380 | case SVC_POOL_PERCPU: |
381 | pidx = m->to_pool[cpu]; |
382 | break; |
383 | case SVC_POOL_PERNODE: |
384 | pidx = m->to_pool[cpu_to_node(cpu)]; |
385 | break; |
386 | } |
387 | |
388 | return &serv->sv_pools[pidx % serv->sv_nrpools]; |
389 | } |
390 | |
391 | int svc_rpcb_setup(struct svc_serv *serv, struct net *net) |
392 | { |
393 | int err; |
394 | |
395 | err = rpcb_create_local(net); |
396 | if (err) |
397 | return err; |
398 | |
399 | /* Remove any stale portmap registrations */ |
400 | svc_unregister(serv, net); |
401 | return 0; |
402 | } |
403 | EXPORT_SYMBOL_GPL(svc_rpcb_setup); |
404 | |
405 | void svc_rpcb_cleanup(struct svc_serv *serv, struct net *net) |
406 | { |
407 | svc_unregister(serv, net); |
408 | rpcb_put_local(net); |
409 | } |
410 | EXPORT_SYMBOL_GPL(svc_rpcb_cleanup); |
411 | |
412 | static int svc_uses_rpcbind(struct svc_serv *serv) |
413 | { |
414 | struct svc_program *progp; |
415 | unsigned int i; |
416 | |
417 | for (progp = serv->sv_program; progp; progp = progp->pg_next) { |
418 | for (i = 0; i < progp->pg_nvers; i++) { |
419 | if (progp->pg_vers[i] == NULL) |
420 | continue; |
421 | if (!progp->pg_vers[i]->vs_hidden) |
422 | return 1; |
423 | } |
424 | } |
425 | |
426 | return 0; |
427 | } |
428 | |
429 | int svc_bind(struct svc_serv *serv, struct net *net) |
430 | { |
431 | if (!svc_uses_rpcbind(serv)) |
432 | return 0; |
433 | return svc_rpcb_setup(serv, net); |
434 | } |
435 | EXPORT_SYMBOL_GPL(svc_bind); |
436 | |
437 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
438 | static void |
439 | __svc_init_bc(struct svc_serv *serv) |
440 | { |
441 | lwq_init(q: &serv->sv_cb_list); |
442 | } |
443 | #else |
444 | static void |
445 | __svc_init_bc(struct svc_serv *serv) |
446 | { |
447 | } |
448 | #endif |
449 | |
450 | /* |
451 | * Create an RPC service |
452 | */ |
453 | static struct svc_serv * |
454 | __svc_create(struct svc_program *prog, struct svc_stat *stats, |
455 | unsigned int bufsize, int npools, int (*threadfn)(void *data)) |
456 | { |
457 | struct svc_serv *serv; |
458 | unsigned int vers; |
459 | unsigned int xdrsize; |
460 | unsigned int i; |
461 | |
462 | if (!(serv = kzalloc(size: sizeof(*serv), GFP_KERNEL))) |
463 | return NULL; |
464 | serv->sv_name = prog->pg_name; |
465 | serv->sv_program = prog; |
466 | serv->sv_stats = stats; |
467 | if (bufsize > RPCSVC_MAXPAYLOAD) |
468 | bufsize = RPCSVC_MAXPAYLOAD; |
469 | serv->sv_max_payload = bufsize? bufsize : 4096; |
470 | serv->sv_max_mesg = roundup(serv->sv_max_payload + PAGE_SIZE, PAGE_SIZE); |
471 | serv->sv_threadfn = threadfn; |
472 | xdrsize = 0; |
473 | while (prog) { |
474 | prog->pg_lovers = prog->pg_nvers-1; |
475 | for (vers=0; vers<prog->pg_nvers ; vers++) |
476 | if (prog->pg_vers[vers]) { |
477 | prog->pg_hivers = vers; |
478 | if (prog->pg_lovers > vers) |
479 | prog->pg_lovers = vers; |
480 | if (prog->pg_vers[vers]->vs_xdrsize > xdrsize) |
481 | xdrsize = prog->pg_vers[vers]->vs_xdrsize; |
482 | } |
483 | prog = prog->pg_next; |
484 | } |
485 | serv->sv_xdrsize = xdrsize; |
486 | INIT_LIST_HEAD(list: &serv->sv_tempsocks); |
487 | INIT_LIST_HEAD(list: &serv->sv_permsocks); |
488 | timer_setup(&serv->sv_temptimer, NULL, 0); |
489 | spin_lock_init(&serv->sv_lock); |
490 | |
491 | __svc_init_bc(serv); |
492 | |
493 | serv->sv_nrpools = npools; |
494 | serv->sv_pools = |
495 | kcalloc(n: serv->sv_nrpools, size: sizeof(struct svc_pool), |
496 | GFP_KERNEL); |
497 | if (!serv->sv_pools) { |
498 | kfree(objp: serv); |
499 | return NULL; |
500 | } |
501 | |
502 | for (i = 0; i < serv->sv_nrpools; i++) { |
503 | struct svc_pool *pool = &serv->sv_pools[i]; |
504 | |
505 | dprintk("svc: initialising pool %u for %s\n" , |
506 | i, serv->sv_name); |
507 | |
508 | pool->sp_id = i; |
509 | lwq_init(q: &pool->sp_xprts); |
510 | INIT_LIST_HEAD(list: &pool->sp_all_threads); |
511 | init_llist_head(list: &pool->sp_idle_threads); |
512 | |
513 | percpu_counter_init(&pool->sp_messages_arrived, 0, GFP_KERNEL); |
514 | percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL); |
515 | percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL); |
516 | } |
517 | |
518 | return serv; |
519 | } |
520 | |
521 | /** |
522 | * svc_create - Create an RPC service |
523 | * @prog: the RPC program the new service will handle |
524 | * @bufsize: maximum message size for @prog |
525 | * @threadfn: a function to service RPC requests for @prog |
526 | * |
527 | * Returns an instantiated struct svc_serv object or NULL. |
528 | */ |
529 | struct svc_serv *svc_create(struct svc_program *prog, unsigned int bufsize, |
530 | int (*threadfn)(void *data)) |
531 | { |
532 | return __svc_create(prog, NULL, bufsize, npools: 1, threadfn); |
533 | } |
534 | EXPORT_SYMBOL_GPL(svc_create); |
535 | |
536 | /** |
537 | * svc_create_pooled - Create an RPC service with pooled threads |
538 | * @prog: the RPC program the new service will handle |
539 | * @stats: the stats struct if desired |
540 | * @bufsize: maximum message size for @prog |
541 | * @threadfn: a function to service RPC requests for @prog |
542 | * |
543 | * Returns an instantiated struct svc_serv object or NULL. |
544 | */ |
545 | struct svc_serv *svc_create_pooled(struct svc_program *prog, |
546 | struct svc_stat *stats, |
547 | unsigned int bufsize, |
548 | int (*threadfn)(void *data)) |
549 | { |
550 | struct svc_serv *serv; |
551 | unsigned int npools = svc_pool_map_get(); |
552 | |
553 | serv = __svc_create(prog, stats, bufsize, npools, threadfn); |
554 | if (!serv) |
555 | goto out_err; |
556 | return serv; |
557 | out_err: |
558 | svc_pool_map_put(npools); |
559 | return NULL; |
560 | } |
561 | EXPORT_SYMBOL_GPL(svc_create_pooled); |
562 | |
563 | /* |
564 | * Destroy an RPC service. Should be called with appropriate locking to |
565 | * protect sv_permsocks and sv_tempsocks. |
566 | */ |
567 | void |
568 | svc_destroy(struct svc_serv **servp) |
569 | { |
570 | struct svc_serv *serv = *servp; |
571 | unsigned int i; |
572 | |
573 | *servp = NULL; |
574 | |
575 | dprintk("svc: svc_destroy(%s)\n" , serv->sv_program->pg_name); |
576 | timer_shutdown_sync(timer: &serv->sv_temptimer); |
577 | |
578 | /* |
579 | * Remaining transports at this point are not expected. |
580 | */ |
581 | WARN_ONCE(!list_empty(&serv->sv_permsocks), |
582 | "SVC: permsocks remain for %s\n" , serv->sv_program->pg_name); |
583 | WARN_ONCE(!list_empty(&serv->sv_tempsocks), |
584 | "SVC: tempsocks remain for %s\n" , serv->sv_program->pg_name); |
585 | |
586 | cache_clean_deferred(owner: serv); |
587 | |
588 | svc_pool_map_put(npools: serv->sv_nrpools); |
589 | |
590 | for (i = 0; i < serv->sv_nrpools; i++) { |
591 | struct svc_pool *pool = &serv->sv_pools[i]; |
592 | |
593 | percpu_counter_destroy(fbc: &pool->sp_messages_arrived); |
594 | percpu_counter_destroy(fbc: &pool->sp_sockets_queued); |
595 | percpu_counter_destroy(fbc: &pool->sp_threads_woken); |
596 | } |
597 | kfree(objp: serv->sv_pools); |
598 | kfree(objp: serv); |
599 | } |
600 | EXPORT_SYMBOL_GPL(svc_destroy); |
601 | |
602 | static bool |
603 | svc_init_buffer(struct svc_rqst *rqstp, unsigned int size, int node) |
604 | { |
605 | unsigned long pages, ret; |
606 | |
607 | /* bc_xprt uses fore channel allocated buffers */ |
608 | if (svc_is_backchannel(rqstp)) |
609 | return true; |
610 | |
611 | pages = size / PAGE_SIZE + 1; /* extra page as we hold both request and reply. |
612 | * We assume one is at most one page |
613 | */ |
614 | WARN_ON_ONCE(pages > RPCSVC_MAXPAGES); |
615 | if (pages > RPCSVC_MAXPAGES) |
616 | pages = RPCSVC_MAXPAGES; |
617 | |
618 | ret = alloc_pages_bulk_array_node(GFP_KERNEL, nid: node, nr_pages: pages, |
619 | page_array: rqstp->rq_pages); |
620 | return ret == pages; |
621 | } |
622 | |
623 | /* |
624 | * Release an RPC server buffer |
625 | */ |
626 | static void |
627 | svc_release_buffer(struct svc_rqst *rqstp) |
628 | { |
629 | unsigned int i; |
630 | |
631 | for (i = 0; i < ARRAY_SIZE(rqstp->rq_pages); i++) |
632 | if (rqstp->rq_pages[i]) |
633 | put_page(page: rqstp->rq_pages[i]); |
634 | } |
635 | |
636 | struct svc_rqst * |
637 | svc_rqst_alloc(struct svc_serv *serv, struct svc_pool *pool, int node) |
638 | { |
639 | struct svc_rqst *rqstp; |
640 | |
641 | rqstp = kzalloc_node(size: sizeof(*rqstp), GFP_KERNEL, node); |
642 | if (!rqstp) |
643 | return rqstp; |
644 | |
645 | folio_batch_init(fbatch: &rqstp->rq_fbatch); |
646 | |
647 | rqstp->rq_server = serv; |
648 | rqstp->rq_pool = pool; |
649 | |
650 | rqstp->rq_scratch_page = alloc_pages_node(nid: node, GFP_KERNEL, order: 0); |
651 | if (!rqstp->rq_scratch_page) |
652 | goto out_enomem; |
653 | |
654 | rqstp->rq_argp = kmalloc_node(size: serv->sv_xdrsize, GFP_KERNEL, node); |
655 | if (!rqstp->rq_argp) |
656 | goto out_enomem; |
657 | |
658 | rqstp->rq_resp = kmalloc_node(size: serv->sv_xdrsize, GFP_KERNEL, node); |
659 | if (!rqstp->rq_resp) |
660 | goto out_enomem; |
661 | |
662 | if (!svc_init_buffer(rqstp, size: serv->sv_max_mesg, node)) |
663 | goto out_enomem; |
664 | |
665 | return rqstp; |
666 | out_enomem: |
667 | svc_rqst_free(rqstp); |
668 | return NULL; |
669 | } |
670 | EXPORT_SYMBOL_GPL(svc_rqst_alloc); |
671 | |
672 | static struct svc_rqst * |
673 | svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node) |
674 | { |
675 | struct svc_rqst *rqstp; |
676 | |
677 | rqstp = svc_rqst_alloc(serv, pool, node); |
678 | if (!rqstp) |
679 | return ERR_PTR(error: -ENOMEM); |
680 | |
681 | spin_lock_bh(lock: &serv->sv_lock); |
682 | serv->sv_nrthreads += 1; |
683 | spin_unlock_bh(lock: &serv->sv_lock); |
684 | |
685 | atomic_inc(v: &pool->sp_nrthreads); |
686 | |
687 | /* Protected by whatever lock the service uses when calling |
688 | * svc_set_num_threads() |
689 | */ |
690 | list_add_rcu(new: &rqstp->rq_all, head: &pool->sp_all_threads); |
691 | |
692 | return rqstp; |
693 | } |
694 | |
695 | /** |
696 | * svc_pool_wake_idle_thread - Awaken an idle thread in @pool |
697 | * @pool: service thread pool |
698 | * |
699 | * Can be called from soft IRQ or process context. Finding an idle |
700 | * service thread and marking it BUSY is atomic with respect to |
701 | * other calls to svc_pool_wake_idle_thread(). |
702 | * |
703 | */ |
704 | void svc_pool_wake_idle_thread(struct svc_pool *pool) |
705 | { |
706 | struct svc_rqst *rqstp; |
707 | struct llist_node *ln; |
708 | |
709 | rcu_read_lock(); |
710 | ln = READ_ONCE(pool->sp_idle_threads.first); |
711 | if (ln) { |
712 | rqstp = llist_entry(ln, struct svc_rqst, rq_idle); |
713 | WRITE_ONCE(rqstp->rq_qtime, ktime_get()); |
714 | if (!task_is_running(rqstp->rq_task)) { |
715 | wake_up_process(tsk: rqstp->rq_task); |
716 | trace_svc_wake_up(pid: rqstp->rq_task->pid); |
717 | percpu_counter_inc(fbc: &pool->sp_threads_woken); |
718 | } |
719 | rcu_read_unlock(); |
720 | return; |
721 | } |
722 | rcu_read_unlock(); |
723 | |
724 | } |
725 | EXPORT_SYMBOL_GPL(svc_pool_wake_idle_thread); |
726 | |
727 | static struct svc_pool * |
728 | svc_pool_next(struct svc_serv *serv, struct svc_pool *pool, unsigned int *state) |
729 | { |
730 | return pool ? pool : &serv->sv_pools[(*state)++ % serv->sv_nrpools]; |
731 | } |
732 | |
733 | static struct svc_pool * |
734 | svc_pool_victim(struct svc_serv *serv, struct svc_pool *target_pool, |
735 | unsigned int *state) |
736 | { |
737 | struct svc_pool *pool; |
738 | unsigned int i; |
739 | |
740 | retry: |
741 | pool = target_pool; |
742 | |
743 | if (pool != NULL) { |
744 | if (atomic_inc_not_zero(v: &pool->sp_nrthreads)) |
745 | goto found_pool; |
746 | return NULL; |
747 | } else { |
748 | for (i = 0; i < serv->sv_nrpools; i++) { |
749 | pool = &serv->sv_pools[--(*state) % serv->sv_nrpools]; |
750 | if (atomic_inc_not_zero(v: &pool->sp_nrthreads)) |
751 | goto found_pool; |
752 | } |
753 | return NULL; |
754 | } |
755 | |
756 | found_pool: |
757 | set_bit(nr: SP_VICTIM_REMAINS, addr: &pool->sp_flags); |
758 | set_bit(nr: SP_NEED_VICTIM, addr: &pool->sp_flags); |
759 | if (!atomic_dec_and_test(v: &pool->sp_nrthreads)) |
760 | return pool; |
761 | /* Nothing left in this pool any more */ |
762 | clear_bit(nr: SP_NEED_VICTIM, addr: &pool->sp_flags); |
763 | clear_bit(nr: SP_VICTIM_REMAINS, addr: &pool->sp_flags); |
764 | goto retry; |
765 | } |
766 | |
767 | static int |
768 | svc_start_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) |
769 | { |
770 | struct svc_rqst *rqstp; |
771 | struct task_struct *task; |
772 | struct svc_pool *chosen_pool; |
773 | unsigned int state = serv->sv_nrthreads-1; |
774 | int node; |
775 | |
776 | do { |
777 | nrservs--; |
778 | chosen_pool = svc_pool_next(serv, pool, state: &state); |
779 | node = svc_pool_map_get_node(pidx: chosen_pool->sp_id); |
780 | |
781 | rqstp = svc_prepare_thread(serv, pool: chosen_pool, node); |
782 | if (IS_ERR(ptr: rqstp)) |
783 | return PTR_ERR(ptr: rqstp); |
784 | task = kthread_create_on_node(threadfn: serv->sv_threadfn, data: rqstp, |
785 | node, namefmt: "%s" , serv->sv_name); |
786 | if (IS_ERR(ptr: task)) { |
787 | svc_exit_thread(rqstp); |
788 | return PTR_ERR(ptr: task); |
789 | } |
790 | |
791 | rqstp->rq_task = task; |
792 | if (serv->sv_nrpools > 1) |
793 | svc_pool_map_set_cpumask(task, pidx: chosen_pool->sp_id); |
794 | |
795 | svc_sock_update_bufs(serv); |
796 | wake_up_process(tsk: task); |
797 | } while (nrservs > 0); |
798 | |
799 | return 0; |
800 | } |
801 | |
802 | static int |
803 | svc_stop_kthreads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) |
804 | { |
805 | unsigned int state = serv->sv_nrthreads-1; |
806 | struct svc_pool *victim; |
807 | |
808 | do { |
809 | victim = svc_pool_victim(serv, target_pool: pool, state: &state); |
810 | if (!victim) |
811 | break; |
812 | svc_pool_wake_idle_thread(victim); |
813 | wait_on_bit(word: &victim->sp_flags, bit: SP_VICTIM_REMAINS, |
814 | TASK_IDLE); |
815 | nrservs++; |
816 | } while (nrservs < 0); |
817 | return 0; |
818 | } |
819 | |
820 | /** |
821 | * svc_set_num_threads - adjust number of threads per RPC service |
822 | * @serv: RPC service to adjust |
823 | * @pool: Specific pool from which to choose threads, or NULL |
824 | * @nrservs: New number of threads for @serv (0 or less means kill all threads) |
825 | * |
826 | * Create or destroy threads to make the number of threads for @serv the |
827 | * given number. If @pool is non-NULL, change only threads in that pool; |
828 | * otherwise, round-robin between all pools for @serv. @serv's |
829 | * sv_nrthreads is adjusted for each thread created or destroyed. |
830 | * |
831 | * Caller must ensure mutual exclusion between this and server startup or |
832 | * shutdown. |
833 | * |
834 | * Returns zero on success or a negative errno if an error occurred while |
835 | * starting a thread. |
836 | */ |
837 | int |
838 | svc_set_num_threads(struct svc_serv *serv, struct svc_pool *pool, int nrservs) |
839 | { |
840 | if (!pool) |
841 | nrservs -= serv->sv_nrthreads; |
842 | else |
843 | nrservs -= atomic_read(v: &pool->sp_nrthreads); |
844 | |
845 | if (nrservs > 0) |
846 | return svc_start_kthreads(serv, pool, nrservs); |
847 | if (nrservs < 0) |
848 | return svc_stop_kthreads(serv, pool, nrservs); |
849 | return 0; |
850 | } |
851 | EXPORT_SYMBOL_GPL(svc_set_num_threads); |
852 | |
853 | /** |
854 | * svc_rqst_replace_page - Replace one page in rq_pages[] |
855 | * @rqstp: svc_rqst with pages to replace |
856 | * @page: replacement page |
857 | * |
858 | * When replacing a page in rq_pages, batch the release of the |
859 | * replaced pages to avoid hammering the page allocator. |
860 | * |
861 | * Return values: |
862 | * %true: page replaced |
863 | * %false: array bounds checking failed |
864 | */ |
865 | bool svc_rqst_replace_page(struct svc_rqst *rqstp, struct page *page) |
866 | { |
867 | struct page **begin = rqstp->rq_pages; |
868 | struct page **end = &rqstp->rq_pages[RPCSVC_MAXPAGES]; |
869 | |
870 | if (unlikely(rqstp->rq_next_page < begin || rqstp->rq_next_page > end)) { |
871 | trace_svc_replace_page_err(rqst: rqstp); |
872 | return false; |
873 | } |
874 | |
875 | if (*rqstp->rq_next_page) { |
876 | if (!folio_batch_add(fbatch: &rqstp->rq_fbatch, |
877 | page_folio(*rqstp->rq_next_page))) |
878 | __folio_batch_release(pvec: &rqstp->rq_fbatch); |
879 | } |
880 | |
881 | get_page(page); |
882 | *(rqstp->rq_next_page++) = page; |
883 | return true; |
884 | } |
885 | EXPORT_SYMBOL_GPL(svc_rqst_replace_page); |
886 | |
887 | /** |
888 | * svc_rqst_release_pages - Release Reply buffer pages |
889 | * @rqstp: RPC transaction context |
890 | * |
891 | * Release response pages that might still be in flight after |
892 | * svc_send, and any spliced filesystem-owned pages. |
893 | */ |
894 | void svc_rqst_release_pages(struct svc_rqst *rqstp) |
895 | { |
896 | int i, count = rqstp->rq_next_page - rqstp->rq_respages; |
897 | |
898 | if (count) { |
899 | release_pages(rqstp->rq_respages, nr: count); |
900 | for (i = 0; i < count; i++) |
901 | rqstp->rq_respages[i] = NULL; |
902 | } |
903 | } |
904 | |
905 | /* |
906 | * Called from a server thread as it's exiting. Caller must hold the "service |
907 | * mutex" for the service. |
908 | */ |
909 | void |
910 | svc_rqst_free(struct svc_rqst *rqstp) |
911 | { |
912 | folio_batch_release(fbatch: &rqstp->rq_fbatch); |
913 | svc_release_buffer(rqstp); |
914 | if (rqstp->rq_scratch_page) |
915 | put_page(page: rqstp->rq_scratch_page); |
916 | kfree(objp: rqstp->rq_resp); |
917 | kfree(objp: rqstp->rq_argp); |
918 | kfree(objp: rqstp->rq_auth_data); |
919 | kfree_rcu(rqstp, rq_rcu_head); |
920 | } |
921 | EXPORT_SYMBOL_GPL(svc_rqst_free); |
922 | |
923 | void |
924 | svc_exit_thread(struct svc_rqst *rqstp) |
925 | { |
926 | struct svc_serv *serv = rqstp->rq_server; |
927 | struct svc_pool *pool = rqstp->rq_pool; |
928 | |
929 | list_del_rcu(entry: &rqstp->rq_all); |
930 | |
931 | atomic_dec(v: &pool->sp_nrthreads); |
932 | |
933 | spin_lock_bh(lock: &serv->sv_lock); |
934 | serv->sv_nrthreads -= 1; |
935 | spin_unlock_bh(lock: &serv->sv_lock); |
936 | svc_sock_update_bufs(serv); |
937 | |
938 | svc_rqst_free(rqstp); |
939 | |
940 | clear_and_wake_up_bit(bit: SP_VICTIM_REMAINS, word: &pool->sp_flags); |
941 | } |
942 | EXPORT_SYMBOL_GPL(svc_exit_thread); |
943 | |
944 | /* |
945 | * Register an "inet" protocol family netid with the local |
946 | * rpcbind daemon via an rpcbind v4 SET request. |
947 | * |
948 | * No netconfig infrastructure is available in the kernel, so |
949 | * we map IP_ protocol numbers to netids by hand. |
950 | * |
951 | * Returns zero on success; a negative errno value is returned |
952 | * if any error occurs. |
953 | */ |
954 | static int __svc_rpcb_register4(struct net *net, const u32 program, |
955 | const u32 version, |
956 | const unsigned short protocol, |
957 | const unsigned short port) |
958 | { |
959 | const struct sockaddr_in sin = { |
960 | .sin_family = AF_INET, |
961 | .sin_addr.s_addr = htonl(INADDR_ANY), |
962 | .sin_port = htons(port), |
963 | }; |
964 | const char *netid; |
965 | int error; |
966 | |
967 | switch (protocol) { |
968 | case IPPROTO_UDP: |
969 | netid = RPCBIND_NETID_UDP; |
970 | break; |
971 | case IPPROTO_TCP: |
972 | netid = RPCBIND_NETID_TCP; |
973 | break; |
974 | default: |
975 | return -ENOPROTOOPT; |
976 | } |
977 | |
978 | error = rpcb_v4_register(net, program, version, |
979 | address: (const struct sockaddr *)&sin, netid); |
980 | |
981 | /* |
982 | * User space didn't support rpcbind v4, so retry this |
983 | * registration request with the legacy rpcbind v2 protocol. |
984 | */ |
985 | if (error == -EPROTONOSUPPORT) |
986 | error = rpcb_register(net, program, version, protocol, port); |
987 | |
988 | return error; |
989 | } |
990 | |
991 | #if IS_ENABLED(CONFIG_IPV6) |
992 | /* |
993 | * Register an "inet6" protocol family netid with the local |
994 | * rpcbind daemon via an rpcbind v4 SET request. |
995 | * |
996 | * No netconfig infrastructure is available in the kernel, so |
997 | * we map IP_ protocol numbers to netids by hand. |
998 | * |
999 | * Returns zero on success; a negative errno value is returned |
1000 | * if any error occurs. |
1001 | */ |
1002 | static int __svc_rpcb_register6(struct net *net, const u32 program, |
1003 | const u32 version, |
1004 | const unsigned short protocol, |
1005 | const unsigned short port) |
1006 | { |
1007 | const struct sockaddr_in6 sin6 = { |
1008 | .sin6_family = AF_INET6, |
1009 | .sin6_addr = IN6ADDR_ANY_INIT, |
1010 | .sin6_port = htons(port), |
1011 | }; |
1012 | const char *netid; |
1013 | int error; |
1014 | |
1015 | switch (protocol) { |
1016 | case IPPROTO_UDP: |
1017 | netid = RPCBIND_NETID_UDP6; |
1018 | break; |
1019 | case IPPROTO_TCP: |
1020 | netid = RPCBIND_NETID_TCP6; |
1021 | break; |
1022 | default: |
1023 | return -ENOPROTOOPT; |
1024 | } |
1025 | |
1026 | error = rpcb_v4_register(net, program, version, |
1027 | address: (const struct sockaddr *)&sin6, netid); |
1028 | |
1029 | /* |
1030 | * User space didn't support rpcbind version 4, so we won't |
1031 | * use a PF_INET6 listener. |
1032 | */ |
1033 | if (error == -EPROTONOSUPPORT) |
1034 | error = -EAFNOSUPPORT; |
1035 | |
1036 | return error; |
1037 | } |
1038 | #endif /* IS_ENABLED(CONFIG_IPV6) */ |
1039 | |
1040 | /* |
1041 | * Register a kernel RPC service via rpcbind version 4. |
1042 | * |
1043 | * Returns zero on success; a negative errno value is returned |
1044 | * if any error occurs. |
1045 | */ |
1046 | static int __svc_register(struct net *net, const char *progname, |
1047 | const u32 program, const u32 version, |
1048 | const int family, |
1049 | const unsigned short protocol, |
1050 | const unsigned short port) |
1051 | { |
1052 | int error = -EAFNOSUPPORT; |
1053 | |
1054 | switch (family) { |
1055 | case PF_INET: |
1056 | error = __svc_rpcb_register4(net, program, version, |
1057 | protocol, port); |
1058 | break; |
1059 | #if IS_ENABLED(CONFIG_IPV6) |
1060 | case PF_INET6: |
1061 | error = __svc_rpcb_register6(net, program, version, |
1062 | protocol, port); |
1063 | #endif |
1064 | } |
1065 | |
1066 | trace_svc_register(program: progname, version, family, protocol, port, error); |
1067 | return error; |
1068 | } |
1069 | |
1070 | int svc_rpcbind_set_version(struct net *net, |
1071 | const struct svc_program *progp, |
1072 | u32 version, int family, |
1073 | unsigned short proto, |
1074 | unsigned short port) |
1075 | { |
1076 | return __svc_register(net, progname: progp->pg_name, program: progp->pg_prog, |
1077 | version, family, protocol: proto, port); |
1078 | |
1079 | } |
1080 | EXPORT_SYMBOL_GPL(svc_rpcbind_set_version); |
1081 | |
1082 | int svc_generic_rpcbind_set(struct net *net, |
1083 | const struct svc_program *progp, |
1084 | u32 version, int family, |
1085 | unsigned short proto, |
1086 | unsigned short port) |
1087 | { |
1088 | const struct svc_version *vers = progp->pg_vers[version]; |
1089 | int error; |
1090 | |
1091 | if (vers == NULL) |
1092 | return 0; |
1093 | |
1094 | if (vers->vs_hidden) { |
1095 | trace_svc_noregister(program: progp->pg_name, version, family: proto, |
1096 | protocol: port, port: family, error: 0); |
1097 | return 0; |
1098 | } |
1099 | |
1100 | /* |
1101 | * Don't register a UDP port if we need congestion |
1102 | * control. |
1103 | */ |
1104 | if (vers->vs_need_cong_ctrl && proto == IPPROTO_UDP) |
1105 | return 0; |
1106 | |
1107 | error = svc_rpcbind_set_version(net, progp, version, |
1108 | family, proto, port); |
1109 | |
1110 | return (vers->vs_rpcb_optnl) ? 0 : error; |
1111 | } |
1112 | EXPORT_SYMBOL_GPL(svc_generic_rpcbind_set); |
1113 | |
1114 | /** |
1115 | * svc_register - register an RPC service with the local portmapper |
1116 | * @serv: svc_serv struct for the service to register |
1117 | * @net: net namespace for the service to register |
1118 | * @family: protocol family of service's listener socket |
1119 | * @proto: transport protocol number to advertise |
1120 | * @port: port to advertise |
1121 | * |
1122 | * Service is registered for any address in the passed-in protocol family |
1123 | */ |
1124 | int svc_register(const struct svc_serv *serv, struct net *net, |
1125 | const int family, const unsigned short proto, |
1126 | const unsigned short port) |
1127 | { |
1128 | struct svc_program *progp; |
1129 | unsigned int i; |
1130 | int error = 0; |
1131 | |
1132 | WARN_ON_ONCE(proto == 0 && port == 0); |
1133 | if (proto == 0 && port == 0) |
1134 | return -EINVAL; |
1135 | |
1136 | for (progp = serv->sv_program; progp; progp = progp->pg_next) { |
1137 | for (i = 0; i < progp->pg_nvers; i++) { |
1138 | |
1139 | error = progp->pg_rpcbind_set(net, progp, i, |
1140 | family, proto, port); |
1141 | if (error < 0) { |
1142 | printk(KERN_WARNING "svc: failed to register " |
1143 | "%sv%u RPC service (errno %d).\n" , |
1144 | progp->pg_name, i, -error); |
1145 | break; |
1146 | } |
1147 | } |
1148 | } |
1149 | |
1150 | return error; |
1151 | } |
1152 | |
1153 | /* |
1154 | * If user space is running rpcbind, it should take the v4 UNSET |
1155 | * and clear everything for this [program, version]. If user space |
1156 | * is running portmap, it will reject the v4 UNSET, but won't have |
1157 | * any "inet6" entries anyway. So a PMAP_UNSET should be sufficient |
1158 | * in this case to clear all existing entries for [program, version]. |
1159 | */ |
1160 | static void __svc_unregister(struct net *net, const u32 program, const u32 version, |
1161 | const char *progname) |
1162 | { |
1163 | int error; |
1164 | |
1165 | error = rpcb_v4_register(net, program, version, NULL, netid: "" ); |
1166 | |
1167 | /* |
1168 | * User space didn't support rpcbind v4, so retry this |
1169 | * request with the legacy rpcbind v2 protocol. |
1170 | */ |
1171 | if (error == -EPROTONOSUPPORT) |
1172 | error = rpcb_register(net, program, version, 0, 0); |
1173 | |
1174 | trace_svc_unregister(program: progname, version, error); |
1175 | } |
1176 | |
1177 | /* |
1178 | * All netids, bind addresses and ports registered for [program, version] |
1179 | * are removed from the local rpcbind database (if the service is not |
1180 | * hidden) to make way for a new instance of the service. |
1181 | * |
1182 | * The result of unregistration is reported via dprintk for those who want |
1183 | * verification of the result, but is otherwise not important. |
1184 | */ |
1185 | static void svc_unregister(const struct svc_serv *serv, struct net *net) |
1186 | { |
1187 | struct sighand_struct *sighand; |
1188 | struct svc_program *progp; |
1189 | unsigned long flags; |
1190 | unsigned int i; |
1191 | |
1192 | clear_thread_flag(TIF_SIGPENDING); |
1193 | |
1194 | for (progp = serv->sv_program; progp; progp = progp->pg_next) { |
1195 | for (i = 0; i < progp->pg_nvers; i++) { |
1196 | if (progp->pg_vers[i] == NULL) |
1197 | continue; |
1198 | if (progp->pg_vers[i]->vs_hidden) |
1199 | continue; |
1200 | __svc_unregister(net, program: progp->pg_prog, version: i, progname: progp->pg_name); |
1201 | } |
1202 | } |
1203 | |
1204 | rcu_read_lock(); |
1205 | sighand = rcu_dereference(current->sighand); |
1206 | spin_lock_irqsave(&sighand->siglock, flags); |
1207 | recalc_sigpending(); |
1208 | spin_unlock_irqrestore(lock: &sighand->siglock, flags); |
1209 | rcu_read_unlock(); |
1210 | } |
1211 | |
1212 | /* |
1213 | * dprintk the given error with the address of the client that caused it. |
1214 | */ |
1215 | #if IS_ENABLED(CONFIG_SUNRPC_DEBUG) |
1216 | static __printf(2, 3) |
1217 | void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) |
1218 | { |
1219 | struct va_format vaf; |
1220 | va_list args; |
1221 | char buf[RPC_MAX_ADDRBUFLEN]; |
1222 | |
1223 | va_start(args, fmt); |
1224 | |
1225 | vaf.fmt = fmt; |
1226 | vaf.va = &args; |
1227 | |
1228 | dprintk("svc: %s: %pV" , svc_print_addr(rqstp, buf, sizeof(buf)), &vaf); |
1229 | |
1230 | va_end(args); |
1231 | } |
1232 | #else |
1233 | static __printf(2,3) void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...) {} |
1234 | #endif |
1235 | |
1236 | __be32 |
1237 | svc_generic_init_request(struct svc_rqst *rqstp, |
1238 | const struct svc_program *progp, |
1239 | struct svc_process_info *ret) |
1240 | { |
1241 | const struct svc_version *versp = NULL; /* compiler food */ |
1242 | const struct svc_procedure *procp = NULL; |
1243 | |
1244 | if (rqstp->rq_vers >= progp->pg_nvers ) |
1245 | goto err_bad_vers; |
1246 | versp = progp->pg_vers[rqstp->rq_vers]; |
1247 | if (!versp) |
1248 | goto err_bad_vers; |
1249 | |
1250 | /* |
1251 | * Some protocol versions (namely NFSv4) require some form of |
1252 | * congestion control. (See RFC 7530 section 3.1 paragraph 2) |
1253 | * In other words, UDP is not allowed. We mark those when setting |
1254 | * up the svc_xprt, and verify that here. |
1255 | * |
1256 | * The spec is not very clear about what error should be returned |
1257 | * when someone tries to access a server that is listening on UDP |
1258 | * for lower versions. RPC_PROG_MISMATCH seems to be the closest |
1259 | * fit. |
1260 | */ |
1261 | if (versp->vs_need_cong_ctrl && rqstp->rq_xprt && |
1262 | !test_bit(XPT_CONG_CTRL, &rqstp->rq_xprt->xpt_flags)) |
1263 | goto err_bad_vers; |
1264 | |
1265 | if (rqstp->rq_proc >= versp->vs_nproc) |
1266 | goto err_bad_proc; |
1267 | rqstp->rq_procinfo = procp = &versp->vs_proc[rqstp->rq_proc]; |
1268 | if (!procp) |
1269 | goto err_bad_proc; |
1270 | |
1271 | /* Initialize storage for argp and resp */ |
1272 | memset(rqstp->rq_argp, 0, procp->pc_argzero); |
1273 | memset(rqstp->rq_resp, 0, procp->pc_ressize); |
1274 | |
1275 | /* Bump per-procedure stats counter */ |
1276 | this_cpu_inc(versp->vs_count[rqstp->rq_proc]); |
1277 | |
1278 | ret->dispatch = versp->vs_dispatch; |
1279 | return rpc_success; |
1280 | err_bad_vers: |
1281 | ret->mismatch.lovers = progp->pg_lovers; |
1282 | ret->mismatch.hivers = progp->pg_hivers; |
1283 | return rpc_prog_mismatch; |
1284 | err_bad_proc: |
1285 | return rpc_proc_unavail; |
1286 | } |
1287 | EXPORT_SYMBOL_GPL(svc_generic_init_request); |
1288 | |
1289 | /* |
1290 | * Common routine for processing the RPC request. |
1291 | */ |
1292 | static int |
1293 | svc_process_common(struct svc_rqst *rqstp) |
1294 | { |
1295 | struct xdr_stream *xdr = &rqstp->rq_res_stream; |
1296 | struct svc_program *progp; |
1297 | const struct svc_procedure *procp = NULL; |
1298 | struct svc_serv *serv = rqstp->rq_server; |
1299 | struct svc_process_info process; |
1300 | enum svc_auth_status auth_res; |
1301 | unsigned int aoffset; |
1302 | int rc; |
1303 | __be32 *p; |
1304 | |
1305 | /* Will be turned off only when NFSv4 Sessions are used */ |
1306 | set_bit(nr: RQ_USEDEFERRAL, addr: &rqstp->rq_flags); |
1307 | clear_bit(nr: RQ_DROPME, addr: &rqstp->rq_flags); |
1308 | |
1309 | /* Construct the first words of the reply: */ |
1310 | svcxdr_init_encode(rqstp); |
1311 | xdr_stream_encode_be32(xdr, n: rqstp->rq_xid); |
1312 | xdr_stream_encode_be32(xdr, rpc_reply); |
1313 | |
1314 | p = xdr_inline_decode(xdr: &rqstp->rq_arg_stream, XDR_UNIT * 4); |
1315 | if (unlikely(!p)) |
1316 | goto err_short_len; |
1317 | if (*p++ != cpu_to_be32(RPC_VERSION)) |
1318 | goto err_bad_rpc; |
1319 | |
1320 | xdr_stream_encode_be32(xdr, rpc_msg_accepted); |
1321 | |
1322 | rqstp->rq_prog = be32_to_cpup(p: p++); |
1323 | rqstp->rq_vers = be32_to_cpup(p: p++); |
1324 | rqstp->rq_proc = be32_to_cpup(p); |
1325 | |
1326 | for (progp = serv->sv_program; progp; progp = progp->pg_next) |
1327 | if (rqstp->rq_prog == progp->pg_prog) |
1328 | break; |
1329 | |
1330 | /* |
1331 | * Decode auth data, and add verifier to reply buffer. |
1332 | * We do this before anything else in order to get a decent |
1333 | * auth verifier. |
1334 | */ |
1335 | auth_res = svc_authenticate(rqstp); |
1336 | /* Also give the program a chance to reject this call: */ |
1337 | if (auth_res == SVC_OK && progp) |
1338 | auth_res = progp->pg_authenticate(rqstp); |
1339 | trace_svc_authenticate(rqst: rqstp, auth_res); |
1340 | switch (auth_res) { |
1341 | case SVC_OK: |
1342 | break; |
1343 | case SVC_GARBAGE: |
1344 | goto err_garbage_args; |
1345 | case SVC_SYSERR: |
1346 | goto err_system_err; |
1347 | case SVC_DENIED: |
1348 | goto err_bad_auth; |
1349 | case SVC_CLOSE: |
1350 | goto close; |
1351 | case SVC_DROP: |
1352 | goto dropit; |
1353 | case SVC_COMPLETE: |
1354 | goto sendit; |
1355 | default: |
1356 | pr_warn_once("Unexpected svc_auth_status (%d)\n" , auth_res); |
1357 | goto err_system_err; |
1358 | } |
1359 | |
1360 | if (progp == NULL) |
1361 | goto err_bad_prog; |
1362 | |
1363 | switch (progp->pg_init_request(rqstp, progp, &process)) { |
1364 | case rpc_success: |
1365 | break; |
1366 | case rpc_prog_unavail: |
1367 | goto err_bad_prog; |
1368 | case rpc_prog_mismatch: |
1369 | goto err_bad_vers; |
1370 | case rpc_proc_unavail: |
1371 | goto err_bad_proc; |
1372 | } |
1373 | |
1374 | procp = rqstp->rq_procinfo; |
1375 | /* Should this check go into the dispatcher? */ |
1376 | if (!procp || !procp->pc_func) |
1377 | goto err_bad_proc; |
1378 | |
1379 | /* Syntactic check complete */ |
1380 | if (serv->sv_stats) |
1381 | serv->sv_stats->rpccnt++; |
1382 | trace_svc_process(rqst: rqstp, name: progp->pg_name); |
1383 | |
1384 | aoffset = xdr_stream_pos(xdr); |
1385 | |
1386 | /* un-reserve some of the out-queue now that we have a |
1387 | * better idea of reply size |
1388 | */ |
1389 | if (procp->pc_xdrressize) |
1390 | svc_reserve_auth(rqstp, space: procp->pc_xdrressize<<2); |
1391 | |
1392 | /* Call the function that processes the request. */ |
1393 | rc = process.dispatch(rqstp); |
1394 | if (procp->pc_release) |
1395 | procp->pc_release(rqstp); |
1396 | xdr_finish_decode(xdr); |
1397 | |
1398 | if (!rc) |
1399 | goto dropit; |
1400 | if (rqstp->rq_auth_stat != rpc_auth_ok) |
1401 | goto err_bad_auth; |
1402 | |
1403 | if (*rqstp->rq_accept_statp != rpc_success) |
1404 | xdr_truncate_encode(xdr, len: aoffset); |
1405 | |
1406 | if (procp->pc_encode == NULL) |
1407 | goto dropit; |
1408 | |
1409 | sendit: |
1410 | if (svc_authorise(rqstp)) |
1411 | goto close_xprt; |
1412 | return 1; /* Caller can now send it */ |
1413 | |
1414 | dropit: |
1415 | svc_authorise(rqstp); /* doesn't hurt to call this twice */ |
1416 | dprintk("svc: svc_process dropit\n" ); |
1417 | return 0; |
1418 | |
1419 | close: |
1420 | svc_authorise(rqstp); |
1421 | close_xprt: |
1422 | if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags)) |
1423 | svc_xprt_close(xprt: rqstp->rq_xprt); |
1424 | dprintk("svc: svc_process close\n" ); |
1425 | return 0; |
1426 | |
1427 | err_short_len: |
1428 | svc_printk(rqstp, fmt: "short len %u, dropping request\n" , |
1429 | rqstp->rq_arg.len); |
1430 | goto close_xprt; |
1431 | |
1432 | err_bad_rpc: |
1433 | if (serv->sv_stats) |
1434 | serv->sv_stats->rpcbadfmt++; |
1435 | xdr_stream_encode_u32(xdr, n: RPC_MSG_DENIED); |
1436 | xdr_stream_encode_u32(xdr, n: RPC_MISMATCH); |
1437 | /* Only RPCv2 supported */ |
1438 | xdr_stream_encode_u32(xdr, RPC_VERSION); |
1439 | xdr_stream_encode_u32(xdr, RPC_VERSION); |
1440 | return 1; /* don't wrap */ |
1441 | |
1442 | err_bad_auth: |
1443 | dprintk("svc: authentication failed (%d)\n" , |
1444 | be32_to_cpu(rqstp->rq_auth_stat)); |
1445 | if (serv->sv_stats) |
1446 | serv->sv_stats->rpcbadauth++; |
1447 | /* Restore write pointer to location of reply status: */ |
1448 | xdr_truncate_encode(xdr, XDR_UNIT * 2); |
1449 | xdr_stream_encode_u32(xdr, n: RPC_MSG_DENIED); |
1450 | xdr_stream_encode_u32(xdr, n: RPC_AUTH_ERROR); |
1451 | xdr_stream_encode_be32(xdr, n: rqstp->rq_auth_stat); |
1452 | goto sendit; |
1453 | |
1454 | err_bad_prog: |
1455 | dprintk("svc: unknown program %d\n" , rqstp->rq_prog); |
1456 | if (serv->sv_stats) |
1457 | serv->sv_stats->rpcbadfmt++; |
1458 | *rqstp->rq_accept_statp = rpc_prog_unavail; |
1459 | goto sendit; |
1460 | |
1461 | err_bad_vers: |
1462 | svc_printk(rqstp, fmt: "unknown version (%d for prog %d, %s)\n" , |
1463 | rqstp->rq_vers, rqstp->rq_prog, progp->pg_name); |
1464 | |
1465 | if (serv->sv_stats) |
1466 | serv->sv_stats->rpcbadfmt++; |
1467 | *rqstp->rq_accept_statp = rpc_prog_mismatch; |
1468 | |
1469 | /* |
1470 | * svc_authenticate() has already added the verifier and |
1471 | * advanced the stream just past rq_accept_statp. |
1472 | */ |
1473 | xdr_stream_encode_u32(xdr, n: process.mismatch.lovers); |
1474 | xdr_stream_encode_u32(xdr, n: process.mismatch.hivers); |
1475 | goto sendit; |
1476 | |
1477 | err_bad_proc: |
1478 | svc_printk(rqstp, fmt: "unknown procedure (%d)\n" , rqstp->rq_proc); |
1479 | |
1480 | if (serv->sv_stats) |
1481 | serv->sv_stats->rpcbadfmt++; |
1482 | *rqstp->rq_accept_statp = rpc_proc_unavail; |
1483 | goto sendit; |
1484 | |
1485 | err_garbage_args: |
1486 | svc_printk(rqstp, fmt: "failed to decode RPC header\n" ); |
1487 | |
1488 | if (serv->sv_stats) |
1489 | serv->sv_stats->rpcbadfmt++; |
1490 | *rqstp->rq_accept_statp = rpc_garbage_args; |
1491 | goto sendit; |
1492 | |
1493 | err_system_err: |
1494 | if (serv->sv_stats) |
1495 | serv->sv_stats->rpcbadfmt++; |
1496 | *rqstp->rq_accept_statp = rpc_system_err; |
1497 | goto sendit; |
1498 | } |
1499 | |
1500 | /** |
1501 | * svc_process - Execute one RPC transaction |
1502 | * @rqstp: RPC transaction context |
1503 | * |
1504 | */ |
1505 | void svc_process(struct svc_rqst *rqstp) |
1506 | { |
1507 | struct kvec *resv = &rqstp->rq_res.head[0]; |
1508 | __be32 *p; |
1509 | |
1510 | #if IS_ENABLED(CONFIG_FAIL_SUNRPC) |
1511 | if (!fail_sunrpc.ignore_server_disconnect && |
1512 | should_fail(attr: &fail_sunrpc.attr, size: 1)) |
1513 | svc_xprt_deferred_close(xprt: rqstp->rq_xprt); |
1514 | #endif |
1515 | |
1516 | /* |
1517 | * Setup response xdr_buf. |
1518 | * Initially it has just one page |
1519 | */ |
1520 | rqstp->rq_next_page = &rqstp->rq_respages[1]; |
1521 | resv->iov_base = page_address(rqstp->rq_respages[0]); |
1522 | resv->iov_len = 0; |
1523 | rqstp->rq_res.pages = rqstp->rq_next_page; |
1524 | rqstp->rq_res.len = 0; |
1525 | rqstp->rq_res.page_base = 0; |
1526 | rqstp->rq_res.page_len = 0; |
1527 | rqstp->rq_res.buflen = PAGE_SIZE; |
1528 | rqstp->rq_res.tail[0].iov_base = NULL; |
1529 | rqstp->rq_res.tail[0].iov_len = 0; |
1530 | |
1531 | svcxdr_init_decode(rqstp); |
1532 | p = xdr_inline_decode(xdr: &rqstp->rq_arg_stream, XDR_UNIT * 2); |
1533 | if (unlikely(!p)) |
1534 | goto out_drop; |
1535 | rqstp->rq_xid = *p++; |
1536 | if (unlikely(*p != rpc_call)) |
1537 | goto out_baddir; |
1538 | |
1539 | if (!svc_process_common(rqstp)) |
1540 | goto out_drop; |
1541 | svc_send(rqstp); |
1542 | return; |
1543 | |
1544 | out_baddir: |
1545 | svc_printk(rqstp, fmt: "bad direction 0x%08x, dropping request\n" , |
1546 | be32_to_cpu(*p)); |
1547 | if (rqstp->rq_server->sv_stats) |
1548 | rqstp->rq_server->sv_stats->rpcbadfmt++; |
1549 | out_drop: |
1550 | svc_drop(rqstp); |
1551 | } |
1552 | |
1553 | #if defined(CONFIG_SUNRPC_BACKCHANNEL) |
1554 | /** |
1555 | * svc_process_bc - process a reverse-direction RPC request |
1556 | * @req: RPC request to be used for client-side processing |
1557 | * @rqstp: server-side execution context |
1558 | * |
1559 | */ |
1560 | void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp) |
1561 | { |
1562 | struct rpc_task *task; |
1563 | int proc_error; |
1564 | struct rpc_timeout timeout; |
1565 | |
1566 | /* Build the svc_rqst used by the common processing routine */ |
1567 | rqstp->rq_xid = req->rq_xid; |
1568 | rqstp->rq_prot = req->rq_xprt->prot; |
1569 | rqstp->rq_bc_net = req->rq_xprt->xprt_net; |
1570 | |
1571 | rqstp->rq_addrlen = sizeof(req->rq_xprt->addr); |
1572 | memcpy(&rqstp->rq_addr, &req->rq_xprt->addr, rqstp->rq_addrlen); |
1573 | memcpy(&rqstp->rq_arg, &req->rq_rcv_buf, sizeof(rqstp->rq_arg)); |
1574 | memcpy(&rqstp->rq_res, &req->rq_snd_buf, sizeof(rqstp->rq_res)); |
1575 | |
1576 | /* Adjust the argument buffer length */ |
1577 | rqstp->rq_arg.len = req->rq_private_buf.len; |
1578 | if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len) { |
1579 | rqstp->rq_arg.head[0].iov_len = rqstp->rq_arg.len; |
1580 | rqstp->rq_arg.page_len = 0; |
1581 | } else if (rqstp->rq_arg.len <= rqstp->rq_arg.head[0].iov_len + |
1582 | rqstp->rq_arg.page_len) |
1583 | rqstp->rq_arg.page_len = rqstp->rq_arg.len - |
1584 | rqstp->rq_arg.head[0].iov_len; |
1585 | else |
1586 | rqstp->rq_arg.len = rqstp->rq_arg.head[0].iov_len + |
1587 | rqstp->rq_arg.page_len; |
1588 | |
1589 | /* Reset the response buffer */ |
1590 | rqstp->rq_res.head[0].iov_len = 0; |
1591 | |
1592 | /* |
1593 | * Skip the XID and calldir fields because they've already |
1594 | * been processed by the caller. |
1595 | */ |
1596 | svcxdr_init_decode(rqstp); |
1597 | if (!xdr_inline_decode(xdr: &rqstp->rq_arg_stream, XDR_UNIT * 2)) |
1598 | return; |
1599 | |
1600 | /* Parse and execute the bc call */ |
1601 | proc_error = svc_process_common(rqstp); |
1602 | |
1603 | atomic_dec(v: &req->rq_xprt->bc_slot_count); |
1604 | if (!proc_error) { |
1605 | /* Processing error: drop the request */ |
1606 | xprt_free_bc_request(req); |
1607 | return; |
1608 | } |
1609 | /* Finally, send the reply synchronously */ |
1610 | if (rqstp->bc_to_initval > 0) { |
1611 | timeout.to_initval = rqstp->bc_to_initval; |
1612 | timeout.to_retries = rqstp->bc_to_retries; |
1613 | } else { |
1614 | timeout.to_initval = req->rq_xprt->timeout->to_initval; |
1615 | timeout.to_retries = req->rq_xprt->timeout->to_retries; |
1616 | } |
1617 | memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf)); |
1618 | task = rpc_run_bc_task(req, timeout: &timeout); |
1619 | |
1620 | if (IS_ERR(ptr: task)) |
1621 | return; |
1622 | |
1623 | WARN_ON_ONCE(atomic_read(&task->tk_count) != 1); |
1624 | rpc_put_task(task); |
1625 | } |
1626 | #endif /* CONFIG_SUNRPC_BACKCHANNEL */ |
1627 | |
1628 | /** |
1629 | * svc_max_payload - Return transport-specific limit on the RPC payload |
1630 | * @rqstp: RPC transaction context |
1631 | * |
1632 | * Returns the maximum number of payload bytes the current transport |
1633 | * allows. |
1634 | */ |
1635 | u32 svc_max_payload(const struct svc_rqst *rqstp) |
1636 | { |
1637 | u32 max = rqstp->rq_xprt->xpt_class->xcl_max_payload; |
1638 | |
1639 | if (rqstp->rq_server->sv_max_payload < max) |
1640 | max = rqstp->rq_server->sv_max_payload; |
1641 | return max; |
1642 | } |
1643 | EXPORT_SYMBOL_GPL(svc_max_payload); |
1644 | |
1645 | /** |
1646 | * svc_proc_name - Return RPC procedure name in string form |
1647 | * @rqstp: svc_rqst to operate on |
1648 | * |
1649 | * Return value: |
1650 | * Pointer to a NUL-terminated string |
1651 | */ |
1652 | const char *svc_proc_name(const struct svc_rqst *rqstp) |
1653 | { |
1654 | if (rqstp && rqstp->rq_procinfo) |
1655 | return rqstp->rq_procinfo->pc_name; |
1656 | return "unknown" ; |
1657 | } |
1658 | |
1659 | |
1660 | /** |
1661 | * svc_encode_result_payload - mark a range of bytes as a result payload |
1662 | * @rqstp: svc_rqst to operate on |
1663 | * @offset: payload's byte offset in rqstp->rq_res |
1664 | * @length: size of payload, in bytes |
1665 | * |
1666 | * Returns zero on success, or a negative errno if a permanent |
1667 | * error occurred. |
1668 | */ |
1669 | int svc_encode_result_payload(struct svc_rqst *rqstp, unsigned int offset, |
1670 | unsigned int length) |
1671 | { |
1672 | return rqstp->rq_xprt->xpt_ops->xpo_result_payload(rqstp, offset, |
1673 | length); |
1674 | } |
1675 | EXPORT_SYMBOL_GPL(svc_encode_result_payload); |
1676 | |
1677 | /** |
1678 | * svc_fill_write_vector - Construct data argument for VFS write call |
1679 | * @rqstp: svc_rqst to operate on |
1680 | * @payload: xdr_buf containing only the write data payload |
1681 | * |
1682 | * Fills in rqstp::rq_vec, and returns the number of elements. |
1683 | */ |
1684 | unsigned int svc_fill_write_vector(struct svc_rqst *rqstp, |
1685 | struct xdr_buf *payload) |
1686 | { |
1687 | struct page **pages = payload->pages; |
1688 | struct kvec *first = payload->head; |
1689 | struct kvec *vec = rqstp->rq_vec; |
1690 | size_t total = payload->len; |
1691 | unsigned int i; |
1692 | |
1693 | /* Some types of transport can present the write payload |
1694 | * entirely in rq_arg.pages. In this case, @first is empty. |
1695 | */ |
1696 | i = 0; |
1697 | if (first->iov_len) { |
1698 | vec[i].iov_base = first->iov_base; |
1699 | vec[i].iov_len = min_t(size_t, total, first->iov_len); |
1700 | total -= vec[i].iov_len; |
1701 | ++i; |
1702 | } |
1703 | |
1704 | while (total) { |
1705 | vec[i].iov_base = page_address(*pages); |
1706 | vec[i].iov_len = min_t(size_t, total, PAGE_SIZE); |
1707 | total -= vec[i].iov_len; |
1708 | ++i; |
1709 | ++pages; |
1710 | } |
1711 | |
1712 | WARN_ON_ONCE(i > ARRAY_SIZE(rqstp->rq_vec)); |
1713 | return i; |
1714 | } |
1715 | EXPORT_SYMBOL_GPL(svc_fill_write_vector); |
1716 | |
1717 | /** |
1718 | * svc_fill_symlink_pathname - Construct pathname argument for VFS symlink call |
1719 | * @rqstp: svc_rqst to operate on |
1720 | * @first: buffer containing first section of pathname |
1721 | * @p: buffer containing remaining section of pathname |
1722 | * @total: total length of the pathname argument |
1723 | * |
1724 | * The VFS symlink API demands a NUL-terminated pathname in mapped memory. |
1725 | * Returns pointer to a NUL-terminated string, or an ERR_PTR. Caller must free |
1726 | * the returned string. |
1727 | */ |
1728 | char *svc_fill_symlink_pathname(struct svc_rqst *rqstp, struct kvec *first, |
1729 | void *p, size_t total) |
1730 | { |
1731 | size_t len, remaining; |
1732 | char *result, *dst; |
1733 | |
1734 | result = kmalloc(size: total + 1, GFP_KERNEL); |
1735 | if (!result) |
1736 | return ERR_PTR(error: -ESERVERFAULT); |
1737 | |
1738 | dst = result; |
1739 | remaining = total; |
1740 | |
1741 | len = min_t(size_t, total, first->iov_len); |
1742 | if (len) { |
1743 | memcpy(dst, first->iov_base, len); |
1744 | dst += len; |
1745 | remaining -= len; |
1746 | } |
1747 | |
1748 | if (remaining) { |
1749 | len = min_t(size_t, remaining, PAGE_SIZE); |
1750 | memcpy(dst, p, len); |
1751 | dst += len; |
1752 | } |
1753 | |
1754 | *dst = '\0'; |
1755 | |
1756 | /* Sanity check: Linux doesn't allow the pathname argument to |
1757 | * contain a NUL byte. |
1758 | */ |
1759 | if (strlen(result) != total) { |
1760 | kfree(objp: result); |
1761 | return ERR_PTR(error: -EINVAL); |
1762 | } |
1763 | return result; |
1764 | } |
1765 | EXPORT_SYMBOL_GPL(svc_fill_symlink_pathname); |
1766 | |