1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* XDP user-space packet buffer |
3 | * Copyright(c) 2018 Intel Corporation. |
4 | */ |
5 | |
6 | #include <linux/init.h> |
7 | #include <linux/sched/mm.h> |
8 | #include <linux/sched/signal.h> |
9 | #include <linux/sched/task.h> |
10 | #include <linux/uaccess.h> |
11 | #include <linux/slab.h> |
12 | #include <linux/bpf.h> |
13 | #include <linux/mm.h> |
14 | #include <linux/netdevice.h> |
15 | #include <linux/rtnetlink.h> |
16 | #include <linux/idr.h> |
17 | #include <linux/vmalloc.h> |
18 | |
19 | #include "xdp_umem.h" |
20 | #include "xsk_queue.h" |
21 | |
22 | static DEFINE_IDA(umem_ida); |
23 | |
24 | static void xdp_umem_unpin_pages(struct xdp_umem *umem) |
25 | { |
26 | unpin_user_pages_dirty_lock(pages: umem->pgs, npages: umem->npgs, make_dirty: true); |
27 | |
28 | kvfree(addr: umem->pgs); |
29 | umem->pgs = NULL; |
30 | } |
31 | |
32 | static void xdp_umem_unaccount_pages(struct xdp_umem *umem) |
33 | { |
34 | if (umem->user) { |
35 | atomic_long_sub(i: umem->npgs, v: &umem->user->locked_vm); |
36 | free_uid(umem->user); |
37 | } |
38 | } |
39 | |
40 | static void xdp_umem_addr_unmap(struct xdp_umem *umem) |
41 | { |
42 | vunmap(addr: umem->addrs); |
43 | umem->addrs = NULL; |
44 | } |
45 | |
46 | static int xdp_umem_addr_map(struct xdp_umem *umem, struct page **pages, |
47 | u32 nr_pages) |
48 | { |
49 | umem->addrs = vmap(pages, count: nr_pages, VM_MAP, PAGE_KERNEL); |
50 | if (!umem->addrs) |
51 | return -ENOMEM; |
52 | return 0; |
53 | } |
54 | |
55 | static void xdp_umem_release(struct xdp_umem *umem) |
56 | { |
57 | umem->zc = false; |
58 | ida_free(&umem_ida, id: umem->id); |
59 | |
60 | xdp_umem_addr_unmap(umem); |
61 | xdp_umem_unpin_pages(umem); |
62 | |
63 | xdp_umem_unaccount_pages(umem); |
64 | kfree(objp: umem); |
65 | } |
66 | |
67 | static void xdp_umem_release_deferred(struct work_struct *work) |
68 | { |
69 | struct xdp_umem *umem = container_of(work, struct xdp_umem, work); |
70 | |
71 | xdp_umem_release(umem); |
72 | } |
73 | |
74 | void xdp_get_umem(struct xdp_umem *umem) |
75 | { |
76 | refcount_inc(r: &umem->users); |
77 | } |
78 | |
79 | void xdp_put_umem(struct xdp_umem *umem, bool defer_cleanup) |
80 | { |
81 | if (!umem) |
82 | return; |
83 | |
84 | if (refcount_dec_and_test(r: &umem->users)) { |
85 | if (defer_cleanup) { |
86 | INIT_WORK(&umem->work, xdp_umem_release_deferred); |
87 | schedule_work(work: &umem->work); |
88 | } else { |
89 | xdp_umem_release(umem); |
90 | } |
91 | } |
92 | } |
93 | |
94 | static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address) |
95 | { |
96 | unsigned int gup_flags = FOLL_WRITE; |
97 | long npgs; |
98 | int err; |
99 | |
100 | umem->pgs = kvcalloc(n: umem->npgs, size: sizeof(*umem->pgs), GFP_KERNEL | __GFP_NOWARN); |
101 | if (!umem->pgs) |
102 | return -ENOMEM; |
103 | |
104 | mmap_read_lock(current->mm); |
105 | npgs = pin_user_pages(start: address, nr_pages: umem->npgs, |
106 | gup_flags: gup_flags | FOLL_LONGTERM, pages: &umem->pgs[0]); |
107 | mmap_read_unlock(current->mm); |
108 | |
109 | if (npgs != umem->npgs) { |
110 | if (npgs >= 0) { |
111 | umem->npgs = npgs; |
112 | err = -ENOMEM; |
113 | goto out_pin; |
114 | } |
115 | err = npgs; |
116 | goto out_pgs; |
117 | } |
118 | return 0; |
119 | |
120 | out_pin: |
121 | xdp_umem_unpin_pages(umem); |
122 | out_pgs: |
123 | kvfree(addr: umem->pgs); |
124 | umem->pgs = NULL; |
125 | return err; |
126 | } |
127 | |
128 | static int xdp_umem_account_pages(struct xdp_umem *umem) |
129 | { |
130 | unsigned long lock_limit, new_npgs, old_npgs; |
131 | |
132 | if (capable(CAP_IPC_LOCK)) |
133 | return 0; |
134 | |
135 | lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; |
136 | umem->user = get_uid(current_user()); |
137 | |
138 | do { |
139 | old_npgs = atomic_long_read(v: &umem->user->locked_vm); |
140 | new_npgs = old_npgs + umem->npgs; |
141 | if (new_npgs > lock_limit) { |
142 | free_uid(umem->user); |
143 | umem->user = NULL; |
144 | return -ENOBUFS; |
145 | } |
146 | } while (atomic_long_cmpxchg(v: &umem->user->locked_vm, old: old_npgs, |
147 | new: new_npgs) != old_npgs); |
148 | return 0; |
149 | } |
150 | |
151 | static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) |
152 | { |
153 | bool unaligned_chunks = mr->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG; |
154 | u32 chunk_size = mr->chunk_size, headroom = mr->headroom; |
155 | u64 addr = mr->addr, size = mr->len; |
156 | u32 chunks_rem, npgs_rem; |
157 | u64 chunks, npgs; |
158 | int err; |
159 | |
160 | if (chunk_size < XDP_UMEM_MIN_CHUNK_SIZE || chunk_size > PAGE_SIZE) { |
161 | /* Strictly speaking we could support this, if: |
162 | * - huge pages, or* |
163 | * - using an IOMMU, or |
164 | * - making sure the memory area is consecutive |
165 | * but for now, we simply say "computer says no". |
166 | */ |
167 | return -EINVAL; |
168 | } |
169 | |
170 | if (mr->flags & ~XDP_UMEM_UNALIGNED_CHUNK_FLAG) |
171 | return -EINVAL; |
172 | |
173 | if (!unaligned_chunks && !is_power_of_2(n: chunk_size)) |
174 | return -EINVAL; |
175 | |
176 | if (!PAGE_ALIGNED(addr)) { |
177 | /* Memory area has to be page size aligned. For |
178 | * simplicity, this might change. |
179 | */ |
180 | return -EINVAL; |
181 | } |
182 | |
183 | if ((addr + size) < addr) |
184 | return -EINVAL; |
185 | |
186 | npgs = div_u64_rem(dividend: size, PAGE_SIZE, remainder: &npgs_rem); |
187 | if (npgs_rem) |
188 | npgs++; |
189 | if (npgs > U32_MAX) |
190 | return -EINVAL; |
191 | |
192 | chunks = div_u64_rem(dividend: size, divisor: chunk_size, remainder: &chunks_rem); |
193 | if (!chunks || chunks > U32_MAX) |
194 | return -EINVAL; |
195 | |
196 | if (!unaligned_chunks && chunks_rem) |
197 | return -EINVAL; |
198 | |
199 | if (headroom >= chunk_size - XDP_PACKET_HEADROOM) |
200 | return -EINVAL; |
201 | |
202 | umem->size = size; |
203 | umem->headroom = headroom; |
204 | umem->chunk_size = chunk_size; |
205 | umem->chunks = chunks; |
206 | umem->npgs = npgs; |
207 | umem->pgs = NULL; |
208 | umem->user = NULL; |
209 | umem->flags = mr->flags; |
210 | |
211 | INIT_LIST_HEAD(list: &umem->xsk_dma_list); |
212 | refcount_set(r: &umem->users, n: 1); |
213 | |
214 | err = xdp_umem_account_pages(umem); |
215 | if (err) |
216 | return err; |
217 | |
218 | err = xdp_umem_pin_pages(umem, address: (unsigned long)addr); |
219 | if (err) |
220 | goto out_account; |
221 | |
222 | err = xdp_umem_addr_map(umem, pages: umem->pgs, nr_pages: umem->npgs); |
223 | if (err) |
224 | goto out_unpin; |
225 | |
226 | return 0; |
227 | |
228 | out_unpin: |
229 | xdp_umem_unpin_pages(umem); |
230 | out_account: |
231 | xdp_umem_unaccount_pages(umem); |
232 | return err; |
233 | } |
234 | |
235 | struct xdp_umem *xdp_umem_create(struct xdp_umem_reg *mr) |
236 | { |
237 | struct xdp_umem *umem; |
238 | int err; |
239 | |
240 | umem = kzalloc(size: sizeof(*umem), GFP_KERNEL); |
241 | if (!umem) |
242 | return ERR_PTR(error: -ENOMEM); |
243 | |
244 | err = ida_alloc(ida: &umem_ida, GFP_KERNEL); |
245 | if (err < 0) { |
246 | kfree(objp: umem); |
247 | return ERR_PTR(error: err); |
248 | } |
249 | umem->id = err; |
250 | |
251 | err = xdp_umem_reg(umem, mr); |
252 | if (err) { |
253 | ida_free(&umem_ida, id: umem->id); |
254 | kfree(objp: umem); |
255 | return ERR_PTR(error: err); |
256 | } |
257 | |
258 | return umem; |
259 | } |
260 | |