1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* Network filesystem high-level write support. |
3 | * |
4 | * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved. |
5 | * Written by David Howells (dhowells@redhat.com) |
6 | */ |
7 | |
8 | #include <linux/fs.h> |
9 | #include <linux/mm.h> |
10 | #include <linux/pagemap.h> |
11 | #include <linux/slab.h> |
12 | #include <linux/writeback.h> |
13 | #include <linux/pagevec.h> |
14 | #include "internal.h" |
15 | |
16 | /** |
17 | * netfs_create_write_request - Create a write operation. |
18 | * @wreq: The write request this is storing from. |
19 | * @dest: The destination type |
20 | * @start: Start of the region this write will modify |
21 | * @len: Length of the modification |
22 | * @worker: The worker function to handle the write(s) |
23 | * |
24 | * Allocate a write operation, set it up and add it to the list on a write |
25 | * request. |
26 | */ |
27 | struct netfs_io_subrequest *netfs_create_write_request(struct netfs_io_request *wreq, |
28 | enum netfs_io_source dest, |
29 | loff_t start, size_t len, |
30 | work_func_t worker) |
31 | { |
32 | struct netfs_io_subrequest *subreq; |
33 | |
34 | subreq = netfs_alloc_subrequest(rreq: wreq); |
35 | if (subreq) { |
36 | INIT_WORK(&subreq->work, worker); |
37 | subreq->source = dest; |
38 | subreq->start = start; |
39 | subreq->len = len; |
40 | subreq->debug_index = wreq->subreq_counter++; |
41 | |
42 | switch (subreq->source) { |
43 | case NETFS_UPLOAD_TO_SERVER: |
44 | netfs_stat(stat: &netfs_n_wh_upload); |
45 | break; |
46 | case NETFS_WRITE_TO_CACHE: |
47 | netfs_stat(stat: &netfs_n_wh_write); |
48 | break; |
49 | default: |
50 | BUG(); |
51 | } |
52 | |
53 | subreq->io_iter = wreq->io_iter; |
54 | iov_iter_advance(i: &subreq->io_iter, bytes: subreq->start - wreq->start); |
55 | iov_iter_truncate(i: &subreq->io_iter, count: subreq->len); |
56 | |
57 | trace_netfs_sreq_ref(rreq_debug_id: wreq->debug_id, subreq_debug_index: subreq->debug_index, |
58 | ref: refcount_read(r: &subreq->ref), |
59 | what: netfs_sreq_trace_new); |
60 | atomic_inc(v: &wreq->nr_outstanding); |
61 | list_add_tail(new: &subreq->rreq_link, head: &wreq->subrequests); |
62 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_prepare); |
63 | } |
64 | |
65 | return subreq; |
66 | } |
67 | EXPORT_SYMBOL(netfs_create_write_request); |
68 | |
69 | /* |
70 | * Process a completed write request once all the component operations have |
71 | * been completed. |
72 | */ |
73 | static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async) |
74 | { |
75 | struct netfs_io_subrequest *subreq; |
76 | struct netfs_inode *ctx = netfs_inode(inode: wreq->inode); |
77 | size_t transferred = 0; |
78 | |
79 | _enter("R=%x[]" , wreq->debug_id); |
80 | |
81 | trace_netfs_rreq(rreq: wreq, what: netfs_rreq_trace_write_done); |
82 | |
83 | list_for_each_entry(subreq, &wreq->subrequests, rreq_link) { |
84 | if (subreq->error || subreq->transferred == 0) |
85 | break; |
86 | transferred += subreq->transferred; |
87 | if (subreq->transferred < subreq->len) |
88 | break; |
89 | } |
90 | wreq->transferred = transferred; |
91 | |
92 | list_for_each_entry(subreq, &wreq->subrequests, rreq_link) { |
93 | if (!subreq->error) |
94 | continue; |
95 | switch (subreq->source) { |
96 | case NETFS_UPLOAD_TO_SERVER: |
97 | /* Depending on the type of failure, this may prevent |
98 | * writeback completion unless we're in disconnected |
99 | * mode. |
100 | */ |
101 | if (!wreq->error) |
102 | wreq->error = subreq->error; |
103 | break; |
104 | |
105 | case NETFS_WRITE_TO_CACHE: |
106 | /* Failure doesn't prevent writeback completion unless |
107 | * we're in disconnected mode. |
108 | */ |
109 | if (subreq->error != -ENOBUFS) |
110 | ctx->ops->invalidate_cache(wreq); |
111 | break; |
112 | |
113 | default: |
114 | WARN_ON_ONCE(1); |
115 | if (!wreq->error) |
116 | wreq->error = -EIO; |
117 | return; |
118 | } |
119 | } |
120 | |
121 | wreq->cleanup(wreq); |
122 | |
123 | if (wreq->origin == NETFS_DIO_WRITE && |
124 | wreq->mapping->nrpages) { |
125 | pgoff_t first = wreq->start >> PAGE_SHIFT; |
126 | pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT; |
127 | invalidate_inode_pages2_range(mapping: wreq->mapping, start: first, end: last); |
128 | } |
129 | |
130 | if (wreq->origin == NETFS_DIO_WRITE) |
131 | inode_dio_end(inode: wreq->inode); |
132 | |
133 | _debug("finished" ); |
134 | trace_netfs_rreq(rreq: wreq, what: netfs_rreq_trace_wake_ip); |
135 | clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, addr: &wreq->flags); |
136 | wake_up_bit(word: &wreq->flags, NETFS_RREQ_IN_PROGRESS); |
137 | |
138 | if (wreq->iocb) { |
139 | wreq->iocb->ki_pos += transferred; |
140 | if (wreq->iocb->ki_complete) |
141 | wreq->iocb->ki_complete( |
142 | wreq->iocb, wreq->error ? wreq->error : transferred); |
143 | } |
144 | |
145 | netfs_clear_subrequests(rreq: wreq, was_async); |
146 | netfs_put_request(rreq: wreq, was_async, what: netfs_rreq_trace_put_complete); |
147 | } |
148 | |
149 | /* |
150 | * Deal with the completion of writing the data to the cache. |
151 | */ |
152 | void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error, |
153 | bool was_async) |
154 | { |
155 | struct netfs_io_subrequest *subreq = _op; |
156 | struct netfs_io_request *wreq = subreq->rreq; |
157 | unsigned int u; |
158 | |
159 | _enter("%x[%x] %zd" , wreq->debug_id, subreq->debug_index, transferred_or_error); |
160 | |
161 | switch (subreq->source) { |
162 | case NETFS_UPLOAD_TO_SERVER: |
163 | netfs_stat(stat: &netfs_n_wh_upload_done); |
164 | break; |
165 | case NETFS_WRITE_TO_CACHE: |
166 | netfs_stat(stat: &netfs_n_wh_write_done); |
167 | break; |
168 | case NETFS_INVALID_WRITE: |
169 | break; |
170 | default: |
171 | BUG(); |
172 | } |
173 | |
174 | if (IS_ERR_VALUE(transferred_or_error)) { |
175 | subreq->error = transferred_or_error; |
176 | trace_netfs_failure(rreq: wreq, sreq: subreq, error: transferred_or_error, |
177 | what: netfs_fail_write); |
178 | goto failed; |
179 | } |
180 | |
181 | if (WARN(transferred_or_error > subreq->len - subreq->transferred, |
182 | "Subreq excess write: R%x[%x] %zd > %zu - %zu" , |
183 | wreq->debug_id, subreq->debug_index, |
184 | transferred_or_error, subreq->len, subreq->transferred)) |
185 | transferred_or_error = subreq->len - subreq->transferred; |
186 | |
187 | subreq->error = 0; |
188 | subreq->transferred += transferred_or_error; |
189 | |
190 | if (iov_iter_count(i: &subreq->io_iter) != subreq->len - subreq->transferred) |
191 | pr_warn("R=%08x[%u] ITER POST-MISMATCH %zx != %zx-%zx %x\n" , |
192 | wreq->debug_id, subreq->debug_index, |
193 | iov_iter_count(&subreq->io_iter), subreq->len, |
194 | subreq->transferred, subreq->io_iter.iter_type); |
195 | |
196 | if (subreq->transferred < subreq->len) |
197 | goto incomplete; |
198 | |
199 | __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); |
200 | out: |
201 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_terminated); |
202 | |
203 | /* If we decrement nr_outstanding to 0, the ref belongs to us. */ |
204 | u = atomic_dec_return(v: &wreq->nr_outstanding); |
205 | if (u == 0) |
206 | netfs_write_terminated(wreq, was_async); |
207 | else if (u == 1) |
208 | wake_up_var(var: &wreq->nr_outstanding); |
209 | |
210 | netfs_put_subrequest(subreq, was_async, what: netfs_sreq_trace_put_terminated); |
211 | return; |
212 | |
213 | incomplete: |
214 | if (transferred_or_error == 0) { |
215 | if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) { |
216 | subreq->error = -ENODATA; |
217 | goto failed; |
218 | } |
219 | } else { |
220 | __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags); |
221 | } |
222 | |
223 | __set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags); |
224 | set_bit(NETFS_RREQ_INCOMPLETE_IO, addr: &wreq->flags); |
225 | goto out; |
226 | |
227 | failed: |
228 | switch (subreq->source) { |
229 | case NETFS_WRITE_TO_CACHE: |
230 | netfs_stat(stat: &netfs_n_wh_write_failed); |
231 | set_bit(NETFS_RREQ_INCOMPLETE_IO, addr: &wreq->flags); |
232 | break; |
233 | case NETFS_UPLOAD_TO_SERVER: |
234 | netfs_stat(stat: &netfs_n_wh_upload_failed); |
235 | set_bit(NETFS_RREQ_FAILED, addr: &wreq->flags); |
236 | wreq->error = subreq->error; |
237 | break; |
238 | default: |
239 | break; |
240 | } |
241 | goto out; |
242 | } |
243 | EXPORT_SYMBOL(netfs_write_subrequest_terminated); |
244 | |
245 | static void netfs_write_to_cache_op(struct netfs_io_subrequest *subreq) |
246 | { |
247 | struct netfs_io_request *wreq = subreq->rreq; |
248 | struct netfs_cache_resources *cres = &wreq->cache_resources; |
249 | |
250 | trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_submit); |
251 | |
252 | cres->ops->write(cres, subreq->start, &subreq->io_iter, |
253 | netfs_write_subrequest_terminated, subreq); |
254 | } |
255 | |
256 | static void netfs_write_to_cache_op_worker(struct work_struct *work) |
257 | { |
258 | struct netfs_io_subrequest *subreq = |
259 | container_of(work, struct netfs_io_subrequest, work); |
260 | |
261 | netfs_write_to_cache_op(subreq); |
262 | } |
263 | |
264 | /** |
265 | * netfs_queue_write_request - Queue a write request for attention |
266 | * @subreq: The write request to be queued |
267 | * |
268 | * Queue the specified write request for processing by a worker thread. We |
269 | * pass the caller's ref on the request to the worker thread. |
270 | */ |
271 | void netfs_queue_write_request(struct netfs_io_subrequest *subreq) |
272 | { |
273 | if (!queue_work(wq: system_unbound_wq, work: &subreq->work)) |
274 | netfs_put_subrequest(subreq, was_async: false, what: netfs_sreq_trace_put_wip); |
275 | } |
276 | EXPORT_SYMBOL(netfs_queue_write_request); |
277 | |
278 | /* |
279 | * Set up a op for writing to the cache. |
280 | */ |
281 | static void netfs_set_up_write_to_cache(struct netfs_io_request *wreq) |
282 | { |
283 | struct netfs_cache_resources *cres = &wreq->cache_resources; |
284 | struct netfs_io_subrequest *subreq; |
285 | struct netfs_inode *ctx = netfs_inode(inode: wreq->inode); |
286 | struct fscache_cookie *cookie = netfs_i_cookie(ctx); |
287 | loff_t start = wreq->start; |
288 | size_t len = wreq->len; |
289 | int ret; |
290 | |
291 | if (!fscache_cookie_enabled(cookie)) { |
292 | clear_bit(NETFS_RREQ_WRITE_TO_CACHE, addr: &wreq->flags); |
293 | return; |
294 | } |
295 | |
296 | _debug("write to cache" ); |
297 | ret = fscache_begin_write_operation(cres, cookie); |
298 | if (ret < 0) |
299 | return; |
300 | |
301 | ret = cres->ops->prepare_write(cres, &start, &len, wreq->upper_len, |
302 | i_size_read(inode: wreq->inode), true); |
303 | if (ret < 0) |
304 | return; |
305 | |
306 | subreq = netfs_create_write_request(wreq, NETFS_WRITE_TO_CACHE, start, len, |
307 | netfs_write_to_cache_op_worker); |
308 | if (!subreq) |
309 | return; |
310 | |
311 | netfs_write_to_cache_op(subreq); |
312 | } |
313 | |
314 | /* |
315 | * Begin the process of writing out a chunk of data. |
316 | * |
317 | * We are given a write request that holds a series of dirty regions and |
318 | * (partially) covers a sequence of folios, all of which are present. The |
319 | * pages must have been marked as writeback as appropriate. |
320 | * |
321 | * We need to perform the following steps: |
322 | * |
323 | * (1) If encrypting, create an output buffer and encrypt each block of the |
324 | * data into it, otherwise the output buffer will point to the original |
325 | * folios. |
326 | * |
327 | * (2) If the data is to be cached, set up a write op for the entire output |
328 | * buffer to the cache, if the cache wants to accept it. |
329 | * |
330 | * (3) If the data is to be uploaded (ie. not merely cached): |
331 | * |
332 | * (a) If the data is to be compressed, create a compression buffer and |
333 | * compress the data into it. |
334 | * |
335 | * (b) For each destination we want to upload to, set up write ops to write |
336 | * to that destination. We may need multiple writes if the data is not |
337 | * contiguous or the span exceeds wsize for a server. |
338 | */ |
339 | int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait, |
340 | enum netfs_write_trace what) |
341 | { |
342 | struct netfs_inode *ctx = netfs_inode(inode: wreq->inode); |
343 | |
344 | _enter("R=%x %llx-%llx f=%lx" , |
345 | wreq->debug_id, wreq->start, wreq->start + wreq->len - 1, |
346 | wreq->flags); |
347 | |
348 | trace_netfs_write(wreq, what); |
349 | if (wreq->len == 0 || wreq->iter.count == 0) { |
350 | pr_err("Zero-sized write [R=%x]\n" , wreq->debug_id); |
351 | return -EIO; |
352 | } |
353 | |
354 | if (wreq->origin == NETFS_DIO_WRITE) |
355 | inode_dio_begin(inode: wreq->inode); |
356 | |
357 | wreq->io_iter = wreq->iter; |
358 | |
359 | /* ->outstanding > 0 carries a ref */ |
360 | netfs_get_request(rreq: wreq, what: netfs_rreq_trace_get_for_outstanding); |
361 | atomic_set(v: &wreq->nr_outstanding, i: 1); |
362 | |
363 | /* Start the encryption/compression going. We can do that in the |
364 | * background whilst we generate a list of write ops that we want to |
365 | * perform. |
366 | */ |
367 | // TODO: Encrypt or compress the region as appropriate |
368 | |
369 | /* We need to write all of the region to the cache */ |
370 | if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags)) |
371 | netfs_set_up_write_to_cache(wreq); |
372 | |
373 | /* However, we don't necessarily write all of the region to the server. |
374 | * Caching of reads is being managed this way also. |
375 | */ |
376 | if (test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) |
377 | ctx->ops->create_write_requests(wreq, wreq->start, wreq->len); |
378 | |
379 | if (atomic_dec_and_test(v: &wreq->nr_outstanding)) |
380 | netfs_write_terminated(wreq, was_async: false); |
381 | |
382 | if (!may_wait) |
383 | return -EIOCBQUEUED; |
384 | |
385 | wait_on_bit(word: &wreq->flags, NETFS_RREQ_IN_PROGRESS, |
386 | TASK_UNINTERRUPTIBLE); |
387 | return wreq->error; |
388 | } |
389 | |
390 | /* |
391 | * Begin a write operation for writing through the pagecache. |
392 | */ |
393 | struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len) |
394 | { |
395 | struct netfs_io_request *wreq; |
396 | struct file *file = iocb->ki_filp; |
397 | |
398 | wreq = netfs_alloc_request(mapping: file->f_mapping, file, start: iocb->ki_pos, len, |
399 | origin: NETFS_WRITETHROUGH); |
400 | if (IS_ERR(ptr: wreq)) |
401 | return wreq; |
402 | |
403 | trace_netfs_write(wreq, what: netfs_write_trace_writethrough); |
404 | |
405 | __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags); |
406 | iov_iter_xarray(i: &wreq->iter, ITER_SOURCE, xarray: &wreq->mapping->i_pages, start: wreq->start, count: 0); |
407 | wreq->io_iter = wreq->iter; |
408 | |
409 | /* ->outstanding > 0 carries a ref */ |
410 | netfs_get_request(rreq: wreq, what: netfs_rreq_trace_get_for_outstanding); |
411 | atomic_set(v: &wreq->nr_outstanding, i: 1); |
412 | return wreq; |
413 | } |
414 | |
415 | static void netfs_submit_writethrough(struct netfs_io_request *wreq, bool final) |
416 | { |
417 | struct netfs_inode *ictx = netfs_inode(inode: wreq->inode); |
418 | unsigned long long start; |
419 | size_t len; |
420 | |
421 | if (!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags)) |
422 | return; |
423 | |
424 | start = wreq->start + wreq->submitted; |
425 | len = wreq->iter.count - wreq->submitted; |
426 | if (!final) { |
427 | len /= wreq->wsize; /* Round to number of maximum packets */ |
428 | len *= wreq->wsize; |
429 | } |
430 | |
431 | ictx->ops->create_write_requests(wreq, start, len); |
432 | wreq->submitted += len; |
433 | } |
434 | |
435 | /* |
436 | * Advance the state of the write operation used when writing through the |
437 | * pagecache. Data has been copied into the pagecache that we need to append |
438 | * to the request. If we've added more than wsize then we need to create a new |
439 | * subrequest. |
440 | */ |
441 | int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end) |
442 | { |
443 | _enter("ic=%zu sb=%zu ws=%u cp=%zu tp=%u" , |
444 | wreq->iter.count, wreq->submitted, wreq->wsize, copied, to_page_end); |
445 | |
446 | wreq->iter.count += copied; |
447 | wreq->io_iter.count += copied; |
448 | if (to_page_end && wreq->io_iter.count - wreq->submitted >= wreq->wsize) |
449 | netfs_submit_writethrough(wreq, final: false); |
450 | |
451 | return wreq->error; |
452 | } |
453 | |
454 | /* |
455 | * End a write operation used when writing through the pagecache. |
456 | */ |
457 | int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb) |
458 | { |
459 | int ret = -EIOCBQUEUED; |
460 | |
461 | _enter("ic=%zu sb=%zu ws=%u" , |
462 | wreq->iter.count, wreq->submitted, wreq->wsize); |
463 | |
464 | if (wreq->submitted < wreq->io_iter.count) |
465 | netfs_submit_writethrough(wreq, final: true); |
466 | |
467 | if (atomic_dec_and_test(v: &wreq->nr_outstanding)) |
468 | netfs_write_terminated(wreq, was_async: false); |
469 | |
470 | if (is_sync_kiocb(kiocb: iocb)) { |
471 | wait_on_bit(word: &wreq->flags, NETFS_RREQ_IN_PROGRESS, |
472 | TASK_UNINTERRUPTIBLE); |
473 | ret = wreq->error; |
474 | } |
475 | |
476 | netfs_put_request(rreq: wreq, was_async: false, what: netfs_rreq_trace_put_return); |
477 | return ret; |
478 | } |
479 | |