1// SPDX-License-Identifier: GPL-2.0-only
2/* Network filesystem high-level write support.
3 *
4 * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
5 * Written by David Howells (dhowells@redhat.com)
6 */
7
8#include <linux/fs.h>
9#include <linux/mm.h>
10#include <linux/pagemap.h>
11#include <linux/slab.h>
12#include <linux/writeback.h>
13#include <linux/pagevec.h>
14#include "internal.h"
15
16/**
17 * netfs_create_write_request - Create a write operation.
18 * @wreq: The write request this is storing from.
19 * @dest: The destination type
20 * @start: Start of the region this write will modify
21 * @len: Length of the modification
22 * @worker: The worker function to handle the write(s)
23 *
24 * Allocate a write operation, set it up and add it to the list on a write
25 * request.
26 */
27struct netfs_io_subrequest *netfs_create_write_request(struct netfs_io_request *wreq,
28 enum netfs_io_source dest,
29 loff_t start, size_t len,
30 work_func_t worker)
31{
32 struct netfs_io_subrequest *subreq;
33
34 subreq = netfs_alloc_subrequest(rreq: wreq);
35 if (subreq) {
36 INIT_WORK(&subreq->work, worker);
37 subreq->source = dest;
38 subreq->start = start;
39 subreq->len = len;
40 subreq->debug_index = wreq->subreq_counter++;
41
42 switch (subreq->source) {
43 case NETFS_UPLOAD_TO_SERVER:
44 netfs_stat(stat: &netfs_n_wh_upload);
45 break;
46 case NETFS_WRITE_TO_CACHE:
47 netfs_stat(stat: &netfs_n_wh_write);
48 break;
49 default:
50 BUG();
51 }
52
53 subreq->io_iter = wreq->io_iter;
54 iov_iter_advance(i: &subreq->io_iter, bytes: subreq->start - wreq->start);
55 iov_iter_truncate(i: &subreq->io_iter, count: subreq->len);
56
57 trace_netfs_sreq_ref(rreq_debug_id: wreq->debug_id, subreq_debug_index: subreq->debug_index,
58 ref: refcount_read(r: &subreq->ref),
59 what: netfs_sreq_trace_new);
60 atomic_inc(v: &wreq->nr_outstanding);
61 list_add_tail(new: &subreq->rreq_link, head: &wreq->subrequests);
62 trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_prepare);
63 }
64
65 return subreq;
66}
67EXPORT_SYMBOL(netfs_create_write_request);
68
69/*
70 * Process a completed write request once all the component operations have
71 * been completed.
72 */
73static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async)
74{
75 struct netfs_io_subrequest *subreq;
76 struct netfs_inode *ctx = netfs_inode(inode: wreq->inode);
77 size_t transferred = 0;
78
79 _enter("R=%x[]", wreq->debug_id);
80
81 trace_netfs_rreq(rreq: wreq, what: netfs_rreq_trace_write_done);
82
83 list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
84 if (subreq->error || subreq->transferred == 0)
85 break;
86 transferred += subreq->transferred;
87 if (subreq->transferred < subreq->len)
88 break;
89 }
90 wreq->transferred = transferred;
91
92 list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
93 if (!subreq->error)
94 continue;
95 switch (subreq->source) {
96 case NETFS_UPLOAD_TO_SERVER:
97 /* Depending on the type of failure, this may prevent
98 * writeback completion unless we're in disconnected
99 * mode.
100 */
101 if (!wreq->error)
102 wreq->error = subreq->error;
103 break;
104
105 case NETFS_WRITE_TO_CACHE:
106 /* Failure doesn't prevent writeback completion unless
107 * we're in disconnected mode.
108 */
109 if (subreq->error != -ENOBUFS)
110 ctx->ops->invalidate_cache(wreq);
111 break;
112
113 default:
114 WARN_ON_ONCE(1);
115 if (!wreq->error)
116 wreq->error = -EIO;
117 return;
118 }
119 }
120
121 wreq->cleanup(wreq);
122
123 if (wreq->origin == NETFS_DIO_WRITE &&
124 wreq->mapping->nrpages) {
125 pgoff_t first = wreq->start >> PAGE_SHIFT;
126 pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
127 invalidate_inode_pages2_range(mapping: wreq->mapping, start: first, end: last);
128 }
129
130 if (wreq->origin == NETFS_DIO_WRITE)
131 inode_dio_end(inode: wreq->inode);
132
133 _debug("finished");
134 trace_netfs_rreq(rreq: wreq, what: netfs_rreq_trace_wake_ip);
135 clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, addr: &wreq->flags);
136 wake_up_bit(word: &wreq->flags, NETFS_RREQ_IN_PROGRESS);
137
138 if (wreq->iocb) {
139 wreq->iocb->ki_pos += transferred;
140 if (wreq->iocb->ki_complete)
141 wreq->iocb->ki_complete(
142 wreq->iocb, wreq->error ? wreq->error : transferred);
143 }
144
145 netfs_clear_subrequests(rreq: wreq, was_async);
146 netfs_put_request(rreq: wreq, was_async, what: netfs_rreq_trace_put_complete);
147}
148
149/*
150 * Deal with the completion of writing the data to the cache.
151 */
152void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
153 bool was_async)
154{
155 struct netfs_io_subrequest *subreq = _op;
156 struct netfs_io_request *wreq = subreq->rreq;
157 unsigned int u;
158
159 _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
160
161 switch (subreq->source) {
162 case NETFS_UPLOAD_TO_SERVER:
163 netfs_stat(stat: &netfs_n_wh_upload_done);
164 break;
165 case NETFS_WRITE_TO_CACHE:
166 netfs_stat(stat: &netfs_n_wh_write_done);
167 break;
168 case NETFS_INVALID_WRITE:
169 break;
170 default:
171 BUG();
172 }
173
174 if (IS_ERR_VALUE(transferred_or_error)) {
175 subreq->error = transferred_or_error;
176 trace_netfs_failure(rreq: wreq, sreq: subreq, error: transferred_or_error,
177 what: netfs_fail_write);
178 goto failed;
179 }
180
181 if (WARN(transferred_or_error > subreq->len - subreq->transferred,
182 "Subreq excess write: R%x[%x] %zd > %zu - %zu",
183 wreq->debug_id, subreq->debug_index,
184 transferred_or_error, subreq->len, subreq->transferred))
185 transferred_or_error = subreq->len - subreq->transferred;
186
187 subreq->error = 0;
188 subreq->transferred += transferred_or_error;
189
190 if (iov_iter_count(i: &subreq->io_iter) != subreq->len - subreq->transferred)
191 pr_warn("R=%08x[%u] ITER POST-MISMATCH %zx != %zx-%zx %x\n",
192 wreq->debug_id, subreq->debug_index,
193 iov_iter_count(&subreq->io_iter), subreq->len,
194 subreq->transferred, subreq->io_iter.iter_type);
195
196 if (subreq->transferred < subreq->len)
197 goto incomplete;
198
199 __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
200out:
201 trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_terminated);
202
203 /* If we decrement nr_outstanding to 0, the ref belongs to us. */
204 u = atomic_dec_return(v: &wreq->nr_outstanding);
205 if (u == 0)
206 netfs_write_terminated(wreq, was_async);
207 else if (u == 1)
208 wake_up_var(var: &wreq->nr_outstanding);
209
210 netfs_put_subrequest(subreq, was_async, what: netfs_sreq_trace_put_terminated);
211 return;
212
213incomplete:
214 if (transferred_or_error == 0) {
215 if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
216 subreq->error = -ENODATA;
217 goto failed;
218 }
219 } else {
220 __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
221 }
222
223 __set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags);
224 set_bit(NETFS_RREQ_INCOMPLETE_IO, addr: &wreq->flags);
225 goto out;
226
227failed:
228 switch (subreq->source) {
229 case NETFS_WRITE_TO_CACHE:
230 netfs_stat(stat: &netfs_n_wh_write_failed);
231 set_bit(NETFS_RREQ_INCOMPLETE_IO, addr: &wreq->flags);
232 break;
233 case NETFS_UPLOAD_TO_SERVER:
234 netfs_stat(stat: &netfs_n_wh_upload_failed);
235 set_bit(NETFS_RREQ_FAILED, addr: &wreq->flags);
236 wreq->error = subreq->error;
237 break;
238 default:
239 break;
240 }
241 goto out;
242}
243EXPORT_SYMBOL(netfs_write_subrequest_terminated);
244
245static void netfs_write_to_cache_op(struct netfs_io_subrequest *subreq)
246{
247 struct netfs_io_request *wreq = subreq->rreq;
248 struct netfs_cache_resources *cres = &wreq->cache_resources;
249
250 trace_netfs_sreq(sreq: subreq, what: netfs_sreq_trace_submit);
251
252 cres->ops->write(cres, subreq->start, &subreq->io_iter,
253 netfs_write_subrequest_terminated, subreq);
254}
255
256static void netfs_write_to_cache_op_worker(struct work_struct *work)
257{
258 struct netfs_io_subrequest *subreq =
259 container_of(work, struct netfs_io_subrequest, work);
260
261 netfs_write_to_cache_op(subreq);
262}
263
264/**
265 * netfs_queue_write_request - Queue a write request for attention
266 * @subreq: The write request to be queued
267 *
268 * Queue the specified write request for processing by a worker thread. We
269 * pass the caller's ref on the request to the worker thread.
270 */
271void netfs_queue_write_request(struct netfs_io_subrequest *subreq)
272{
273 if (!queue_work(wq: system_unbound_wq, work: &subreq->work))
274 netfs_put_subrequest(subreq, was_async: false, what: netfs_sreq_trace_put_wip);
275}
276EXPORT_SYMBOL(netfs_queue_write_request);
277
278/*
279 * Set up a op for writing to the cache.
280 */
281static void netfs_set_up_write_to_cache(struct netfs_io_request *wreq)
282{
283 struct netfs_cache_resources *cres = &wreq->cache_resources;
284 struct netfs_io_subrequest *subreq;
285 struct netfs_inode *ctx = netfs_inode(inode: wreq->inode);
286 struct fscache_cookie *cookie = netfs_i_cookie(ctx);
287 loff_t start = wreq->start;
288 size_t len = wreq->len;
289 int ret;
290
291 if (!fscache_cookie_enabled(cookie)) {
292 clear_bit(NETFS_RREQ_WRITE_TO_CACHE, addr: &wreq->flags);
293 return;
294 }
295
296 _debug("write to cache");
297 ret = fscache_begin_write_operation(cres, cookie);
298 if (ret < 0)
299 return;
300
301 ret = cres->ops->prepare_write(cres, &start, &len, wreq->upper_len,
302 i_size_read(inode: wreq->inode), true);
303 if (ret < 0)
304 return;
305
306 subreq = netfs_create_write_request(wreq, NETFS_WRITE_TO_CACHE, start, len,
307 netfs_write_to_cache_op_worker);
308 if (!subreq)
309 return;
310
311 netfs_write_to_cache_op(subreq);
312}
313
314/*
315 * Begin the process of writing out a chunk of data.
316 *
317 * We are given a write request that holds a series of dirty regions and
318 * (partially) covers a sequence of folios, all of which are present. The
319 * pages must have been marked as writeback as appropriate.
320 *
321 * We need to perform the following steps:
322 *
323 * (1) If encrypting, create an output buffer and encrypt each block of the
324 * data into it, otherwise the output buffer will point to the original
325 * folios.
326 *
327 * (2) If the data is to be cached, set up a write op for the entire output
328 * buffer to the cache, if the cache wants to accept it.
329 *
330 * (3) If the data is to be uploaded (ie. not merely cached):
331 *
332 * (a) If the data is to be compressed, create a compression buffer and
333 * compress the data into it.
334 *
335 * (b) For each destination we want to upload to, set up write ops to write
336 * to that destination. We may need multiple writes if the data is not
337 * contiguous or the span exceeds wsize for a server.
338 */
339int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
340 enum netfs_write_trace what)
341{
342 struct netfs_inode *ctx = netfs_inode(inode: wreq->inode);
343
344 _enter("R=%x %llx-%llx f=%lx",
345 wreq->debug_id, wreq->start, wreq->start + wreq->len - 1,
346 wreq->flags);
347
348 trace_netfs_write(wreq, what);
349 if (wreq->len == 0 || wreq->iter.count == 0) {
350 pr_err("Zero-sized write [R=%x]\n", wreq->debug_id);
351 return -EIO;
352 }
353
354 if (wreq->origin == NETFS_DIO_WRITE)
355 inode_dio_begin(inode: wreq->inode);
356
357 wreq->io_iter = wreq->iter;
358
359 /* ->outstanding > 0 carries a ref */
360 netfs_get_request(rreq: wreq, what: netfs_rreq_trace_get_for_outstanding);
361 atomic_set(v: &wreq->nr_outstanding, i: 1);
362
363 /* Start the encryption/compression going. We can do that in the
364 * background whilst we generate a list of write ops that we want to
365 * perform.
366 */
367 // TODO: Encrypt or compress the region as appropriate
368
369 /* We need to write all of the region to the cache */
370 if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
371 netfs_set_up_write_to_cache(wreq);
372
373 /* However, we don't necessarily write all of the region to the server.
374 * Caching of reads is being managed this way also.
375 */
376 if (test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
377 ctx->ops->create_write_requests(wreq, wreq->start, wreq->len);
378
379 if (atomic_dec_and_test(v: &wreq->nr_outstanding))
380 netfs_write_terminated(wreq, was_async: false);
381
382 if (!may_wait)
383 return -EIOCBQUEUED;
384
385 wait_on_bit(word: &wreq->flags, NETFS_RREQ_IN_PROGRESS,
386 TASK_UNINTERRUPTIBLE);
387 return wreq->error;
388}
389
390/*
391 * Begin a write operation for writing through the pagecache.
392 */
393struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len)
394{
395 struct netfs_io_request *wreq;
396 struct file *file = iocb->ki_filp;
397
398 wreq = netfs_alloc_request(mapping: file->f_mapping, file, start: iocb->ki_pos, len,
399 origin: NETFS_WRITETHROUGH);
400 if (IS_ERR(ptr: wreq))
401 return wreq;
402
403 trace_netfs_write(wreq, what: netfs_write_trace_writethrough);
404
405 __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
406 iov_iter_xarray(i: &wreq->iter, ITER_SOURCE, xarray: &wreq->mapping->i_pages, start: wreq->start, count: 0);
407 wreq->io_iter = wreq->iter;
408
409 /* ->outstanding > 0 carries a ref */
410 netfs_get_request(rreq: wreq, what: netfs_rreq_trace_get_for_outstanding);
411 atomic_set(v: &wreq->nr_outstanding, i: 1);
412 return wreq;
413}
414
415static void netfs_submit_writethrough(struct netfs_io_request *wreq, bool final)
416{
417 struct netfs_inode *ictx = netfs_inode(inode: wreq->inode);
418 unsigned long long start;
419 size_t len;
420
421 if (!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
422 return;
423
424 start = wreq->start + wreq->submitted;
425 len = wreq->iter.count - wreq->submitted;
426 if (!final) {
427 len /= wreq->wsize; /* Round to number of maximum packets */
428 len *= wreq->wsize;
429 }
430
431 ictx->ops->create_write_requests(wreq, start, len);
432 wreq->submitted += len;
433}
434
435/*
436 * Advance the state of the write operation used when writing through the
437 * pagecache. Data has been copied into the pagecache that we need to append
438 * to the request. If we've added more than wsize then we need to create a new
439 * subrequest.
440 */
441int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end)
442{
443 _enter("ic=%zu sb=%zu ws=%u cp=%zu tp=%u",
444 wreq->iter.count, wreq->submitted, wreq->wsize, copied, to_page_end);
445
446 wreq->iter.count += copied;
447 wreq->io_iter.count += copied;
448 if (to_page_end && wreq->io_iter.count - wreq->submitted >= wreq->wsize)
449 netfs_submit_writethrough(wreq, final: false);
450
451 return wreq->error;
452}
453
454/*
455 * End a write operation used when writing through the pagecache.
456 */
457int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb)
458{
459 int ret = -EIOCBQUEUED;
460
461 _enter("ic=%zu sb=%zu ws=%u",
462 wreq->iter.count, wreq->submitted, wreq->wsize);
463
464 if (wreq->submitted < wreq->io_iter.count)
465 netfs_submit_writethrough(wreq, final: true);
466
467 if (atomic_dec_and_test(v: &wreq->nr_outstanding))
468 netfs_write_terminated(wreq, was_async: false);
469
470 if (is_sync_kiocb(kiocb: iocb)) {
471 wait_on_bit(word: &wreq->flags, NETFS_RREQ_IN_PROGRESS,
472 TASK_UNINTERRUPTIBLE);
473 ret = wreq->error;
474 }
475
476 netfs_put_request(rreq: wreq, was_async: false, what: netfs_rreq_trace_put_return);
477 return ret;
478}
479

source code of linux/fs/netfs/output.c