verity.c source code [linux/fs/btrfs/verity.c]

1	// SPDX-License-Identifier: GPL-2.0
2
3	#include <linux/init.h>
4	#include <linux/fs.h>
5	#include <linux/slab.h>
6	#include <linux/rwsem.h>
7	#include <linux/xattr.h>
8	#include <linux/security.h>
9	#include <linux/posix_acl_xattr.h>
10	#include <linux/iversion.h>
11	#include <linux/fsverity.h>
12	#include <linux/sched/mm.h>
13	#include "messages.h"
14	#include "ctree.h"
15	#include "btrfs_inode.h"
16	#include "transaction.h"
17	#include "locking.h"
18	#include "fs.h"
19	#include "accessors.h"
20	#include "ioctl.h"
21	#include "verity.h"
22	#include "orphan.h"
23
24	/*
25	* Implementation of the interface defined in struct fsverity_operations.
26	*
27	* The main question is how and where to store the verity descriptor and the
28	* Merkle tree. We store both in dedicated btree items in the filesystem tree,
29	* together with the rest of the inode metadata. This means we'll need to do
30	* extra work to encrypt them once encryption is supported in btrfs, but btrfs
31	* has a lot of careful code around i_size and it seems better to make a new key
32	* type than try and adjust all of our expectations for i_size.
33	*
34	* Note that this differs from the implementation in ext4 and f2fs, where
35	* this data is stored as if it were in the file, but past EOF. However, btrfs
36	* does not have a widespread mechanism for caching opaque metadata pages, so we
37	* do pretend that the Merkle tree pages themselves are past EOF for the
38	* purposes of caching them (as opposed to creating a virtual inode).
39	*
40	* fs verity items are stored under two different key types on disk.
41	* The descriptor items:
42	* [ inode objectid, BTRFS_VERITY_DESC_ITEM_KEY, offset ]
43	*
44	* At offset 0, we store a btrfs_verity_descriptor_item which tracks the
45	* size of the descriptor item and some extra data for encryption.
46	* Starting at offset 1, these hold the generic fs verity descriptor.
47	* The latter are opaque to btrfs, we just read and write them as a blob for
48	* the higher level verity code. The most common descriptor size is 256 bytes.
49	*
50	* The merkle tree items:
51	* [ inode objectid, BTRFS_VERITY_MERKLE_ITEM_KEY, offset ]
52	*
53	* These also start at offset 0, and correspond to the merkle tree bytes.
54	* So when fsverity asks for page 0 of the merkle tree, we pull up one page
55	* starting at offset 0 for this key type. These are also opaque to btrfs,
56	* we're blindly storing whatever fsverity sends down.
57	*
58	* Another important consideration is the fact that the Merkle tree data scales
59	* linearly with the size of the file (with 4K pages/blocks and SHA-256, it's
60	* ~1/127th the size) so for large files, writing the tree can be a lengthy
61	* operation. For that reason, we guard the whole enable verity operation
62	* (between begin_enable_verity and end_enable_verity) with an orphan item.
63	* Again, because the data can be pretty large, it's quite possible that we
64	* could run out of space writing it, so we try our best to handle errors by
65	* stopping and rolling back rather than aborting the victim transaction.
66	*/
67
68	#define MERKLE_START_ALIGN 65536
69
70	/*
71	* Compute the logical file offset where we cache the Merkle tree.
72	*
73	* @inode: inode of the verity file
74	*
75	* For the purposes of caching the Merkle tree pages, as required by
76	* fs-verity, it is convenient to do size computations in terms of a file
77	* offset, rather than in terms of page indices.
78	*
79	* Use 64K to be sure it's past the last page in the file, even with 64K pages.
80	* That rounding operation itself can overflow loff_t, so we do it in u64 and
81	* check.
82	*
83	* Returns the file offset on success, negative error code on failure.
84	*/
85	static loff_t merkle_file_pos(const struct inode *inode)
86	{
87	u64 sz = inode->i_size;
88	u64 rounded = round_up(sz, MERKLE_START_ALIGN);
89
90	if (rounded > inode->i_sb->s_maxbytes)
91	return -EFBIG;
92
93	return rounded;
94	}
95
96	/*
97	* Drop all the items for this inode with this key_type.
98	*
99	* @inode: inode to drop items for
100	* @key_type: type of items to drop (BTRFS_VERITY_DESC_ITEM or
101	* BTRFS_VERITY_MERKLE_ITEM)
102	*
103	* Before doing a verity enable we cleanup any existing verity items.
104	* This is also used to clean up if a verity enable failed half way through.
105	*
106	* Returns number of dropped items on success, negative error code on failure.
107	*/
108	static int drop_verity_items(struct btrfs_inode *inode, u8 key_type)
109	{
110	struct btrfs_trans_handle *trans;
111	struct btrfs_root *root = inode->root;
112	struct btrfs_path *path;
113	struct btrfs_key key;
114	int count = `0`;
115	int ret;
116
117	path = btrfs_alloc_path();
118	if (!path)
119	return -ENOMEM;
120
121	while (`1`) {
122	/ 1 for the item being dropped /
123	trans = btrfs_start_transaction(root, num_items: `1`);
124	if (IS_ERR(ptr: trans)) {
125	ret = PTR_ERR(ptr: trans);
126	goto out;
127	}
128
129	/*
130	* Walk backwards through all the items until we find one that
131	* isn't from our key type or objectid
132	*/
133	key.objectid = btrfs_ino(inode);
134	key.type = key_type;
135	key.offset = (u64)-`1`;
136
137	ret = btrfs_search_slot(trans, root, key: &key, p: path, ins_len: -`1`, cow: `1`);
138	if (ret > `0`) {
139	ret = `0`;
140	/ No more keys of this type, we're done /
141	if (path->slots[`0`] == `0`)
142	break;
143	path->slots[`0`]--;
144	} else if (ret < `0`) {
145	btrfs_end_transaction(trans);
146	goto out;
147	}
148
149	btrfs_item_key_to_cpu(eb: path->nodes[`0`], cpu_key: &key, nr: path->slots[`0`]);
150
151	/ No more keys of this type, we're done /
152	if (key.objectid != btrfs_ino(inode) \|\| key.type != key_type)
153	break;
154
155	/*
156	* This shouldn't be a performance sensitive function because
157	* it's not used as part of truncate. If it ever becomes
158	* perf sensitive, change this to walk forward and bulk delete
159	* items
160	*/
161	ret = btrfs_del_items(trans, root, path, slot: path->slots[`0`], nr: `1`);
162	if (ret) {
163	btrfs_end_transaction(trans);
164	goto out;
165	}
166	count++;
167	btrfs_release_path(p: path);
168	btrfs_end_transaction(trans);
169	}
170	ret = count;
171	btrfs_end_transaction(trans);
172	out:
173	btrfs_free_path(p: path);
174	return ret;
175	}
176
177	/*
178	* Drop all verity items
179	*
180	* @inode: inode to drop verity items for
181	*
182	* In most contexts where we are dropping verity items, we want to do it for all
183	* the types of verity items, not a particular one.
184	*
185	* Returns: 0 on success, negative error code on failure.
186	*/
187	int btrfs_drop_verity_items(struct btrfs_inode *inode)
188	{
189	int ret;
190
191	ret = drop_verity_items(inode, BTRFS_VERITY_DESC_ITEM_KEY);
192	if (ret < `0`)
193	return ret;
194	ret = drop_verity_items(inode, BTRFS_VERITY_MERKLE_ITEM_KEY);
195	if (ret < `0`)
196	return ret;
197
198	return `0`;
199	}
200
201	/*
202	* Insert and write inode items with a given key type and offset.
203	*
204	* @inode: inode to insert for
205	* @key_type: key type to insert
206	* @offset: item offset to insert at
207	* @src: source data to write
208	* @len: length of source data to write
209	*
210	* Write len bytes from src into items of up to 2K length.
211	* The inserted items will have key (ino, key_type, offset + off) where off is
212	* consecutively increasing from 0 up to the last item ending at offset + len.
213	*
214	* Returns 0 on success and a negative error code on failure.
215	*/
216	static int write_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
217	const char *src, u64 len)
218	{
219	struct btrfs_trans_handle *trans;
220	struct btrfs_path *path;
221	struct btrfs_root *root = inode->root;
222	struct extent_buffer *leaf;
223	struct btrfs_key key;
224	unsigned long copy_bytes;
225	unsigned long src_offset = `0`;
226	void *data;
227	int ret = `0`;
228
229	path = btrfs_alloc_path();
230	if (!path)
231	return -ENOMEM;
232
233	while (len > `0`) {
234	/ 1 for the new item being inserted /
235	trans = btrfs_start_transaction(root, num_items: `1`);
236	if (IS_ERR(ptr: trans)) {
237	ret = PTR_ERR(ptr: trans);
238	break;
239	}
240
241	key.objectid = btrfs_ino(inode);
242	key.type = key_type;
243	key.offset = offset;
244
245	/*
246	* Insert 2K at a time mostly to be friendly for smaller leaf
247	* size filesystems
248	*/
249	copy_bytes = min_t(u64, len, `2048`);
250
251	ret = btrfs_insert_empty_item(trans, root, path, key: &key, data_size: copy_bytes);
252	if (ret) {
253	btrfs_end_transaction(trans);
254	break;
255	}
256
257	leaf = path->nodes[`0`];
258
259	data = btrfs_item_ptr(leaf, path->slots[`0`], void);
260	write_extent_buffer(eb: leaf, src: src + src_offset,
261	start: (unsigned long)data, len: copy_bytes);
262	offset += copy_bytes;
263	src_offset += copy_bytes;
264	len -= copy_bytes;
265
266	btrfs_release_path(p: path);
267	btrfs_end_transaction(trans);
268	}
269
270	btrfs_free_path(p: path);
271	return ret;
272	}
273
274	/*
275	* Read inode items of the given key type and offset from the btree.
276	*
277	* @inode: inode to read items of
278	* @key_type: key type to read
279	* @offset: item offset to read from
280	* @dest: Buffer to read into. This parameter has slightly tricky
281	* semantics. If it is NULL, the function will not do any copying
282	* and will just return the size of all the items up to len bytes.
283	* If dest_page is passed, then the function will kmap_local the
284	* page and ignore dest, but it must still be non-NULL to avoid the
285	* counting-only behavior.
286	* @len: length in bytes to read
287	* @dest_page: copy into this page instead of the dest buffer
288	*
289	* Helper function to read items from the btree. This returns the number of
290	* bytes read or < 0 for errors. We can return short reads if the items don't
291	* exist on disk or aren't big enough to fill the desired length. Supports
292	* reading into a provided buffer (dest) or into the page cache
293	*
294	* Returns number of bytes read or a negative error code on failure.
295	*/
296	static int read_key_bytes(struct btrfs_inode *inode, u8 key_type, u64 offset,
297	char dest, u64 len, struct* page *dest_page)
298	{
299	struct btrfs_path *path;
300	struct btrfs_root *root = inode->root;
301	struct extent_buffer *leaf;
302	struct btrfs_key key;
303	u64 item_end;
304	u64 copy_end;
305	int copied = `0`;
306	u32 copy_offset;
307	unsigned long copy_bytes;
308	unsigned long dest_offset = `0`;
309	void *data;
310	char *kaddr = dest;
311	int ret;
312
313	path = btrfs_alloc_path();
314	if (!path)
315	return -ENOMEM;
316
317	if (dest_page)
318	path->reada = READA_FORWARD;
319
320	key.objectid = btrfs_ino(inode);
321	key.type = key_type;
322	key.offset = offset;
323
324	ret = btrfs_search_slot(NULL, root, key: &key, p: path, ins_len: `0`, cow: `0`);
325	if (ret < `0`) {
326	goto out;
327	} else if (ret > `0`) {
328	ret = `0`;
329	if (path->slots[`0`] == `0`)
330	goto out;
331	path->slots[`0`]--;
332	}
333
334	while (len > `0`) {
335	leaf = path->nodes[`0`];
336	btrfs_item_key_to_cpu(eb: leaf, cpu_key: &key, nr: path->slots[`0`]);
337
338	if (key.objectid != btrfs_ino(inode) \|\| key.type != key_type)
339	break;
340
341	item_end = btrfs_item_size(eb: leaf, slot: path->slots[`0`]) + key.offset;
342
343	if (copied > `0`) {
344	/*
345	* Once we've copied something, we want all of the items
346	* to be sequential
347	*/
348	if (key.offset != offset)
349	break;
350	} else {
351	/*
352	* Our initial offset might be in the middle of an
353	* item. Make sure it all makes sense.
354	*/
355	if (key.offset > offset)
356	break;
357	if (item_end <= offset)
358	break;
359	}
360
361	/ desc = NULL to just sum all the item lengths /
362	if (!dest)
363	copy_end = item_end;
364	else
365	copy_end = min(offset + len, item_end);
366
367	/ Number of bytes in this item we want to copy /
368	copy_bytes = copy_end - offset;
369
370	/ Offset from the start of item for copying /
371	copy_offset = offset - key.offset;
372
373	if (dest) {
374	if (dest_page)
375	kaddr = kmap_local_page(page: dest_page);
376
377	data = btrfs_item_ptr(leaf, path->slots[`0`], void);
378	read_extent_buffer(eb: leaf, dst: kaddr + dest_offset,
379	start: (unsigned long)data + copy_offset,
380	len: copy_bytes);
381
382	if (dest_page)
383	kunmap_local(kaddr);
384	}
385
386	offset += copy_bytes;
387	dest_offset += copy_bytes;
388	len -= copy_bytes;
389	copied += copy_bytes;
390
391	path->slots[`0`]++;
392	if (path->slots[`0`] >= btrfs_header_nritems(eb: path->nodes[`0`])) {
393	/*
394	* We've reached the last slot in this leaf and we need
395	* to go to the next leaf.
396	*/
397	ret = btrfs_next_leaf(root, path);
398	if (ret < `0`) {
399	break;
400	} else if (ret > `0`) {
401	ret = `0`;
402	break;
403	}
404	}
405	}
406	out:
407	btrfs_free_path(p: path);
408	if (!ret)
409	ret = copied;
410	return ret;
411	}
412
413	/*
414	* Delete an fsverity orphan
415	*
416	* @trans: transaction to do the delete in
417	* @inode: inode to orphan
418	*
419	* Capture verity orphan specific logic that is repeated in the couple places
420	* we delete verity orphans. Specifically, handling ENOENT and ignoring inodes
421	* with 0 links.
422	*
423	* Returns zero on success or a negative error code on failure.
424	*/
425	static int del_orphan(struct btrfs_trans_handle trans, struct* btrfs_inode *inode)
426	{
427	struct btrfs_root *root = inode->root;
428	int ret;
429
430	/*
431	* If the inode has no links, it is either already unlinked, or was
432	* created with O_TMPFILE. In either case, it should have an orphan from
433	* that other operation. Rather than reference count the orphans, we
434	* simply ignore them here, because we only invoke the verity path in
435	* the orphan logic when i_nlink is 1.
436	*/
437	if (!inode->vfs_inode.i_nlink)
438	return `0`;
439
440	ret = btrfs_del_orphan_item(trans, root, offset: btrfs_ino(inode));
441	if (ret == -ENOENT)
442	ret = `0`;
443	return ret;
444	}
445
446	/*
447	* Rollback in-progress verity if we encounter an error.
448	*
449	* @inode: inode verity had an error for
450	*
451	* We try to handle recoverable errors while enabling verity by rolling it back
452	* and just failing the operation, rather than having an fs level error no
453	* matter what. However, any error in rollback is unrecoverable.
454	*
455	* Returns 0 on success, negative error code on failure.
456	*/
457	static int rollback_verity(struct btrfs_inode *inode)
458	{
459	struct btrfs_trans_handle *trans = NULL;
460	struct btrfs_root *root = inode->root;
461	int ret;
462
463	ASSERT(inode_is_locked(&inode->vfs_inode));
464	truncate_inode_pages(inode->vfs_inode.i_mapping, inode->vfs_inode.i_size);
465	clear_bit(nr: BTRFS_INODE_VERITY_IN_PROGRESS, addr: &inode->runtime_flags);
466	ret = btrfs_drop_verity_items(inode);
467	if (ret) {
468	btrfs_handle_fs_error(root->fs_info, ret,
469	"failed to drop verity items in rollback %llu",
470	(u64)inode->vfs_inode.i_ino);
471	goto out;
472	}
473
474	/*
475	* 1 for updating the inode flag
476	* 1 for deleting the orphan
477	*/
478	trans = btrfs_start_transaction(root, num_items: `2`);
479	if (IS_ERR(ptr: trans)) {
480	ret = PTR_ERR(ptr: trans);
481	trans = NULL;
482	btrfs_handle_fs_error(root->fs_info, ret,
483	"failed to start transaction in verity rollback %llu",
484	(u64)inode->vfs_inode.i_ino);
485	goto out;
486	}
487	inode->ro_flags &= ~BTRFS_INODE_RO_VERITY;
488	btrfs_sync_inode_flags_to_i_flags(inode: &inode->vfs_inode);
489	ret = btrfs_update_inode(trans, inode);
490	if (ret) {
491	btrfs_abort_transaction(trans, ret);
492	goto out;
493	}
494	ret = del_orphan(trans, inode);
495	if (ret) {
496	btrfs_abort_transaction(trans, ret);
497	goto out;
498	}
499	out:
500	if (trans)
501	btrfs_end_transaction(trans);
502	return ret;
503	}
504
505	/*
506	* Finalize making the file a valid verity file
507	*
508	* @inode: inode to be marked as verity
509	* @desc: contents of the verity descriptor to write (not NULL)
510	* @desc_size: size of the verity descriptor
511	*
512	* Do the actual work of finalizing verity after successfully writing the Merkle
513	* tree:
514	*
515	* - write out the descriptor items
516	* - mark the inode with the verity flag
517	* - delete the orphan item
518	* - mark the ro compat bit
519	* - clear the in progress bit
520	*
521	* Returns 0 on success, negative error code on failure.
522	*/
523	static int finish_verity(struct btrfs_inode inode, const* void *desc,
524	size_t desc_size)
525	{
526	struct btrfs_trans_handle *trans = NULL;
527	struct btrfs_root *root = inode->root;
528	struct btrfs_verity_descriptor_item item;
529	int ret;
530
531	/ Write out the descriptor item /
532	memset(&item, `0`, sizeof(item));
533	btrfs_set_stack_verity_descriptor_size(s: &item, val: desc_size);
534	ret = write_key_bytes(inode, BTRFS_VERITY_DESC_ITEM_KEY, offset: `0`,
535	src: (const char )&item, len: sizeof*(item));
536	if (ret)
537	goto out;
538
539	/ Write out the descriptor itself /
540	ret = write_key_bytes(inode, BTRFS_VERITY_DESC_ITEM_KEY, offset: `1`,
541	src: desc, len: desc_size);
542	if (ret)
543	goto out;
544
545	/*
546	* 1 for updating the inode flag
547	* 1 for deleting the orphan
548	*/
549	trans = btrfs_start_transaction(root, num_items: `2`);
550	if (IS_ERR(ptr: trans)) {
551	ret = PTR_ERR(ptr: trans);
552	goto out;
553	}
554	inode->ro_flags \|= BTRFS_INODE_RO_VERITY;
555	btrfs_sync_inode_flags_to_i_flags(inode: &inode->vfs_inode);
556	ret = btrfs_update_inode(trans, inode);
557	if (ret)
558	goto end_trans;
559	ret = del_orphan(trans, inode);
560	if (ret)
561	goto end_trans;
562	clear_bit(nr: BTRFS_INODE_VERITY_IN_PROGRESS, addr: &inode->runtime_flags);
563	btrfs_set_fs_compat_ro(root->fs_info, VERITY);
564	end_trans:
565	btrfs_end_transaction(trans);
566	out:
567	return ret;
568
569	}
570
571	/*
572	* fsverity op that begins enabling verity.
573	*
574	* @filp: file to enable verity on
575	*
576	* Begin enabling fsverity for the file. We drop any existing verity items, add
577	* an orphan and set the in progress bit.
578	*
579	* Returns 0 on success, negative error code on failure.
580	*/
581	static int btrfs_begin_enable_verity(struct file *filp)
582	{
583	struct btrfs_inode *inode = BTRFS_I(inode: file_inode(f: filp));
584	struct btrfs_root *root = inode->root;
585	struct btrfs_trans_handle *trans;
586	int ret;
587
588	ASSERT(inode_is_locked(file_inode(filp)));
589
590	if (test_bit(BTRFS_INODE_VERITY_IN_PROGRESS, &inode->runtime_flags))
591	return -EBUSY;
592
593	/*
594	* This should almost never do anything, but theoretically, it's
595	* possible that we failed to enable verity on a file, then were
596	* interrupted or failed while rolling back, failed to cleanup the
597	* orphan, and finally attempt to enable verity again.
598	*/
599	ret = btrfs_drop_verity_items(inode);
600	if (ret)
601	return ret;
602
603	/ 1 for the orphan item /
604	trans = btrfs_start_transaction(root, num_items: `1`);
605	if (IS_ERR(ptr: trans))
606	return PTR_ERR(ptr: trans);
607
608	ret = btrfs_orphan_add(trans, inode);
609	if (!ret)
610	set_bit(nr: BTRFS_INODE_VERITY_IN_PROGRESS, addr: &inode->runtime_flags);
611	btrfs_end_transaction(trans);
612
613	return `0`;
614	}
615
616	/*
617	* fsverity op that ends enabling verity.
618	*
619	* @filp: file we are finishing enabling verity on
620	* @desc: verity descriptor to write out (NULL in error conditions)
621	* @desc_size: size of the verity descriptor (variable with signatures)
622	* @merkle_tree_size: size of the merkle tree in bytes
623	*
624	* If desc is null, then VFS is signaling an error occurred during verity
625	* enable, and we should try to rollback. Otherwise, attempt to finish verity.
626	*
627	* Returns 0 on success, negative error code on error.
628	*/
629	static int btrfs_end_enable_verity(struct file filp, const* void *desc,
630	size_t desc_size, u64 merkle_tree_size)
631	{
632	struct btrfs_inode *inode = BTRFS_I(inode: file_inode(f: filp));
633	int ret = `0`;
634	int rollback_ret;
635
636	ASSERT(inode_is_locked(file_inode(filp)));
637
638	if (desc == NULL)
639	goto rollback;
640
641	ret = finish_verity(inode, desc, desc_size);
642	if (ret)
643	goto rollback;
644	return ret;
645
646	rollback:
647	rollback_ret = rollback_verity(inode);
648	if (rollback_ret)
649	btrfs_err(inode->root->fs_info,
650	"failed to rollback verity items: %d", rollback_ret);
651	return ret;
652	}
653
654	/*
655	* fsverity op that gets the struct fsverity_descriptor.
656	*
657	* @inode: inode to get the descriptor of
658	* @buf: output buffer for the descriptor contents
659	* @buf_size: size of the output buffer. 0 to query the size
660	*
661	* fsverity does a two pass setup for reading the descriptor, in the first pass
662	* it calls with buf_size = 0 to query the size of the descriptor, and then in
663	* the second pass it actually reads the descriptor off disk.
664	*
665	* Returns the size on success or a negative error code on failure.
666	*/
667	int btrfs_get_verity_descriptor(struct inode inode, void* *buf, size_t buf_size)
668	{
669	u64 true_size;
670	int ret = `0`;
671	struct btrfs_verity_descriptor_item item;
672
673	memset(&item, `0`, sizeof(item));
674	ret = read_key_bytes(inode: BTRFS_I(inode), BTRFS_VERITY_DESC_ITEM_KEY, offset: `0`,
675	dest: (char )&item, len: sizeof*(item), NULL);
676	if (ret < `0`)
677	return ret;
678
679	if (item.reserved[`0`] != `0` \|\| item.reserved[`1`] != `0`)
680	return -EUCLEAN;
681
682	true_size = btrfs_stack_verity_descriptor_size(s: &item);
683	if (true_size > INT_MAX)
684	return -EUCLEAN;
685
686	if (buf_size == `0`)
687	return true_size;
688	if (buf_size < true_size)
689	return -ERANGE;
690
691	ret = read_key_bytes(inode: BTRFS_I(inode), BTRFS_VERITY_DESC_ITEM_KEY, offset: `1`,
692	dest: buf, len: buf_size, NULL);
693	if (ret < `0`)
694	return ret;
695	if (ret != true_size)
696	return -EIO;
697
698	return true_size;
699	}
700
701	/*
702	* fsverity op that reads and caches a merkle tree page.
703	*
704	* @inode: inode to read a merkle tree page for
705	* @index: page index relative to the start of the merkle tree
706	* @num_ra_pages: number of pages to readahead. Optional, we ignore it
707	*
708	* The Merkle tree is stored in the filesystem btree, but its pages are cached
709	* with a logical position past EOF in the inode's mapping.
710	*
711	* Returns the page we read, or an ERR_PTR on error.
712	*/
713	static struct page btrfs_read_merkle_tree_page(struct* inode *inode,
714	pgoff_t index,
715	unsigned long num_ra_pages)
716	{
717	struct folio *folio;
718	u64 off = (u64)index << PAGE_SHIFT;
719	loff_t merkle_pos = merkle_file_pos(inode);
720	int ret;
721
722	if (merkle_pos < `0`)
723	return ERR_PTR(error: merkle_pos);
724	if (merkle_pos > inode->i_sb->s_maxbytes - off - PAGE_SIZE)
725	return ERR_PTR(error: -EFBIG);
726	index += merkle_pos >> PAGE_SHIFT;
727	again:
728	folio = __filemap_get_folio(mapping: inode->i_mapping, index, FGP_ACCESSED, gfp: `0`);
729	if (!IS_ERR(ptr: folio)) {
730	if (folio_test_uptodate(folio))
731	goto out;
732
733	folio_lock(folio);
734	/ If it's not uptodate after we have the lock, we got a read error. /
735	if (!folio_test_uptodate(folio)) {
736	folio_unlock(folio);
737	folio_put(folio);
738	return ERR_PTR(error: -EIO);
739	}
740	folio_unlock(folio);
741	goto out;
742	}
743
744	folio = filemap_alloc_folio(gfp: mapping_gfp_constraint(mapping: inode->i_mapping, gfp_mask: ~__GFP_FS),
745	order: `0`);
746	if (!folio)
747	return ERR_PTR(error: -ENOMEM);
748
749	ret = filemap_add_folio(mapping: inode->i_mapping, folio, index, GFP_NOFS);
750	if (ret) {
751	folio_put(folio);
752	/ Did someone else insert a folio here? /
753	if (ret == -EEXIST)
754	goto again;
755	return ERR_PTR(error: ret);
756	}
757
758	/*
759	* Merkle item keys are indexed from byte 0 in the merkle tree.
760	* They have the form:
761	*
762	* [ inode objectid, BTRFS_MERKLE_ITEM_KEY, offset in bytes ]
763	*/
764	ret = read_key_bytes(inode: BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY, offset: off,
765	dest: folio_address(folio), PAGE_SIZE, dest_page: &folio->page);
766	if (ret < `0`) {
767	folio_put(folio);
768	return ERR_PTR(error: ret);
769	}
770	if (ret < PAGE_SIZE)
771	folio_zero_segment(folio, start: ret, PAGE_SIZE);
772
773	folio_mark_uptodate(folio);
774	folio_unlock(folio);
775
776	out:
777	return folio_file_page(folio, index);
778	}
779
780	/*
781	* fsverity op that writes a Merkle tree block into the btree.
782	*
783	* @inode: inode to write a Merkle tree block for
784	* @buf: Merkle tree block to write
785	* @pos: the position of the block in the Merkle tree (in bytes)
786	* @size: the Merkle tree block size (in bytes)
787	*
788	* Returns 0 on success or negative error code on failure
789	*/
790	static int btrfs_write_merkle_tree_block(struct inode inode, const* void *buf,
791	u64 pos, unsigned int size)
792	{
793	loff_t merkle_pos = merkle_file_pos(inode);
794
795	if (merkle_pos < `0`)
796	return merkle_pos;
797	if (merkle_pos > inode->i_sb->s_maxbytes - pos - size)
798	return -EFBIG;
799
800	return write_key_bytes(inode: BTRFS_I(inode), BTRFS_VERITY_MERKLE_ITEM_KEY,
801	offset: pos, src: buf, len: size);
802	}
803
804	const struct fsverity_operations btrfs_verityops = {
805	.begin_enable_verity = btrfs_begin_enable_verity,
806	.end_enable_verity = btrfs_end_enable_verity,
807	.get_verity_descriptor = btrfs_get_verity_descriptor,
808	.read_merkle_tree_page = btrfs_read_merkle_tree_page,
809	.write_merkle_tree_block = btrfs_write_merkle_tree_block,
810	};
811

source code of linux/fs/btrfs/verity.c