dir.c source code [linux/fs/ext4/dir.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* linux/fs/ext4/dir.c
4	*
5	* Copyright (C) 1992, 1993, 1994, 1995
6	* Remy Card (card@masi.ibp.fr)
7	* Laboratoire MASI - Institut Blaise Pascal
8	* Universite Pierre et Marie Curie (Paris VI)
9	*
10	* from
11	*
12	* linux/fs/minix/dir.c
13	*
14	* Copyright (C) 1991, 1992 Linus Torvalds
15	*
16	* ext4 directory handling functions
17	*
18	* Big-endian to little-endian byte-swapping/bitmaps by
19	* David S. Miller (davem@caip.rutgers.edu), 1995
20	*
21	* Hash Tree Directory indexing (c) 2001 Daniel Phillips
22	*
23	*/
24
25	#include <linux/fs.h>
26	#include <linux/buffer_head.h>
27	#include <linux/slab.h>
28	#include <linux/iversion.h>
29	#include <linux/unicode.h>
30	#include "ext4.h"
31	#include "xattr.h"
32
33	static int ext4_dx_readdir(struct file , struct* dir_context *);
34
35	/**
36	* is_dx_dir() - check if a directory is using htree indexing
37	* @inode: directory inode
38	*
39	* Check if the given dir-inode refers to an htree-indexed directory
40	* (or a directory which could potentially get converted to use htree
41	* indexing).
42	*
43	* Return 1 if it is a dx dir, 0 if not
44	*/
45	static int is_dx_dir(struct inode *inode)
46	{
47	struct super_block *sb = inode->i_sb;
48
49	if (ext4_has_feature_dir_index(sb: inode->i_sb) &&
50	((ext4_test_inode_flag(inode, bit: EXT4_INODE_INDEX)) \|\|
51	((inode->i_size >> sb->s_blocksize_bits) == `1`) \|\|
52	ext4_has_inline_data(inode)))
53	return `1`;
54
55	return `0`;
56	}
57
58	static bool is_fake_dir_entry(struct ext4_dir_entry_2 *de)
59	{
60	/ Check if . or .. , or skip if namelen is 0 /
61	if ((de->name_len > `0`) && (de->name_len <= `2`) && (de->name[`0`] == `'.'`) &&
62	(de->name[`1`] == `'.'` \|\| de->name[`1`] == `'\0'`))
63	return true;
64	/ Check if this is a csum entry /
65	if (de->file_type == EXT4_FT_DIR_CSUM)
66	return true;
67	return false;
68	}
69
70	/*
71	* Return 0 if the directory entry is OK, and 1 if there is a problem
72	*
73	* Note: this is the opposite of what ext2 and ext3 historically returned...
74	*
75	* bh passed here can be an inode block or a dir data block, depending
76	* on the inode inline data flag.
77	*/
78	int __ext4_check_dir_entry(const char function, unsigned* int line,
79	struct inode dir, struct* file *filp,
80	struct ext4_dir_entry_2 *de,
81	struct buffer_head bh, char* buf, int* size,
82	unsigned int offset)
83	{
84	const char *error_msg = NULL;
85	const int rlen = ext4_rec_len_from_disk(dlen: de->rec_len,
86	blocksize: dir->i_sb->s_blocksize);
87	const int next_offset = ((char *) de - buf) + rlen;
88	bool fake = is_fake_dir_entry(de);
89	bool has_csum = ext4_has_metadata_csum(sb: dir->i_sb);
90
91	if (unlikely(rlen < ext4_dir_rec_len(`1`, fake ? NULL : dir)))
92	error_msg = "rec_len is smaller than minimal";
93	else if (unlikely(rlen % `4` != `0`))
94	error_msg = "rec_len % 4 != 0";
95	else if (unlikely(rlen < ext4_dir_rec_len(de->name_len,
96	fake ? NULL : dir)))
97	error_msg = "rec_len is too small for name_len";
98	else if (unlikely(next_offset > size))
99	error_msg = "directory entry overrun";
100	else if (unlikely(next_offset > size - ext4_dir_rec_len(`1`,
101	has_csum ? NULL : dir) &&
102	next_offset != size))
103	error_msg = "directory entry too close to block end";
104	else if (unlikely(le32_to_cpu(de->inode) >
105	le32_to_cpu(EXT4_SB(dir->i_sb)->s_es->s_inodes_count)))
106	error_msg = "inode out of bounds";
107	else
108	return `0`;
109
110	if (filp)
111	ext4_error_file(filp, function, line, bh->b_blocknr,
112	"bad entry in directory: %s - offset=%u, "
113	"inode=%u, rec_len=%d, size=%d fake=%d",
114	error_msg, offset, le32_to_cpu(de->inode),
115	rlen, size, fake);
116	else
117	ext4_error_inode(dir, function, line, bh->b_blocknr,
118	"bad entry in directory: %s - offset=%u, "
119	"inode=%u, rec_len=%d, size=%d fake=%d",
120	error_msg, offset, le32_to_cpu(de->inode),
121	rlen, size, fake);
122
123	return `1`;
124	}
125
126	static int ext4_readdir(struct file file, struct* dir_context *ctx)
127	{
128	unsigned int offset;
129	int i;
130	struct ext4_dir_entry_2 *de;
131	int err;
132	struct inode *inode = file_inode(f: file);
133	struct super_block *sb = inode->i_sb;
134	struct buffer_head *bh = NULL;
135	struct fscrypt_str fstr = FSTR_INIT(NULL, `0`);
136
137	err = fscrypt_prepare_readdir(dir: inode);
138	if (err)
139	return err;
140
141	if (is_dx_dir(inode)) {
142	err = ext4_dx_readdir(file, ctx);
143	if (err != ERR_BAD_DX_DIR)
144	return err;
145
146	/ Can we just clear INDEX flag to ignore htree information? /
147	if (!ext4_has_metadata_csum(sb)) {
148	/*
149	* We don't set the inode dirty flag since it's not
150	* critical that it gets flushed back to the disk.
151	*/
152	ext4_clear_inode_flag(inode, bit: EXT4_INODE_INDEX);
153	}
154	}
155
156	if (ext4_has_inline_data(inode)) {
157	int has_inline_data = `1`;
158	err = ext4_read_inline_dir(filp: file, ctx,
159	has_inline_data: &has_inline_data);
160	if (has_inline_data)
161	return err;
162	}
163
164	if (IS_ENCRYPTED(inode)) {
165	err = fscrypt_fname_alloc_buffer(EXT4_NAME_LEN, crypto_str: &fstr);
166	if (err < `0`)
167	return err;
168	}
169
170	while (ctx->pos < inode->i_size) {
171	struct ext4_map_blocks map;
172
173	if (fatal_signal_pending(current)) {
174	err = -ERESTARTSYS;
175	goto errout;
176	}
177	cond_resched();
178	offset = ctx->pos & (sb->s_blocksize - `1`);
179	map.m_lblk = ctx->pos >> EXT4_BLOCK_SIZE_BITS(sb);
180	map.m_len = `1`;
181	err = ext4_map_blocks(NULL, inode, map: &map, flags: `0`);
182	if (err == `0`) {
183	/ m_len should never be zero but let's avoid*
184	* an infinite loop if it somehow is */
185	if (map.m_len == `0`)
186	map.m_len = `1`;
187	ctx->pos += map.m_len * sb->s_blocksize;
188	continue;
189	}
190	if (err > `0`) {
191	pgoff_t index = map.m_pblk >>
192	(PAGE_SHIFT - inode->i_blkbits);
193	if (!ra_has_index(ra: &file->f_ra, index))
194	page_cache_sync_readahead(
195	mapping: sb->s_bdev->bd_inode->i_mapping,
196	ra: &file->f_ra, file,
197	index, req_count: `1`);
198	file->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
199	bh = ext4_bread(NULL, inode, map.m_lblk, `0`);
200	if (IS_ERR(ptr: bh)) {
201	err = PTR_ERR(ptr: bh);
202	bh = NULL;
203	goto errout;
204	}
205	}
206
207	if (!bh) {
208	/ corrupt size? Maybe no more blocks to read /
209	if (ctx->pos > inode->i_blocks << `9`)
210	break;
211	ctx->pos += sb->s_blocksize - offset;
212	continue;
213	}
214
215	/ Check the checksum /
216	if (!buffer_verified(bh) &&
217	!ext4_dirblock_csum_verify(inode, bh)) {
218	EXT4_ERROR_FILE(file, `0`, "directory fails checksum "
219	"at offset %llu",
220	(unsigned long long)ctx->pos);
221	ctx->pos += sb->s_blocksize - offset;
222	brelse(bh);
223	bh = NULL;
224	continue;
225	}
226	set_buffer_verified(bh);
227
228	/ If the dir block has changed since the last call to*
229	* readdir(2), then we might be pointing to an invalid
230	* dirent right now. Scan from the start of the block
231	* to make sure. */
232	if (!inode_eq_iversion(inode, old: file->f_version)) {
233	for (i = `0`; i < sb->s_blocksize && i < offset; ) {
234	de = (struct ext4_dir_entry_2 *)
235	(bh->b_data + i);
236	/ It's too expensive to do a full*
237	* dirent test each time round this
238	* loop, but we do have to test at
239	* least that it is non-zero. A
240	* failure will be detected in the
241	* dirent test below. */
242	if (ext4_rec_len_from_disk(dlen: de->rec_len,
243	blocksize: sb->s_blocksize) < ext4_dir_rec_len(name_len: `1`,
244	dir: inode))
245	break;
246	i += ext4_rec_len_from_disk(dlen: de->rec_len,
247	blocksize: sb->s_blocksize);
248	}
249	offset = i;
250	ctx->pos = (ctx->pos & ~(sb->s_blocksize - `1`))
251	\| offset;
252	file->f_version = inode_query_iversion(inode);
253	}
254
255	while (ctx->pos < inode->i_size
256	&& offset < sb->s_blocksize) {
257	de = (struct ext4_dir_entry_2 *) (bh->b_data + offset);
258	if (ext4_check_dir_entry(inode, file, de, bh,
259	bh->b_data, bh->b_size,
260	offset)) {
261	/*
262	* On error, skip to the next block
263	*/
264	ctx->pos = (ctx->pos \|
265	(sb->s_blocksize - `1`)) + `1`;
266	break;
267	}
268	offset += ext4_rec_len_from_disk(dlen: de->rec_len,
269	blocksize: sb->s_blocksize);
270	if (le32_to_cpu(de->inode)) {
271	if (!IS_ENCRYPTED(inode)) {
272	if (!dir_emit(ctx, name: de->name,
273	namelen: de->name_len,
274	le32_to_cpu(de->inode),
275	type: get_dtype(sb, filetype: de->file_type)))
276	goto done;
277	} else {
278	int save_len = fstr.len;
279	struct fscrypt_str de_name =
280	FSTR_INIT(de->name,
281	de->name_len);
282
283	/ Directory is encrypted /
284	err = fscrypt_fname_disk_to_usr(inode,
285	EXT4_DIRENT_HASH(de),
286	EXT4_DIRENT_MINOR_HASH(de),
287	iname: &de_name, oname: &fstr);
288	de_name = fstr;
289	fstr.len = save_len;
290	if (err)
291	goto errout;
292	if (!dir_emit(ctx,
293	name: de_name.name, namelen: de_name.len,
294	le32_to_cpu(de->inode),
295	type: get_dtype(sb, filetype: de->file_type)))
296	goto done;
297	}
298	}
299	ctx->pos += ext4_rec_len_from_disk(dlen: de->rec_len,
300	blocksize: sb->s_blocksize);
301	}
302	if ((ctx->pos < inode->i_size) && !dir_relax_shared(inode))
303	goto done;
304	brelse(bh);
305	bh = NULL;
306	}
307	done:
308	err = `0`;
309	errout:
310	fscrypt_fname_free_buffer(crypto_str: &fstr);
311	brelse(bh);
312	return err;
313	}
314
315	static inline int is_32bit_api(void)
316	{
317	#ifdef CONFIG_COMPAT
318	return in_compat_syscall();
319	#else
320	return (BITS_PER_LONG == `32`);
321	#endif
322	}
323
324	/*
325	* These functions convert from the major/minor hash to an f_pos
326	* value for dx directories
327	*
328	* Upper layer (for example NFS) should specify FMODE_32BITHASH or
329	* FMODE_64BITHASH explicitly. On the other hand, we allow ext4 to be mounted
330	* directly on both 32-bit and 64-bit nodes, under such case, neither
331	* FMODE_32BITHASH nor FMODE_64BITHASH is specified.
332	*/
333	static inline loff_t hash2pos(struct file *filp, __u32 major, __u32 minor)
334	{
335	if ((filp->f_mode & FMODE_32BITHASH) \|\|
336	(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
337	return major >> `1`;
338	else
339	return ((__u64)(major >> `1`) << `32`) \| (__u64)minor;
340	}
341
342	static inline __u32 pos2maj_hash(struct file *filp, loff_t pos)
343	{
344	if ((filp->f_mode & FMODE_32BITHASH) \|\|
345	(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
346	return (pos << `1`) & `0xffffffff`;
347	else
348	return ((pos >> `32`) << `1`) & `0xffffffff`;
349	}
350
351	static inline __u32 pos2min_hash(struct file *filp, loff_t pos)
352	{
353	if ((filp->f_mode & FMODE_32BITHASH) \|\|
354	(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
355	return `0`;
356	else
357	return pos & `0xffffffff`;
358	}
359
360	/*
361	* Return 32- or 64-bit end-of-file for dx directories
362	*/
363	static inline loff_t ext4_get_htree_eof(struct file *filp)
364	{
365	if ((filp->f_mode & FMODE_32BITHASH) \|\|
366	(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
367	return EXT4_HTREE_EOF_32BIT;
368	else
369	return EXT4_HTREE_EOF_64BIT;
370	}
371
372
373	/*
374	* ext4_dir_llseek() calls generic_file_llseek_size to handle htree
375	* directories, where the "offset" is in terms of the filename hash
376	* value instead of the byte offset.
377	*
378	* Because we may return a 64-bit hash that is well beyond offset limits,
379	* we need to pass the max hash as the maximum allowable offset in
380	* the htree directory case.
381	*
382	* For non-htree, ext4_llseek already chooses the proper max offset.
383	*/
384	static loff_t ext4_dir_llseek(struct file file, loff_t offset, int* whence)
385	{
386	struct inode *inode = file->f_mapping->host;
387	int dx_dir = is_dx_dir(inode);
388	loff_t ret, htree_max = ext4_get_htree_eof(filp: file);
389
390	if (likely(dx_dir))
391	ret = generic_file_llseek_size(file, offset, whence,
392	maxsize: htree_max, eof: htree_max);
393	else
394	ret = ext4_llseek(file, offset, origin: whence);
395	file->f_version = inode_peek_iversion(inode) - `1`;
396	return ret;
397	}
398
399	/*
400	* This structure holds the nodes of the red-black tree used to store
401	* the directory entry in hash order.
402	*/
403	struct fname {
404	__u32 hash;
405	__u32 minor_hash;
406	struct rb_node rb_hash;
407	struct fname *next;
408	__u32 inode;
409	__u8 name_len;
410	__u8 file_type;
411	char name[];
412	};
413
414	/*
415	* This function implements a non-recursive way of freeing all of the
416	* nodes in the red-black tree.
417	*/
418	static void free_rb_tree_fname(struct rb_root *root)
419	{
420	struct fname fname, next;
421
422	rbtree_postorder_for_each_entry_safe(fname, next, root, rb_hash)
423	while (fname) {
424	struct fname *old = fname;
425	fname = fname->next;
426	kfree(objp: old);
427	}
428
429	*root = RB_ROOT;
430	}
431
432
433	static struct dir_private_info ext4_htree_create_dir_info(struct* file *filp,
434	loff_t pos)
435	{
436	struct dir_private_info *p;
437
438	p = kzalloc(size: sizeof(*p), GFP_KERNEL);
439	if (!p)
440	return NULL;
441	p->curr_hash = pos2maj_hash(filp, pos);
442	p->curr_minor_hash = pos2min_hash(filp, pos);
443	return p;
444	}
445
446	void ext4_htree_free_dir_info(struct dir_private_info *p)
447	{
448	free_rb_tree_fname(root: &p->root);
449	kfree(objp: p);
450	}
451
452	/*
453	* Given a directory entry, enter it into the fname rb tree.
454	*
455	* When filename encryption is enabled, the dirent will hold the
456	* encrypted filename, while the htree will hold decrypted filename.
457	* The decrypted filename is passed in via ent_name. parameter.
458	*/
459	int ext4_htree_store_dirent(struct file *dir_file, __u32 hash,
460	__u32 minor_hash,
461	struct ext4_dir_entry_2 *dirent,
462	struct fscrypt_str *ent_name)
463	{
464	struct rb_node *p, parent = NULL;
465	struct fname fname, new_fn;
466	struct dir_private_info *info;
467	int len;
468
469	info = dir_file->private_data;
470	p = &info->root.rb_node;
471
472	/ Create and allocate the fname structure /
473	len = sizeof(struct fname) + ent_name->len + `1`;
474	new_fn = kzalloc(size: len, GFP_KERNEL);
475	if (!new_fn)
476	return -ENOMEM;
477	new_fn->hash = hash;
478	new_fn->minor_hash = minor_hash;
479	new_fn->inode = le32_to_cpu(dirent->inode);
480	new_fn->name_len = ent_name->len;
481	new_fn->file_type = dirent->file_type;
482	memcpy(new_fn->name, ent_name->name, ent_name->len);
483
484	while (*p) {
485	parent = *p;
486	fname = rb_entry(parent, struct fname, rb_hash);
487
488	/*
489	* If the hash and minor hash match up, then we put
490	* them on a linked list. This rarely happens...
491	*/
492	if ((new_fn->hash == fname->hash) &&
493	(new_fn->minor_hash == fname->minor_hash)) {
494	new_fn->next = fname->next;
495	fname->next = new_fn;
496	return `0`;
497	}
498
499	if (new_fn->hash < fname->hash)
500	p = &(*p)->rb_left;
501	else if (new_fn->hash > fname->hash)
502	p = &(*p)->rb_right;
503	else if (new_fn->minor_hash < fname->minor_hash)
504	p = &(*p)->rb_left;
505	else / if (new_fn->minor_hash > fname->minor_hash) /
506	p = &(*p)->rb_right;
507	}
508
509	rb_link_node(node: &new_fn->rb_hash, parent, rb_link: p);
510	rb_insert_color(&new_fn->rb_hash, &info->root);
511	return `0`;
512	}
513
514
515
516	/*
517	* This is a helper function for ext4_dx_readdir. It calls filldir
518	* for all entries on the fname linked list. (Normally there is only
519	* one entry on the linked list, unless there are 62 bit hash collisions.)
520	*/
521	static int call_filldir(struct file file, struct* dir_context *ctx,
522	struct fname *fname)
523	{
524	struct dir_private_info *info = file->private_data;
525	struct inode *inode = file_inode(f: file);
526	struct super_block *sb = inode->i_sb;
527
528	if (!fname) {
529	ext4_msg(sb, KERN_ERR, "%s:%d: inode #%lu: comm %s: "
530	"called with null fname?!?", __func__, __LINE__,
531	inode->i_ino, current->comm);
532	return `0`;
533	}
534	ctx->pos = hash2pos(filp: file, major: fname->hash, minor: fname->minor_hash);
535	while (fname) {
536	if (!dir_emit(ctx, name: fname->name,
537	namelen: fname->name_len,
538	ino: fname->inode,
539	type: get_dtype(sb, filetype: fname->file_type))) {
540	info->extra_fname = fname;
541	return `1`;
542	}
543	fname = fname->next;
544	}
545	return `0`;
546	}
547
548	static int ext4_dx_readdir(struct file file, struct* dir_context *ctx)
549	{
550	struct dir_private_info *info = file->private_data;
551	struct inode *inode = file_inode(f: file);
552	struct fname *fname;
553	int ret = `0`;
554
555	if (!info) {
556	info = ext4_htree_create_dir_info(filp: file, pos: ctx->pos);
557	if (!info)
558	return -ENOMEM;
559	file->private_data = info;
560	}
561
562	if (ctx->pos == ext4_get_htree_eof(filp: file))
563	return `0`; / EOF /
564
565	/ Some one has messed with f_pos; reset the world /
566	if (info->last_pos != ctx->pos) {
567	free_rb_tree_fname(root: &info->root);
568	info->curr_node = NULL;
569	info->extra_fname = NULL;
570	info->curr_hash = pos2maj_hash(filp: file, pos: ctx->pos);
571	info->curr_minor_hash = pos2min_hash(filp: file, pos: ctx->pos);
572	}
573
574	/*
575	* If there are any leftover names on the hash collision
576	* chain, return them first.
577	*/
578	if (info->extra_fname) {
579	if (call_filldir(file, ctx, fname: info->extra_fname))
580	goto finished;
581	info->extra_fname = NULL;
582	goto next_node;
583	} else if (!info->curr_node)
584	info->curr_node = rb_first(&info->root);
585
586	while (`1`) {
587	/*
588	* Fill the rbtree if we have no more entries,
589	* or the inode has changed since we last read in the
590	* cached entries.
591	*/
592	if ((!info->curr_node) \|\|
593	!inode_eq_iversion(inode, old: file->f_version)) {
594	info->curr_node = NULL;
595	free_rb_tree_fname(root: &info->root);
596	file->f_version = inode_query_iversion(inode);
597	ret = ext4_htree_fill_tree(dir_file: file, start_hash: info->curr_hash,
598	start_minor_hash: info->curr_minor_hash,
599	next_hash: &info->next_hash);
600	if (ret < `0`)
601	goto finished;
602	if (ret == `0`) {
603	ctx->pos = ext4_get_htree_eof(filp: file);
604	break;
605	}
606	info->curr_node = rb_first(&info->root);
607	}
608
609	fname = rb_entry(info->curr_node, struct fname, rb_hash);
610	info->curr_hash = fname->hash;
611	info->curr_minor_hash = fname->minor_hash;
612	if (call_filldir(file, ctx, fname))
613	break;
614	next_node:
615	info->curr_node = rb_next(info->curr_node);
616	if (info->curr_node) {
617	fname = rb_entry(info->curr_node, struct fname,
618	rb_hash);
619	info->curr_hash = fname->hash;
620	info->curr_minor_hash = fname->minor_hash;
621	} else {
622	if (info->next_hash == ~`0`) {
623	ctx->pos = ext4_get_htree_eof(filp: file);
624	break;
625	}
626	info->curr_hash = info->next_hash;
627	info->curr_minor_hash = `0`;
628	}
629	}
630	finished:
631	info->last_pos = ctx->pos;
632	return ret < `0` ? ret : `0`;
633	}
634
635	static int ext4_release_dir(struct inode inode, struct* file *filp)
636	{
637	if (filp->private_data)
638	ext4_htree_free_dir_info(p: filp->private_data);
639
640	return `0`;
641	}
642
643	int ext4_check_all_de(struct inode dir, struct* buffer_head bh, void* *buf,
644	int buf_size)
645	{
646	struct ext4_dir_entry_2 *de;
647	int rlen;
648	unsigned int offset = `0`;
649	char *top;
650
651	de = buf;
652	top = buf + buf_size;
653	while ((char *) de < top) {
654	if (ext4_check_dir_entry(dir, NULL, de, bh,
655	buf, buf_size, offset))
656	return -EFSCORRUPTED;
657	rlen = ext4_rec_len_from_disk(dlen: de->rec_len, blocksize: buf_size);
658	de = (struct ext4_dir_entry_2 )((char* *)de + rlen);
659	offset += rlen;
660	}
661	if ((char *) de > top)
662	return -EFSCORRUPTED;
663
664	return `0`;
665	}
666
667	const struct file_operations ext4_dir_operations = {
668	.llseek = ext4_dir_llseek,
669	.read = generic_read_dir,
670	.iterate_shared = ext4_readdir,
671	.unlocked_ioctl = ext4_ioctl,
672	#ifdef CONFIG_COMPAT
673	.compat_ioctl = ext4_compat_ioctl,
674	#endif
675	.fsync = ext4_sync_file,
676	.release = ext4_release_dir,
677	};
678

source code of linux/fs/ext4/dir.c