inode.c source code [linux/fs/ntfs/inode.c]

1	// SPDX-License-Identifier: GPL-2.0-or-later
2	/*
3	* inode.c - NTFS kernel inode handling.
4	*
5	* Copyright (c) 2001-2014 Anton Altaparmakov and Tuxera Inc.
6	*/
7
8	#include <linux/buffer_head.h>
9	#include <linux/fs.h>
10	#include <linux/mm.h>
11	#include <linux/mount.h>
12	#include <linux/mutex.h>
13	#include <linux/pagemap.h>
14	#include <linux/quotaops.h>
15	#include <linux/slab.h>
16	#include <linux/log2.h>
17
18	#include "aops.h"
19	#include "attrib.h"
20	#include "bitmap.h"
21	#include "dir.h"
22	#include "debug.h"
23	#include "inode.h"
24	#include "lcnalloc.h"
25	#include "malloc.h"
26	#include "mft.h"
27	#include "time.h"
28	#include "ntfs.h"
29
30	/**
31	* ntfs_test_inode - compare two (possibly fake) inodes for equality
32	* @vi: vfs inode which to test
33	* @data: data which is being tested with
34	*
35	* Compare the ntfs attribute embedded in the ntfs specific part of the vfs
36	* inode @vi for equality with the ntfs attribute @data.
37	*
38	* If searching for the normal file/directory inode, set @na->type to AT_UNUSED.
39	* @na->name and @na->name_len are then ignored.
40	*
41	* Return 1 if the attributes match and 0 if not.
42	*
43	* NOTE: This function runs with the inode_hash_lock spin lock held so it is not
44	* allowed to sleep.
45	*/
46	int ntfs_test_inode(struct inode vi, void* *data)
47	{
48	ntfs_attr na = (ntfs_attr )data;
49	ntfs_inode *ni;
50
51	if (vi->i_ino != na->mft_no)
52	return `0`;
53	ni = NTFS_I(inode: vi);
54	/ If !NInoAttr(ni), @vi is a normal file or directory inode. /
55	if (likely(!NInoAttr(ni))) {
56	/ If not looking for a normal inode this is a mismatch. /
57	if (unlikely(na->type != AT_UNUSED))
58	return `0`;
59	} else {
60	/ A fake inode describing an attribute. /
61	if (ni->type != na->type)
62	return `0`;
63	if (ni->name_len != na->name_len)
64	return `0`;
65	if (na->name_len && memcmp(p: ni->name, q: na->name,
66	size: na->name_len * sizeof(ntfschar)))
67	return `0`;
68	}
69	/ Match! /
70	return `1`;
71	}
72
73	/**
74	* ntfs_init_locked_inode - initialize an inode
75	* @vi: vfs inode to initialize
76	* @data: data which to initialize @vi to
77	*
78	* Initialize the vfs inode @vi with the values from the ntfs attribute @data in
79	* order to enable ntfs_test_inode() to do its work.
80	*
81	* If initializing the normal file/directory inode, set @na->type to AT_UNUSED.
82	* In that case, @na->name and @na->name_len should be set to NULL and 0,
83	* respectively. Although that is not strictly necessary as
84	* ntfs_read_locked_inode() will fill them in later.
85	*
86	* Return 0 on success and -errno on error.
87	*
88	* NOTE: This function runs with the inode->i_lock spin lock held so it is not
89	* allowed to sleep. (Hence the GFP_ATOMIC allocation.)
90	*/
91	static int ntfs_init_locked_inode(struct inode vi, void* *data)
92	{
93	ntfs_attr na = (ntfs_attr )data;
94	ntfs_inode *ni = NTFS_I(inode: vi);
95
96	vi->i_ino = na->mft_no;
97
98	ni->type = na->type;
99	if (na->type == AT_INDEX_ALLOCATION)
100	NInoSetMstProtected(ni);
101
102	ni->name = na->name;
103	ni->name_len = na->name_len;
104
105	/ If initializing a normal inode, we are done. /
106	if (likely(na->type == AT_UNUSED)) {
107	BUG_ON(na->name);
108	BUG_ON(na->name_len);
109	return `0`;
110	}
111
112	/ It is a fake inode. /
113	NInoSetAttr(ni);
114
115	/*
116	* We have I30 global constant as an optimization as it is the name
117	* in >99.9% of named attributes! The other <0.1% incur a GFP_ATOMIC
118	* allocation but that is ok. And most attributes are unnamed anyway,
119	* thus the fraction of named attributes with name != I30 is actually
120	* absolutely tiny.
121	*/
122	if (na->name_len && na->name != I30) {
123	unsigned int i;
124
125	BUG_ON(!na->name);
126	i = na->name_len * sizeof(ntfschar);
127	ni->name = kmalloc(size: i + sizeof(ntfschar), GFP_ATOMIC);
128	if (!ni->name)
129	return -ENOMEM;
130	memcpy(ni->name, na->name, i);
131	ni->name[na->name_len] = `0`;
132	}
133	return `0`;
134	}
135
136	static int ntfs_read_locked_inode(struct inode *vi);
137	static int ntfs_read_locked_attr_inode(struct inode base_vi, struct* inode *vi);
138	static int ntfs_read_locked_index_inode(struct inode *base_vi,
139	struct inode *vi);
140
141	/**
142	* ntfs_iget - obtain a struct inode corresponding to a specific normal inode
143	* @sb: super block of mounted volume
144	* @mft_no: mft record number / inode number to obtain
145	*
146	* Obtain the struct inode corresponding to a specific normal inode (i.e. a
147	* file or directory).
148	*
149	* If the inode is in the cache, it is just returned with an increased
150	* reference count. Otherwise, a new struct inode is allocated and initialized,
151	* and finally ntfs_read_locked_inode() is called to read in the inode and
152	* fill in the remainder of the inode structure.
153	*
154	* Return the struct inode on success. Check the return value with IS_ERR() and
155	* if true, the function failed and the error code is obtained from PTR_ERR().
156	*/
157	struct inode ntfs_iget(struct* super_block sb, unsigned* long mft_no)
158	{
159	struct inode *vi;
160	int err;
161	ntfs_attr na;
162
163	na.mft_no = mft_no;
164	na.type = AT_UNUSED;
165	na.name = NULL;
166	na.name_len = `0`;
167
168	vi = iget5_locked(sb, mft_no, test: ntfs_test_inode,
169	set: ntfs_init_locked_inode, &na);
170	if (unlikely(!vi))
171	return ERR_PTR(error: -ENOMEM);
172
173	err = `0`;
174
175	/ If this is a freshly allocated inode, need to read it now. /
176	if (vi->i_state & I_NEW) {
177	err = ntfs_read_locked_inode(vi);
178	unlock_new_inode(vi);
179	}
180	/*
181	* There is no point in keeping bad inodes around if the failure was
182	* due to ENOMEM. We want to be able to retry again later.
183	*/
184	if (unlikely(err == -ENOMEM)) {
185	iput(vi);
186	vi = ERR_PTR(error: err);
187	}
188	return vi;
189	}
190
191	/**
192	* ntfs_attr_iget - obtain a struct inode corresponding to an attribute
193	* @base_vi: vfs base inode containing the attribute
194	* @type: attribute type
195	* @name: Unicode name of the attribute (NULL if unnamed)
196	* @name_len: length of @name in Unicode characters (0 if unnamed)
197	*
198	* Obtain the (fake) struct inode corresponding to the attribute specified by
199	* @type, @name, and @name_len, which is present in the base mft record
200	* specified by the vfs inode @base_vi.
201	*
202	* If the attribute inode is in the cache, it is just returned with an
203	* increased reference count. Otherwise, a new struct inode is allocated and
204	* initialized, and finally ntfs_read_locked_attr_inode() is called to read the
205	* attribute and fill in the inode structure.
206	*
207	* Note, for index allocation attributes, you need to use ntfs_index_iget()
208	* instead of ntfs_attr_iget() as working with indices is a lot more complex.
209	*
210	* Return the struct inode of the attribute inode on success. Check the return
211	* value with IS_ERR() and if true, the function failed and the error code is
212	* obtained from PTR_ERR().
213	*/
214	struct inode ntfs_attr_iget(struct* inode *base_vi, ATTR_TYPE type,
215	ntfschar *name, u32 name_len)
216	{
217	struct inode *vi;
218	int err;
219	ntfs_attr na;
220
221	/ Make sure no one calls ntfs_attr_iget() for indices. /
222	BUG_ON(type == AT_INDEX_ALLOCATION);
223
224	na.mft_no = base_vi->i_ino;
225	na.type = type;
226	na.name = name;
227	na.name_len = name_len;
228
229	vi = iget5_locked(base_vi->i_sb, na.mft_no, test: ntfs_test_inode,
230	set: ntfs_init_locked_inode, &na);
231	if (unlikely(!vi))
232	return ERR_PTR(error: -ENOMEM);
233
234	err = `0`;
235
236	/ If this is a freshly allocated inode, need to read it now. /
237	if (vi->i_state & I_NEW) {
238	err = ntfs_read_locked_attr_inode(base_vi, vi);
239	unlock_new_inode(vi);
240	}
241	/*
242	* There is no point in keeping bad attribute inodes around. This also
243	* simplifies things in that we never need to check for bad attribute
244	* inodes elsewhere.
245	*/
246	if (unlikely(err)) {
247	iput(vi);
248	vi = ERR_PTR(error: err);
249	}
250	return vi;
251	}
252
253	/**
254	* ntfs_index_iget - obtain a struct inode corresponding to an index
255	* @base_vi: vfs base inode containing the index related attributes
256	* @name: Unicode name of the index
257	* @name_len: length of @name in Unicode characters
258	*
259	* Obtain the (fake) struct inode corresponding to the index specified by @name
260	* and @name_len, which is present in the base mft record specified by the vfs
261	* inode @base_vi.
262	*
263	* If the index inode is in the cache, it is just returned with an increased
264	* reference count. Otherwise, a new struct inode is allocated and
265	* initialized, and finally ntfs_read_locked_index_inode() is called to read
266	* the index related attributes and fill in the inode structure.
267	*
268	* Return the struct inode of the index inode on success. Check the return
269	* value with IS_ERR() and if true, the function failed and the error code is
270	* obtained from PTR_ERR().
271	*/
272	struct inode ntfs_index_iget(struct* inode base_vi, ntfschar name,
273	u32 name_len)
274	{
275	struct inode *vi;
276	int err;
277	ntfs_attr na;
278
279	na.mft_no = base_vi->i_ino;
280	na.type = AT_INDEX_ALLOCATION;
281	na.name = name;
282	na.name_len = name_len;
283
284	vi = iget5_locked(base_vi->i_sb, na.mft_no, test: ntfs_test_inode,
285	set: ntfs_init_locked_inode, &na);
286	if (unlikely(!vi))
287	return ERR_PTR(error: -ENOMEM);
288
289	err = `0`;
290
291	/ If this is a freshly allocated inode, need to read it now. /
292	if (vi->i_state & I_NEW) {
293	err = ntfs_read_locked_index_inode(base_vi, vi);
294	unlock_new_inode(vi);
295	}
296	/*
297	* There is no point in keeping bad index inodes around. This also
298	* simplifies things in that we never need to check for bad index
299	* inodes elsewhere.
300	*/
301	if (unlikely(err)) {
302	iput(vi);
303	vi = ERR_PTR(error: err);
304	}
305	return vi;
306	}
307
308	struct inode ntfs_alloc_big_inode(struct* super_block *sb)
309	{
310	ntfs_inode *ni;
311
312	ntfs_debug("Entering.");
313	ni = alloc_inode_sb(sb, cache: ntfs_big_inode_cache, GFP_NOFS);
314	if (likely(ni != NULL)) {
315	ni->state = `0`;
316	return VFS_I(ni);
317	}
318	ntfs_error(sb, "Allocation of NTFS big inode structure failed.");
319	return NULL;
320	}
321
322	void ntfs_free_big_inode(struct inode *inode)
323	{
324	kmem_cache_free(s: ntfs_big_inode_cache, objp: NTFS_I(inode));
325	}
326
327	static inline ntfs_inode ntfs_alloc_extent_inode(void*)
328	{
329	ntfs_inode *ni;
330
331	ntfs_debug("Entering.");
332	ni = kmem_cache_alloc(cachep: ntfs_inode_cache, GFP_NOFS);
333	if (likely(ni != NULL)) {
334	ni->state = `0`;
335	return ni;
336	}
337	ntfs_error(NULL, "Allocation of NTFS inode structure failed.");
338	return NULL;
339	}
340
341	static void ntfs_destroy_extent_inode(ntfs_inode *ni)
342	{
343	ntfs_debug("Entering.");
344	BUG_ON(ni->page);
345	if (!atomic_dec_and_test(v: &ni->count))
346	BUG();
347	kmem_cache_free(s: ntfs_inode_cache, objp: ni);
348	}
349
350	/*
351	* The attribute runlist lock has separate locking rules from the
352	* normal runlist lock, so split the two lock-classes:
353	*/
354	static struct lock_class_key attr_list_rl_lock_class;
355
356	/**
357	* __ntfs_init_inode - initialize ntfs specific part of an inode
358	* @sb: super block of mounted volume
359	* @ni: freshly allocated ntfs inode which to initialize
360	*
361	* Initialize an ntfs inode to defaults.
362	*
363	* NOTE: ni->mft_no, ni->state, ni->type, ni->name, and ni->name_len are left
364	* untouched. Make sure to initialize them elsewhere.
365	*
366	* Return zero on success and -ENOMEM on error.
367	*/
368	void __ntfs_init_inode(struct super_block sb, ntfs_inode ni)
369	{
370	ntfs_debug("Entering.");
371	rwlock_init(&ni->size_lock);
372	ni->initialized_size = ni->allocated_size = `0`;
373	ni->seq_no = `0`;
374	atomic_set(v: &ni->count, i: `1`);
375	ni->vol = NTFS_SB(sb);
376	ntfs_init_runlist(rl: &ni->runlist);
377	mutex_init(&ni->mrec_lock);
378	ni->page = NULL;
379	ni->page_ofs = `0`;
380	ni->attr_list_size = `0`;
381	ni->attr_list = NULL;
382	ntfs_init_runlist(rl: &ni->attr_list_rl);
383	lockdep_set_class(&ni->attr_list_rl.lock,
384	&attr_list_rl_lock_class);
385	ni->itype.index.block_size = `0`;
386	ni->itype.index.vcn_size = `0`;
387	ni->itype.index.collation_rule = `0`;
388	ni->itype.index.block_size_bits = `0`;
389	ni->itype.index.vcn_size_bits = `0`;
390	mutex_init(&ni->extent_lock);
391	ni->nr_extents = `0`;
392	ni->ext.base_ntfs_ino = NULL;
393	}
394
395	/*
396	* Extent inodes get MFT-mapped in a nested way, while the base inode
397	* is still mapped. Teach this nesting to the lock validator by creating
398	* a separate class for nested inode's mrec_lock's:
399	*/
400	static struct lock_class_key extent_inode_mrec_lock_key;
401
402	inline ntfs_inode ntfs_new_extent_inode(struct* super_block *sb,
403	unsigned long mft_no)
404	{
405	ntfs_inode *ni = ntfs_alloc_extent_inode();
406
407	ntfs_debug("Entering.");
408	if (likely(ni != NULL)) {
409	__ntfs_init_inode(sb, ni);
410	lockdep_set_class(&ni->mrec_lock, &extent_inode_mrec_lock_key);
411	ni->mft_no = mft_no;
412	ni->type = AT_UNUSED;
413	ni->name = NULL;
414	ni->name_len = `0`;
415	}
416	return ni;
417	}
418
419	/**
420	* ntfs_is_extended_system_file - check if a file is in the $Extend directory
421	* @ctx: initialized attribute search context
422	*
423	* Search all file name attributes in the inode described by the attribute
424	* search context @ctx and check if any of the names are in the $Extend system
425	* directory.
426	*
427	* Return values:
428	* 1: file is in $Extend directory
429	* 0: file is not in $Extend directory
430	* -errno: failed to determine if the file is in the $Extend directory
431	*/
432	static int ntfs_is_extended_system_file(ntfs_attr_search_ctx *ctx)
433	{
434	int nr_links, err;
435
436	/ Restart search. /
437	ntfs_attr_reinit_search_ctx(ctx);
438
439	/ Get number of hard links. /
440	nr_links = le16_to_cpu(ctx->mrec->link_count);
441
442	/ Loop through all hard links. /
443	while (!(err = ntfs_attr_lookup(type: AT_FILE_NAME, NULL, name_len: `0`, ic: `0`, lowest_vcn: `0`, NULL, val_len: `0`,
444	ctx))) {
445	FILE_NAME_ATTR *file_name_attr;
446	ATTR_RECORD *attr = ctx->attr;
447	u8 p, p2;
448
449	nr_links--;
450	/*
451	* Maximum sanity checking as we are called on an inode that
452	* we suspect might be corrupt.
453	*/
454	p = (u8*)attr + le32_to_cpu(attr->length);
455	if (p < (u8)ctx->mrec \|\| (u8)p > (u8*)ctx->mrec +
456	le32_to_cpu(ctx->mrec->bytes_in_use)) {
457	err_corrupt_attr:
458	ntfs_error(ctx->ntfs_ino->vol->sb, "Corrupt file name "
459	"attribute. You should run chkdsk.");
460	return -EIO;
461	}
462	if (attr->non_resident) {
463	ntfs_error(ctx->ntfs_ino->vol->sb, "Non-resident file "
464	"name. You should run chkdsk.");
465	return -EIO;
466	}
467	if (attr->flags) {
468	ntfs_error(ctx->ntfs_ino->vol->sb, "File name with "
469	"invalid flags. You should run "
470	"chkdsk.");
471	return -EIO;
472	}
473	if (!(attr->data.resident.flags & RESIDENT_ATTR_IS_INDEXED)) {
474	ntfs_error(ctx->ntfs_ino->vol->sb, "Unindexed file "
475	"name. You should run chkdsk.");
476	return -EIO;
477	}
478	file_name_attr = (FILE_NAME_ATTR)((u8)attr +
479	le16_to_cpu(attr->data.resident.value_offset));
480	p2 = (u8 *)file_name_attr + le32_to_cpu(attr->data.resident.value_length);
481	if (p2 < (u8*)attr \|\| p2 > p)
482	goto err_corrupt_attr;
483	/ This attribute is ok, but is it in the $Extend directory? /
484	if (MREF_LE(file_name_attr->parent_directory) == FILE_Extend)
485	return `1`; / YES, it's an extended system file. /
486	}
487	if (unlikely(err != -ENOENT))
488	return err;
489	if (unlikely(nr_links)) {
490	ntfs_error(ctx->ntfs_ino->vol->sb, "Inode hard link count "
491	"doesn't match number of name attributes. You "
492	"should run chkdsk.");
493	return -EIO;
494	}
495	return `0`; / NO, it is not an extended system file. /
496	}
497
498	/**
499	* ntfs_read_locked_inode - read an inode from its device
500	* @vi: inode to read
501	*
502	* ntfs_read_locked_inode() is called from ntfs_iget() to read the inode
503	* described by @vi into memory from the device.
504	*
505	* The only fields in @vi that we need to/can look at when the function is
506	* called are i_sb, pointing to the mounted device's super block, and i_ino,
507	* the number of the inode to load.
508	*
509	* ntfs_read_locked_inode() maps, pins and locks the mft record number i_ino
510	* for reading and sets up the necessary @vi fields as well as initializing
511	* the ntfs inode.
512	*
513	* Q: What locks are held when the function is called?
514	* A: i_state has I_NEW set, hence the inode is locked, also
515	* i_count is set to 1, so it is not going to go away
516	* i_flags is set to 0 and we have no business touching it. Only an ioctl()
517	* is allowed to write to them. We should of course be honouring them but
518	* we need to do that using the IS_* macros defined in include/linux/fs.h.
519	* In any case ntfs_read_locked_inode() has nothing to do with i_flags.
520	*
521	* Return 0 on success and -errno on error. In the error case, the inode will
522	* have had make_bad_inode() executed on it.
523	*/
524	static int ntfs_read_locked_inode(struct inode *vi)
525	{
526	ntfs_volume *vol = NTFS_SB(sb: vi->i_sb);
527	ntfs_inode *ni;
528	struct inode *bvi;
529	MFT_RECORD *m;
530	ATTR_RECORD *a;
531	STANDARD_INFORMATION *si;
532	ntfs_attr_search_ctx *ctx;
533	int err = `0`;
534
535	ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
536
537	/ Setup the generic vfs inode parts now. /
538	vi->i_uid = vol->uid;
539	vi->i_gid = vol->gid;
540	vi->i_mode = `0`;
541
542	/*
543	* Initialize the ntfs specific part of @vi special casing
544	* FILE_MFT which we need to do at mount time.
545	*/
546	if (vi->i_ino != FILE_MFT)
547	ntfs_init_big_inode(vi);
548	ni = NTFS_I(inode: vi);
549
550	m = map_mft_record(ni);
551	if (IS_ERR(ptr: m)) {
552	err = PTR_ERR(ptr: m);
553	goto err_out;
554	}
555	ctx = ntfs_attr_get_search_ctx(ni, mrec: m);
556	if (!ctx) {
557	err = -ENOMEM;
558	goto unm_err_out;
559	}
560
561	if (!(m->flags & MFT_RECORD_IN_USE)) {
562	ntfs_error(vi->i_sb, "Inode is not in use!");
563	goto unm_err_out;
564	}
565	if (m->base_mft_record) {
566	ntfs_error(vi->i_sb, "Inode is an extent inode!");
567	goto unm_err_out;
568	}
569
570	/ Transfer information from mft record into vfs and ntfs inodes. /
571	vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
572
573	/*
574	* FIXME: Keep in mind that link_count is two for files which have both
575	* a long file name and a short file name as separate entries, so if
576	* we are hiding short file names this will be too high. Either we need
577	* to account for the short file names by subtracting them or we need
578	* to make sure we delete files even though i_nlink is not zero which
579	* might be tricky due to vfs interactions. Need to think about this
580	* some more when implementing the unlink command.
581	*/
582	set_nlink(inode: vi, le16_to_cpu(m->link_count));
583	/*
584	* FIXME: Reparse points can have the directory bit set even though
585	* they would be S_IFLNK. Need to deal with this further below when we
586	* implement reparse points / symbolic links but it will do for now.
587	* Also if not a directory, it could be something else, rather than
588	* a regular file. But again, will do for now.
589	*/
590	/ Everyone gets all permissions. /
591	vi->i_mode \|= S_IRWXUGO;
592	/ If read-only, no one gets write permissions. /
593	if (IS_RDONLY(vi))
594	vi->i_mode &= ~S_IWUGO;
595	if (m->flags & MFT_RECORD_IS_DIRECTORY) {
596	vi->i_mode \|= S_IFDIR;
597	/*
598	* Apply the directory permissions mask set in the mount
599	* options.
600	*/
601	vi->i_mode &= ~vol->dmask;
602	/ Things break without this kludge! /
603	if (vi->i_nlink > `1`)
604	set_nlink(inode: vi, nlink: `1`);
605	} else {
606	vi->i_mode \|= S_IFREG;
607	/ Apply the file permissions mask set in the mount options. /
608	vi->i_mode &= ~vol->fmask;
609	}
610	/*
611	* Find the standard information attribute in the mft record. At this
612	* stage we haven't setup the attribute list stuff yet, so this could
613	* in fact fail if the standard information is in an extent record, but
614	* I don't think this actually ever happens.
615	*/
616	err = ntfs_attr_lookup(type: AT_STANDARD_INFORMATION, NULL, name_len: `0`, ic: `0`, lowest_vcn: `0`, NULL, val_len: `0`,
617	ctx);
618	if (unlikely(err)) {
619	if (err == -ENOENT) {
620	/*
621	* TODO: We should be performing a hot fix here (if the
622	* recover mount option is set) by creating a new
623	* attribute.
624	*/
625	ntfs_error(vi->i_sb, "$STANDARD_INFORMATION attribute "
626	"is missing.");
627	}
628	goto unm_err_out;
629	}
630	a = ctx->attr;
631	/ Get the standard information attribute value. /
632	if ((u8 *)a + le16_to_cpu(a->data.resident.value_offset)
633	+ le32_to_cpu(a->data.resident.value_length) >
634	(u8 *)ctx->mrec + vol->mft_record_size) {
635	ntfs_error(vi->i_sb, "Corrupt standard information attribute in inode.");
636	goto unm_err_out;
637	}
638	si = (STANDARD_INFORMATION)((u8)a +
639	le16_to_cpu(a->data.resident.value_offset));
640
641	/ Transfer information from the standard information into vi. /
642	/*
643	* Note: The i_?times do not quite map perfectly onto the NTFS times,
644	* but they are close enough, and in the end it doesn't really matter
645	* that much...
646	*/
647	/*
648	* mtime is the last change of the data within the file. Not changed
649	* when only metadata is changed, e.g. a rename doesn't affect mtime.
650	*/
651	inode_set_mtime_to_ts(inode: vi, ts: ntfs2utc(time: si->last_data_change_time));
652	/*
653	* ctime is the last change of the metadata of the file. This obviously
654	* always changes, when mtime is changed. ctime can be changed on its
655	* own, mtime is then not changed, e.g. when a file is renamed.
656	*/
657	inode_set_ctime_to_ts(inode: vi, ts: ntfs2utc(time: si->last_mft_change_time));
658	/*
659	* Last access to the data within the file. Not changed during a rename
660	* for example but changed whenever the file is written to.
661	*/
662	inode_set_atime_to_ts(inode: vi, ts: ntfs2utc(time: si->last_access_time));
663
664	/ Find the attribute list attribute if present. /
665	ntfs_attr_reinit_search_ctx(ctx);
666	err = ntfs_attr_lookup(type: AT_ATTRIBUTE_LIST, NULL, name_len: `0`, ic: `0`, lowest_vcn: `0`, NULL, val_len: `0`, ctx);
667	if (err) {
668	if (unlikely(err != -ENOENT)) {
669	ntfs_error(vi->i_sb, "Failed to lookup attribute list "
670	"attribute.");
671	goto unm_err_out;
672	}
673	} else / if (!err) / {
674	if (vi->i_ino == FILE_MFT)
675	goto skip_attr_list_load;
676	ntfs_debug("Attribute list found in inode 0x%lx.", vi->i_ino);
677	NInoSetAttrList(ni);
678	a = ctx->attr;
679	if (a->flags & ATTR_COMPRESSION_MASK) {
680	ntfs_error(vi->i_sb, "Attribute list attribute is "
681	"compressed.");
682	goto unm_err_out;
683	}
684	if (a->flags & ATTR_IS_ENCRYPTED \|\|
685	a->flags & ATTR_IS_SPARSE) {
686	if (a->non_resident) {
687	ntfs_error(vi->i_sb, "Non-resident attribute "
688	"list attribute is encrypted/"
689	"sparse.");
690	goto unm_err_out;
691	}
692	ntfs_warning(vi->i_sb, "Resident attribute list "
693	"attribute in inode 0x%lx is marked "
694	"encrypted/sparse which is not true. "
695	"However, Windows allows this and "
696	"chkdsk does not detect or correct it "
697	"so we will just ignore the invalid "
698	"flags and pretend they are not set.",
699	vi->i_ino);
700	}
701	/ Now allocate memory for the attribute list. /
702	ni->attr_list_size = (u32)ntfs_attr_size(a);
703	ni->attr_list = ntfs_malloc_nofs(size: ni->attr_list_size);
704	if (!ni->attr_list) {
705	ntfs_error(vi->i_sb, "Not enough memory to allocate "
706	"buffer for attribute list.");
707	err = -ENOMEM;
708	goto unm_err_out;
709	}
710	if (a->non_resident) {
711	NInoSetAttrListNonResident(ni);
712	if (a->data.non_resident.lowest_vcn) {
713	ntfs_error(vi->i_sb, "Attribute list has non "
714	"zero lowest_vcn.");
715	goto unm_err_out;
716	}
717	/*
718	* Setup the runlist. No need for locking as we have
719	* exclusive access to the inode at this time.
720	*/
721	ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
722	attr: a, NULL);
723	if (IS_ERR(ptr: ni->attr_list_rl.rl)) {
724	err = PTR_ERR(ptr: ni->attr_list_rl.rl);
725	ni->attr_list_rl.rl = NULL;
726	ntfs_error(vi->i_sb, "Mapping pairs "
727	"decompression failed.");
728	goto unm_err_out;
729	}
730	/ Now load the attribute list. /
731	if ((err = load_attribute_list(vol, rl: &ni->attr_list_rl,
732	al_start: ni->attr_list, size: ni->attr_list_size,
733	initialized_size: sle64_to_cpu(x: a->data.non_resident.
734	initialized_size)))) {
735	ntfs_error(vi->i_sb, "Failed to load "
736	"attribute list attribute.");
737	goto unm_err_out;
738	}
739	} else / if (!a->non_resident) / {
740	if ((u8*)a + le16_to_cpu(a->data.resident.value_offset)
741	+ le32_to_cpu(
742	a->data.resident.value_length) >
743	(u8*)ctx->mrec + vol->mft_record_size) {
744	ntfs_error(vi->i_sb, "Corrupt attribute list "
745	"in inode.");
746	goto unm_err_out;
747	}
748	/ Now copy the attribute list. /
749	memcpy(ni->attr_list, (u8*)a + le16_to_cpu(
750	a->data.resident.value_offset),
751	le32_to_cpu(
752	a->data.resident.value_length));
753	}
754	}
755	skip_attr_list_load:
756	/*
757	* If an attribute list is present we now have the attribute list value
758	* in ntfs_ino->attr_list and it is ntfs_ino->attr_list_size bytes.
759	*/
760	if (S_ISDIR(vi->i_mode)) {
761	loff_t bvi_size;
762	ntfs_inode *bni;
763	INDEX_ROOT *ir;
764	u8 ir_end, index_end;
765
766	/ It is a directory, find index root attribute. /
767	ntfs_attr_reinit_search_ctx(ctx);
768	err = ntfs_attr_lookup(type: AT_INDEX_ROOT, name: I30, name_len: `4`, ic: CASE_SENSITIVE,
769	lowest_vcn: `0`, NULL, val_len: `0`, ctx);
770	if (unlikely(err)) {
771	if (err == -ENOENT) {
772	// FIXME: File is corrupt! Hot-fix with empty
773	// index root attribute if recovery option is
774	// set.
775	ntfs_error(vi->i_sb, "$INDEX_ROOT attribute "
776	"is missing.");
777	}
778	goto unm_err_out;
779	}
780	a = ctx->attr;
781	/ Set up the state. /
782	if (unlikely(a->non_resident)) {
783	ntfs_error(vol->sb, "$INDEX_ROOT attribute is not "
784	"resident.");
785	goto unm_err_out;
786	}
787	/ Ensure the attribute name is placed before the value. /
788	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
789	le16_to_cpu(a->data.resident.value_offset)))) {
790	ntfs_error(vol->sb, "$INDEX_ROOT attribute name is "
791	"placed after the attribute value.");
792	goto unm_err_out;
793	}
794	/*
795	* Compressed/encrypted index root just means that the newly
796	* created files in that directory should be created compressed/
797	* encrypted. However index root cannot be both compressed and
798	* encrypted.
799	*/
800	if (a->flags & ATTR_COMPRESSION_MASK)
801	NInoSetCompressed(ni);
802	if (a->flags & ATTR_IS_ENCRYPTED) {
803	if (a->flags & ATTR_COMPRESSION_MASK) {
804	ntfs_error(vi->i_sb, "Found encrypted and "
805	"compressed attribute.");
806	goto unm_err_out;
807	}
808	NInoSetEncrypted(ni);
809	}
810	if (a->flags & ATTR_IS_SPARSE)
811	NInoSetSparse(ni);
812	ir = (INDEX_ROOT)((u8)a +
813	le16_to_cpu(a->data.resident.value_offset));
814	ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length);
815	if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) {
816	ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
817	"corrupt.");
818	goto unm_err_out;
819	}
820	index_end = (u8*)&ir->index +
821	le32_to_cpu(ir->index.index_length);
822	if (index_end > ir_end) {
823	ntfs_error(vi->i_sb, "Directory index is corrupt.");
824	goto unm_err_out;
825	}
826	if (ir->type != AT_FILE_NAME) {
827	ntfs_error(vi->i_sb, "Indexed attribute is not "
828	"$FILE_NAME.");
829	goto unm_err_out;
830	}
831	if (ir->collation_rule != COLLATION_FILE_NAME) {
832	ntfs_error(vi->i_sb, "Index collation rule is not "
833	"COLLATION_FILE_NAME.");
834	goto unm_err_out;
835	}
836	ni->itype.index.collation_rule = ir->collation_rule;
837	ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
838	if (ni->itype.index.block_size &
839	(ni->itype.index.block_size - `1`)) {
840	ntfs_error(vi->i_sb, "Index block size (%u) is not a "
841	"power of two.",
842	ni->itype.index.block_size);
843	goto unm_err_out;
844	}
845	if (ni->itype.index.block_size > PAGE_SIZE) {
846	ntfs_error(vi->i_sb, "Index block size (%u) > "
847	"PAGE_SIZE (%ld) is not "
848	"supported. Sorry.",
849	ni->itype.index.block_size,
850	PAGE_SIZE);
851	err = -EOPNOTSUPP;
852	goto unm_err_out;
853	}
854	if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
855	ntfs_error(vi->i_sb, "Index block size (%u) < "
856	"NTFS_BLOCK_SIZE (%i) is not "
857	"supported. Sorry.",
858	ni->itype.index.block_size,
859	NTFS_BLOCK_SIZE);
860	err = -EOPNOTSUPP;
861	goto unm_err_out;
862	}
863	ni->itype.index.block_size_bits =
864	ffs(ni->itype.index.block_size) - `1`;
865	/ Determine the size of a vcn in the directory index. /
866	if (vol->cluster_size <= ni->itype.index.block_size) {
867	ni->itype.index.vcn_size = vol->cluster_size;
868	ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
869	} else {
870	ni->itype.index.vcn_size = vol->sector_size;
871	ni->itype.index.vcn_size_bits = vol->sector_size_bits;
872	}
873
874	/ Setup the index allocation attribute, even if not present. /
875	NInoSetMstProtected(ni);
876	ni->type = AT_INDEX_ALLOCATION;
877	ni->name = I30;
878	ni->name_len = `4`;
879
880	if (!(ir->index.flags & LARGE_INDEX)) {
881	/ No index allocation. /
882	vi->i_size = ni->initialized_size =
883	ni->allocated_size = `0`;
884	/ We are done with the mft record, so we release it. /
885	ntfs_attr_put_search_ctx(ctx);
886	unmap_mft_record(ni);
887	m = NULL;
888	ctx = NULL;
889	goto skip_large_dir_stuff;
890	} / LARGE_INDEX: Index allocation present. Setup state. /
891	NInoSetIndexAllocPresent(ni);
892	/ Find index allocation attribute. /
893	ntfs_attr_reinit_search_ctx(ctx);
894	err = ntfs_attr_lookup(type: AT_INDEX_ALLOCATION, name: I30, name_len: `4`,
895	ic: CASE_SENSITIVE, lowest_vcn: `0`, NULL, val_len: `0`, ctx);
896	if (unlikely(err)) {
897	if (err == -ENOENT)
898	ntfs_error(vi->i_sb, "$INDEX_ALLOCATION "
899	"attribute is not present but "
900	"$INDEX_ROOT indicated it is.");
901	else
902	ntfs_error(vi->i_sb, "Failed to lookup "
903	"$INDEX_ALLOCATION "
904	"attribute.");
905	goto unm_err_out;
906	}
907	a = ctx->attr;
908	if (!a->non_resident) {
909	ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
910	"is resident.");
911	goto unm_err_out;
912	}
913	/*
914	* Ensure the attribute name is placed before the mapping pairs
915	* array.
916	*/
917	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
918	le16_to_cpu(
919	a->data.non_resident.mapping_pairs_offset)))) {
920	ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name "
921	"is placed after the mapping pairs "
922	"array.");
923	goto unm_err_out;
924	}
925	if (a->flags & ATTR_IS_ENCRYPTED) {
926	ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
927	"is encrypted.");
928	goto unm_err_out;
929	}
930	if (a->flags & ATTR_IS_SPARSE) {
931	ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
932	"is sparse.");
933	goto unm_err_out;
934	}
935	if (a->flags & ATTR_COMPRESSION_MASK) {
936	ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute "
937	"is compressed.");
938	goto unm_err_out;
939	}
940	if (a->data.non_resident.lowest_vcn) {
941	ntfs_error(vi->i_sb, "First extent of "
942	"$INDEX_ALLOCATION attribute has non "
943	"zero lowest_vcn.");
944	goto unm_err_out;
945	}
946	vi->i_size = sle64_to_cpu(x: a->data.non_resident.data_size);
947	ni->initialized_size = sle64_to_cpu(
948	x: a->data.non_resident.initialized_size);
949	ni->allocated_size = sle64_to_cpu(
950	x: a->data.non_resident.allocated_size);
951	/*
952	* We are done with the mft record, so we release it. Otherwise
953	* we would deadlock in ntfs_attr_iget().
954	*/
955	ntfs_attr_put_search_ctx(ctx);
956	unmap_mft_record(ni);
957	m = NULL;
958	ctx = NULL;
959	/ Get the index bitmap attribute inode. /
960	bvi = ntfs_attr_iget(base_vi: vi, type: AT_BITMAP, name: I30, name_len: `4`);
961	if (IS_ERR(ptr: bvi)) {
962	ntfs_error(vi->i_sb, "Failed to get bitmap attribute.");
963	err = PTR_ERR(ptr: bvi);
964	goto unm_err_out;
965	}
966	bni = NTFS_I(inode: bvi);
967	if (NInoCompressed(ni: bni) \|\| NInoEncrypted(ni: bni) \|\|
968	NInoSparse(ni: bni)) {
969	ntfs_error(vi->i_sb, "$BITMAP attribute is compressed "
970	"and/or encrypted and/or sparse.");
971	goto iput_unm_err_out;
972	}
973	/ Consistency check bitmap size vs. index allocation size. /
974	bvi_size = i_size_read(inode: bvi);
975	if ((bvi_size << `3`) < (vi->i_size >>
976	ni->itype.index.block_size_bits)) {
977	ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) "
978	"for index allocation (0x%llx).",
979	bvi_size << `3`, vi->i_size);
980	goto iput_unm_err_out;
981	}
982	/ No longer need the bitmap attribute inode. /
983	iput(bvi);
984	skip_large_dir_stuff:
985	/ Setup the operations for this inode. /
986	vi->i_op = &ntfs_dir_inode_ops;
987	vi->i_fop = &ntfs_dir_ops;
988	vi->i_mapping->a_ops = &ntfs_mst_aops;
989	} else {
990	/ It is a file. /
991	ntfs_attr_reinit_search_ctx(ctx);
992
993	/ Setup the data attribute, even if not present. /
994	ni->type = AT_DATA;
995	ni->name = NULL;
996	ni->name_len = `0`;
997
998	/ Find first extent of the unnamed data attribute. /
999	err = ntfs_attr_lookup(type: AT_DATA, NULL, name_len: `0`, ic: `0`, lowest_vcn: `0`, NULL, val_len: `0`, ctx);
1000	if (unlikely(err)) {
1001	vi->i_size = ni->initialized_size =
1002	ni->allocated_size = `0`;
1003	if (err != -ENOENT) {
1004	ntfs_error(vi->i_sb, "Failed to lookup $DATA "
1005	"attribute.");
1006	goto unm_err_out;
1007	}
1008	/*
1009	* FILE_Secure does not have an unnamed $DATA
1010	* attribute, so we special case it here.
1011	*/
1012	if (vi->i_ino == FILE_Secure)
1013	goto no_data_attr_special_case;
1014	/*
1015	* Most if not all the system files in the $Extend
1016	* system directory do not have unnamed data
1017	* attributes so we need to check if the parent
1018	* directory of the file is FILE_Extend and if it is
1019	* ignore this error. To do this we need to get the
1020	* name of this inode from the mft record as the name
1021	* contains the back reference to the parent directory.
1022	*/
1023	if (ntfs_is_extended_system_file(ctx) > `0`)
1024	goto no_data_attr_special_case;
1025	// FIXME: File is corrupt! Hot-fix with empty data
1026	// attribute if recovery option is set.
1027	ntfs_error(vi->i_sb, "$DATA attribute is missing.");
1028	goto unm_err_out;
1029	}
1030	a = ctx->attr;
1031	/ Setup the state. /
1032	if (a->flags & (ATTR_COMPRESSION_MASK \| ATTR_IS_SPARSE)) {
1033	if (a->flags & ATTR_COMPRESSION_MASK) {
1034	NInoSetCompressed(ni);
1035	if (vol->cluster_size > `4096`) {
1036	ntfs_error(vi->i_sb, "Found "
1037	"compressed data but "
1038	"compression is "
1039	"disabled due to "
1040	"cluster size (%i) > "
1041	"4kiB.",
1042	vol->cluster_size);
1043	goto unm_err_out;
1044	}
1045	if ((a->flags & ATTR_COMPRESSION_MASK)
1046	!= ATTR_IS_COMPRESSED) {
1047	ntfs_error(vi->i_sb, "Found unknown "
1048	"compression method "
1049	"or corrupt file.");
1050	goto unm_err_out;
1051	}
1052	}
1053	if (a->flags & ATTR_IS_SPARSE)
1054	NInoSetSparse(ni);
1055	}
1056	if (a->flags & ATTR_IS_ENCRYPTED) {
1057	if (NInoCompressed(ni)) {
1058	ntfs_error(vi->i_sb, "Found encrypted and "
1059	"compressed data.");
1060	goto unm_err_out;
1061	}
1062	NInoSetEncrypted(ni);
1063	}
1064	if (a->non_resident) {
1065	NInoSetNonResident(ni);
1066	if (NInoCompressed(ni) \|\| NInoSparse(ni)) {
1067	if (NInoCompressed(ni) && a->data.non_resident.
1068	compression_unit != `4`) {
1069	ntfs_error(vi->i_sb, "Found "
1070	"non-standard "
1071	"compression unit (%u "
1072	"instead of 4). "
1073	"Cannot handle this.",
1074	a->data.non_resident.
1075	compression_unit);
1076	err = -EOPNOTSUPP;
1077	goto unm_err_out;
1078	}
1079	if (a->data.non_resident.compression_unit) {
1080	ni->itype.compressed.block_size = `1U` <<
1081	(a->data.non_resident.
1082	compression_unit +
1083	vol->cluster_size_bits);
1084	ni->itype.compressed.block_size_bits =
1085	ffs(ni->itype.
1086	compressed.
1087	block_size) - `1`;
1088	ni->itype.compressed.block_clusters =
1089	`1U` << a->data.
1090	non_resident.
1091	compression_unit;
1092	} else {
1093	ni->itype.compressed.block_size = `0`;
1094	ni->itype.compressed.block_size_bits =
1095	`0`;
1096	ni->itype.compressed.block_clusters =
1097	`0`;
1098	}
1099	ni->itype.compressed.size = sle64_to_cpu(
1100	x: a->data.non_resident.
1101	compressed_size);
1102	}
1103	if (a->data.non_resident.lowest_vcn) {
1104	ntfs_error(vi->i_sb, "First extent of $DATA "
1105	"attribute has non zero "
1106	"lowest_vcn.");
1107	goto unm_err_out;
1108	}
1109	vi->i_size = sle64_to_cpu(
1110	x: a->data.non_resident.data_size);
1111	ni->initialized_size = sle64_to_cpu(
1112	x: a->data.non_resident.initialized_size);
1113	ni->allocated_size = sle64_to_cpu(
1114	x: a->data.non_resident.allocated_size);
1115	} else { / Resident attribute. /
1116	vi->i_size = ni->initialized_size = le32_to_cpu(
1117	a->data.resident.value_length);
1118	ni->allocated_size = le32_to_cpu(a->length) -
1119	le16_to_cpu(
1120	a->data.resident.value_offset);
1121	if (vi->i_size > ni->allocated_size) {
1122	ntfs_error(vi->i_sb, "Resident data attribute "
1123	"is corrupt (size exceeds "
1124	"allocation).");
1125	goto unm_err_out;
1126	}
1127	}
1128	no_data_attr_special_case:
1129	/ We are done with the mft record, so we release it. /
1130	ntfs_attr_put_search_ctx(ctx);
1131	unmap_mft_record(ni);
1132	m = NULL;
1133	ctx = NULL;
1134	/ Setup the operations for this inode. /
1135	vi->i_op = &ntfs_file_inode_ops;
1136	vi->i_fop = &ntfs_file_ops;
1137	vi->i_mapping->a_ops = &ntfs_normal_aops;
1138	if (NInoMstProtected(ni))
1139	vi->i_mapping->a_ops = &ntfs_mst_aops;
1140	else if (NInoCompressed(ni))
1141	vi->i_mapping->a_ops = &ntfs_compressed_aops;
1142	}
1143	/*
1144	* The number of 512-byte blocks used on disk (for stat). This is in so
1145	* far inaccurate as it doesn't account for any named streams or other
1146	* special non-resident attributes, but that is how Windows works, too,
1147	* so we are at least consistent with Windows, if not entirely
1148	* consistent with the Linux Way. Doing it the Linux Way would cause a
1149	* significant slowdown as it would involve iterating over all
1150	* attributes in the mft record and adding the allocated/compressed
1151	* sizes of all non-resident attributes present to give us the Linux
1152	* correct size that should go into i_blocks (after division by 512).
1153	*/
1154	if (S_ISREG(vi->i_mode) && (NInoCompressed(ni) \|\| NInoSparse(ni)))
1155	vi->i_blocks = ni->itype.compressed.size >> `9`;
1156	else
1157	vi->i_blocks = ni->allocated_size >> `9`;
1158	ntfs_debug("Done.");
1159	return `0`;
1160	iput_unm_err_out:
1161	iput(bvi);
1162	unm_err_out:
1163	if (!err)
1164	err = -EIO;
1165	if (ctx)
1166	ntfs_attr_put_search_ctx(ctx);
1167	if (m)
1168	unmap_mft_record(ni);
1169	err_out:
1170	ntfs_error(vol->sb, "Failed with error code %i. Marking corrupt "
1171	"inode 0x%lx as bad. Run chkdsk.", err, vi->i_ino);
1172	make_bad_inode(vi);
1173	if (err != -EOPNOTSUPP && err != -ENOMEM)
1174	NVolSetErrors(vol);
1175	return err;
1176	}
1177
1178	/**
1179	* ntfs_read_locked_attr_inode - read an attribute inode from its base inode
1180	* @base_vi: base inode
1181	* @vi: attribute inode to read
1182	*
1183	* ntfs_read_locked_attr_inode() is called from ntfs_attr_iget() to read the
1184	* attribute inode described by @vi into memory from the base mft record
1185	* described by @base_ni.
1186	*
1187	* ntfs_read_locked_attr_inode() maps, pins and locks the base inode for
1188	* reading and looks up the attribute described by @vi before setting up the
1189	* necessary fields in @vi as well as initializing the ntfs inode.
1190	*
1191	* Q: What locks are held when the function is called?
1192	* A: i_state has I_NEW set, hence the inode is locked, also
1193	* i_count is set to 1, so it is not going to go away
1194	*
1195	* Return 0 on success and -errno on error. In the error case, the inode will
1196	* have had make_bad_inode() executed on it.
1197	*
1198	* Note this cannot be called for AT_INDEX_ALLOCATION.
1199	*/
1200	static int ntfs_read_locked_attr_inode(struct inode base_vi, struct* inode *vi)
1201	{
1202	ntfs_volume *vol = NTFS_SB(sb: vi->i_sb);
1203	ntfs_inode ni, base_ni;
1204	MFT_RECORD *m;
1205	ATTR_RECORD *a;
1206	ntfs_attr_search_ctx *ctx;
1207	int err = `0`;
1208
1209	ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
1210
1211	ntfs_init_big_inode(vi);
1212
1213	ni = NTFS_I(inode: vi);
1214	base_ni = NTFS_I(inode: base_vi);
1215
1216	/ Just mirror the values from the base inode. /
1217	vi->i_uid = base_vi->i_uid;
1218	vi->i_gid = base_vi->i_gid;
1219	set_nlink(inode: vi, nlink: base_vi->i_nlink);
1220	inode_set_mtime_to_ts(inode: vi, ts: inode_get_mtime(inode: base_vi));
1221	inode_set_ctime_to_ts(inode: vi, ts: inode_get_ctime(inode: base_vi));
1222	inode_set_atime_to_ts(inode: vi, ts: inode_get_atime(inode: base_vi));
1223	vi->i_generation = ni->seq_no = base_ni->seq_no;
1224
1225	/ Set inode type to zero but preserve permissions. /
1226	vi->i_mode = base_vi->i_mode & ~S_IFMT;
1227
1228	m = map_mft_record(ni: base_ni);
1229	if (IS_ERR(ptr: m)) {
1230	err = PTR_ERR(ptr: m);
1231	goto err_out;
1232	}
1233	ctx = ntfs_attr_get_search_ctx(ni: base_ni, mrec: m);
1234	if (!ctx) {
1235	err = -ENOMEM;
1236	goto unm_err_out;
1237	}
1238	/ Find the attribute. /
1239	err = ntfs_attr_lookup(type: ni->type, name: ni->name, name_len: ni->name_len,
1240	ic: CASE_SENSITIVE, lowest_vcn: `0`, NULL, val_len: `0`, ctx);
1241	if (unlikely(err))
1242	goto unm_err_out;
1243	a = ctx->attr;
1244	if (a->flags & (ATTR_COMPRESSION_MASK \| ATTR_IS_SPARSE)) {
1245	if (a->flags & ATTR_COMPRESSION_MASK) {
1246	NInoSetCompressed(ni);
1247	if ((ni->type != AT_DATA) \|\| (ni->type == AT_DATA &&
1248	ni->name_len)) {
1249	ntfs_error(vi->i_sb, "Found compressed "
1250	"non-data or named data "
1251	"attribute. Please report "
1252	"you saw this message to "
1253	"linux-ntfs-dev@lists."
1254	"sourceforge.net");
1255	goto unm_err_out;
1256	}
1257	if (vol->cluster_size > `4096`) {
1258	ntfs_error(vi->i_sb, "Found compressed "
1259	"attribute but compression is "
1260	"disabled due to cluster size "
1261	"(%i) > 4kiB.",
1262	vol->cluster_size);
1263	goto unm_err_out;
1264	}
1265	if ((a->flags & ATTR_COMPRESSION_MASK) !=
1266	ATTR_IS_COMPRESSED) {
1267	ntfs_error(vi->i_sb, "Found unknown "
1268	"compression method.");
1269	goto unm_err_out;
1270	}
1271	}
1272	/*
1273	* The compressed/sparse flag set in an index root just means
1274	* to compress all files.
1275	*/
1276	if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
1277	ntfs_error(vi->i_sb, "Found mst protected attribute "
1278	"but the attribute is %s. Please "
1279	"report you saw this message to "
1280	"linux-ntfs-dev@lists.sourceforge.net",
1281	NInoCompressed(ni) ? "compressed" :
1282	"sparse");
1283	goto unm_err_out;
1284	}
1285	if (a->flags & ATTR_IS_SPARSE)
1286	NInoSetSparse(ni);
1287	}
1288	if (a->flags & ATTR_IS_ENCRYPTED) {
1289	if (NInoCompressed(ni)) {
1290	ntfs_error(vi->i_sb, "Found encrypted and compressed "
1291	"data.");
1292	goto unm_err_out;
1293	}
1294	/*
1295	* The encryption flag set in an index root just means to
1296	* encrypt all files.
1297	*/
1298	if (NInoMstProtected(ni) && ni->type != AT_INDEX_ROOT) {
1299	ntfs_error(vi->i_sb, "Found mst protected attribute "
1300	"but the attribute is encrypted. "
1301	"Please report you saw this message "
1302	"to linux-ntfs-dev@lists.sourceforge."
1303	"net");
1304	goto unm_err_out;
1305	}
1306	if (ni->type != AT_DATA) {
1307	ntfs_error(vi->i_sb, "Found encrypted non-data "
1308	"attribute.");
1309	goto unm_err_out;
1310	}
1311	NInoSetEncrypted(ni);
1312	}
1313	if (!a->non_resident) {
1314	/ Ensure the attribute name is placed before the value. /
1315	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1316	le16_to_cpu(a->data.resident.value_offset)))) {
1317	ntfs_error(vol->sb, "Attribute name is placed after "
1318	"the attribute value.");
1319	goto unm_err_out;
1320	}
1321	if (NInoMstProtected(ni)) {
1322	ntfs_error(vi->i_sb, "Found mst protected attribute "
1323	"but the attribute is resident. "
1324	"Please report you saw this message to "
1325	"linux-ntfs-dev@lists.sourceforge.net");
1326	goto unm_err_out;
1327	}
1328	vi->i_size = ni->initialized_size = le32_to_cpu(
1329	a->data.resident.value_length);
1330	ni->allocated_size = le32_to_cpu(a->length) -
1331	le16_to_cpu(a->data.resident.value_offset);
1332	if (vi->i_size > ni->allocated_size) {
1333	ntfs_error(vi->i_sb, "Resident attribute is corrupt "
1334	"(size exceeds allocation).");
1335	goto unm_err_out;
1336	}
1337	} else {
1338	NInoSetNonResident(ni);
1339	/*
1340	* Ensure the attribute name is placed before the mapping pairs
1341	* array.
1342	*/
1343	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1344	le16_to_cpu(
1345	a->data.non_resident.mapping_pairs_offset)))) {
1346	ntfs_error(vol->sb, "Attribute name is placed after "
1347	"the mapping pairs array.");
1348	goto unm_err_out;
1349	}
1350	if (NInoCompressed(ni) \|\| NInoSparse(ni)) {
1351	if (NInoCompressed(ni) && a->data.non_resident.
1352	compression_unit != `4`) {
1353	ntfs_error(vi->i_sb, "Found non-standard "
1354	"compression unit (%u instead "
1355	"of 4). Cannot handle this.",
1356	a->data.non_resident.
1357	compression_unit);
1358	err = -EOPNOTSUPP;
1359	goto unm_err_out;
1360	}
1361	if (a->data.non_resident.compression_unit) {
1362	ni->itype.compressed.block_size = `1U` <<
1363	(a->data.non_resident.
1364	compression_unit +
1365	vol->cluster_size_bits);
1366	ni->itype.compressed.block_size_bits =
1367	ffs(ni->itype.compressed.
1368	block_size) - `1`;
1369	ni->itype.compressed.block_clusters = `1U` <<
1370	a->data.non_resident.
1371	compression_unit;
1372	} else {
1373	ni->itype.compressed.block_size = `0`;
1374	ni->itype.compressed.block_size_bits = `0`;
1375	ni->itype.compressed.block_clusters = `0`;
1376	}
1377	ni->itype.compressed.size = sle64_to_cpu(
1378	x: a->data.non_resident.compressed_size);
1379	}
1380	if (a->data.non_resident.lowest_vcn) {
1381	ntfs_error(vi->i_sb, "First extent of attribute has "
1382	"non-zero lowest_vcn.");
1383	goto unm_err_out;
1384	}
1385	vi->i_size = sle64_to_cpu(x: a->data.non_resident.data_size);
1386	ni->initialized_size = sle64_to_cpu(
1387	x: a->data.non_resident.initialized_size);
1388	ni->allocated_size = sle64_to_cpu(
1389	x: a->data.non_resident.allocated_size);
1390	}
1391	vi->i_mapping->a_ops = &ntfs_normal_aops;
1392	if (NInoMstProtected(ni))
1393	vi->i_mapping->a_ops = &ntfs_mst_aops;
1394	else if (NInoCompressed(ni))
1395	vi->i_mapping->a_ops = &ntfs_compressed_aops;
1396	if ((NInoCompressed(ni) \|\| NInoSparse(ni)) && ni->type != AT_INDEX_ROOT)
1397	vi->i_blocks = ni->itype.compressed.size >> `9`;
1398	else
1399	vi->i_blocks = ni->allocated_size >> `9`;
1400	/*
1401	* Make sure the base inode does not go away and attach it to the
1402	* attribute inode.
1403	*/
1404	igrab(base_vi);
1405	ni->ext.base_ntfs_ino = base_ni;
1406	ni->nr_extents = -`1`;
1407
1408	ntfs_attr_put_search_ctx(ctx);
1409	unmap_mft_record(ni: base_ni);
1410
1411	ntfs_debug("Done.");
1412	return `0`;
1413
1414	unm_err_out:
1415	if (!err)
1416	err = -EIO;
1417	if (ctx)
1418	ntfs_attr_put_search_ctx(ctx);
1419	unmap_mft_record(ni: base_ni);
1420	err_out:
1421	ntfs_error(vol->sb, "Failed with error code %i while reading attribute "
1422	"inode (mft_no 0x%lx, type 0x%x, name_len %i). "
1423	"Marking corrupt inode and base inode 0x%lx as bad. "
1424	"Run chkdsk.", err, vi->i_ino, ni->type, ni->name_len,
1425	base_vi->i_ino);
1426	make_bad_inode(vi);
1427	if (err != -ENOMEM)
1428	NVolSetErrors(vol);
1429	return err;
1430	}
1431
1432	/**
1433	* ntfs_read_locked_index_inode - read an index inode from its base inode
1434	* @base_vi: base inode
1435	* @vi: index inode to read
1436	*
1437	* ntfs_read_locked_index_inode() is called from ntfs_index_iget() to read the
1438	* index inode described by @vi into memory from the base mft record described
1439	* by @base_ni.
1440	*
1441	* ntfs_read_locked_index_inode() maps, pins and locks the base inode for
1442	* reading and looks up the attributes relating to the index described by @vi
1443	* before setting up the necessary fields in @vi as well as initializing the
1444	* ntfs inode.
1445	*
1446	* Note, index inodes are essentially attribute inodes (NInoAttr() is true)
1447	* with the attribute type set to AT_INDEX_ALLOCATION. Apart from that, they
1448	* are setup like directory inodes since directories are a special case of
1449	* indices ao they need to be treated in much the same way. Most importantly,
1450	* for small indices the index allocation attribute might not actually exist.
1451	* However, the index root attribute always exists but this does not need to
1452	* have an inode associated with it and this is why we define a new inode type
1453	* index. Also, like for directories, we need to have an attribute inode for
1454	* the bitmap attribute corresponding to the index allocation attribute and we
1455	* can store this in the appropriate field of the inode, just like we do for
1456	* normal directory inodes.
1457	*
1458	* Q: What locks are held when the function is called?
1459	* A: i_state has I_NEW set, hence the inode is locked, also
1460	* i_count is set to 1, so it is not going to go away
1461	*
1462	* Return 0 on success and -errno on error. In the error case, the inode will
1463	* have had make_bad_inode() executed on it.
1464	*/
1465	static int ntfs_read_locked_index_inode(struct inode base_vi, struct* inode *vi)
1466	{
1467	loff_t bvi_size;
1468	ntfs_volume *vol = NTFS_SB(sb: vi->i_sb);
1469	ntfs_inode ni, base_ni, *bni;
1470	struct inode *bvi;
1471	MFT_RECORD *m;
1472	ATTR_RECORD *a;
1473	ntfs_attr_search_ctx *ctx;
1474	INDEX_ROOT *ir;
1475	u8 ir_end, index_end;
1476	int err = `0`;
1477
1478	ntfs_debug("Entering for i_ino 0x%lx.", vi->i_ino);
1479	ntfs_init_big_inode(vi);
1480	ni = NTFS_I(inode: vi);
1481	base_ni = NTFS_I(inode: base_vi);
1482	/ Just mirror the values from the base inode. /
1483	vi->i_uid = base_vi->i_uid;
1484	vi->i_gid = base_vi->i_gid;
1485	set_nlink(inode: vi, nlink: base_vi->i_nlink);
1486	inode_set_mtime_to_ts(inode: vi, ts: inode_get_mtime(inode: base_vi));
1487	inode_set_ctime_to_ts(inode: vi, ts: inode_get_ctime(inode: base_vi));
1488	inode_set_atime_to_ts(inode: vi, ts: inode_get_atime(inode: base_vi));
1489	vi->i_generation = ni->seq_no = base_ni->seq_no;
1490	/ Set inode type to zero but preserve permissions. /
1491	vi->i_mode = base_vi->i_mode & ~S_IFMT;
1492	/ Map the mft record for the base inode. /
1493	m = map_mft_record(ni: base_ni);
1494	if (IS_ERR(ptr: m)) {
1495	err = PTR_ERR(ptr: m);
1496	goto err_out;
1497	}
1498	ctx = ntfs_attr_get_search_ctx(ni: base_ni, mrec: m);
1499	if (!ctx) {
1500	err = -ENOMEM;
1501	goto unm_err_out;
1502	}
1503	/ Find the index root attribute. /
1504	err = ntfs_attr_lookup(type: AT_INDEX_ROOT, name: ni->name, name_len: ni->name_len,
1505	ic: CASE_SENSITIVE, lowest_vcn: `0`, NULL, val_len: `0`, ctx);
1506	if (unlikely(err)) {
1507	if (err == -ENOENT)
1508	ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is "
1509	"missing.");
1510	goto unm_err_out;
1511	}
1512	a = ctx->attr;
1513	/ Set up the state. /
1514	if (unlikely(a->non_resident)) {
1515	ntfs_error(vol->sb, "$INDEX_ROOT attribute is not resident.");
1516	goto unm_err_out;
1517	}
1518	/ Ensure the attribute name is placed before the value. /
1519	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1520	le16_to_cpu(a->data.resident.value_offset)))) {
1521	ntfs_error(vol->sb, "$INDEX_ROOT attribute name is placed "
1522	"after the attribute value.");
1523	goto unm_err_out;
1524	}
1525	/*
1526	* Compressed/encrypted/sparse index root is not allowed, except for
1527	* directories of course but those are not dealt with here.
1528	*/
1529	if (a->flags & (ATTR_COMPRESSION_MASK \| ATTR_IS_ENCRYPTED \|
1530	ATTR_IS_SPARSE)) {
1531	ntfs_error(vi->i_sb, "Found compressed/encrypted/sparse index "
1532	"root attribute.");
1533	goto unm_err_out;
1534	}
1535	ir = (INDEX_ROOT)((u8)a + le16_to_cpu(a->data.resident.value_offset));
1536	ir_end = (u8*)ir + le32_to_cpu(a->data.resident.value_length);
1537	if (ir_end > (u8*)ctx->mrec + vol->mft_record_size) {
1538	ntfs_error(vi->i_sb, "$INDEX_ROOT attribute is corrupt.");
1539	goto unm_err_out;
1540	}
1541	index_end = (u8*)&ir->index + le32_to_cpu(ir->index.index_length);
1542	if (index_end > ir_end) {
1543	ntfs_error(vi->i_sb, "Index is corrupt.");
1544	goto unm_err_out;
1545	}
1546	if (ir->type) {
1547	ntfs_error(vi->i_sb, "Index type is not 0 (type is 0x%x).",
1548	le32_to_cpu(ir->type));
1549	goto unm_err_out;
1550	}
1551	ni->itype.index.collation_rule = ir->collation_rule;
1552	ntfs_debug("Index collation rule is 0x%x.",
1553	le32_to_cpu(ir->collation_rule));
1554	ni->itype.index.block_size = le32_to_cpu(ir->index_block_size);
1555	if (!is_power_of_2(n: ni->itype.index.block_size)) {
1556	ntfs_error(vi->i_sb, "Index block size (%u) is not a power of "
1557	"two.", ni->itype.index.block_size);
1558	goto unm_err_out;
1559	}
1560	if (ni->itype.index.block_size > PAGE_SIZE) {
1561	ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_SIZE "
1562	"(%ld) is not supported. Sorry.",
1563	ni->itype.index.block_size, PAGE_SIZE);
1564	err = -EOPNOTSUPP;
1565	goto unm_err_out;
1566	}
1567	if (ni->itype.index.block_size < NTFS_BLOCK_SIZE) {
1568	ntfs_error(vi->i_sb, "Index block size (%u) < NTFS_BLOCK_SIZE "
1569	"(%i) is not supported. Sorry.",
1570	ni->itype.index.block_size, NTFS_BLOCK_SIZE);
1571	err = -EOPNOTSUPP;
1572	goto unm_err_out;
1573	}
1574	ni->itype.index.block_size_bits = ffs(ni->itype.index.block_size) - `1`;
1575	/ Determine the size of a vcn in the index. /
1576	if (vol->cluster_size <= ni->itype.index.block_size) {
1577	ni->itype.index.vcn_size = vol->cluster_size;
1578	ni->itype.index.vcn_size_bits = vol->cluster_size_bits;
1579	} else {
1580	ni->itype.index.vcn_size = vol->sector_size;
1581	ni->itype.index.vcn_size_bits = vol->sector_size_bits;
1582	}
1583	/ Check for presence of index allocation attribute. /
1584	if (!(ir->index.flags & LARGE_INDEX)) {
1585	/ No index allocation. /
1586	vi->i_size = ni->initialized_size = ni->allocated_size = `0`;
1587	/ We are done with the mft record, so we release it. /
1588	ntfs_attr_put_search_ctx(ctx);
1589	unmap_mft_record(ni: base_ni);
1590	m = NULL;
1591	ctx = NULL;
1592	goto skip_large_index_stuff;
1593	} / LARGE_INDEX: Index allocation present. Setup state. /
1594	NInoSetIndexAllocPresent(ni);
1595	/ Find index allocation attribute. /
1596	ntfs_attr_reinit_search_ctx(ctx);
1597	err = ntfs_attr_lookup(type: AT_INDEX_ALLOCATION, name: ni->name, name_len: ni->name_len,
1598	ic: CASE_SENSITIVE, lowest_vcn: `0`, NULL, val_len: `0`, ctx);
1599	if (unlikely(err)) {
1600	if (err == -ENOENT)
1601	ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1602	"not present but $INDEX_ROOT "
1603	"indicated it is.");
1604	else
1605	ntfs_error(vi->i_sb, "Failed to lookup "
1606	"$INDEX_ALLOCATION attribute.");
1607	goto unm_err_out;
1608	}
1609	a = ctx->attr;
1610	if (!a->non_resident) {
1611	ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1612	"resident.");
1613	goto unm_err_out;
1614	}
1615	/*
1616	* Ensure the attribute name is placed before the mapping pairs array.
1617	*/
1618	if (unlikely(a->name_length && (le16_to_cpu(a->name_offset) >=
1619	le16_to_cpu(
1620	a->data.non_resident.mapping_pairs_offset)))) {
1621	ntfs_error(vol->sb, "$INDEX_ALLOCATION attribute name is "
1622	"placed after the mapping pairs array.");
1623	goto unm_err_out;
1624	}
1625	if (a->flags & ATTR_IS_ENCRYPTED) {
1626	ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1627	"encrypted.");
1628	goto unm_err_out;
1629	}
1630	if (a->flags & ATTR_IS_SPARSE) {
1631	ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is sparse.");
1632	goto unm_err_out;
1633	}
1634	if (a->flags & ATTR_COMPRESSION_MASK) {
1635	ntfs_error(vi->i_sb, "$INDEX_ALLOCATION attribute is "
1636	"compressed.");
1637	goto unm_err_out;
1638	}
1639	if (a->data.non_resident.lowest_vcn) {
1640	ntfs_error(vi->i_sb, "First extent of $INDEX_ALLOCATION "
1641	"attribute has non zero lowest_vcn.");
1642	goto unm_err_out;
1643	}
1644	vi->i_size = sle64_to_cpu(x: a->data.non_resident.data_size);
1645	ni->initialized_size = sle64_to_cpu(
1646	x: a->data.non_resident.initialized_size);
1647	ni->allocated_size = sle64_to_cpu(x: a->data.non_resident.allocated_size);
1648	/*
1649	* We are done with the mft record, so we release it. Otherwise
1650	* we would deadlock in ntfs_attr_iget().
1651	*/
1652	ntfs_attr_put_search_ctx(ctx);
1653	unmap_mft_record(ni: base_ni);
1654	m = NULL;
1655	ctx = NULL;
1656	/ Get the index bitmap attribute inode. /
1657	bvi = ntfs_attr_iget(base_vi, type: AT_BITMAP, name: ni->name, name_len: ni->name_len);
1658	if (IS_ERR(ptr: bvi)) {
1659	ntfs_error(vi->i_sb, "Failed to get bitmap attribute.");
1660	err = PTR_ERR(ptr: bvi);
1661	goto unm_err_out;
1662	}
1663	bni = NTFS_I(inode: bvi);
1664	if (NInoCompressed(ni: bni) \|\| NInoEncrypted(ni: bni) \|\|
1665	NInoSparse(ni: bni)) {
1666	ntfs_error(vi->i_sb, "$BITMAP attribute is compressed and/or "
1667	"encrypted and/or sparse.");
1668	goto iput_unm_err_out;
1669	}
1670	/ Consistency check bitmap size vs. index allocation size. /
1671	bvi_size = i_size_read(inode: bvi);
1672	if ((bvi_size << `3`) < (vi->i_size >> ni->itype.index.block_size_bits)) {
1673	ntfs_error(vi->i_sb, "Index bitmap too small (0x%llx) for "
1674	"index allocation (0x%llx).", bvi_size << `3`,
1675	vi->i_size);
1676	goto iput_unm_err_out;
1677	}
1678	iput(bvi);
1679	skip_large_index_stuff:
1680	/ Setup the operations for this index inode. /
1681	vi->i_mapping->a_ops = &ntfs_mst_aops;
1682	vi->i_blocks = ni->allocated_size >> `9`;
1683	/*
1684	* Make sure the base inode doesn't go away and attach it to the
1685	* index inode.
1686	*/
1687	igrab(base_vi);
1688	ni->ext.base_ntfs_ino = base_ni;
1689	ni->nr_extents = -`1`;
1690
1691	ntfs_debug("Done.");
1692	return `0`;
1693	iput_unm_err_out:
1694	iput(bvi);
1695	unm_err_out:
1696	if (!err)
1697	err = -EIO;
1698	if (ctx)
1699	ntfs_attr_put_search_ctx(ctx);
1700	if (m)
1701	unmap_mft_record(ni: base_ni);
1702	err_out:
1703	ntfs_error(vi->i_sb, "Failed with error code %i while reading index "
1704	"inode (mft_no 0x%lx, name_len %i.", err, vi->i_ino,
1705	ni->name_len);
1706	make_bad_inode(vi);
1707	if (err != -EOPNOTSUPP && err != -ENOMEM)
1708	NVolSetErrors(vol);
1709	return err;
1710	}
1711
1712	/*
1713	* The MFT inode has special locking, so teach the lock validator
1714	* about this by splitting off the locking rules of the MFT from
1715	* the locking rules of other inodes. The MFT inode can never be
1716	* accessed from the VFS side (or even internally), only by the
1717	* map_mft functions.
1718	*/
1719	static struct lock_class_key mft_ni_runlist_lock_key, mft_ni_mrec_lock_key;
1720
1721	/**
1722	* ntfs_read_inode_mount - special read_inode for mount time use only
1723	* @vi: inode to read
1724	*
1725	* Read inode FILE_MFT at mount time, only called with super_block lock
1726	* held from within the read_super() code path.
1727	*
1728	* This function exists because when it is called the page cache for $MFT/$DATA
1729	* is not initialized and hence we cannot get at the contents of mft records
1730	* by calling map_mft_record*().
1731	*
1732	* Further it needs to cope with the circular references problem, i.e. cannot
1733	* load any attributes other than $ATTRIBUTE_LIST until $DATA is loaded, because
1734	* we do not know where the other extent mft records are yet and again, because
1735	* we cannot call map_mft_record*() yet. Obviously this applies only when an
1736	* attribute list is actually present in $MFT inode.
1737	*
1738	* We solve these problems by starting with the $DATA attribute before anything
1739	* else and iterating using ntfs_attr_lookup($DATA) over all extents. As each
1740	* extent is found, we ntfs_mapping_pairs_decompress() including the implied
1741	* ntfs_runlists_merge(). Each step of the iteration necessarily provides
1742	* sufficient information for the next step to complete.
1743	*
1744	* This should work but there are two possible pit falls (see inline comments
1745	* below), but only time will tell if they are real pits or just smoke...
1746	*/
1747	int ntfs_read_inode_mount(struct inode *vi)
1748	{
1749	VCN next_vcn, last_vcn, highest_vcn;
1750	s64 block;
1751	struct super_block *sb = vi->i_sb;
1752	ntfs_volume *vol = NTFS_SB(sb);
1753	struct buffer_head *bh;
1754	ntfs_inode *ni;
1755	MFT_RECORD *m = NULL;
1756	ATTR_RECORD *a;
1757	ntfs_attr_search_ctx *ctx;
1758	unsigned int i, nr_blocks;
1759	int err;
1760
1761	ntfs_debug("Entering.");
1762
1763	/ Initialize the ntfs specific part of @vi. /
1764	ntfs_init_big_inode(vi);
1765
1766	ni = NTFS_I(inode: vi);
1767
1768	/ Setup the data attribute. It is special as it is mst protected. /
1769	NInoSetNonResident(ni);
1770	NInoSetMstProtected(ni);
1771	NInoSetSparseDisabled(ni);
1772	ni->type = AT_DATA;
1773	ni->name = NULL;
1774	ni->name_len = `0`;
1775	/*
1776	* This sets up our little cheat allowing us to reuse the async read io
1777	* completion handler for directories.
1778	*/
1779	ni->itype.index.block_size = vol->mft_record_size;
1780	ni->itype.index.block_size_bits = vol->mft_record_size_bits;
1781
1782	/ Very important! Needed to be able to call map_mft_record(). /*
1783	vol->mft_ino = vi;
1784
1785	/ Allocate enough memory to read the first mft record. /
1786	if (vol->mft_record_size > `64` * `1024`) {
1787	ntfs_error(sb, "Unsupported mft record size %i (max 64kiB).",
1788	vol->mft_record_size);
1789	goto err_out;
1790	}
1791	i = vol->mft_record_size;
1792	if (i < sb->s_blocksize)
1793	i = sb->s_blocksize;
1794	m = (MFT_RECORD*)ntfs_malloc_nofs(size: i);
1795	if (!m) {
1796	ntfs_error(sb, "Failed to allocate buffer for $MFT record 0.");
1797	goto err_out;
1798	}
1799
1800	/ Determine the first block of the $MFT/$DATA attribute. /
1801	block = vol->mft_lcn << vol->cluster_size_bits >>
1802	sb->s_blocksize_bits;
1803	nr_blocks = vol->mft_record_size >> sb->s_blocksize_bits;
1804	if (!nr_blocks)
1805	nr_blocks = `1`;
1806
1807	/ Load $MFT/$DATA's first mft record. /
1808	for (i = `0`; i < nr_blocks; i++) {
1809	bh = sb_bread(sb, block: block++);
1810	if (!bh) {
1811	ntfs_error(sb, "Device read failed.");
1812	goto err_out;
1813	}
1814	memcpy((char*)m + (i << sb->s_blocksize_bits), bh->b_data,
1815	sb->s_blocksize);
1816	brelse(bh);
1817	}
1818
1819	if (le32_to_cpu(m->bytes_allocated) != vol->mft_record_size) {
1820	ntfs_error(sb, "Incorrect mft record size %u in superblock, should be %u.",
1821	le32_to_cpu(m->bytes_allocated), vol->mft_record_size);
1822	goto err_out;
1823	}
1824
1825	/ Apply the mst fixups. /
1826	if (post_read_mst_fixup(b: (NTFS_RECORD*)m, size: vol->mft_record_size)) {
1827	/ FIXME: Try to use the $MFTMirr now. /
1828	ntfs_error(sb, "MST fixup failed. $MFT is corrupt.");
1829	goto err_out;
1830	}
1831
1832	/ Sanity check offset to the first attribute /
1833	if (le16_to_cpu(m->attrs_offset) >= le32_to_cpu(m->bytes_allocated)) {
1834	ntfs_error(sb, "Incorrect mft offset to the first attribute %u in superblock.",
1835	le16_to_cpu(m->attrs_offset));
1836	goto err_out;
1837	}
1838
1839	/ Need this to sanity check attribute list references to $MFT. /
1840	vi->i_generation = ni->seq_no = le16_to_cpu(m->sequence_number);
1841
1842	/ Provides read_folio() for map_mft_record(). /
1843	vi->i_mapping->a_ops = &ntfs_mst_aops;
1844
1845	ctx = ntfs_attr_get_search_ctx(ni, mrec: m);
1846	if (!ctx) {
1847	err = -ENOMEM;
1848	goto err_out;
1849	}
1850
1851	/ Find the attribute list attribute if present. /
1852	err = ntfs_attr_lookup(type: AT_ATTRIBUTE_LIST, NULL, name_len: `0`, ic: `0`, lowest_vcn: `0`, NULL, val_len: `0`, ctx);
1853	if (err) {
1854	if (unlikely(err != -ENOENT)) {
1855	ntfs_error(sb, "Failed to lookup attribute list "
1856	"attribute. You should run chkdsk.");
1857	goto put_err_out;
1858	}
1859	} else / if (!err) / {
1860	ATTR_LIST_ENTRY al_entry, next_al_entry;
1861	u8 *al_end;
1862	static const char *es = " Not allowed. $MFT is corrupt. "
1863	"You should run chkdsk.";
1864
1865	ntfs_debug("Attribute list attribute found in $MFT.");
1866	NInoSetAttrList(ni);
1867	a = ctx->attr;
1868	if (a->flags & ATTR_COMPRESSION_MASK) {
1869	ntfs_error(sb, "Attribute list attribute is "
1870	"compressed.%s", es);
1871	goto put_err_out;
1872	}
1873	if (a->flags & ATTR_IS_ENCRYPTED \|\|
1874	a->flags & ATTR_IS_SPARSE) {
1875	if (a->non_resident) {
1876	ntfs_error(sb, "Non-resident attribute list "
1877	"attribute is encrypted/"
1878	"sparse.%s", es);
1879	goto put_err_out;
1880	}
1881	ntfs_warning(sb, "Resident attribute list attribute "
1882	"in $MFT system file is marked "
1883	"encrypted/sparse which is not true. "
1884	"However, Windows allows this and "
1885	"chkdsk does not detect or correct it "
1886	"so we will just ignore the invalid "
1887	"flags and pretend they are not set.");
1888	}
1889	/ Now allocate memory for the attribute list. /
1890	ni->attr_list_size = (u32)ntfs_attr_size(a);
1891	if (!ni->attr_list_size) {
1892	ntfs_error(sb, "Attr_list_size is zero");
1893	goto put_err_out;
1894	}
1895	ni->attr_list = ntfs_malloc_nofs(size: ni->attr_list_size);
1896	if (!ni->attr_list) {
1897	ntfs_error(sb, "Not enough memory to allocate buffer "
1898	"for attribute list.");
1899	goto put_err_out;
1900	}
1901	if (a->non_resident) {
1902	NInoSetAttrListNonResident(ni);
1903	if (a->data.non_resident.lowest_vcn) {
1904	ntfs_error(sb, "Attribute list has non zero "
1905	"lowest_vcn. $MFT is corrupt. "
1906	"You should run chkdsk.");
1907	goto put_err_out;
1908	}
1909	/ Setup the runlist. /
1910	ni->attr_list_rl.rl = ntfs_mapping_pairs_decompress(vol,
1911	attr: a, NULL);
1912	if (IS_ERR(ptr: ni->attr_list_rl.rl)) {
1913	err = PTR_ERR(ptr: ni->attr_list_rl.rl);
1914	ni->attr_list_rl.rl = NULL;
1915	ntfs_error(sb, "Mapping pairs decompression "
1916	"failed with error code %i.",
1917	-err);
1918	goto put_err_out;
1919	}
1920	/ Now load the attribute list. /
1921	if ((err = load_attribute_list(vol, rl: &ni->attr_list_rl,
1922	al_start: ni->attr_list, size: ni->attr_list_size,
1923	initialized_size: sle64_to_cpu(x: a->data.
1924	non_resident.initialized_size)))) {
1925	ntfs_error(sb, "Failed to load attribute list "
1926	"attribute with error code %i.",
1927	-err);
1928	goto put_err_out;
1929	}
1930	} else / if (!ctx.attr->non_resident) / {
1931	if ((u8*)a + le16_to_cpu(
1932	a->data.resident.value_offset) +
1933	le32_to_cpu(
1934	a->data.resident.value_length) >
1935	(u8*)ctx->mrec + vol->mft_record_size) {
1936	ntfs_error(sb, "Corrupt attribute list "
1937	"attribute.");
1938	goto put_err_out;
1939	}
1940	/ Now copy the attribute list. /
1941	memcpy(ni->attr_list, (u8*)a + le16_to_cpu(
1942	a->data.resident.value_offset),
1943	le32_to_cpu(
1944	a->data.resident.value_length));
1945	}
1946	/ The attribute list is now setup in memory. /
1947	/*
1948	* FIXME: I don't know if this case is actually possible.
1949	* According to logic it is not possible but I have seen too
1950	* many weird things in MS software to rely on logic... Thus we
1951	* perform a manual search and make sure the first $MFT/$DATA
1952	* extent is in the base inode. If it is not we abort with an
1953	* error and if we ever see a report of this error we will need
1954	* to do some magic in order to have the necessary mft record
1955	* loaded and in the right place in the page cache. But
1956	* hopefully logic will prevail and this never happens...
1957	*/
1958	al_entry = (ATTR_LIST_ENTRY*)ni->attr_list;
1959	al_end = (u8*)al_entry + ni->attr_list_size;
1960	for (;; al_entry = next_al_entry) {
1961	/ Out of bounds check. /
1962	if ((u8*)al_entry < ni->attr_list \|\|
1963	(u8*)al_entry > al_end)
1964	goto em_put_err_out;
1965	/ Catch the end of the attribute list. /
1966	if ((u8*)al_entry == al_end)
1967	goto em_put_err_out;
1968	if (!al_entry->length)
1969	goto em_put_err_out;
1970	if ((u8)al_entry + `6` > al_end \|\| (u8)al_entry +
1971	le16_to_cpu(al_entry->length) > al_end)
1972	goto em_put_err_out;
1973	next_al_entry = (ATTR_LIST_ENTRY)((u8)al_entry +
1974	le16_to_cpu(al_entry->length));
1975	if (le32_to_cpu(al_entry->type) > le32_to_cpu(AT_DATA))
1976	goto em_put_err_out;
1977	if (AT_DATA != al_entry->type)
1978	continue;
1979	/ We want an unnamed attribute. /
1980	if (al_entry->name_length)
1981	goto em_put_err_out;
1982	/ Want the first entry, i.e. lowest_vcn == 0. /
1983	if (al_entry->lowest_vcn)
1984	goto em_put_err_out;
1985	/ First entry has to be in the base mft record. /
1986	if (MREF_LE(al_entry->mft_reference) != vi->i_ino) {
1987	/ MFT references do not match, logic fails. /
1988	ntfs_error(sb, "BUG: The first $DATA extent "
1989	"of $MFT is not in the base "
1990	"mft record. Please report "
1991	"you saw this message to "
1992	"linux-ntfs-dev@lists."
1993	"sourceforge.net");
1994	goto put_err_out;
1995	} else {
1996	/ Sequence numbers must match. /
1997	if (MSEQNO_LE(al_entry->mft_reference) !=
1998	ni->seq_no)
1999	goto em_put_err_out;
2000	/ Got it. All is ok. We can stop now. /
2001	break;
2002	}
2003	}
2004	}
2005
2006	ntfs_attr_reinit_search_ctx(ctx);
2007
2008	/ Now load all attribute extents. /
2009	a = NULL;
2010	next_vcn = last_vcn = highest_vcn = `0`;
2011	while (!(err = ntfs_attr_lookup(type: AT_DATA, NULL, name_len: `0`, ic: `0`, lowest_vcn: next_vcn, NULL, val_len: `0`,
2012	ctx))) {
2013	runlist_element *nrl;
2014
2015	/ Cache the current attribute. /
2016	a = ctx->attr;
2017	/ $MFT must be non-resident. /
2018	if (!a->non_resident) {
2019	ntfs_error(sb, "$MFT must be non-resident but a "
2020	"resident extent was found. $MFT is "
2021	"corrupt. Run chkdsk.");
2022	goto put_err_out;
2023	}
2024	/ $MFT must be uncompressed and unencrypted. /
2025	if (a->flags & ATTR_COMPRESSION_MASK \|\|
2026	a->flags & ATTR_IS_ENCRYPTED \|\|
2027	a->flags & ATTR_IS_SPARSE) {
2028	ntfs_error(sb, "$MFT must be uncompressed, "
2029	"non-sparse, and unencrypted but a "
2030	"compressed/sparse/encrypted extent "
2031	"was found. $MFT is corrupt. Run "
2032	"chkdsk.");
2033	goto put_err_out;
2034	}
2035	/*
2036	* Decompress the mapping pairs array of this extent and merge
2037	* the result into the existing runlist. No need for locking
2038	* as we have exclusive access to the inode at this time and we
2039	* are a mount in progress task, too.
2040	*/
2041	nrl = ntfs_mapping_pairs_decompress(vol, attr: a, old_rl: ni->runlist.rl);
2042	if (IS_ERR(ptr: nrl)) {
2043	ntfs_error(sb, "ntfs_mapping_pairs_decompress() "
2044	"failed with error code %ld. $MFT is "
2045	"corrupt.", PTR_ERR(nrl));
2046	goto put_err_out;
2047	}
2048	ni->runlist.rl = nrl;
2049
2050	/ Are we in the first extent? /
2051	if (!next_vcn) {
2052	if (a->data.non_resident.lowest_vcn) {
2053	ntfs_error(sb, "First extent of $DATA "
2054	"attribute has non zero "
2055	"lowest_vcn. $MFT is corrupt. "
2056	"You should run chkdsk.");
2057	goto put_err_out;
2058	}
2059	/ Get the last vcn in the $DATA attribute. /
2060	last_vcn = sle64_to_cpu(
2061	x: a->data.non_resident.allocated_size)
2062	>> vol->cluster_size_bits;
2063	/ Fill in the inode size. /
2064	vi->i_size = sle64_to_cpu(
2065	x: a->data.non_resident.data_size);
2066	ni->initialized_size = sle64_to_cpu(
2067	x: a->data.non_resident.initialized_size);
2068	ni->allocated_size = sle64_to_cpu(
2069	x: a->data.non_resident.allocated_size);
2070	/*
2071	* Verify the number of mft records does not exceed
2072	* 2^32 - 1.
2073	*/
2074	if ((vi->i_size >> vol->mft_record_size_bits) >=
2075	(`1ULL` << `32`)) {
2076	ntfs_error(sb, "$MFT is too big! Aborting.");
2077	goto put_err_out;
2078	}
2079	/*
2080	* We have got the first extent of the runlist for
2081	* $MFT which means it is now relatively safe to call
2082	* the normal ntfs_read_inode() function.
2083	* Complete reading the inode, this will actually
2084	* re-read the mft record for $MFT, this time entering
2085	* it into the page cache with which we complete the
2086	* kick start of the volume. It should be safe to do
2087	* this now as the first extent of $MFT/$DATA is
2088	* already known and we would hope that we don't need
2089	* further extents in order to find the other
2090	* attributes belonging to $MFT. Only time will tell if
2091	* this is really the case. If not we will have to play
2092	* magic at this point, possibly duplicating a lot of
2093	* ntfs_read_inode() at this point. We will need to
2094	* ensure we do enough of its work to be able to call
2095	* ntfs_read_inode() on extents of $MFT/$DATA. But lets
2096	* hope this never happens...
2097	*/
2098	ntfs_read_locked_inode(vi);
2099	if (is_bad_inode(vi)) {
2100	ntfs_error(sb, "ntfs_read_inode() of $MFT "
2101	"failed. BUG or corrupt $MFT. "
2102	"Run chkdsk and if no errors "
2103	"are found, please report you "
2104	"saw this message to "
2105	"linux-ntfs-dev@lists."
2106	"sourceforge.net");
2107	ntfs_attr_put_search_ctx(ctx);
2108	/ Revert to the safe super operations. /
2109	ntfs_free(addr: m);
2110	return -`1`;
2111	}
2112	/*
2113	* Re-initialize some specifics about $MFT's inode as
2114	* ntfs_read_inode() will have set up the default ones.
2115	*/
2116	/ Set uid and gid to root. /
2117	vi->i_uid = GLOBAL_ROOT_UID;
2118	vi->i_gid = GLOBAL_ROOT_GID;
2119	/ Regular file. No access for anyone. /
2120	vi->i_mode = S_IFREG;
2121	/ No VFS initiated operations allowed for $MFT. /
2122	vi->i_op = &ntfs_empty_inode_ops;
2123	vi->i_fop = &ntfs_empty_file_ops;
2124	}
2125
2126	/ Get the lowest vcn for the next extent. /
2127	highest_vcn = sle64_to_cpu(a->data.non_resident.highest_vcn);
2128	next_vcn = highest_vcn + `1`;
2129
2130	/ Only one extent or error, which we catch below. /
2131	if (next_vcn <= `0`)
2132	break;
2133
2134	/ Avoid endless loops due to corruption. /
2135	if (next_vcn < sle64_to_cpu(
2136	a->data.non_resident.lowest_vcn)) {
2137	ntfs_error(sb, "$MFT has corrupt attribute list "
2138	"attribute. Run chkdsk.");
2139	goto put_err_out;
2140	}
2141	}
2142	if (err != -ENOENT) {
2143	ntfs_error(sb, "Failed to lookup $MFT/$DATA attribute extent. "
2144	"$MFT is corrupt. Run chkdsk.");
2145	goto put_err_out;
2146	}
2147	if (!a) {
2148	ntfs_error(sb, "$MFT/$DATA attribute not found. $MFT is "
2149	"corrupt. Run chkdsk.");
2150	goto put_err_out;
2151	}
2152	if (highest_vcn && highest_vcn != last_vcn - `1`) {
2153	ntfs_error(sb, "Failed to load the complete runlist for "
2154	"$MFT/$DATA. Driver bug or corrupt $MFT. "
2155	"Run chkdsk.");
2156	ntfs_debug("highest_vcn = 0x%llx, last_vcn - 1 = 0x%llx",
2157	(unsigned long long)highest_vcn,
2158	(unsigned long long)last_vcn - `1`);
2159	goto put_err_out;
2160	}
2161	ntfs_attr_put_search_ctx(ctx);
2162	ntfs_debug("Done.");
2163	ntfs_free(m);
2164
2165	/*
2166	* Split the locking rules of the MFT inode from the
2167	* locking rules of other inodes:
2168	*/
2169	lockdep_set_class(&ni->runlist.lock, &mft_ni_runlist_lock_key);
2170	lockdep_set_class(&ni->mrec_lock, &mft_ni_mrec_lock_key);
2171
2172	return `0`;
2173
2174	em_put_err_out:
2175	ntfs_error(sb, "Couldn't find first extent of $DATA attribute in "
2176	"attribute list. $MFT is corrupt. Run chkdsk.");
2177	put_err_out:
2178	ntfs_attr_put_search_ctx(ctx);
2179	err_out:
2180	ntfs_error(sb, "Failed. Marking inode as bad.");
2181	make_bad_inode(vi);
2182	ntfs_free(m);
2183	return -`1`;
2184	}
2185
2186	static void __ntfs_clear_inode(ntfs_inode *ni)
2187	{
2188	/ Free all alocated memory. /
2189	down_write(sem: &ni->runlist.lock);
2190	if (ni->runlist.rl) {
2191	ntfs_free(addr: ni->runlist.rl);
2192	ni->runlist.rl = NULL;
2193	}
2194	up_write(sem: &ni->runlist.lock);
2195
2196	if (ni->attr_list) {
2197	ntfs_free(addr: ni->attr_list);
2198	ni->attr_list = NULL;
2199	}
2200
2201	down_write(sem: &ni->attr_list_rl.lock);
2202	if (ni->attr_list_rl.rl) {
2203	ntfs_free(addr: ni->attr_list_rl.rl);
2204	ni->attr_list_rl.rl = NULL;
2205	}
2206	up_write(sem: &ni->attr_list_rl.lock);
2207
2208	if (ni->name_len && ni->name != I30) {
2209	/ Catch bugs... /
2210	BUG_ON(!ni->name);
2211	kfree(objp: ni->name);
2212	}
2213	}
2214
2215	void ntfs_clear_extent_inode(ntfs_inode *ni)
2216	{
2217	ntfs_debug("Entering for inode 0x%lx.", ni->mft_no);
2218
2219	BUG_ON(NInoAttr(ni));
2220	BUG_ON(ni->nr_extents != -`1`);
2221
2222	#ifdef NTFS_RW
2223	if (NInoDirty(ni)) {
2224	if (!is_bad_inode(VFS_I(ni: ni->ext.base_ntfs_ino)))
2225	ntfs_error(ni->vol->sb, "Clearing dirty extent inode! "
2226	"Losing data! This is a BUG!!!");
2227	// FIXME: Do something!!!
2228	}
2229	#endif /* NTFS_RW */
2230
2231	__ntfs_clear_inode(ni);
2232
2233	/ Bye, bye... /
2234	ntfs_destroy_extent_inode(ni);
2235	}
2236
2237	/**
2238	* ntfs_evict_big_inode - clean up the ntfs specific part of an inode
2239	* @vi: vfs inode pending annihilation
2240	*
2241	* When the VFS is going to remove an inode from memory, ntfs_clear_big_inode()
2242	* is called, which deallocates all memory belonging to the NTFS specific part
2243	* of the inode and returns.
2244	*
2245	* If the MFT record is dirty, we commit it before doing anything else.
2246	*/
2247	void ntfs_evict_big_inode(struct inode *vi)
2248	{
2249	ntfs_inode *ni = NTFS_I(inode: vi);
2250
2251	truncate_inode_pages_final(&vi->i_data);
2252	clear_inode(vi);
2253
2254	#ifdef NTFS_RW
2255	if (NInoDirty(ni)) {
2256	bool was_bad = (is_bad_inode(vi));
2257
2258	/ Committing the inode also commits all extent inodes. /
2259	ntfs_commit_inode(vi);
2260
2261	if (!was_bad && (is_bad_inode(vi) \|\| NInoDirty(ni))) {
2262	ntfs_error(vi->i_sb, "Failed to commit dirty inode "
2263	"0x%lx. Losing data!", vi->i_ino);
2264	// FIXME: Do something!!!
2265	}
2266	}
2267	#endif /* NTFS_RW */
2268
2269	/ No need to lock at this stage as no one else has a reference. /
2270	if (ni->nr_extents > `0`) {
2271	int i;
2272
2273	for (i = `0`; i < ni->nr_extents; i++)
2274	ntfs_clear_extent_inode(ni: ni->ext.extent_ntfs_inos[i]);
2275	kfree(objp: ni->ext.extent_ntfs_inos);
2276	}
2277
2278	__ntfs_clear_inode(ni);
2279
2280	if (NInoAttr(ni)) {
2281	/ Release the base inode if we are holding it. /
2282	if (ni->nr_extents == -`1`) {
2283	iput(VFS_I(ni: ni->ext.base_ntfs_ino));
2284	ni->nr_extents = `0`;
2285	ni->ext.base_ntfs_ino = NULL;
2286	}
2287	}
2288	BUG_ON(ni->page);
2289	if (!atomic_dec_and_test(v: &ni->count))
2290	BUG();
2291	return;
2292	}
2293
2294	/**
2295	* ntfs_show_options - show mount options in /proc/mounts
2296	* @sf: seq_file in which to write our mount options
2297	* @root: root of the mounted tree whose mount options to display
2298	*
2299	* Called by the VFS once for each mounted ntfs volume when someone reads
2300	* /proc/mounts in order to display the NTFS specific mount options of each
2301	* mount. The mount options of fs specified by @root are written to the seq file
2302	* @sf and success is returned.
2303	*/
2304	int ntfs_show_options(struct seq_file sf, struct* dentry *root)
2305	{
2306	ntfs_volume *vol = NTFS_SB(sb: root->d_sb);
2307	int i;
2308
2309	seq_printf(m: sf, fmt: ",uid=%i", from_kuid_munged(to: &init_user_ns, uid: vol->uid));
2310	seq_printf(m: sf, fmt: ",gid=%i", from_kgid_munged(to: &init_user_ns, gid: vol->gid));
2311	if (vol->fmask == vol->dmask)
2312	seq_printf(m: sf, fmt: ",umask=0%o", vol->fmask);
2313	else {
2314	seq_printf(m: sf, fmt: ",fmask=0%o", vol->fmask);
2315	seq_printf(m: sf, fmt: ",dmask=0%o", vol->dmask);
2316	}
2317	seq_printf(m: sf, fmt: ",nls=%s", vol->nls_map->charset);
2318	if (NVolCaseSensitive(vol))
2319	seq_printf(m: sf, fmt: ",case_sensitive");
2320	if (NVolShowSystemFiles(vol))
2321	seq_printf(m: sf, fmt: ",show_sys_files");
2322	if (!NVolSparseEnabled(vol))
2323	seq_printf(m: sf, fmt: ",disable_sparse");
2324	for (i = `0`; on_errors_arr[i].val; i++) {
2325	if (on_errors_arr[i].val & vol->on_errors)
2326	seq_printf(m: sf, fmt: ",errors=%s", on_errors_arr[i].str);
2327	}
2328	seq_printf(m: sf, fmt: ",mft_zone_multiplier=%i", vol->mft_zone_multiplier);
2329	return `0`;
2330	}
2331
2332	#ifdef NTFS_RW
2333
2334	static const char *es = " Leaving inconsistent metadata. Unmount and run "
2335	"chkdsk.";
2336
2337	/**
2338	* ntfs_truncate - called when the i_size of an ntfs inode is changed
2339	* @vi: inode for which the i_size was changed
2340	*
2341	* We only support i_size changes for normal files at present, i.e. not
2342	* compressed and not encrypted. This is enforced in ntfs_setattr(), see
2343	* below.
2344	*
2345	* The kernel guarantees that @vi is a regular file (S_ISREG() is true) and
2346	* that the change is allowed.
2347	*
2348	* This implies for us that @vi is a file inode rather than a directory, index,
2349	* or attribute inode as well as that @vi is a base inode.
2350	*
2351	* Returns 0 on success or -errno on error.
2352	*
2353	* Called with ->i_mutex held.
2354	*/
2355	int ntfs_truncate(struct inode *vi)
2356	{
2357	s64 new_size, old_size, nr_freed, new_alloc_size, old_alloc_size;
2358	VCN highest_vcn;
2359	unsigned long flags;
2360	ntfs_inode base_ni, ni = NTFS_I(inode: vi);
2361	ntfs_volume *vol = ni->vol;
2362	ntfs_attr_search_ctx *ctx;
2363	MFT_RECORD *m;
2364	ATTR_RECORD *a;
2365	const char *te = " Leaving file length out of sync with i_size.";
2366	int err, mp_size, size_change, alloc_change;
2367
2368	ntfs_debug("Entering for inode 0x%lx.", vi->i_ino);
2369	BUG_ON(NInoAttr(ni));
2370	BUG_ON(S_ISDIR(vi->i_mode));
2371	BUG_ON(NInoMstProtected(ni));
2372	BUG_ON(ni->nr_extents < `0`);
2373	retry_truncate:
2374	/*
2375	* Lock the runlist for writing and map the mft record to ensure it is
2376	* safe to mess with the attribute runlist and sizes.
2377	*/
2378	down_write(sem: &ni->runlist.lock);
2379	if (!NInoAttr(ni))
2380	base_ni = ni;
2381	else
2382	base_ni = ni->ext.base_ntfs_ino;
2383	m = map_mft_record(ni: base_ni);
2384	if (IS_ERR(ptr: m)) {
2385	err = PTR_ERR(ptr: m);
2386	ntfs_error(vi->i_sb, "Failed to map mft record for inode 0x%lx "
2387	"(error code %d).%s", vi->i_ino, err, te);
2388	ctx = NULL;
2389	m = NULL;
2390	goto old_bad_out;
2391	}
2392	ctx = ntfs_attr_get_search_ctx(ni: base_ni, mrec: m);
2393	if (unlikely(!ctx)) {
2394	ntfs_error(vi->i_sb, "Failed to allocate a search context for "
2395	"inode 0x%lx (not enough memory).%s",
2396	vi->i_ino, te);
2397	err = -ENOMEM;
2398	goto old_bad_out;
2399	}
2400	err = ntfs_attr_lookup(type: ni->type, name: ni->name, name_len: ni->name_len,
2401	ic: CASE_SENSITIVE, lowest_vcn: `0`, NULL, val_len: `0`, ctx);
2402	if (unlikely(err)) {
2403	if (err == -ENOENT) {
2404	ntfs_error(vi->i_sb, "Open attribute is missing from "
2405	"mft record. Inode 0x%lx is corrupt. "
2406	"Run chkdsk.%s", vi->i_ino, te);
2407	err = -EIO;
2408	} else
2409	ntfs_error(vi->i_sb, "Failed to lookup attribute in "
2410	"inode 0x%lx (error code %d).%s",
2411	vi->i_ino, err, te);
2412	goto old_bad_out;
2413	}
2414	m = ctx->mrec;
2415	a = ctx->attr;
2416	/*
2417	* The i_size of the vfs inode is the new size for the attribute value.
2418	*/
2419	new_size = i_size_read(inode: vi);
2420	/ The current size of the attribute value is the old size. /
2421	old_size = ntfs_attr_size(a);
2422	/ Calculate the new allocated size. /
2423	if (NInoNonResident(ni))
2424	new_alloc_size = (new_size + vol->cluster_size - `1`) &
2425	~(s64)vol->cluster_size_mask;
2426	else
2427	new_alloc_size = (new_size + `7`) & ~`7`;
2428	/ The current allocated size is the old allocated size. /
2429	read_lock_irqsave(&ni->size_lock, flags);
2430	old_alloc_size = ni->allocated_size;
2431	read_unlock_irqrestore(&ni->size_lock, flags);
2432	/*
2433	* The change in the file size. This will be 0 if no change, >0 if the
2434	* size is growing, and <0 if the size is shrinking.
2435	*/
2436	size_change = -`1`;
2437	if (new_size - old_size >= `0`) {
2438	size_change = `1`;
2439	if (new_size == old_size)
2440	size_change = `0`;
2441	}
2442	/ As above for the allocated size. /
2443	alloc_change = -`1`;
2444	if (new_alloc_size - old_alloc_size >= `0`) {
2445	alloc_change = `1`;
2446	if (new_alloc_size == old_alloc_size)
2447	alloc_change = `0`;
2448	}
2449	/*
2450	* If neither the size nor the allocation are being changed there is
2451	* nothing to do.
2452	*/
2453	if (!size_change && !alloc_change)
2454	goto unm_done;
2455	/ If the size is changing, check if new size is allowed in $AttrDef. /
2456	if (size_change) {
2457	err = ntfs_attr_size_bounds_check(vol, type: ni->type, size: new_size);
2458	if (unlikely(err)) {
2459	if (err == -ERANGE) {
2460	ntfs_error(vol->sb, "Truncate would cause the "
2461	"inode 0x%lx to %simum size "
2462	"for its attribute type "
2463	"(0x%x). Aborting truncate.",
2464	vi->i_ino,
2465	new_size > old_size ? "exceed "
2466	"the max" : "go under the min",
2467	le32_to_cpu(ni->type));
2468	err = -EFBIG;
2469	} else {
2470	ntfs_error(vol->sb, "Inode 0x%lx has unknown "
2471	"attribute type 0x%x. "
2472	"Aborting truncate.",
2473	vi->i_ino,
2474	le32_to_cpu(ni->type));
2475	err = -EIO;
2476	}
2477	/ Reset the vfs inode size to the old size. /
2478	i_size_write(inode: vi, i_size: old_size);
2479	goto err_out;
2480	}
2481	}
2482	if (NInoCompressed(ni) \|\| NInoEncrypted(ni)) {
2483	ntfs_warning(vi->i_sb, "Changes in inode size are not "
2484	"supported yet for %s files, ignoring.",
2485	NInoCompressed(ni) ? "compressed" :
2486	"encrypted");
2487	err = -EOPNOTSUPP;
2488	goto bad_out;
2489	}
2490	if (a->non_resident)
2491	goto do_non_resident_truncate;
2492	BUG_ON(NInoNonResident(ni));
2493	/ Resize the attribute record to best fit the new attribute size. /
2494	if (new_size < vol->mft_record_size &&
2495	!ntfs_resident_attr_value_resize(m, a, new_size)) {
2496	/ The resize succeeded! /
2497	flush_dcache_mft_record_page(ni: ctx->ntfs_ino);
2498	mark_mft_record_dirty(ni: ctx->ntfs_ino);
2499	write_lock_irqsave(&ni->size_lock, flags);
2500	/ Update the sizes in the ntfs inode and all is done. /
2501	ni->allocated_size = le32_to_cpu(a->length) -
2502	le16_to_cpu(a->data.resident.value_offset);
2503	/*
2504	* Note ntfs_resident_attr_value_resize() has already done any
2505	* necessary data clearing in the attribute record. When the
2506	* file is being shrunk vmtruncate() will already have cleared
2507	* the top part of the last partial page, i.e. since this is
2508	* the resident case this is the page with index 0. However,
2509	* when the file is being expanded, the page cache page data
2510	* between the old data_size, i.e. old_size, and the new_size
2511	* has not been zeroed. Fortunately, we do not need to zero it
2512	* either since on one hand it will either already be zero due
2513	* to both read_folio and writepage clearing partial page data
2514	* beyond i_size in which case there is nothing to do or in the
2515	* case of the file being mmap()ped at the same time, POSIX
2516	* specifies that the behaviour is unspecified thus we do not
2517	* have to do anything. This means that in our implementation
2518	* in the rare case that the file is mmap()ped and a write
2519	* occurred into the mmap()ped region just beyond the file size
2520	* and writepage has not yet been called to write out the page
2521	* (which would clear the area beyond the file size) and we now
2522	* extend the file size to incorporate this dirty region
2523	* outside the file size, a write of the page would result in
2524	* this data being written to disk instead of being cleared.
2525	* Given both POSIX and the Linux mmap(2) man page specify that
2526	* this corner case is undefined, we choose to leave it like
2527	* that as this is much simpler for us as we cannot lock the
2528	* relevant page now since we are holding too many ntfs locks
2529	* which would result in a lock reversal deadlock.
2530	*/
2531	ni->initialized_size = new_size;
2532	write_unlock_irqrestore(&ni->size_lock, flags);
2533	goto unm_done;
2534	}
2535	/ If the above resize failed, this must be an attribute extension. /
2536	BUG_ON(size_change < `0`);
2537	/*
2538	* We have to drop all the locks so we can call
2539	* ntfs_attr_make_non_resident(). This could be optimised by try-
2540	* locking the first page cache page and only if that fails dropping
2541	* the locks, locking the page, and redoing all the locking and
2542	* lookups. While this would be a huge optimisation, it is not worth
2543	* it as this is definitely a slow code path as it only ever can happen
2544	* once for any given file.
2545	*/
2546	ntfs_attr_put_search_ctx(ctx);
2547	unmap_mft_record(ni: base_ni);
2548	up_write(sem: &ni->runlist.lock);
2549	/*
2550	* Not enough space in the mft record, try to make the attribute
2551	* non-resident and if successful restart the truncation process.
2552	*/
2553	err = ntfs_attr_make_non_resident(ni, data_size: old_size);
2554	if (likely(!err))
2555	goto retry_truncate;
2556	/*
2557	* Could not make non-resident. If this is due to this not being
2558	* permitted for this attribute type or there not being enough space,
2559	* try to make other attributes non-resident. Otherwise fail.
2560	*/
2561	if (unlikely(err != -EPERM && err != -ENOSPC)) {
2562	ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, attribute "
2563	"type 0x%x, because the conversion from "
2564	"resident to non-resident attribute failed "
2565	"with error code %i.", vi->i_ino,
2566	(unsigned)le32_to_cpu(ni->type), err);
2567	if (err != -ENOMEM)
2568	err = -EIO;
2569	goto conv_err_out;
2570	}
2571	/ TODO: Not implemented from here, abort. /
2572	if (err == -ENOSPC)
2573	ntfs_error(vol->sb, "Not enough space in the mft record/on "
2574	"disk for the non-resident attribute value. "
2575	"This case is not implemented yet.");
2576	else / if (err == -EPERM) /
2577	ntfs_error(vol->sb, "This attribute type may not be "
2578	"non-resident. This case is not implemented "
2579	"yet.");
2580	err = -EOPNOTSUPP;
2581	goto conv_err_out;
2582	#if 0
2583	// TODO: Attempt to make other attributes non-resident.
2584	if (!err)
2585	goto do_resident_extend;
2586	/*
2587	* Both the attribute list attribute and the standard information
2588	* attribute must remain in the base inode. Thus, if this is one of
2589	* these attributes, we have to try to move other attributes out into
2590	* extent mft records instead.
2591	*/
2592	if (ni->type == AT_ATTRIBUTE_LIST \|\|
2593	ni->type == AT_STANDARD_INFORMATION) {
2594	// TODO: Attempt to move other attributes into extent mft
2595	// records.
2596	err = -EOPNOTSUPP;
2597	if (!err)
2598	goto do_resident_extend;
2599	goto err_out;
2600	}
2601	// TODO: Attempt to move this attribute to an extent mft record, but
2602	// only if it is not already the only attribute in an mft record in
2603	// which case there would be nothing to gain.
2604	err = -EOPNOTSUPP;
2605	if (!err)
2606	goto do_resident_extend;
2607	/ There is nothing we can do to make enough space. )-: /
2608	goto err_out;
2609	#endif
2610	do_non_resident_truncate:
2611	BUG_ON(!NInoNonResident(ni));
2612	if (alloc_change < `0`) {
2613	highest_vcn = sle64_to_cpu(x: a->data.non_resident.highest_vcn);
2614	if (highest_vcn > `0` &&
2615	old_alloc_size >> vol->cluster_size_bits >
2616	highest_vcn + `1`) {
2617	/*
2618	* This attribute has multiple extents. Not yet
2619	* supported.
2620	*/
2621	ntfs_error(vol->sb, "Cannot truncate inode 0x%lx, "
2622	"attribute type 0x%x, because the "
2623	"attribute is highly fragmented (it "
2624	"consists of multiple extents) and "
2625	"this case is not implemented yet.",
2626	vi->i_ino,
2627	(unsigned)le32_to_cpu(ni->type));
2628	err = -EOPNOTSUPP;
2629	goto bad_out;
2630	}
2631	}
2632	/*
2633	* If the size is shrinking, need to reduce the initialized_size and
2634	* the data_size before reducing the allocation.
2635	*/
2636	if (size_change < `0`) {
2637	/*
2638	* Make the valid size smaller (i_size is already up-to-date).
2639	*/
2640	write_lock_irqsave(&ni->size_lock, flags);
2641	if (new_size < ni->initialized_size) {
2642	ni->initialized_size = new_size;
2643	a->data.non_resident.initialized_size =
2644	cpu_to_sle64(x: new_size);
2645	}
2646	a->data.non_resident.data_size = cpu_to_sle64(x: new_size);
2647	write_unlock_irqrestore(&ni->size_lock, flags);
2648	flush_dcache_mft_record_page(ni: ctx->ntfs_ino);
2649	mark_mft_record_dirty(ni: ctx->ntfs_ino);
2650	/ If the allocated size is not changing, we are done. /
2651	if (!alloc_change)
2652	goto unm_done;
2653	/*
2654	* If the size is shrinking it makes no sense for the
2655	* allocation to be growing.
2656	*/
2657	BUG_ON(alloc_change > `0`);
2658	} else / if (size_change >= 0) / {
2659	/*
2660	* The file size is growing or staying the same but the
2661	* allocation can be shrinking, growing or staying the same.
2662	*/
2663	if (alloc_change > `0`) {
2664	/*
2665	* We need to extend the allocation and possibly update
2666	* the data size. If we are updating the data size,
2667	* since we are not touching the initialized_size we do
2668	* not need to worry about the actual data on disk.
2669	* And as far as the page cache is concerned, there
2670	* will be no pages beyond the old data size and any
2671	* partial region in the last page between the old and
2672	* new data size (or the end of the page if the new
2673	* data size is outside the page) does not need to be
2674	* modified as explained above for the resident
2675	* attribute truncate case. To do this, we simply drop
2676	* the locks we hold and leave all the work to our
2677	* friendly helper ntfs_attr_extend_allocation().
2678	*/
2679	ntfs_attr_put_search_ctx(ctx);
2680	unmap_mft_record(ni: base_ni);
2681	up_write(sem: &ni->runlist.lock);
2682	err = ntfs_attr_extend_allocation(ni, new_alloc_size: new_size,
2683	new_data_size: size_change > `0` ? new_size : -`1`, data_start: -`1`);
2684	/*
2685	* ntfs_attr_extend_allocation() will have done error
2686	* output already.
2687	*/
2688	goto done;
2689	}
2690	if (!alloc_change)
2691	goto alloc_done;
2692	}
2693	/ alloc_change < 0 /
2694	/ Free the clusters. /
2695	nr_freed = ntfs_cluster_free(ni, start_vcn: new_alloc_size >>
2696	vol->cluster_size_bits, count: -`1`, ctx);
2697	m = ctx->mrec;
2698	a = ctx->attr;
2699	if (unlikely(nr_freed < `0`)) {
2700	ntfs_error(vol->sb, "Failed to release cluster(s) (error code "
2701	"%lli). Unmount and run chkdsk to recover "
2702	"the lost cluster(s).", (long long)nr_freed);
2703	NVolSetErrors(vol);
2704	nr_freed = `0`;
2705	}
2706	/ Truncate the runlist. /
2707	err = ntfs_rl_truncate_nolock(vol, runlist: &ni->runlist,
2708	new_length: new_alloc_size >> vol->cluster_size_bits);
2709	/*
2710	* If the runlist truncation failed and/or the search context is no
2711	* longer valid, we cannot resize the attribute record or build the
2712	* mapping pairs array thus we mark the inode bad so that no access to
2713	* the freed clusters can happen.
2714	*/
2715	if (unlikely(err \|\| IS_ERR(m))) {
2716	ntfs_error(vol->sb, "Failed to %s (error code %li).%s",
2717	IS_ERR(m) ?
2718	"restore attribute search context" :
2719	"truncate attribute runlist",
2720	IS_ERR(m) ? PTR_ERR(m) : err, es);
2721	err = -EIO;
2722	goto bad_out;
2723	}
2724	/ Get the size for the shrunk mapping pairs array for the runlist. /
2725	mp_size = ntfs_get_size_for_mapping_pairs(vol, rl: ni->runlist.rl, first_vcn: `0`, last_vcn: -`1`);
2726	if (unlikely(mp_size <= `0`)) {
2727	ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
2728	"attribute type 0x%x, because determining the "
2729	"size for the mapping pairs failed with error "
2730	"code %i.%s", vi->i_ino,
2731	(unsigned)le32_to_cpu(ni->type), mp_size, es);
2732	err = -EIO;
2733	goto bad_out;
2734	}
2735	/*
2736	* Shrink the attribute record for the new mapping pairs array. Note,
2737	* this cannot fail since we are making the attribute smaller thus by
2738	* definition there is enough space to do so.
2739	*/
2740	err = ntfs_attr_record_resize(m, a, new_size: mp_size +
2741	le16_to_cpu(a->data.non_resident.mapping_pairs_offset));
2742	BUG_ON(err);
2743	/*
2744	* Generate the mapping pairs array directly into the attribute record.
2745	*/
2746	err = ntfs_mapping_pairs_build(vol, dst: (u8*)a +
2747	le16_to_cpu(a->data.non_resident.mapping_pairs_offset),
2748	dst_len: mp_size, rl: ni->runlist.rl, first_vcn: `0`, last_vcn: -`1`, NULL);
2749	if (unlikely(err)) {
2750	ntfs_error(vol->sb, "Cannot shrink allocation of inode 0x%lx, "
2751	"attribute type 0x%x, because building the "
2752	"mapping pairs failed with error code %i.%s",
2753	vi->i_ino, (unsigned)le32_to_cpu(ni->type),
2754	err, es);
2755	err = -EIO;
2756	goto bad_out;
2757	}
2758	/ Update the allocated/compressed size as well as the highest vcn. /
2759	a->data.non_resident.highest_vcn = cpu_to_sle64(x: (new_alloc_size >>
2760	vol->cluster_size_bits) - `1`);
2761	write_lock_irqsave(&ni->size_lock, flags);
2762	ni->allocated_size = new_alloc_size;
2763	a->data.non_resident.allocated_size = cpu_to_sle64(x: new_alloc_size);
2764	if (NInoSparse(ni) \|\| NInoCompressed(ni)) {
2765	if (nr_freed) {
2766	ni->itype.compressed.size -= nr_freed <<
2767	vol->cluster_size_bits;
2768	BUG_ON(ni->itype.compressed.size < `0`);
2769	a->data.non_resident.compressed_size = cpu_to_sle64(
2770	x: ni->itype.compressed.size);
2771	vi->i_blocks = ni->itype.compressed.size >> `9`;
2772	}
2773	} else
2774	vi->i_blocks = new_alloc_size >> `9`;
2775	write_unlock_irqrestore(&ni->size_lock, flags);
2776	/*
2777	* We have shrunk the allocation. If this is a shrinking truncate we
2778	* have already dealt with the initialized_size and the data_size above
2779	* and we are done. If the truncate is only changing the allocation
2780	* and not the data_size, we are also done. If this is an extending
2781	* truncate, need to extend the data_size now which is ensured by the
2782	* fact that @size_change is positive.
2783	*/
2784	alloc_done:
2785	/*
2786	* If the size is growing, need to update it now. If it is shrinking,
2787	* we have already updated it above (before the allocation change).
2788	*/
2789	if (size_change > `0`)
2790	a->data.non_resident.data_size = cpu_to_sle64(x: new_size);
2791	/ Ensure the modified mft record is written out. /
2792	flush_dcache_mft_record_page(ni: ctx->ntfs_ino);
2793	mark_mft_record_dirty(ni: ctx->ntfs_ino);
2794	unm_done:
2795	ntfs_attr_put_search_ctx(ctx);
2796	unmap_mft_record(ni: base_ni);
2797	up_write(sem: &ni->runlist.lock);
2798	done:
2799	/ Update the mtime and ctime on the base inode. /
2800	/ normally ->truncate shouldn't update ctime or mtime,*
2801	* but ntfs did before so it got a copy & paste version
2802	* of file_update_time. one day someone should fix this
2803	* for real.
2804	*/
2805	if (!IS_NOCMTIME(VFS_I(base_ni)) && !IS_RDONLY(VFS_I(base_ni))) {
2806	struct timespec64 now = current_time(inode: VFS_I(ni: base_ni));
2807	struct timespec64 ctime = inode_get_ctime(inode: VFS_I(ni: base_ni));
2808	struct timespec64 mtime = inode_get_mtime(inode: VFS_I(ni: base_ni));
2809	int sync_it = `0`;
2810
2811	if (!timespec64_equal(a: &mtime, b: &now) \|\|
2812	!timespec64_equal(a: &ctime, b: &now))
2813	sync_it = `1`;
2814	inode_set_ctime_to_ts(inode: VFS_I(ni: base_ni), ts: now);
2815	inode_set_mtime_to_ts(inode: VFS_I(ni: base_ni), ts: now);
2816
2817	if (sync_it)
2818	mark_inode_dirty_sync(inode: VFS_I(ni: base_ni));
2819	}
2820
2821	if (likely(!err)) {
2822	NInoClearTruncateFailed(ni);
2823	ntfs_debug("Done.");
2824	}
2825	return err;
2826	old_bad_out:
2827	old_size = -`1`;
2828	bad_out:
2829	if (err != -ENOMEM && err != -EOPNOTSUPP)
2830	NVolSetErrors(vol);
2831	if (err != -EOPNOTSUPP)
2832	NInoSetTruncateFailed(ni);
2833	else if (old_size >= `0`)
2834	i_size_write(inode: vi, i_size: old_size);
2835	err_out:
2836	if (ctx)
2837	ntfs_attr_put_search_ctx(ctx);
2838	if (m)
2839	unmap_mft_record(ni: base_ni);
2840	up_write(sem: &ni->runlist.lock);
2841	out:
2842	ntfs_debug("Failed. Returning error code %i.", err);
2843	return err;
2844	conv_err_out:
2845	if (err != -ENOMEM && err != -EOPNOTSUPP)
2846	NVolSetErrors(vol);
2847	if (err != -EOPNOTSUPP)
2848	NInoSetTruncateFailed(ni);
2849	else
2850	i_size_write(inode: vi, i_size: old_size);
2851	goto out;
2852	}
2853
2854	/**
2855	* ntfs_truncate_vfs - wrapper for ntfs_truncate() that has no return value
2856	* @vi: inode for which the i_size was changed
2857	*
2858	* Wrapper for ntfs_truncate() that has no return value.
2859	*
2860	* See ntfs_truncate() description above for details.
2861	*/
2862	#ifdef NTFS_RW
2863	void ntfs_truncate_vfs(struct inode *vi) {
2864	ntfs_truncate(vi);
2865	}
2866	#endif
2867
2868	/**
2869	* ntfs_setattr - called from notify_change() when an attribute is being changed
2870	* @idmap: idmap of the mount the inode was found from
2871	* @dentry: dentry whose attributes to change
2872	* @attr: structure describing the attributes and the changes
2873	*
2874	* We have to trap VFS attempts to truncate the file described by @dentry as
2875	* soon as possible, because we do not implement changes in i_size yet. So we
2876	* abort all i_size changes here.
2877	*
2878	* We also abort all changes of user, group, and mode as we do not implement
2879	* the NTFS ACLs yet.
2880	*
2881	* Called with ->i_mutex held.
2882	*/
2883	int ntfs_setattr(struct mnt_idmap idmap, struct* dentry *dentry,
2884	struct iattr *attr)
2885	{
2886	struct inode *vi = d_inode(dentry);
2887	int err;
2888	unsigned int ia_valid = attr->ia_valid;
2889
2890	err = setattr_prepare(&nop_mnt_idmap, dentry, attr);
2891	if (err)
2892	goto out;
2893	/ We do not support NTFS ACLs yet. /
2894	if (ia_valid & (ATTR_UID \| ATTR_GID \| ATTR_MODE)) {
2895	ntfs_warning(vi->i_sb, "Changes in user/group/mode are not "
2896	"supported yet, ignoring.");
2897	err = -EOPNOTSUPP;
2898	goto out;
2899	}
2900	if (ia_valid & ATTR_SIZE) {
2901	if (attr->ia_size != i_size_read(inode: vi)) {
2902	ntfs_inode *ni = NTFS_I(inode: vi);
2903	/*
2904	* FIXME: For now we do not support resizing of
2905	* compressed or encrypted files yet.
2906	*/
2907	if (NInoCompressed(ni) \|\| NInoEncrypted(ni)) {
2908	ntfs_warning(vi->i_sb, "Changes in inode size "
2909	"are not supported yet for "
2910	"%s files, ignoring.",
2911	NInoCompressed(ni) ?
2912	"compressed" : "encrypted");
2913	err = -EOPNOTSUPP;
2914	} else {
2915	truncate_setsize(inode: vi, newsize: attr->ia_size);
2916	ntfs_truncate_vfs(vi);
2917	}
2918	if (err \|\| ia_valid == ATTR_SIZE)
2919	goto out;
2920	} else {
2921	/*
2922	* We skipped the truncate but must still update
2923	* timestamps.
2924	*/
2925	ia_valid \|= ATTR_MTIME \| ATTR_CTIME;
2926	}
2927	}
2928	if (ia_valid & ATTR_ATIME)
2929	inode_set_atime_to_ts(inode: vi, ts: attr->ia_atime);
2930	if (ia_valid & ATTR_MTIME)
2931	inode_set_mtime_to_ts(inode: vi, ts: attr->ia_mtime);
2932	if (ia_valid & ATTR_CTIME)
2933	inode_set_ctime_to_ts(inode: vi, ts: attr->ia_ctime);
2934	mark_inode_dirty(inode: vi);
2935	out:
2936	return err;
2937	}
2938
2939	/**
2940	* __ntfs_write_inode - write out a dirty inode
2941	* @vi: inode to write out
2942	* @sync: if true, write out synchronously
2943	*
2944	* Write out a dirty inode to disk including any extent inodes if present.
2945	*
2946	* If @sync is true, commit the inode to disk and wait for io completion. This
2947	* is done using write_mft_record().
2948	*
2949	* If @sync is false, just schedule the write to happen but do not wait for i/o
2950	* completion. In 2.6 kernels, scheduling usually happens just by virtue of
2951	* marking the page (and in this case mft record) dirty but we do not implement
2952	* this yet as write_mft_record() largely ignores the @sync parameter and
2953	* always performs synchronous writes.
2954	*
2955	* Return 0 on success and -errno on error.
2956	*/
2957	int __ntfs_write_inode(struct inode vi, int* sync)
2958	{
2959	sle64 nt;
2960	ntfs_inode *ni = NTFS_I(inode: vi);
2961	ntfs_attr_search_ctx *ctx;
2962	MFT_RECORD *m;
2963	STANDARD_INFORMATION *si;
2964	int err = `0`;
2965	bool modified = false;
2966
2967	ntfs_debug("Entering for %sinode 0x%lx.", NInoAttr(ni) ? "attr " : "",
2968	vi->i_ino);
2969	/*
2970	* Dirty attribute inodes are written via their real inodes so just
2971	* clean them here. Access time updates are taken care off when the
2972	* real inode is written.
2973	*/
2974	if (NInoAttr(ni)) {
2975	NInoClearDirty(ni);
2976	ntfs_debug("Done.");
2977	return `0`;
2978	}
2979	/ Map, pin, and lock the mft record belonging to the inode. /
2980	m = map_mft_record(ni);
2981	if (IS_ERR(ptr: m)) {
2982	err = PTR_ERR(ptr: m);
2983	goto err_out;
2984	}
2985	/ Update the access times in the standard information attribute. /
2986	ctx = ntfs_attr_get_search_ctx(ni, mrec: m);
2987	if (unlikely(!ctx)) {
2988	err = -ENOMEM;
2989	goto unm_err_out;
2990	}
2991	err = ntfs_attr_lookup(type: AT_STANDARD_INFORMATION, NULL, name_len: `0`,
2992	ic: CASE_SENSITIVE, lowest_vcn: `0`, NULL, val_len: `0`, ctx);
2993	if (unlikely(err)) {
2994	ntfs_attr_put_search_ctx(ctx);
2995	goto unm_err_out;
2996	}
2997	si = (STANDARD_INFORMATION)((u8)ctx->attr +
2998	le16_to_cpu(ctx->attr->data.resident.value_offset));
2999	/ Update the access times if they have changed. /
3000	nt = utc2ntfs(ts: inode_get_mtime(inode: vi));
3001	if (si->last_data_change_time != nt) {
3002	ntfs_debug("Updating mtime for inode 0x%lx: old = 0x%llx, "
3003	"new = 0x%llx", vi->i_ino, (long long)
3004	sle64_to_cpu(si->last_data_change_time),
3005	(long long)sle64_to_cpu(nt));
3006	si->last_data_change_time = nt;
3007	modified = true;
3008	}
3009	nt = utc2ntfs(ts: inode_get_ctime(inode: vi));
3010	if (si->last_mft_change_time != nt) {
3011	ntfs_debug("Updating ctime for inode 0x%lx: old = 0x%llx, "
3012	"new = 0x%llx", vi->i_ino, (long long)
3013	sle64_to_cpu(si->last_mft_change_time),
3014	(long long)sle64_to_cpu(nt));
3015	si->last_mft_change_time = nt;
3016	modified = true;
3017	}
3018	nt = utc2ntfs(ts: inode_get_atime(inode: vi));
3019	if (si->last_access_time != nt) {
3020	ntfs_debug("Updating atime for inode 0x%lx: old = 0x%llx, "
3021	"new = 0x%llx", vi->i_ino,
3022	(long long)sle64_to_cpu(si->last_access_time),
3023	(long long)sle64_to_cpu(nt));
3024	si->last_access_time = nt;
3025	modified = true;
3026	}
3027	/*
3028	* If we just modified the standard information attribute we need to
3029	* mark the mft record it is in dirty. We do this manually so that
3030	* mark_inode_dirty() is not called which would redirty the inode and
3031	* hence result in an infinite loop of trying to write the inode.
3032	* There is no need to mark the base inode nor the base mft record
3033	* dirty, since we are going to write this mft record below in any case
3034	* and the base mft record may actually not have been modified so it
3035	* might not need to be written out.
3036	* NOTE: It is not a problem when the inode for $MFT itself is being
3037	* written out as mark_ntfs_record_dirty() will only set I_DIRTY_PAGES
3038	* on the $MFT inode and hence __ntfs_write_inode() will not be
3039	* re-invoked because of it which in turn is ok since the dirtied mft
3040	* record will be cleaned and written out to disk below, i.e. before
3041	* this function returns.
3042	*/
3043	if (modified) {
3044	flush_dcache_mft_record_page(ni: ctx->ntfs_ino);
3045	if (!NInoTestSetDirty(ni: ctx->ntfs_ino))
3046	mark_ntfs_record_dirty(page: ctx->ntfs_ino->page,
3047	ofs: ctx->ntfs_ino->page_ofs);
3048	}
3049	ntfs_attr_put_search_ctx(ctx);
3050	/ Now the access times are updated, write the base mft record. /
3051	if (NInoDirty(ni))
3052	err = write_mft_record(ni, m, sync);
3053	/ Write all attached extent mft records. /
3054	mutex_lock(&ni->extent_lock);
3055	if (ni->nr_extents > `0`) {
3056	ntfs_inode **extent_nis = ni->ext.extent_ntfs_inos;
3057	int i;
3058
3059	ntfs_debug("Writing %i extent inodes.", ni->nr_extents);
3060	for (i = `0`; i < ni->nr_extents; i++) {
3061	ntfs_inode *tni = extent_nis[i];
3062
3063	if (NInoDirty(ni: tni)) {
3064	MFT_RECORD *tm = map_mft_record(ni: tni);
3065	int ret;
3066
3067	if (IS_ERR(ptr: tm)) {
3068	if (!err \|\| err == -ENOMEM)
3069	err = PTR_ERR(ptr: tm);
3070	continue;
3071	}
3072	ret = write_mft_record(ni: tni, m: tm, sync);
3073	unmap_mft_record(ni: tni);
3074	if (unlikely(ret)) {
3075	if (!err \|\| err == -ENOMEM)
3076	err = ret;
3077	}
3078	}
3079	}
3080	}
3081	mutex_unlock(lock: &ni->extent_lock);
3082	unmap_mft_record(ni);
3083	if (unlikely(err))
3084	goto err_out;
3085	ntfs_debug("Done.");
3086	return `0`;
3087	unm_err_out:
3088	unmap_mft_record(ni);
3089	err_out:
3090	if (err == -ENOMEM) {
3091	ntfs_warning(vi->i_sb, "Not enough memory to write inode. "
3092	"Marking the inode dirty again, so the VFS "
3093	"retries later.");
3094	mark_inode_dirty(inode: vi);
3095	} else {
3096	ntfs_error(vi->i_sb, "Failed (error %i): Run chkdsk.", -err);
3097	NVolSetErrors(vol: ni->vol);
3098	}
3099	return err;
3100	}
3101
3102	#endif /* NTFS_RW */
3103

source code of linux/fs/ntfs/inode.c