1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * mmap.c |
4 | * |
5 | * Code to deal with the mess that is clustered mmap. |
6 | * |
7 | * Copyright (C) 2002, 2004 Oracle. All rights reserved. |
8 | */ |
9 | |
10 | #include <linux/fs.h> |
11 | #include <linux/types.h> |
12 | #include <linux/highmem.h> |
13 | #include <linux/pagemap.h> |
14 | #include <linux/uio.h> |
15 | #include <linux/signal.h> |
16 | #include <linux/rbtree.h> |
17 | |
18 | #include <cluster/masklog.h> |
19 | |
20 | #include "ocfs2.h" |
21 | |
22 | #include "aops.h" |
23 | #include "dlmglue.h" |
24 | #include "file.h" |
25 | #include "inode.h" |
26 | #include "mmap.h" |
27 | #include "super.h" |
28 | #include "ocfs2_trace.h" |
29 | |
30 | |
31 | static vm_fault_t ocfs2_fault(struct vm_fault *vmf) |
32 | { |
33 | struct vm_area_struct *vma = vmf->vma; |
34 | sigset_t oldset; |
35 | vm_fault_t ret; |
36 | |
37 | ocfs2_block_signals(oldset: &oldset); |
38 | ret = filemap_fault(vmf); |
39 | ocfs2_unblock_signals(oldset: &oldset); |
40 | |
41 | trace_ocfs2_fault(ino: OCFS2_I(inode: vma->vm_file->f_mapping->host)->ip_blkno, |
42 | area: vma, page: vmf->page, pgoff: vmf->pgoff); |
43 | return ret; |
44 | } |
45 | |
46 | static vm_fault_t __ocfs2_page_mkwrite(struct file *file, |
47 | struct buffer_head *di_bh, struct page *page) |
48 | { |
49 | int err; |
50 | vm_fault_t ret = VM_FAULT_NOPAGE; |
51 | struct inode *inode = file_inode(f: file); |
52 | struct address_space *mapping = inode->i_mapping; |
53 | loff_t pos = page_offset(page); |
54 | unsigned int len = PAGE_SIZE; |
55 | pgoff_t last_index; |
56 | struct page *locked_page = NULL; |
57 | void *fsdata; |
58 | loff_t size = i_size_read(inode); |
59 | |
60 | last_index = (size - 1) >> PAGE_SHIFT; |
61 | |
62 | /* |
63 | * There are cases that lead to the page no longer belonging to the |
64 | * mapping. |
65 | * 1) pagecache truncates locally due to memory pressure. |
66 | * 2) pagecache truncates when another is taking EX lock against |
67 | * inode lock. see ocfs2_data_convert_worker. |
68 | * |
69 | * The i_size check doesn't catch the case where nodes truncated and |
70 | * then re-extended the file. We'll re-check the page mapping after |
71 | * taking the page lock inside of ocfs2_write_begin_nolock(). |
72 | * |
73 | * Let VM retry with these cases. |
74 | */ |
75 | if ((page->mapping != inode->i_mapping) || |
76 | (!PageUptodate(page)) || |
77 | (page_offset(page) >= size)) |
78 | goto out; |
79 | |
80 | /* |
81 | * Call ocfs2_write_begin() and ocfs2_write_end() to take |
82 | * advantage of the allocation code there. We pass a write |
83 | * length of the whole page (chopped to i_size) to make sure |
84 | * the whole thing is allocated. |
85 | * |
86 | * Since we know the page is up to date, we don't have to |
87 | * worry about ocfs2_write_begin() skipping some buffer reads |
88 | * because the "write" would invalidate their data. |
89 | */ |
90 | if (page->index == last_index) |
91 | len = ((size - 1) & ~PAGE_MASK) + 1; |
92 | |
93 | err = ocfs2_write_begin_nolock(mapping, pos, len, type: OCFS2_WRITE_MMAP, |
94 | pagep: &locked_page, fsdata: &fsdata, di_bh, mmap_page: page); |
95 | if (err) { |
96 | if (err != -ENOSPC) |
97 | mlog_errno(err); |
98 | ret = vmf_error(err); |
99 | goto out; |
100 | } |
101 | |
102 | if (!locked_page) { |
103 | ret = VM_FAULT_NOPAGE; |
104 | goto out; |
105 | } |
106 | err = ocfs2_write_end_nolock(mapping, pos, len, copied: len, fsdata); |
107 | BUG_ON(err != len); |
108 | ret = VM_FAULT_LOCKED; |
109 | out: |
110 | return ret; |
111 | } |
112 | |
113 | static vm_fault_t ocfs2_page_mkwrite(struct vm_fault *vmf) |
114 | { |
115 | struct page *page = vmf->page; |
116 | struct inode *inode = file_inode(f: vmf->vma->vm_file); |
117 | struct buffer_head *di_bh = NULL; |
118 | sigset_t oldset; |
119 | int err; |
120 | vm_fault_t ret; |
121 | |
122 | sb_start_pagefault(sb: inode->i_sb); |
123 | ocfs2_block_signals(oldset: &oldset); |
124 | |
125 | /* |
126 | * The cluster locks taken will block a truncate from another |
127 | * node. Taking the data lock will also ensure that we don't |
128 | * attempt page truncation as part of a downconvert. |
129 | */ |
130 | err = ocfs2_inode_lock(inode, &di_bh, 1); |
131 | if (err < 0) { |
132 | mlog_errno(err); |
133 | ret = vmf_error(err); |
134 | goto out; |
135 | } |
136 | |
137 | /* |
138 | * The alloc sem should be enough to serialize with |
139 | * ocfs2_truncate_file() changing i_size as well as any thread |
140 | * modifying the inode btree. |
141 | */ |
142 | down_write(sem: &OCFS2_I(inode)->ip_alloc_sem); |
143 | |
144 | ret = __ocfs2_page_mkwrite(file: vmf->vma->vm_file, di_bh, page); |
145 | |
146 | up_write(sem: &OCFS2_I(inode)->ip_alloc_sem); |
147 | |
148 | brelse(bh: di_bh); |
149 | ocfs2_inode_unlock(inode, ex: 1); |
150 | |
151 | out: |
152 | ocfs2_unblock_signals(oldset: &oldset); |
153 | sb_end_pagefault(sb: inode->i_sb); |
154 | return ret; |
155 | } |
156 | |
157 | static const struct vm_operations_struct ocfs2_file_vm_ops = { |
158 | .fault = ocfs2_fault, |
159 | .page_mkwrite = ocfs2_page_mkwrite, |
160 | }; |
161 | |
162 | int ocfs2_mmap(struct file *file, struct vm_area_struct *vma) |
163 | { |
164 | int ret = 0, lock_level = 0; |
165 | |
166 | ret = ocfs2_inode_lock_atime(inode: file_inode(f: file), |
167 | vfsmnt: file->f_path.mnt, level: &lock_level, wait: 1); |
168 | if (ret < 0) { |
169 | mlog_errno(ret); |
170 | goto out; |
171 | } |
172 | ocfs2_inode_unlock(inode: file_inode(f: file), ex: lock_level); |
173 | out: |
174 | vma->vm_ops = &ocfs2_file_vm_ops; |
175 | return 0; |
176 | } |
177 | |
178 | |