1 | // SPDX-License-Identifier: GPL-2.0 |
2 | /* |
3 | * Copyright (c) 2014-2016 Christoph Hellwig. |
4 | */ |
5 | #include <linux/exportfs.h> |
6 | #include <linux/iomap.h> |
7 | #include <linux/slab.h> |
8 | #include <linux/pr.h> |
9 | |
10 | #include <linux/nfsd/debug.h> |
11 | |
12 | #include "blocklayoutxdr.h" |
13 | #include "pnfs.h" |
14 | #include "filecache.h" |
15 | #include "vfs.h" |
16 | |
17 | #define NFSDDBG_FACILITY NFSDDBG_PNFS |
18 | |
19 | |
20 | static __be32 |
21 | nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp, |
22 | struct nfsd4_layoutget *args) |
23 | { |
24 | struct nfsd4_layout_seg *seg = &args->lg_seg; |
25 | struct super_block *sb = inode->i_sb; |
26 | u32 block_size = i_blocksize(node: inode); |
27 | struct pnfs_block_extent *bex; |
28 | struct iomap iomap; |
29 | u32 device_generation = 0; |
30 | int error; |
31 | |
32 | if (seg->offset & (block_size - 1)) { |
33 | dprintk("pnfsd: I/O misaligned\n" ); |
34 | goto out_layoutunavailable; |
35 | } |
36 | |
37 | /* |
38 | * Some clients barf on non-zero block numbers for NONE or INVALID |
39 | * layouts, so make sure to zero the whole structure. |
40 | */ |
41 | error = -ENOMEM; |
42 | bex = kzalloc(size: sizeof(*bex), GFP_KERNEL); |
43 | if (!bex) |
44 | goto out_error; |
45 | args->lg_content = bex; |
46 | |
47 | error = sb->s_export_op->map_blocks(inode, seg->offset, seg->length, |
48 | &iomap, seg->iomode != IOMODE_READ, |
49 | &device_generation); |
50 | if (error) { |
51 | if (error == -ENXIO) |
52 | goto out_layoutunavailable; |
53 | goto out_error; |
54 | } |
55 | |
56 | if (iomap.length < args->lg_minlength) { |
57 | dprintk("pnfsd: extent smaller than minlength\n" ); |
58 | goto out_layoutunavailable; |
59 | } |
60 | |
61 | switch (iomap.type) { |
62 | case IOMAP_MAPPED: |
63 | if (seg->iomode == IOMODE_READ) |
64 | bex->es = PNFS_BLOCK_READ_DATA; |
65 | else |
66 | bex->es = PNFS_BLOCK_READWRITE_DATA; |
67 | bex->soff = iomap.addr; |
68 | break; |
69 | case IOMAP_UNWRITTEN: |
70 | if (seg->iomode & IOMODE_RW) { |
71 | /* |
72 | * Crack monkey special case from section 2.3.1. |
73 | */ |
74 | if (args->lg_minlength == 0) { |
75 | dprintk("pnfsd: no soup for you!\n" ); |
76 | goto out_layoutunavailable; |
77 | } |
78 | |
79 | bex->es = PNFS_BLOCK_INVALID_DATA; |
80 | bex->soff = iomap.addr; |
81 | break; |
82 | } |
83 | fallthrough; |
84 | case IOMAP_HOLE: |
85 | if (seg->iomode == IOMODE_READ) { |
86 | bex->es = PNFS_BLOCK_NONE_DATA; |
87 | break; |
88 | } |
89 | fallthrough; |
90 | case IOMAP_DELALLOC: |
91 | default: |
92 | WARN(1, "pnfsd: filesystem returned %d extent\n" , iomap.type); |
93 | goto out_layoutunavailable; |
94 | } |
95 | |
96 | error = nfsd4_set_deviceid(id: &bex->vol_id, fhp, device_generation); |
97 | if (error) |
98 | goto out_error; |
99 | bex->foff = iomap.offset; |
100 | bex->len = iomap.length; |
101 | |
102 | seg->offset = iomap.offset; |
103 | seg->length = iomap.length; |
104 | |
105 | dprintk("GET: 0x%llx:0x%llx %d\n" , bex->foff, bex->len, bex->es); |
106 | return 0; |
107 | |
108 | out_error: |
109 | seg->length = 0; |
110 | return nfserrno(errno: error); |
111 | out_layoutunavailable: |
112 | seg->length = 0; |
113 | return nfserr_layoutunavailable; |
114 | } |
115 | |
116 | static __be32 |
117 | nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp, |
118 | struct iomap *iomaps, int nr_iomaps) |
119 | { |
120 | struct timespec64 mtime = inode_get_mtime(inode); |
121 | loff_t new_size = lcp->lc_last_wr + 1; |
122 | struct iattr iattr = { .ia_valid = 0 }; |
123 | int error; |
124 | |
125 | if (lcp->lc_mtime.tv_nsec == UTIME_NOW || |
126 | timespec64_compare(lhs: &lcp->lc_mtime, rhs: &mtime) < 0) |
127 | lcp->lc_mtime = current_time(inode); |
128 | iattr.ia_valid |= ATTR_ATIME | ATTR_CTIME | ATTR_MTIME; |
129 | iattr.ia_atime = iattr.ia_ctime = iattr.ia_mtime = lcp->lc_mtime; |
130 | |
131 | if (new_size > i_size_read(inode)) { |
132 | iattr.ia_valid |= ATTR_SIZE; |
133 | iattr.ia_size = new_size; |
134 | } |
135 | |
136 | error = inode->i_sb->s_export_op->commit_blocks(inode, iomaps, |
137 | nr_iomaps, &iattr); |
138 | kfree(objp: iomaps); |
139 | return nfserrno(errno: error); |
140 | } |
141 | |
142 | #ifdef CONFIG_NFSD_BLOCKLAYOUT |
143 | static int |
144 | nfsd4_block_get_device_info_simple(struct super_block *sb, |
145 | struct nfsd4_getdeviceinfo *gdp) |
146 | { |
147 | struct pnfs_block_deviceaddr *dev; |
148 | struct pnfs_block_volume *b; |
149 | |
150 | dev = kzalloc(size: sizeof(struct pnfs_block_deviceaddr) + |
151 | sizeof(struct pnfs_block_volume), GFP_KERNEL); |
152 | if (!dev) |
153 | return -ENOMEM; |
154 | gdp->gd_device = dev; |
155 | |
156 | dev->nr_volumes = 1; |
157 | b = &dev->volumes[0]; |
158 | |
159 | b->type = PNFS_BLOCK_VOLUME_SIMPLE; |
160 | b->simple.sig_len = PNFS_BLOCK_UUID_LEN; |
161 | return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len, |
162 | &b->simple.offset); |
163 | } |
164 | |
165 | static __be32 |
166 | nfsd4_block_proc_getdeviceinfo(struct super_block *sb, |
167 | struct svc_rqst *rqstp, |
168 | struct nfs4_client *clp, |
169 | struct nfsd4_getdeviceinfo *gdp) |
170 | { |
171 | if (bdev_is_partition(bdev: sb->s_bdev)) |
172 | return nfserr_inval; |
173 | return nfserrno(errno: nfsd4_block_get_device_info_simple(sb, gdp)); |
174 | } |
175 | |
176 | static __be32 |
177 | nfsd4_block_proc_layoutcommit(struct inode *inode, |
178 | struct nfsd4_layoutcommit *lcp) |
179 | { |
180 | struct iomap *iomaps; |
181 | int nr_iomaps; |
182 | |
183 | nr_iomaps = nfsd4_block_decode_layoutupdate(p: lcp->lc_up_layout, |
184 | len: lcp->lc_up_len, iomapp: &iomaps, block_size: i_blocksize(node: inode)); |
185 | if (nr_iomaps < 0) |
186 | return nfserrno(errno: nr_iomaps); |
187 | |
188 | return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps); |
189 | } |
190 | |
191 | const struct nfsd4_layout_ops bl_layout_ops = { |
192 | /* |
193 | * Pretend that we send notification to the client. This is a blatant |
194 | * lie to force recent Linux clients to cache our device IDs. |
195 | * We rarely ever change the device ID, so the harm of leaking deviceids |
196 | * for a while isn't too bad. Unfortunately RFC5661 is a complete mess |
197 | * in this regard, but I filed errata 4119 for this a while ago, and |
198 | * hopefully the Linux client will eventually start caching deviceids |
199 | * without this again. |
200 | */ |
201 | .notify_types = |
202 | NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, |
203 | .proc_getdeviceinfo = nfsd4_block_proc_getdeviceinfo, |
204 | .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo, |
205 | .proc_layoutget = nfsd4_block_proc_layoutget, |
206 | .encode_layoutget = nfsd4_block_encode_layoutget, |
207 | .proc_layoutcommit = nfsd4_block_proc_layoutcommit, |
208 | }; |
209 | #endif /* CONFIG_NFSD_BLOCKLAYOUT */ |
210 | |
211 | #ifdef CONFIG_NFSD_SCSILAYOUT |
212 | #define NFSD_MDS_PR_KEY 0x0100000000000000ULL |
213 | |
214 | /* |
215 | * We use the client ID as a unique key for the reservations. |
216 | * This allows us to easily fence a client when recalls fail. |
217 | */ |
218 | static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp) |
219 | { |
220 | return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id; |
221 | } |
222 | |
223 | static const u8 designator_types[] = { |
224 | PS_DESIGNATOR_EUI64, |
225 | PS_DESIGNATOR_NAA, |
226 | }; |
227 | |
228 | static int |
229 | nfsd4_block_get_unique_id(struct gendisk *disk, struct pnfs_block_volume *b) |
230 | { |
231 | int ret, i; |
232 | |
233 | for (i = 0; i < ARRAY_SIZE(designator_types); i++) { |
234 | u8 type = designator_types[i]; |
235 | |
236 | ret = disk->fops->get_unique_id(disk, b->scsi.designator, type); |
237 | if (ret > 0) { |
238 | b->scsi.code_set = PS_CODE_SET_BINARY; |
239 | b->scsi.designator_type = type; |
240 | b->scsi.designator_len = ret; |
241 | return 0; |
242 | } |
243 | } |
244 | |
245 | return -EINVAL; |
246 | } |
247 | |
248 | static int |
249 | nfsd4_block_get_device_info_scsi(struct super_block *sb, |
250 | struct nfs4_client *clp, |
251 | struct nfsd4_getdeviceinfo *gdp) |
252 | { |
253 | struct pnfs_block_deviceaddr *dev; |
254 | struct pnfs_block_volume *b; |
255 | const struct pr_ops *ops; |
256 | int ret; |
257 | |
258 | dev = kzalloc(size: sizeof(struct pnfs_block_deviceaddr) + |
259 | sizeof(struct pnfs_block_volume), GFP_KERNEL); |
260 | if (!dev) |
261 | return -ENOMEM; |
262 | gdp->gd_device = dev; |
263 | |
264 | dev->nr_volumes = 1; |
265 | b = &dev->volumes[0]; |
266 | |
267 | b->type = PNFS_BLOCK_VOLUME_SCSI; |
268 | b->scsi.pr_key = nfsd4_scsi_pr_key(clp); |
269 | |
270 | ret = nfsd4_block_get_unique_id(disk: sb->s_bdev->bd_disk, b); |
271 | if (ret < 0) |
272 | goto out_free_dev; |
273 | |
274 | ret = -EINVAL; |
275 | ops = sb->s_bdev->bd_disk->fops->pr_ops; |
276 | if (!ops) { |
277 | pr_err("pNFS: device %s does not support PRs.\n" , |
278 | sb->s_id); |
279 | goto out_free_dev; |
280 | } |
281 | |
282 | ret = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true); |
283 | if (ret) { |
284 | pr_err("pNFS: failed to register key for device %s.\n" , |
285 | sb->s_id); |
286 | goto out_free_dev; |
287 | } |
288 | |
289 | ret = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY, |
290 | PR_EXCLUSIVE_ACCESS_REG_ONLY, 0); |
291 | if (ret) { |
292 | pr_err("pNFS: failed to reserve device %s.\n" , |
293 | sb->s_id); |
294 | goto out_free_dev; |
295 | } |
296 | |
297 | return 0; |
298 | |
299 | out_free_dev: |
300 | kfree(objp: dev); |
301 | gdp->gd_device = NULL; |
302 | return ret; |
303 | } |
304 | |
305 | static __be32 |
306 | nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb, |
307 | struct svc_rqst *rqstp, |
308 | struct nfs4_client *clp, |
309 | struct nfsd4_getdeviceinfo *gdp) |
310 | { |
311 | if (bdev_is_partition(bdev: sb->s_bdev)) |
312 | return nfserr_inval; |
313 | return nfserrno(errno: nfsd4_block_get_device_info_scsi(sb, clp, gdp)); |
314 | } |
315 | static __be32 |
316 | nfsd4_scsi_proc_layoutcommit(struct inode *inode, |
317 | struct nfsd4_layoutcommit *lcp) |
318 | { |
319 | struct iomap *iomaps; |
320 | int nr_iomaps; |
321 | |
322 | nr_iomaps = nfsd4_scsi_decode_layoutupdate(p: lcp->lc_up_layout, |
323 | len: lcp->lc_up_len, iomapp: &iomaps, block_size: i_blocksize(node: inode)); |
324 | if (nr_iomaps < 0) |
325 | return nfserrno(errno: nr_iomaps); |
326 | |
327 | return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps); |
328 | } |
329 | |
330 | static void |
331 | nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls) |
332 | { |
333 | struct nfs4_client *clp = ls->ls_stid.sc_client; |
334 | struct block_device *bdev = ls->ls_file->nf_file->f_path.mnt->mnt_sb->s_bdev; |
335 | |
336 | bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY, |
337 | nfsd4_scsi_pr_key(clp), 0, true); |
338 | } |
339 | |
340 | const struct nfsd4_layout_ops scsi_layout_ops = { |
341 | /* |
342 | * Pretend that we send notification to the client. This is a blatant |
343 | * lie to force recent Linux clients to cache our device IDs. |
344 | * We rarely ever change the device ID, so the harm of leaking deviceids |
345 | * for a while isn't too bad. Unfortunately RFC5661 is a complete mess |
346 | * in this regard, but I filed errata 4119 for this a while ago, and |
347 | * hopefully the Linux client will eventually start caching deviceids |
348 | * without this again. |
349 | */ |
350 | .notify_types = |
351 | NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE, |
352 | .proc_getdeviceinfo = nfsd4_scsi_proc_getdeviceinfo, |
353 | .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo, |
354 | .proc_layoutget = nfsd4_block_proc_layoutget, |
355 | .encode_layoutget = nfsd4_block_encode_layoutget, |
356 | .proc_layoutcommit = nfsd4_scsi_proc_layoutcommit, |
357 | .fence_client = nfsd4_scsi_fence_client, |
358 | }; |
359 | #endif /* CONFIG_NFSD_SCSILAYOUT */ |
360 | |