1 | /* |
2 | * Device operations for the pnfs nfs4 file layout driver. |
3 | * |
4 | * Copyright (c) 2002 |
5 | * The Regents of the University of Michigan |
6 | * All Rights Reserved |
7 | * |
8 | * Dean Hildebrand <dhildebz@umich.edu> |
9 | * Garth Goodson <Garth.Goodson@netapp.com> |
10 | * |
11 | * Permission is granted to use, copy, create derivative works, and |
12 | * redistribute this software and such derivative works for any purpose, |
13 | * so long as the name of the University of Michigan is not used in |
14 | * any advertising or publicity pertaining to the use or distribution |
15 | * of this software without specific, written prior authorization. If |
16 | * the above copyright notice or any other identification of the |
17 | * University of Michigan is included in any copy of any portion of |
18 | * this software, then the disclaimer below must also be included. |
19 | * |
20 | * This software is provided as is, without representation or warranty |
21 | * of any kind either express or implied, including without limitation |
22 | * the implied warranties of merchantability, fitness for a particular |
23 | * purpose, or noninfringement. The Regents of the University of |
24 | * Michigan shall not be liable for any damages, including special, |
25 | * indirect, incidental, or consequential damages, with respect to any |
26 | * claim arising out of or in connection with the use of the software, |
27 | * even if it has been or is hereafter advised of the possibility of |
28 | * such damages. |
29 | */ |
30 | |
31 | #include <linux/nfs_fs.h> |
32 | #include <linux/vmalloc.h> |
33 | #include <linux/module.h> |
34 | |
35 | #include "../internal.h" |
36 | #include "../nfs4session.h" |
37 | #include "filelayout.h" |
38 | #include "../nfs4trace.h" |
39 | |
40 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD |
41 | |
42 | static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; |
43 | static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; |
44 | |
45 | void |
46 | nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) |
47 | { |
48 | struct nfs4_pnfs_ds *ds; |
49 | int i; |
50 | |
51 | nfs4_print_deviceid(dev_id: &dsaddr->id_node.deviceid); |
52 | |
53 | for (i = 0; i < dsaddr->ds_num; i++) { |
54 | ds = dsaddr->ds_list[i]; |
55 | if (ds != NULL) |
56 | nfs4_pnfs_ds_put(ds); |
57 | } |
58 | kfree(objp: dsaddr->stripe_indices); |
59 | kfree_rcu(dsaddr, id_node.rcu); |
60 | } |
61 | |
62 | /* Decode opaque device data and return the result */ |
63 | struct nfs4_file_layout_dsaddr * |
64 | nfs4_fl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, |
65 | gfp_t gfp_flags) |
66 | { |
67 | int i; |
68 | u32 cnt, num; |
69 | u8 *indexp; |
70 | __be32 *p; |
71 | u8 *stripe_indices; |
72 | u8 max_stripe_index; |
73 | struct nfs4_file_layout_dsaddr *dsaddr = NULL; |
74 | struct xdr_stream stream; |
75 | struct xdr_buf buf; |
76 | struct page *scratch; |
77 | struct list_head dsaddrs; |
78 | struct nfs4_pnfs_ds_addr *da; |
79 | |
80 | /* set up xdr stream */ |
81 | scratch = alloc_page(gfp_flags); |
82 | if (!scratch) |
83 | goto out_err; |
84 | |
85 | xdr_init_decode_pages(xdr: &stream, buf: &buf, pages: pdev->pages, len: pdev->pglen); |
86 | xdr_set_scratch_page(xdr: &stream, page: scratch); |
87 | |
88 | /* Get the stripe count (number of stripe index) */ |
89 | p = xdr_inline_decode(xdr: &stream, nbytes: 4); |
90 | if (unlikely(!p)) |
91 | goto out_err_free_scratch; |
92 | |
93 | cnt = be32_to_cpup(p); |
94 | dprintk("%s stripe count %d\n" , __func__, cnt); |
95 | if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { |
96 | printk(KERN_WARNING "NFS: %s: stripe count %d greater than " |
97 | "supported maximum %d\n" , __func__, |
98 | cnt, NFS4_PNFS_MAX_STRIPE_CNT); |
99 | goto out_err_free_scratch; |
100 | } |
101 | |
102 | /* read stripe indices */ |
103 | stripe_indices = kcalloc(n: cnt, size: sizeof(u8), flags: gfp_flags); |
104 | if (!stripe_indices) |
105 | goto out_err_free_scratch; |
106 | |
107 | p = xdr_inline_decode(xdr: &stream, nbytes: cnt << 2); |
108 | if (unlikely(!p)) |
109 | goto out_err_free_stripe_indices; |
110 | |
111 | indexp = &stripe_indices[0]; |
112 | max_stripe_index = 0; |
113 | for (i = 0; i < cnt; i++) { |
114 | *indexp = be32_to_cpup(p: p++); |
115 | max_stripe_index = max(max_stripe_index, *indexp); |
116 | indexp++; |
117 | } |
118 | |
119 | /* Check the multipath list count */ |
120 | p = xdr_inline_decode(xdr: &stream, nbytes: 4); |
121 | if (unlikely(!p)) |
122 | goto out_err_free_stripe_indices; |
123 | |
124 | num = be32_to_cpup(p); |
125 | dprintk("%s ds_num %u\n" , __func__, num); |
126 | if (num > NFS4_PNFS_MAX_MULTI_CNT) { |
127 | printk(KERN_WARNING "NFS: %s: multipath count %d greater than " |
128 | "supported maximum %d\n" , __func__, |
129 | num, NFS4_PNFS_MAX_MULTI_CNT); |
130 | goto out_err_free_stripe_indices; |
131 | } |
132 | |
133 | /* validate stripe indices are all < num */ |
134 | if (max_stripe_index >= num) { |
135 | printk(KERN_WARNING "NFS: %s: stripe index %u >= num ds %u\n" , |
136 | __func__, max_stripe_index, num); |
137 | goto out_err_free_stripe_indices; |
138 | } |
139 | |
140 | dsaddr = kzalloc(struct_size(dsaddr, ds_list, num), flags: gfp_flags); |
141 | if (!dsaddr) |
142 | goto out_err_free_stripe_indices; |
143 | |
144 | dsaddr->stripe_count = cnt; |
145 | dsaddr->stripe_indices = stripe_indices; |
146 | stripe_indices = NULL; |
147 | dsaddr->ds_num = num; |
148 | nfs4_init_deviceid_node(&dsaddr->id_node, server, &pdev->dev_id); |
149 | |
150 | INIT_LIST_HEAD(list: &dsaddrs); |
151 | |
152 | for (i = 0; i < dsaddr->ds_num; i++) { |
153 | int j; |
154 | u32 mp_count; |
155 | |
156 | p = xdr_inline_decode(xdr: &stream, nbytes: 4); |
157 | if (unlikely(!p)) |
158 | goto out_err_free_deviceid; |
159 | |
160 | mp_count = be32_to_cpup(p); /* multipath count */ |
161 | for (j = 0; j < mp_count; j++) { |
162 | da = nfs4_decode_mp_ds_addr(net: server->nfs_client->cl_net, |
163 | xdr: &stream, gfp_flags); |
164 | if (da) |
165 | list_add_tail(new: &da->da_node, head: &dsaddrs); |
166 | } |
167 | if (list_empty(head: &dsaddrs)) { |
168 | dprintk("%s: no suitable DS addresses found\n" , |
169 | __func__); |
170 | goto out_err_free_deviceid; |
171 | } |
172 | |
173 | dsaddr->ds_list[i] = nfs4_pnfs_ds_add(dsaddrs: &dsaddrs, gfp_flags); |
174 | if (!dsaddr->ds_list[i]) |
175 | goto out_err_drain_dsaddrs; |
176 | trace_fl_getdevinfo(server, deviceid: &pdev->dev_id, ds_remotestr: dsaddr->ds_list[i]->ds_remotestr); |
177 | |
178 | /* If DS was already in cache, free ds addrs */ |
179 | while (!list_empty(head: &dsaddrs)) { |
180 | da = list_first_entry(&dsaddrs, |
181 | struct nfs4_pnfs_ds_addr, |
182 | da_node); |
183 | list_del_init(entry: &da->da_node); |
184 | kfree(objp: da->da_remotestr); |
185 | kfree(objp: da); |
186 | } |
187 | } |
188 | |
189 | __free_page(scratch); |
190 | return dsaddr; |
191 | |
192 | out_err_drain_dsaddrs: |
193 | while (!list_empty(head: &dsaddrs)) { |
194 | da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, |
195 | da_node); |
196 | list_del_init(entry: &da->da_node); |
197 | kfree(objp: da->da_remotestr); |
198 | kfree(objp: da); |
199 | } |
200 | out_err_free_deviceid: |
201 | nfs4_fl_free_deviceid(dsaddr); |
202 | /* stripe_indicies was part of dsaddr */ |
203 | goto out_err_free_scratch; |
204 | out_err_free_stripe_indices: |
205 | kfree(objp: stripe_indices); |
206 | out_err_free_scratch: |
207 | __free_page(scratch); |
208 | out_err: |
209 | dprintk("%s ERROR: returning NULL\n" , __func__); |
210 | return NULL; |
211 | } |
212 | |
213 | void |
214 | nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr) |
215 | { |
216 | nfs4_put_deviceid_node(&dsaddr->id_node); |
217 | } |
218 | |
219 | /* |
220 | * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit |
221 | * Then: ((res + fsi) % dsaddr->stripe_count) |
222 | */ |
223 | u32 |
224 | nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset) |
225 | { |
226 | struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); |
227 | u64 tmp; |
228 | |
229 | tmp = offset - flseg->pattern_offset; |
230 | do_div(tmp, flseg->stripe_unit); |
231 | tmp += flseg->first_stripe_index; |
232 | return do_div(tmp, flseg->dsaddr->stripe_count); |
233 | } |
234 | |
235 | u32 |
236 | nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j) |
237 | { |
238 | return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j]; |
239 | } |
240 | |
241 | struct nfs_fh * |
242 | nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j) |
243 | { |
244 | struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg); |
245 | u32 i; |
246 | |
247 | if (flseg->stripe_type == STRIPE_SPARSE) { |
248 | if (flseg->num_fh == 1) |
249 | i = 0; |
250 | else if (flseg->num_fh == 0) |
251 | /* Use the MDS OPEN fh set in nfs_read_rpcsetup */ |
252 | return NULL; |
253 | else |
254 | i = nfs4_fl_calc_ds_index(lseg, j); |
255 | } else |
256 | i = j; |
257 | return flseg->fh_array[i]; |
258 | } |
259 | |
260 | /* Upon return, either ds is connected, or ds is NULL */ |
261 | struct nfs4_pnfs_ds * |
262 | nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx) |
263 | { |
264 | struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr; |
265 | struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; |
266 | struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); |
267 | struct nfs4_pnfs_ds *ret = ds; |
268 | struct nfs_server *s = NFS_SERVER(inode: lseg->pls_layout->plh_inode); |
269 | int status; |
270 | |
271 | if (ds == NULL) { |
272 | printk(KERN_ERR "NFS: %s: No data server for offset index %d\n" , |
273 | __func__, ds_idx); |
274 | pnfs_generic_mark_devid_invalid(node: devid); |
275 | goto out; |
276 | } |
277 | smp_rmb(); |
278 | if (ds->ds_clp) |
279 | goto out_test_devid; |
280 | |
281 | status = nfs4_pnfs_ds_connect(mds_srv: s, ds, devid, timeo: dataserver_timeo, |
282 | retrans: dataserver_retrans, version: 4, |
283 | minor_version: s->nfs_client->cl_minorversion); |
284 | if (status) { |
285 | nfs4_mark_deviceid_unavailable(node: devid); |
286 | ret = NULL; |
287 | goto out; |
288 | } |
289 | |
290 | out_test_devid: |
291 | if (ret->ds_clp == NULL || |
292 | filelayout_test_devid_unavailable(node: devid)) |
293 | ret = NULL; |
294 | out: |
295 | return ret; |
296 | } |
297 | |
298 | module_param(dataserver_retrans, uint, 0644); |
299 | MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " |
300 | "retries a request before it attempts further " |
301 | " recovery action." ); |
302 | module_param(dataserver_timeo, uint, 0644); |
303 | MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " |
304 | "NFSv4.1 client waits for a response from a " |
305 | " data server before it retries an NFS request." ); |
306 | |