1 | /* Copyright (c) 2013 Coraid, Inc. See COPYING for GPL terms. */ |
2 | /* |
3 | * aoecmd.c |
4 | * Filesystem request handling methods |
5 | */ |
6 | |
7 | #include <linux/ata.h> |
8 | #include <linux/slab.h> |
9 | #include <linux/hdreg.h> |
10 | #include <linux/blk-mq.h> |
11 | #include <linux/skbuff.h> |
12 | #include <linux/netdevice.h> |
13 | #include <linux/moduleparam.h> |
14 | #include <linux/workqueue.h> |
15 | #include <linux/kthread.h> |
16 | #include <net/net_namespace.h> |
17 | #include <asm/unaligned.h> |
18 | #include <linux/uio.h> |
19 | #include "aoe.h" |
20 | |
21 | #define MAXIOC (8192) /* default meant to avoid most soft lockups */ |
22 | |
23 | static void ktcomplete(struct frame *, struct sk_buff *); |
24 | static int count_targets(struct aoedev *d, int *untainted); |
25 | |
26 | static struct buf *nextbuf(struct aoedev *); |
27 | |
28 | static int aoe_deadsecs = 60 * 3; |
29 | module_param(aoe_deadsecs, int, 0644); |
30 | MODULE_PARM_DESC(aoe_deadsecs, "After aoe_deadsecs seconds, give up and fail dev." ); |
31 | |
32 | static int aoe_maxout = 64; |
33 | module_param(aoe_maxout, int, 0644); |
34 | MODULE_PARM_DESC(aoe_maxout, |
35 | "Only aoe_maxout outstanding packets for every MAC on eX.Y." ); |
36 | |
37 | /* The number of online cpus during module initialization gives us a |
38 | * convenient heuristic cap on the parallelism used for ktio threads |
39 | * doing I/O completion. It is not important that the cap equal the |
40 | * actual number of running CPUs at any given time, but because of CPU |
41 | * hotplug, we take care to use ncpus instead of using |
42 | * num_online_cpus() after module initialization. |
43 | */ |
44 | static int ncpus; |
45 | |
46 | /* mutex lock used for synchronization while thread spawning */ |
47 | static DEFINE_MUTEX(ktio_spawn_lock); |
48 | |
49 | static wait_queue_head_t *ktiowq; |
50 | static struct ktstate *kts; |
51 | |
52 | /* io completion queue */ |
53 | struct iocq_ktio { |
54 | struct list_head head; |
55 | spinlock_t lock; |
56 | }; |
57 | static struct iocq_ktio *iocq; |
58 | |
59 | static struct page *empty_page; |
60 | |
61 | static struct sk_buff * |
62 | new_skb(ulong len) |
63 | { |
64 | struct sk_buff *skb; |
65 | |
66 | skb = alloc_skb(size: len + MAX_HEADER, GFP_ATOMIC); |
67 | if (skb) { |
68 | skb_reserve(skb, MAX_HEADER); |
69 | skb_reset_mac_header(skb); |
70 | skb_reset_network_header(skb); |
71 | skb->protocol = __constant_htons(ETH_P_AOE); |
72 | skb_checksum_none_assert(skb); |
73 | } |
74 | return skb; |
75 | } |
76 | |
77 | static struct frame * |
78 | getframe_deferred(struct aoedev *d, u32 tag) |
79 | { |
80 | struct list_head *head, *pos, *nx; |
81 | struct frame *f; |
82 | |
83 | head = &d->rexmitq; |
84 | list_for_each_safe(pos, nx, head) { |
85 | f = list_entry(pos, struct frame, head); |
86 | if (f->tag == tag) { |
87 | list_del(entry: pos); |
88 | return f; |
89 | } |
90 | } |
91 | return NULL; |
92 | } |
93 | |
94 | static struct frame * |
95 | getframe(struct aoedev *d, u32 tag) |
96 | { |
97 | struct frame *f; |
98 | struct list_head *head, *pos, *nx; |
99 | u32 n; |
100 | |
101 | n = tag % NFACTIVE; |
102 | head = &d->factive[n]; |
103 | list_for_each_safe(pos, nx, head) { |
104 | f = list_entry(pos, struct frame, head); |
105 | if (f->tag == tag) { |
106 | list_del(entry: pos); |
107 | return f; |
108 | } |
109 | } |
110 | return NULL; |
111 | } |
112 | |
113 | /* |
114 | * Leave the top bit clear so we have tagspace for userland. |
115 | * The bottom 16 bits are the xmit tick for rexmit/rttavg processing. |
116 | * This driver reserves tag -1 to mean "unused frame." |
117 | */ |
118 | static int |
119 | newtag(struct aoedev *d) |
120 | { |
121 | register ulong n; |
122 | |
123 | n = jiffies & 0xffff; |
124 | return n | (++d->lasttag & 0x7fff) << 16; |
125 | } |
126 | |
127 | static u32 |
128 | aoehdr_atainit(struct aoedev *d, struct aoetgt *t, struct aoe_hdr *h) |
129 | { |
130 | u32 host_tag = newtag(d); |
131 | |
132 | memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src); |
133 | memcpy(h->dst, t->addr, sizeof h->dst); |
134 | h->type = __constant_cpu_to_be16(ETH_P_AOE); |
135 | h->verfl = AOE_HVER; |
136 | h->major = cpu_to_be16(d->aoemajor); |
137 | h->minor = d->aoeminor; |
138 | h->cmd = AOECMD_ATA; |
139 | h->tag = cpu_to_be32(host_tag); |
140 | |
141 | return host_tag; |
142 | } |
143 | |
144 | static inline void |
145 | put_lba(struct aoe_atahdr *ah, sector_t lba) |
146 | { |
147 | ah->lba0 = lba; |
148 | ah->lba1 = lba >>= 8; |
149 | ah->lba2 = lba >>= 8; |
150 | ah->lba3 = lba >>= 8; |
151 | ah->lba4 = lba >>= 8; |
152 | ah->lba5 = lba >>= 8; |
153 | } |
154 | |
155 | static struct aoeif * |
156 | ifrotate(struct aoetgt *t) |
157 | { |
158 | struct aoeif *ifp; |
159 | |
160 | ifp = t->ifp; |
161 | ifp++; |
162 | if (ifp >= &t->ifs[NAOEIFS] || ifp->nd == NULL) |
163 | ifp = t->ifs; |
164 | if (ifp->nd == NULL) |
165 | return NULL; |
166 | return t->ifp = ifp; |
167 | } |
168 | |
169 | static void |
170 | skb_pool_put(struct aoedev *d, struct sk_buff *skb) |
171 | { |
172 | __skb_queue_tail(list: &d->skbpool, newsk: skb); |
173 | } |
174 | |
175 | static struct sk_buff * |
176 | skb_pool_get(struct aoedev *d) |
177 | { |
178 | struct sk_buff *skb = skb_peek(list_: &d->skbpool); |
179 | |
180 | if (skb && atomic_read(v: &skb_shinfo(skb)->dataref) == 1) { |
181 | __skb_unlink(skb, list: &d->skbpool); |
182 | return skb; |
183 | } |
184 | if (skb_queue_len(list_: &d->skbpool) < NSKBPOOLMAX && |
185 | (skb = new_skb(ETH_ZLEN))) |
186 | return skb; |
187 | |
188 | return NULL; |
189 | } |
190 | |
191 | void |
192 | aoe_freetframe(struct frame *f) |
193 | { |
194 | struct aoetgt *t; |
195 | |
196 | t = f->t; |
197 | f->buf = NULL; |
198 | memset(&f->iter, 0, sizeof(f->iter)); |
199 | f->r_skb = NULL; |
200 | f->flags = 0; |
201 | list_add(new: &f->head, head: &t->ffree); |
202 | } |
203 | |
204 | static struct frame * |
205 | newtframe(struct aoedev *d, struct aoetgt *t) |
206 | { |
207 | struct frame *f; |
208 | struct sk_buff *skb; |
209 | struct list_head *pos; |
210 | |
211 | if (list_empty(head: &t->ffree)) { |
212 | if (t->falloc >= NSKBPOOLMAX*2) |
213 | return NULL; |
214 | f = kcalloc(n: 1, size: sizeof(*f), GFP_ATOMIC); |
215 | if (f == NULL) |
216 | return NULL; |
217 | t->falloc++; |
218 | f->t = t; |
219 | } else { |
220 | pos = t->ffree.next; |
221 | list_del(entry: pos); |
222 | f = list_entry(pos, struct frame, head); |
223 | } |
224 | |
225 | skb = f->skb; |
226 | if (skb == NULL) { |
227 | f->skb = skb = new_skb(ETH_ZLEN); |
228 | if (!skb) { |
229 | bail: aoe_freetframe(f); |
230 | return NULL; |
231 | } |
232 | } |
233 | |
234 | if (atomic_read(v: &skb_shinfo(skb)->dataref) != 1) { |
235 | skb = skb_pool_get(d); |
236 | if (skb == NULL) |
237 | goto bail; |
238 | skb_pool_put(d, skb: f->skb); |
239 | f->skb = skb; |
240 | } |
241 | |
242 | skb->truesize -= skb->data_len; |
243 | skb_shinfo(skb)->nr_frags = skb->data_len = 0; |
244 | skb_trim(skb, len: 0); |
245 | return f; |
246 | } |
247 | |
248 | static struct frame * |
249 | newframe(struct aoedev *d) |
250 | { |
251 | struct frame *f; |
252 | struct aoetgt *t, **tt; |
253 | int totout = 0; |
254 | int use_tainted; |
255 | int has_untainted; |
256 | |
257 | if (!d->targets || !d->targets[0]) { |
258 | printk(KERN_ERR "aoe: NULL TARGETS!\n" ); |
259 | return NULL; |
260 | } |
261 | tt = d->tgt; /* last used target */ |
262 | for (use_tainted = 0, has_untainted = 0;;) { |
263 | tt++; |
264 | if (tt >= &d->targets[d->ntargets] || !*tt) |
265 | tt = d->targets; |
266 | t = *tt; |
267 | if (!t->taint) { |
268 | has_untainted = 1; |
269 | totout += t->nout; |
270 | } |
271 | if (t->nout < t->maxout |
272 | && (use_tainted || !t->taint) |
273 | && t->ifp->nd) { |
274 | f = newtframe(d, t); |
275 | if (f) { |
276 | ifrotate(t); |
277 | d->tgt = tt; |
278 | return f; |
279 | } |
280 | } |
281 | if (tt == d->tgt) { /* we've looped and found nada */ |
282 | if (!use_tainted && !has_untainted) |
283 | use_tainted = 1; |
284 | else |
285 | break; |
286 | } |
287 | } |
288 | if (totout == 0) { |
289 | d->kicked++; |
290 | d->flags |= DEVFL_KICKME; |
291 | } |
292 | return NULL; |
293 | } |
294 | |
295 | static void |
296 | skb_fillup(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter) |
297 | { |
298 | int frag = 0; |
299 | struct bio_vec bv; |
300 | |
301 | __bio_for_each_segment(bv, bio, iter, iter) |
302 | skb_fill_page_desc(skb, i: frag++, page: bv.bv_page, |
303 | off: bv.bv_offset, size: bv.bv_len); |
304 | } |
305 | |
306 | static void |
307 | fhash(struct frame *f) |
308 | { |
309 | struct aoedev *d = f->t->d; |
310 | u32 n; |
311 | |
312 | n = f->tag % NFACTIVE; |
313 | list_add_tail(new: &f->head, head: &d->factive[n]); |
314 | } |
315 | |
316 | static void |
317 | ata_rw_frameinit(struct frame *f) |
318 | { |
319 | struct aoetgt *t; |
320 | struct aoe_hdr *h; |
321 | struct aoe_atahdr *ah; |
322 | struct sk_buff *skb; |
323 | char writebit, extbit; |
324 | |
325 | skb = f->skb; |
326 | h = (struct aoe_hdr *) skb_mac_header(skb); |
327 | ah = (struct aoe_atahdr *) (h + 1); |
328 | skb_put(skb, len: sizeof(*h) + sizeof(*ah)); |
329 | memset(h, 0, skb->len); |
330 | |
331 | writebit = 0x10; |
332 | extbit = 0x4; |
333 | |
334 | t = f->t; |
335 | f->tag = aoehdr_atainit(d: t->d, t, h); |
336 | fhash(f); |
337 | t->nout++; |
338 | f->waited = 0; |
339 | f->waited_total = 0; |
340 | |
341 | /* set up ata header */ |
342 | ah->scnt = f->iter.bi_size >> 9; |
343 | put_lba(ah, lba: f->iter.bi_sector); |
344 | if (t->d->flags & DEVFL_EXT) { |
345 | ah->aflags |= AOEAFL_EXT; |
346 | } else { |
347 | extbit = 0; |
348 | ah->lba3 &= 0x0f; |
349 | ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */ |
350 | } |
351 | if (f->buf && bio_data_dir(f->buf->bio) == WRITE) { |
352 | skb_fillup(skb, bio: f->buf->bio, iter: f->iter); |
353 | ah->aflags |= AOEAFL_WRITE; |
354 | skb->len += f->iter.bi_size; |
355 | skb->data_len = f->iter.bi_size; |
356 | skb->truesize += f->iter.bi_size; |
357 | t->wpkts++; |
358 | } else { |
359 | t->rpkts++; |
360 | writebit = 0; |
361 | } |
362 | |
363 | ah->cmdstat = ATA_CMD_PIO_READ | writebit | extbit; |
364 | skb->dev = t->ifp->nd; |
365 | } |
366 | |
367 | static int |
368 | aoecmd_ata_rw(struct aoedev *d) |
369 | { |
370 | struct frame *f; |
371 | struct buf *buf; |
372 | struct sk_buff *skb; |
373 | struct sk_buff_head queue; |
374 | |
375 | buf = nextbuf(d); |
376 | if (buf == NULL) |
377 | return 0; |
378 | f = newframe(d); |
379 | if (f == NULL) |
380 | return 0; |
381 | |
382 | /* initialize the headers & frame */ |
383 | f->buf = buf; |
384 | f->iter = buf->iter; |
385 | f->iter.bi_size = min_t(unsigned long, |
386 | d->maxbcnt ?: DEFAULTBCNT, |
387 | f->iter.bi_size); |
388 | bio_advance_iter(bio: buf->bio, iter: &buf->iter, bytes: f->iter.bi_size); |
389 | |
390 | if (!buf->iter.bi_size) |
391 | d->ip.buf = NULL; |
392 | |
393 | /* mark all tracking fields and load out */ |
394 | buf->nframesout += 1; |
395 | |
396 | ata_rw_frameinit(f); |
397 | |
398 | skb = skb_clone(skb: f->skb, GFP_ATOMIC); |
399 | if (skb) { |
400 | f->sent = ktime_get(); |
401 | __skb_queue_head_init(list: &queue); |
402 | __skb_queue_tail(list: &queue, newsk: skb); |
403 | aoenet_xmit(&queue); |
404 | } |
405 | return 1; |
406 | } |
407 | |
408 | /* some callers cannot sleep, and they can call this function, |
409 | * transmitting the packets later, when interrupts are on |
410 | */ |
411 | static void |
412 | aoecmd_cfg_pkts(ushort aoemajor, unsigned char aoeminor, struct sk_buff_head *queue) |
413 | { |
414 | struct aoe_hdr *h; |
415 | struct aoe_cfghdr *ch; |
416 | struct sk_buff *skb; |
417 | struct net_device *ifp; |
418 | |
419 | rcu_read_lock(); |
420 | for_each_netdev_rcu(&init_net, ifp) { |
421 | dev_hold(dev: ifp); |
422 | if (!is_aoe_netif(ifp)) { |
423 | dev_put(dev: ifp); |
424 | continue; |
425 | } |
426 | |
427 | skb = new_skb(len: sizeof *h + sizeof *ch); |
428 | if (skb == NULL) { |
429 | printk(KERN_INFO "aoe: skb alloc failure\n" ); |
430 | dev_put(dev: ifp); |
431 | continue; |
432 | } |
433 | skb_put(skb, len: sizeof *h + sizeof *ch); |
434 | skb->dev = ifp; |
435 | __skb_queue_tail(list: queue, newsk: skb); |
436 | h = (struct aoe_hdr *) skb_mac_header(skb); |
437 | memset(h, 0, sizeof *h + sizeof *ch); |
438 | |
439 | memset(h->dst, 0xff, sizeof h->dst); |
440 | memcpy(h->src, ifp->dev_addr, sizeof h->src); |
441 | h->type = __constant_cpu_to_be16(ETH_P_AOE); |
442 | h->verfl = AOE_HVER; |
443 | h->major = cpu_to_be16(aoemajor); |
444 | h->minor = aoeminor; |
445 | h->cmd = AOECMD_CFG; |
446 | } |
447 | rcu_read_unlock(); |
448 | } |
449 | |
450 | static void |
451 | resend(struct aoedev *d, struct frame *f) |
452 | { |
453 | struct sk_buff *skb; |
454 | struct sk_buff_head queue; |
455 | struct aoe_hdr *h; |
456 | struct aoetgt *t; |
457 | char buf[128]; |
458 | u32 n; |
459 | |
460 | t = f->t; |
461 | n = newtag(d); |
462 | skb = f->skb; |
463 | if (ifrotate(t) == NULL) { |
464 | /* probably can't happen, but set it up to fail anyway */ |
465 | pr_info("aoe: resend: no interfaces to rotate to.\n" ); |
466 | ktcomplete(f, NULL); |
467 | return; |
468 | } |
469 | h = (struct aoe_hdr *) skb_mac_header(skb); |
470 | |
471 | if (!(f->flags & FFL_PROBE)) { |
472 | snprintf(buf, size: sizeof(buf), |
473 | fmt: "%15s e%ld.%d oldtag=%08x@%08lx newtag=%08x s=%pm d=%pm nout=%d\n" , |
474 | "retransmit" , d->aoemajor, d->aoeminor, |
475 | f->tag, jiffies, n, |
476 | h->src, h->dst, t->nout); |
477 | aoechr_error(buf); |
478 | } |
479 | |
480 | f->tag = n; |
481 | fhash(f); |
482 | h->tag = cpu_to_be32(n); |
483 | memcpy(h->dst, t->addr, sizeof h->dst); |
484 | memcpy(h->src, t->ifp->nd->dev_addr, sizeof h->src); |
485 | |
486 | skb->dev = t->ifp->nd; |
487 | skb = skb_clone(skb, GFP_ATOMIC); |
488 | if (skb == NULL) |
489 | return; |
490 | f->sent = ktime_get(); |
491 | __skb_queue_head_init(list: &queue); |
492 | __skb_queue_tail(list: &queue, newsk: skb); |
493 | aoenet_xmit(&queue); |
494 | } |
495 | |
496 | static int |
497 | tsince_hr(struct frame *f) |
498 | { |
499 | u64 delta = ktime_to_ns(ktime_sub(ktime_get(), f->sent)); |
500 | |
501 | /* delta is normally under 4.2 seconds, avoid 64-bit division */ |
502 | if (likely(delta <= UINT_MAX)) |
503 | return (u32)delta / NSEC_PER_USEC; |
504 | |
505 | /* avoid overflow after 71 minutes */ |
506 | if (delta > ((u64)INT_MAX * NSEC_PER_USEC)) |
507 | return INT_MAX; |
508 | |
509 | return div_u64(dividend: delta, NSEC_PER_USEC); |
510 | } |
511 | |
512 | static int |
513 | tsince(u32 tag) |
514 | { |
515 | int n; |
516 | |
517 | n = jiffies & 0xffff; |
518 | n -= tag & 0xffff; |
519 | if (n < 0) |
520 | n += 1<<16; |
521 | return jiffies_to_usecs(j: n + 1); |
522 | } |
523 | |
524 | static struct aoeif * |
525 | getif(struct aoetgt *t, struct net_device *nd) |
526 | { |
527 | struct aoeif *p, *e; |
528 | |
529 | p = t->ifs; |
530 | e = p + NAOEIFS; |
531 | for (; p < e; p++) |
532 | if (p->nd == nd) |
533 | return p; |
534 | return NULL; |
535 | } |
536 | |
537 | static void |
538 | ejectif(struct aoetgt *t, struct aoeif *ifp) |
539 | { |
540 | struct aoeif *e; |
541 | struct net_device *nd; |
542 | ulong n; |
543 | |
544 | nd = ifp->nd; |
545 | e = t->ifs + NAOEIFS - 1; |
546 | n = (e - ifp) * sizeof *ifp; |
547 | memmove(ifp, ifp+1, n); |
548 | e->nd = NULL; |
549 | dev_put(dev: nd); |
550 | } |
551 | |
552 | static struct frame * |
553 | reassign_frame(struct frame *f) |
554 | { |
555 | struct frame *nf; |
556 | struct sk_buff *skb; |
557 | |
558 | nf = newframe(d: f->t->d); |
559 | if (!nf) |
560 | return NULL; |
561 | if (nf->t == f->t) { |
562 | aoe_freetframe(f: nf); |
563 | return NULL; |
564 | } |
565 | |
566 | skb = nf->skb; |
567 | nf->skb = f->skb; |
568 | nf->buf = f->buf; |
569 | nf->iter = f->iter; |
570 | nf->waited = 0; |
571 | nf->waited_total = f->waited_total; |
572 | nf->sent = f->sent; |
573 | f->skb = skb; |
574 | |
575 | return nf; |
576 | } |
577 | |
578 | static void |
579 | probe(struct aoetgt *t) |
580 | { |
581 | struct aoedev *d; |
582 | struct frame *f; |
583 | struct sk_buff *skb; |
584 | struct sk_buff_head queue; |
585 | size_t n, m; |
586 | int frag; |
587 | |
588 | d = t->d; |
589 | f = newtframe(d, t); |
590 | if (!f) { |
591 | pr_err("%s %pm for e%ld.%d: %s\n" , |
592 | "aoe: cannot probe remote address" , |
593 | t->addr, |
594 | (long) d->aoemajor, d->aoeminor, |
595 | "no frame available" ); |
596 | return; |
597 | } |
598 | f->flags |= FFL_PROBE; |
599 | ifrotate(t); |
600 | f->iter.bi_size = t->d->maxbcnt ? t->d->maxbcnt : DEFAULTBCNT; |
601 | ata_rw_frameinit(f); |
602 | skb = f->skb; |
603 | for (frag = 0, n = f->iter.bi_size; n > 0; ++frag, n -= m) { |
604 | if (n < PAGE_SIZE) |
605 | m = n; |
606 | else |
607 | m = PAGE_SIZE; |
608 | skb_fill_page_desc(skb, i: frag, page: empty_page, off: 0, size: m); |
609 | } |
610 | skb->len += f->iter.bi_size; |
611 | skb->data_len = f->iter.bi_size; |
612 | skb->truesize += f->iter.bi_size; |
613 | |
614 | skb = skb_clone(skb: f->skb, GFP_ATOMIC); |
615 | if (skb) { |
616 | f->sent = ktime_get(); |
617 | __skb_queue_head_init(list: &queue); |
618 | __skb_queue_tail(list: &queue, newsk: skb); |
619 | aoenet_xmit(&queue); |
620 | } |
621 | } |
622 | |
623 | static long |
624 | rto(struct aoedev *d) |
625 | { |
626 | long t; |
627 | |
628 | t = 2 * d->rttavg >> RTTSCALE; |
629 | t += 8 * d->rttdev >> RTTDSCALE; |
630 | if (t == 0) |
631 | t = 1; |
632 | |
633 | return t; |
634 | } |
635 | |
636 | static void |
637 | rexmit_deferred(struct aoedev *d) |
638 | { |
639 | struct aoetgt *t; |
640 | struct frame *f; |
641 | struct frame *nf; |
642 | struct list_head *pos, *nx, *head; |
643 | int since; |
644 | int untainted; |
645 | |
646 | count_targets(d, untainted: &untainted); |
647 | |
648 | head = &d->rexmitq; |
649 | list_for_each_safe(pos, nx, head) { |
650 | f = list_entry(pos, struct frame, head); |
651 | t = f->t; |
652 | if (t->taint) { |
653 | if (!(f->flags & FFL_PROBE)) { |
654 | nf = reassign_frame(f); |
655 | if (nf) { |
656 | if (t->nout_probes == 0 |
657 | && untainted > 0) { |
658 | probe(t); |
659 | t->nout_probes++; |
660 | } |
661 | list_replace(old: &f->head, new: &nf->head); |
662 | pos = &nf->head; |
663 | aoe_freetframe(f); |
664 | f = nf; |
665 | t = f->t; |
666 | } |
667 | } else if (untainted < 1) { |
668 | /* don't probe w/o other untainted aoetgts */ |
669 | goto stop_probe; |
670 | } else if (tsince_hr(f) < t->taint * rto(d)) { |
671 | /* reprobe slowly when taint is high */ |
672 | continue; |
673 | } |
674 | } else if (f->flags & FFL_PROBE) { |
675 | stop_probe: /* don't probe untainted aoetgts */ |
676 | list_del(entry: pos); |
677 | aoe_freetframe(f); |
678 | /* leaving d->kicked, because this is routine */ |
679 | f->t->d->flags |= DEVFL_KICKME; |
680 | continue; |
681 | } |
682 | if (t->nout >= t->maxout) |
683 | continue; |
684 | list_del(entry: pos); |
685 | t->nout++; |
686 | if (f->flags & FFL_PROBE) |
687 | t->nout_probes++; |
688 | since = tsince_hr(f); |
689 | f->waited += since; |
690 | f->waited_total += since; |
691 | resend(d, f); |
692 | } |
693 | } |
694 | |
695 | /* An aoetgt accumulates demerits quickly, and successful |
696 | * probing redeems the aoetgt slowly. |
697 | */ |
698 | static void |
699 | scorn(struct aoetgt *t) |
700 | { |
701 | int n; |
702 | |
703 | n = t->taint++; |
704 | t->taint += t->taint * 2; |
705 | if (n > t->taint) |
706 | t->taint = n; |
707 | if (t->taint > MAX_TAINT) |
708 | t->taint = MAX_TAINT; |
709 | } |
710 | |
711 | static int |
712 | count_targets(struct aoedev *d, int *untainted) |
713 | { |
714 | int i, good; |
715 | |
716 | for (i = good = 0; i < d->ntargets && d->targets[i]; ++i) |
717 | if (d->targets[i]->taint == 0) |
718 | good++; |
719 | |
720 | if (untainted) |
721 | *untainted = good; |
722 | return i; |
723 | } |
724 | |
725 | static void |
726 | rexmit_timer(struct timer_list *timer) |
727 | { |
728 | struct aoedev *d; |
729 | struct aoetgt *t; |
730 | struct aoeif *ifp; |
731 | struct frame *f; |
732 | struct list_head *head, *pos, *nx; |
733 | LIST_HEAD(flist); |
734 | register long timeout; |
735 | ulong flags, n; |
736 | int i; |
737 | int utgts; /* number of aoetgt descriptors (not slots) */ |
738 | int since; |
739 | |
740 | d = from_timer(d, timer, timer); |
741 | |
742 | spin_lock_irqsave(&d->lock, flags); |
743 | |
744 | /* timeout based on observed timings and variations */ |
745 | timeout = rto(d); |
746 | |
747 | utgts = count_targets(d, NULL); |
748 | |
749 | if (d->flags & DEVFL_TKILL) { |
750 | spin_unlock_irqrestore(lock: &d->lock, flags); |
751 | return; |
752 | } |
753 | |
754 | /* collect all frames to rexmit into flist */ |
755 | for (i = 0; i < NFACTIVE; i++) { |
756 | head = &d->factive[i]; |
757 | list_for_each_safe(pos, nx, head) { |
758 | f = list_entry(pos, struct frame, head); |
759 | if (tsince_hr(f) < timeout) |
760 | break; /* end of expired frames */ |
761 | /* move to flist for later processing */ |
762 | list_move_tail(list: pos, head: &flist); |
763 | } |
764 | } |
765 | |
766 | /* process expired frames */ |
767 | while (!list_empty(head: &flist)) { |
768 | pos = flist.next; |
769 | f = list_entry(pos, struct frame, head); |
770 | since = tsince_hr(f); |
771 | n = f->waited_total + since; |
772 | n /= USEC_PER_SEC; |
773 | if (aoe_deadsecs |
774 | && n > aoe_deadsecs |
775 | && !(f->flags & FFL_PROBE)) { |
776 | /* Waited too long. Device failure. |
777 | * Hang all frames on first hash bucket for downdev |
778 | * to clean up. |
779 | */ |
780 | list_splice(list: &flist, head: &d->factive[0]); |
781 | aoedev_downdev(d); |
782 | goto out; |
783 | } |
784 | |
785 | t = f->t; |
786 | n = f->waited + since; |
787 | n /= USEC_PER_SEC; |
788 | if (aoe_deadsecs && utgts > 0 |
789 | && (n > aoe_deadsecs / utgts || n > HARD_SCORN_SECS)) |
790 | scorn(t); /* avoid this target */ |
791 | |
792 | if (t->maxout != 1) { |
793 | t->ssthresh = t->maxout / 2; |
794 | t->maxout = 1; |
795 | } |
796 | |
797 | if (f->flags & FFL_PROBE) { |
798 | t->nout_probes--; |
799 | } else { |
800 | ifp = getif(t, nd: f->skb->dev); |
801 | if (ifp && ++ifp->lost > (t->nframes << 1) |
802 | && (ifp != t->ifs || t->ifs[1].nd)) { |
803 | ejectif(t, ifp); |
804 | ifp = NULL; |
805 | } |
806 | } |
807 | list_move_tail(list: pos, head: &d->rexmitq); |
808 | t->nout--; |
809 | } |
810 | rexmit_deferred(d); |
811 | |
812 | out: |
813 | if ((d->flags & DEVFL_KICKME) && d->blkq) { |
814 | d->flags &= ~DEVFL_KICKME; |
815 | blk_mq_run_hw_queues(q: d->blkq, async: true); |
816 | } |
817 | |
818 | d->timer.expires = jiffies + TIMERTICK; |
819 | add_timer(timer: &d->timer); |
820 | |
821 | spin_unlock_irqrestore(lock: &d->lock, flags); |
822 | } |
823 | |
824 | static void |
825 | bufinit(struct buf *buf, struct request *rq, struct bio *bio) |
826 | { |
827 | memset(buf, 0, sizeof(*buf)); |
828 | buf->rq = rq; |
829 | buf->bio = bio; |
830 | buf->iter = bio->bi_iter; |
831 | } |
832 | |
833 | static struct buf * |
834 | nextbuf(struct aoedev *d) |
835 | { |
836 | struct request *rq; |
837 | struct request_queue *q; |
838 | struct aoe_req *req; |
839 | struct buf *buf; |
840 | struct bio *bio; |
841 | |
842 | q = d->blkq; |
843 | if (q == NULL) |
844 | return NULL; /* initializing */ |
845 | if (d->ip.buf) |
846 | return d->ip.buf; |
847 | rq = d->ip.rq; |
848 | if (rq == NULL) { |
849 | rq = list_first_entry_or_null(&d->rq_list, struct request, |
850 | queuelist); |
851 | if (rq == NULL) |
852 | return NULL; |
853 | list_del_init(entry: &rq->queuelist); |
854 | blk_mq_start_request(rq); |
855 | d->ip.rq = rq; |
856 | d->ip.nxbio = rq->bio; |
857 | |
858 | req = blk_mq_rq_to_pdu(rq); |
859 | req->nr_bios = 0; |
860 | __rq_for_each_bio(bio, rq) |
861 | req->nr_bios++; |
862 | } |
863 | buf = mempool_alloc(pool: d->bufpool, GFP_ATOMIC); |
864 | if (buf == NULL) { |
865 | pr_err("aoe: nextbuf: unable to mempool_alloc!\n" ); |
866 | return NULL; |
867 | } |
868 | bio = d->ip.nxbio; |
869 | bufinit(buf, rq, bio); |
870 | bio = bio->bi_next; |
871 | d->ip.nxbio = bio; |
872 | if (bio == NULL) |
873 | d->ip.rq = NULL; |
874 | return d->ip.buf = buf; |
875 | } |
876 | |
877 | /* enters with d->lock held */ |
878 | void |
879 | aoecmd_work(struct aoedev *d) |
880 | { |
881 | rexmit_deferred(d); |
882 | while (aoecmd_ata_rw(d)) |
883 | ; |
884 | } |
885 | |
886 | /* this function performs work that has been deferred until sleeping is OK |
887 | */ |
888 | void |
889 | aoecmd_sleepwork(struct work_struct *work) |
890 | { |
891 | struct aoedev *d = container_of(work, struct aoedev, work); |
892 | |
893 | if (d->flags & DEVFL_GDALLOC) |
894 | aoeblk_gdalloc(d); |
895 | |
896 | if (d->flags & DEVFL_NEWSIZE) { |
897 | set_capacity_and_notify(disk: d->gd, size: d->ssize); |
898 | |
899 | spin_lock_irq(lock: &d->lock); |
900 | d->flags |= DEVFL_UP; |
901 | d->flags &= ~DEVFL_NEWSIZE; |
902 | spin_unlock_irq(lock: &d->lock); |
903 | } |
904 | } |
905 | |
906 | static void |
907 | ata_ident_fixstring(u16 *id, int ns) |
908 | { |
909 | u16 s; |
910 | |
911 | while (ns-- > 0) { |
912 | s = *id; |
913 | *id++ = s >> 8 | s << 8; |
914 | } |
915 | } |
916 | |
917 | static void |
918 | ataid_complete(struct aoedev *d, struct aoetgt *t, unsigned char *id) |
919 | { |
920 | u64 ssize; |
921 | u16 n; |
922 | |
923 | /* word 83: command set supported */ |
924 | n = get_unaligned_le16(p: &id[83 << 1]); |
925 | |
926 | /* word 86: command set/feature enabled */ |
927 | n |= get_unaligned_le16(p: &id[86 << 1]); |
928 | |
929 | if (n & (1<<10)) { /* bit 10: LBA 48 */ |
930 | d->flags |= DEVFL_EXT; |
931 | |
932 | /* word 100: number lba48 sectors */ |
933 | ssize = get_unaligned_le64(p: &id[100 << 1]); |
934 | |
935 | /* set as in ide-disk.c:init_idedisk_capacity */ |
936 | d->geo.cylinders = ssize; |
937 | d->geo.cylinders /= (255 * 63); |
938 | d->geo.heads = 255; |
939 | d->geo.sectors = 63; |
940 | } else { |
941 | d->flags &= ~DEVFL_EXT; |
942 | |
943 | /* number lba28 sectors */ |
944 | ssize = get_unaligned_le32(p: &id[60 << 1]); |
945 | |
946 | /* NOTE: obsolete in ATA 6 */ |
947 | d->geo.cylinders = get_unaligned_le16(p: &id[54 << 1]); |
948 | d->geo.heads = get_unaligned_le16(p: &id[55 << 1]); |
949 | d->geo.sectors = get_unaligned_le16(p: &id[56 << 1]); |
950 | } |
951 | |
952 | ata_ident_fixstring(id: (u16 *) &id[10<<1], ns: 10); /* serial */ |
953 | ata_ident_fixstring(id: (u16 *) &id[23<<1], ns: 4); /* firmware */ |
954 | ata_ident_fixstring(id: (u16 *) &id[27<<1], ns: 20); /* model */ |
955 | memcpy(d->ident, id, sizeof(d->ident)); |
956 | |
957 | if (d->ssize != ssize) |
958 | printk(KERN_INFO |
959 | "aoe: %pm e%ld.%d v%04x has %llu sectors\n" , |
960 | t->addr, |
961 | d->aoemajor, d->aoeminor, |
962 | d->fw_ver, (long long)ssize); |
963 | d->ssize = ssize; |
964 | d->geo.start = 0; |
965 | if (d->flags & (DEVFL_GDALLOC|DEVFL_NEWSIZE)) |
966 | return; |
967 | if (d->gd != NULL) |
968 | d->flags |= DEVFL_NEWSIZE; |
969 | else |
970 | d->flags |= DEVFL_GDALLOC; |
971 | queue_work(wq: aoe_wq, work: &d->work); |
972 | } |
973 | |
974 | static void |
975 | calc_rttavg(struct aoedev *d, struct aoetgt *t, int rtt) |
976 | { |
977 | register long n; |
978 | |
979 | n = rtt; |
980 | |
981 | /* cf. Congestion Avoidance and Control, Jacobson & Karels, 1988 */ |
982 | n -= d->rttavg >> RTTSCALE; |
983 | d->rttavg += n; |
984 | if (n < 0) |
985 | n = -n; |
986 | n -= d->rttdev >> RTTDSCALE; |
987 | d->rttdev += n; |
988 | |
989 | if (!t || t->maxout >= t->nframes) |
990 | return; |
991 | if (t->maxout < t->ssthresh) |
992 | t->maxout += 1; |
993 | else if (t->nout == t->maxout && t->next_cwnd-- == 0) { |
994 | t->maxout += 1; |
995 | t->next_cwnd = t->maxout; |
996 | } |
997 | } |
998 | |
999 | static struct aoetgt * |
1000 | gettgt(struct aoedev *d, char *addr) |
1001 | { |
1002 | struct aoetgt **t, **e; |
1003 | |
1004 | t = d->targets; |
1005 | e = t + d->ntargets; |
1006 | for (; t < e && *t; t++) |
1007 | if (memcmp(p: (*t)->addr, q: addr, size: sizeof((*t)->addr)) == 0) |
1008 | return *t; |
1009 | return NULL; |
1010 | } |
1011 | |
1012 | static void |
1013 | bvcpy(struct sk_buff *skb, struct bio *bio, struct bvec_iter iter, long cnt) |
1014 | { |
1015 | int soff = 0; |
1016 | struct bio_vec bv; |
1017 | |
1018 | iter.bi_size = cnt; |
1019 | |
1020 | __bio_for_each_segment(bv, bio, iter, iter) { |
1021 | char *p = bvec_kmap_local(bvec: &bv); |
1022 | skb_copy_bits(skb, offset: soff, to: p, len: bv.bv_len); |
1023 | kunmap_local(p); |
1024 | soff += bv.bv_len; |
1025 | } |
1026 | } |
1027 | |
1028 | void |
1029 | aoe_end_request(struct aoedev *d, struct request *rq, int fastfail) |
1030 | { |
1031 | struct bio *bio; |
1032 | int bok; |
1033 | struct request_queue *q; |
1034 | blk_status_t err = BLK_STS_OK; |
1035 | |
1036 | q = d->blkq; |
1037 | if (rq == d->ip.rq) |
1038 | d->ip.rq = NULL; |
1039 | do { |
1040 | bio = rq->bio; |
1041 | bok = !fastfail && !bio->bi_status; |
1042 | if (!bok) |
1043 | err = BLK_STS_IOERR; |
1044 | } while (blk_update_request(rq, error: bok ? BLK_STS_OK : BLK_STS_IOERR, nr_bytes: bio->bi_iter.bi_size)); |
1045 | |
1046 | __blk_mq_end_request(rq, error: err); |
1047 | |
1048 | /* cf. https://lore.kernel.org/lkml/20061031071040.GS14055@kernel.dk/ */ |
1049 | if (!fastfail) |
1050 | blk_mq_run_hw_queues(q, async: true); |
1051 | } |
1052 | |
1053 | static void |
1054 | aoe_end_buf(struct aoedev *d, struct buf *buf) |
1055 | { |
1056 | struct request *rq = buf->rq; |
1057 | struct aoe_req *req = blk_mq_rq_to_pdu(rq); |
1058 | |
1059 | if (buf == d->ip.buf) |
1060 | d->ip.buf = NULL; |
1061 | mempool_free(element: buf, pool: d->bufpool); |
1062 | if (--req->nr_bios == 0) |
1063 | aoe_end_request(d, rq, fastfail: 0); |
1064 | } |
1065 | |
1066 | static void |
1067 | ktiocomplete(struct frame *f) |
1068 | { |
1069 | struct aoe_hdr *hin, *hout; |
1070 | struct aoe_atahdr *ahin, *ahout; |
1071 | struct buf *buf; |
1072 | struct sk_buff *skb; |
1073 | struct aoetgt *t; |
1074 | struct aoeif *ifp; |
1075 | struct aoedev *d; |
1076 | long n; |
1077 | int untainted; |
1078 | |
1079 | if (f == NULL) |
1080 | return; |
1081 | |
1082 | t = f->t; |
1083 | d = t->d; |
1084 | skb = f->r_skb; |
1085 | buf = f->buf; |
1086 | if (f->flags & FFL_PROBE) |
1087 | goto out; |
1088 | if (!skb) /* just fail the buf. */ |
1089 | goto noskb; |
1090 | |
1091 | hout = (struct aoe_hdr *) skb_mac_header(skb: f->skb); |
1092 | ahout = (struct aoe_atahdr *) (hout+1); |
1093 | |
1094 | hin = (struct aoe_hdr *) skb->data; |
1095 | skb_pull(skb, len: sizeof(*hin)); |
1096 | ahin = (struct aoe_atahdr *) skb->data; |
1097 | skb_pull(skb, len: sizeof(*ahin)); |
1098 | if (ahin->cmdstat & 0xa9) { /* these bits cleared on success */ |
1099 | pr_err("aoe: ata error cmd=%2.2Xh stat=%2.2Xh from e%ld.%d\n" , |
1100 | ahout->cmdstat, ahin->cmdstat, |
1101 | d->aoemajor, d->aoeminor); |
1102 | noskb: if (buf) |
1103 | buf->bio->bi_status = BLK_STS_IOERR; |
1104 | goto out; |
1105 | } |
1106 | |
1107 | n = ahout->scnt << 9; |
1108 | switch (ahout->cmdstat) { |
1109 | case ATA_CMD_PIO_READ: |
1110 | case ATA_CMD_PIO_READ_EXT: |
1111 | if (skb->len < n) { |
1112 | pr_err("%s e%ld.%d. skb->len=%d need=%ld\n" , |
1113 | "aoe: runt data size in read from" , |
1114 | (long) d->aoemajor, d->aoeminor, |
1115 | skb->len, n); |
1116 | buf->bio->bi_status = BLK_STS_IOERR; |
1117 | break; |
1118 | } |
1119 | if (n > f->iter.bi_size) { |
1120 | pr_err_ratelimited("%s e%ld.%d. bytes=%ld need=%u\n" , |
1121 | "aoe: too-large data size in read from" , |
1122 | (long) d->aoemajor, d->aoeminor, |
1123 | n, f->iter.bi_size); |
1124 | buf->bio->bi_status = BLK_STS_IOERR; |
1125 | break; |
1126 | } |
1127 | bvcpy(skb, bio: f->buf->bio, iter: f->iter, cnt: n); |
1128 | fallthrough; |
1129 | case ATA_CMD_PIO_WRITE: |
1130 | case ATA_CMD_PIO_WRITE_EXT: |
1131 | spin_lock_irq(lock: &d->lock); |
1132 | ifp = getif(t, nd: skb->dev); |
1133 | if (ifp) |
1134 | ifp->lost = 0; |
1135 | spin_unlock_irq(lock: &d->lock); |
1136 | break; |
1137 | case ATA_CMD_ID_ATA: |
1138 | if (skb->len < 512) { |
1139 | pr_info("%s e%ld.%d. skb->len=%d need=512\n" , |
1140 | "aoe: runt data size in ataid from" , |
1141 | (long) d->aoemajor, d->aoeminor, |
1142 | skb->len); |
1143 | break; |
1144 | } |
1145 | if (skb_linearize(skb)) |
1146 | break; |
1147 | spin_lock_irq(lock: &d->lock); |
1148 | ataid_complete(d, t, id: skb->data); |
1149 | spin_unlock_irq(lock: &d->lock); |
1150 | break; |
1151 | default: |
1152 | pr_info("aoe: unrecognized ata command %2.2Xh for %d.%d\n" , |
1153 | ahout->cmdstat, |
1154 | be16_to_cpu(get_unaligned(&hin->major)), |
1155 | hin->minor); |
1156 | } |
1157 | out: |
1158 | spin_lock_irq(lock: &d->lock); |
1159 | if (t->taint > 0 |
1160 | && --t->taint > 0 |
1161 | && t->nout_probes == 0) { |
1162 | count_targets(d, untainted: &untainted); |
1163 | if (untainted > 0) { |
1164 | probe(t); |
1165 | t->nout_probes++; |
1166 | } |
1167 | } |
1168 | |
1169 | aoe_freetframe(f); |
1170 | |
1171 | if (buf && --buf->nframesout == 0 && buf->iter.bi_size == 0) |
1172 | aoe_end_buf(d, buf); |
1173 | |
1174 | spin_unlock_irq(lock: &d->lock); |
1175 | aoedev_put(d); |
1176 | dev_kfree_skb(skb); |
1177 | } |
1178 | |
1179 | /* Enters with iocq.lock held. |
1180 | * Returns true iff responses needing processing remain. |
1181 | */ |
1182 | static int |
1183 | ktio(int id) |
1184 | { |
1185 | struct frame *f; |
1186 | struct list_head *pos; |
1187 | int i; |
1188 | int actual_id; |
1189 | |
1190 | for (i = 0; ; ++i) { |
1191 | if (i == MAXIOC) |
1192 | return 1; |
1193 | if (list_empty(head: &iocq[id].head)) |
1194 | return 0; |
1195 | pos = iocq[id].head.next; |
1196 | list_del(entry: pos); |
1197 | f = list_entry(pos, struct frame, head); |
1198 | spin_unlock_irq(lock: &iocq[id].lock); |
1199 | ktiocomplete(f); |
1200 | |
1201 | /* Figure out if extra threads are required. */ |
1202 | actual_id = f->t->d->aoeminor % ncpus; |
1203 | |
1204 | if (!kts[actual_id].active) { |
1205 | BUG_ON(id != 0); |
1206 | mutex_lock(&ktio_spawn_lock); |
1207 | if (!kts[actual_id].active |
1208 | && aoe_ktstart(k: &kts[actual_id]) == 0) |
1209 | kts[actual_id].active = 1; |
1210 | mutex_unlock(lock: &ktio_spawn_lock); |
1211 | } |
1212 | spin_lock_irq(lock: &iocq[id].lock); |
1213 | } |
1214 | } |
1215 | |
1216 | static int |
1217 | kthread(void *vp) |
1218 | { |
1219 | struct ktstate *k; |
1220 | DECLARE_WAITQUEUE(wait, current); |
1221 | int more; |
1222 | |
1223 | k = vp; |
1224 | current->flags |= PF_NOFREEZE; |
1225 | set_user_nice(current, nice: -10); |
1226 | complete(&k->rendez); /* tell spawner we're running */ |
1227 | do { |
1228 | spin_lock_irq(lock: k->lock); |
1229 | more = k->fn(k->id); |
1230 | if (!more) { |
1231 | add_wait_queue(wq_head: k->waitq, wq_entry: &wait); |
1232 | __set_current_state(TASK_INTERRUPTIBLE); |
1233 | } |
1234 | spin_unlock_irq(lock: k->lock); |
1235 | if (!more) { |
1236 | schedule(); |
1237 | remove_wait_queue(wq_head: k->waitq, wq_entry: &wait); |
1238 | } else |
1239 | cond_resched(); |
1240 | } while (!kthread_should_stop()); |
1241 | complete(&k->rendez); /* tell spawner we're stopping */ |
1242 | return 0; |
1243 | } |
1244 | |
1245 | void |
1246 | aoe_ktstop(struct ktstate *k) |
1247 | { |
1248 | kthread_stop(k: k->task); |
1249 | wait_for_completion(&k->rendez); |
1250 | } |
1251 | |
1252 | int |
1253 | aoe_ktstart(struct ktstate *k) |
1254 | { |
1255 | struct task_struct *task; |
1256 | |
1257 | init_completion(x: &k->rendez); |
1258 | task = kthread_run(kthread, k, "%s" , k->name); |
1259 | if (task == NULL || IS_ERR(ptr: task)) |
1260 | return -ENOMEM; |
1261 | k->task = task; |
1262 | wait_for_completion(&k->rendez); /* allow kthread to start */ |
1263 | init_completion(x: &k->rendez); /* for waiting for exit later */ |
1264 | return 0; |
1265 | } |
1266 | |
1267 | /* pass it off to kthreads for processing */ |
1268 | static void |
1269 | ktcomplete(struct frame *f, struct sk_buff *skb) |
1270 | { |
1271 | int id; |
1272 | ulong flags; |
1273 | |
1274 | f->r_skb = skb; |
1275 | id = f->t->d->aoeminor % ncpus; |
1276 | spin_lock_irqsave(&iocq[id].lock, flags); |
1277 | if (!kts[id].active) { |
1278 | spin_unlock_irqrestore(lock: &iocq[id].lock, flags); |
1279 | /* The thread with id has not been spawned yet, |
1280 | * so delegate the work to the main thread and |
1281 | * try spawning a new thread. |
1282 | */ |
1283 | id = 0; |
1284 | spin_lock_irqsave(&iocq[id].lock, flags); |
1285 | } |
1286 | list_add_tail(new: &f->head, head: &iocq[id].head); |
1287 | spin_unlock_irqrestore(lock: &iocq[id].lock, flags); |
1288 | wake_up(&ktiowq[id]); |
1289 | } |
1290 | |
1291 | struct sk_buff * |
1292 | aoecmd_ata_rsp(struct sk_buff *skb) |
1293 | { |
1294 | struct aoedev *d; |
1295 | struct aoe_hdr *h; |
1296 | struct frame *f; |
1297 | u32 n; |
1298 | ulong flags; |
1299 | char ebuf[128]; |
1300 | u16 aoemajor; |
1301 | |
1302 | h = (struct aoe_hdr *) skb->data; |
1303 | aoemajor = be16_to_cpu(get_unaligned(&h->major)); |
1304 | d = aoedev_by_aoeaddr(maj: aoemajor, min: h->minor, do_alloc: 0); |
1305 | if (d == NULL) { |
1306 | snprintf(buf: ebuf, size: sizeof ebuf, fmt: "aoecmd_ata_rsp: ata response " |
1307 | "for unknown device %d.%d\n" , |
1308 | aoemajor, h->minor); |
1309 | aoechr_error(ebuf); |
1310 | return skb; |
1311 | } |
1312 | |
1313 | spin_lock_irqsave(&d->lock, flags); |
1314 | |
1315 | n = be32_to_cpu(get_unaligned(&h->tag)); |
1316 | f = getframe(d, tag: n); |
1317 | if (f) { |
1318 | calc_rttavg(d, t: f->t, rtt: tsince_hr(f)); |
1319 | f->t->nout--; |
1320 | if (f->flags & FFL_PROBE) |
1321 | f->t->nout_probes--; |
1322 | } else { |
1323 | f = getframe_deferred(d, tag: n); |
1324 | if (f) { |
1325 | calc_rttavg(d, NULL, rtt: tsince_hr(f)); |
1326 | } else { |
1327 | calc_rttavg(d, NULL, rtt: tsince(tag: n)); |
1328 | spin_unlock_irqrestore(lock: &d->lock, flags); |
1329 | aoedev_put(d); |
1330 | snprintf(buf: ebuf, size: sizeof(ebuf), |
1331 | fmt: "%15s e%d.%d tag=%08x@%08lx s=%pm d=%pm\n" , |
1332 | "unexpected rsp" , |
1333 | get_unaligned_be16(p: &h->major), |
1334 | h->minor, |
1335 | get_unaligned_be32(p: &h->tag), |
1336 | jiffies, |
1337 | h->src, |
1338 | h->dst); |
1339 | aoechr_error(ebuf); |
1340 | return skb; |
1341 | } |
1342 | } |
1343 | aoecmd_work(d); |
1344 | |
1345 | spin_unlock_irqrestore(lock: &d->lock, flags); |
1346 | |
1347 | ktcomplete(f, skb); |
1348 | |
1349 | /* |
1350 | * Note here that we do not perform an aoedev_put, as we are |
1351 | * leaving this reference for the ktio to release. |
1352 | */ |
1353 | return NULL; |
1354 | } |
1355 | |
1356 | void |
1357 | aoecmd_cfg(ushort aoemajor, unsigned char aoeminor) |
1358 | { |
1359 | struct sk_buff_head queue; |
1360 | |
1361 | __skb_queue_head_init(list: &queue); |
1362 | aoecmd_cfg_pkts(aoemajor, aoeminor, queue: &queue); |
1363 | aoenet_xmit(&queue); |
1364 | } |
1365 | |
1366 | struct sk_buff * |
1367 | aoecmd_ata_id(struct aoedev *d) |
1368 | { |
1369 | struct aoe_hdr *h; |
1370 | struct aoe_atahdr *ah; |
1371 | struct frame *f; |
1372 | struct sk_buff *skb; |
1373 | struct aoetgt *t; |
1374 | |
1375 | f = newframe(d); |
1376 | if (f == NULL) |
1377 | return NULL; |
1378 | |
1379 | t = *d->tgt; |
1380 | |
1381 | /* initialize the headers & frame */ |
1382 | skb = f->skb; |
1383 | h = (struct aoe_hdr *) skb_mac_header(skb); |
1384 | ah = (struct aoe_atahdr *) (h+1); |
1385 | skb_put(skb, len: sizeof *h + sizeof *ah); |
1386 | memset(h, 0, skb->len); |
1387 | f->tag = aoehdr_atainit(d, t, h); |
1388 | fhash(f); |
1389 | t->nout++; |
1390 | f->waited = 0; |
1391 | f->waited_total = 0; |
1392 | |
1393 | /* set up ata header */ |
1394 | ah->scnt = 1; |
1395 | ah->cmdstat = ATA_CMD_ID_ATA; |
1396 | ah->lba3 = 0xa0; |
1397 | |
1398 | skb->dev = t->ifp->nd; |
1399 | |
1400 | d->rttavg = RTTAVG_INIT; |
1401 | d->rttdev = RTTDEV_INIT; |
1402 | d->timer.function = rexmit_timer; |
1403 | |
1404 | skb = skb_clone(skb, GFP_ATOMIC); |
1405 | if (skb) |
1406 | f->sent = ktime_get(); |
1407 | |
1408 | return skb; |
1409 | } |
1410 | |
1411 | static struct aoetgt ** |
1412 | grow_targets(struct aoedev *d) |
1413 | { |
1414 | ulong oldn, newn; |
1415 | struct aoetgt **tt; |
1416 | |
1417 | oldn = d->ntargets; |
1418 | newn = oldn * 2; |
1419 | tt = kcalloc(n: newn, size: sizeof(*d->targets), GFP_ATOMIC); |
1420 | if (!tt) |
1421 | return NULL; |
1422 | memmove(tt, d->targets, sizeof(*d->targets) * oldn); |
1423 | d->tgt = tt + (d->tgt - d->targets); |
1424 | kfree(objp: d->targets); |
1425 | d->targets = tt; |
1426 | d->ntargets = newn; |
1427 | |
1428 | return &d->targets[oldn]; |
1429 | } |
1430 | |
1431 | static struct aoetgt * |
1432 | addtgt(struct aoedev *d, char *addr, ulong nframes) |
1433 | { |
1434 | struct aoetgt *t, **tt, **te; |
1435 | |
1436 | tt = d->targets; |
1437 | te = tt + d->ntargets; |
1438 | for (; tt < te && *tt; tt++) |
1439 | ; |
1440 | |
1441 | if (tt == te) { |
1442 | tt = grow_targets(d); |
1443 | if (!tt) |
1444 | goto nomem; |
1445 | } |
1446 | t = kzalloc(size: sizeof(*t), GFP_ATOMIC); |
1447 | if (!t) |
1448 | goto nomem; |
1449 | t->nframes = nframes; |
1450 | t->d = d; |
1451 | memcpy(t->addr, addr, sizeof t->addr); |
1452 | t->ifp = t->ifs; |
1453 | aoecmd_wreset(t); |
1454 | t->maxout = t->nframes / 2; |
1455 | INIT_LIST_HEAD(list: &t->ffree); |
1456 | return *tt = t; |
1457 | |
1458 | nomem: |
1459 | pr_info("aoe: cannot allocate memory to add target\n" ); |
1460 | return NULL; |
1461 | } |
1462 | |
1463 | static void |
1464 | setdbcnt(struct aoedev *d) |
1465 | { |
1466 | struct aoetgt **t, **e; |
1467 | int bcnt = 0; |
1468 | |
1469 | t = d->targets; |
1470 | e = t + d->ntargets; |
1471 | for (; t < e && *t; t++) |
1472 | if (bcnt == 0 || bcnt > (*t)->minbcnt) |
1473 | bcnt = (*t)->minbcnt; |
1474 | if (bcnt != d->maxbcnt) { |
1475 | d->maxbcnt = bcnt; |
1476 | pr_info("aoe: e%ld.%d: setting %d byte data frames\n" , |
1477 | d->aoemajor, d->aoeminor, bcnt); |
1478 | } |
1479 | } |
1480 | |
1481 | static void |
1482 | setifbcnt(struct aoetgt *t, struct net_device *nd, int bcnt) |
1483 | { |
1484 | struct aoedev *d; |
1485 | struct aoeif *p, *e; |
1486 | int minbcnt; |
1487 | |
1488 | d = t->d; |
1489 | minbcnt = bcnt; |
1490 | p = t->ifs; |
1491 | e = p + NAOEIFS; |
1492 | for (; p < e; p++) { |
1493 | if (p->nd == NULL) |
1494 | break; /* end of the valid interfaces */ |
1495 | if (p->nd == nd) { |
1496 | p->bcnt = bcnt; /* we're updating */ |
1497 | nd = NULL; |
1498 | } else if (minbcnt > p->bcnt) |
1499 | minbcnt = p->bcnt; /* find the min interface */ |
1500 | } |
1501 | if (nd) { |
1502 | if (p == e) { |
1503 | pr_err("aoe: device setifbcnt failure; too many interfaces.\n" ); |
1504 | return; |
1505 | } |
1506 | dev_hold(dev: nd); |
1507 | p->nd = nd; |
1508 | p->bcnt = bcnt; |
1509 | } |
1510 | t->minbcnt = minbcnt; |
1511 | setdbcnt(d); |
1512 | } |
1513 | |
1514 | void |
1515 | aoecmd_cfg_rsp(struct sk_buff *skb) |
1516 | { |
1517 | struct aoedev *d; |
1518 | struct aoe_hdr *h; |
1519 | struct aoe_cfghdr *ch; |
1520 | struct aoetgt *t; |
1521 | ulong flags, aoemajor; |
1522 | struct sk_buff *sl; |
1523 | struct sk_buff_head queue; |
1524 | u16 n; |
1525 | |
1526 | sl = NULL; |
1527 | h = (struct aoe_hdr *) skb_mac_header(skb); |
1528 | ch = (struct aoe_cfghdr *) (h+1); |
1529 | |
1530 | /* |
1531 | * Enough people have their dip switches set backwards to |
1532 | * warrant a loud message for this special case. |
1533 | */ |
1534 | aoemajor = get_unaligned_be16(p: &h->major); |
1535 | if (aoemajor == 0xfff) { |
1536 | printk(KERN_ERR "aoe: Warning: shelf address is all ones. " |
1537 | "Check shelf dip switches.\n" ); |
1538 | return; |
1539 | } |
1540 | if (aoemajor == 0xffff) { |
1541 | pr_info("aoe: e%ld.%d: broadcast shelf number invalid\n" , |
1542 | aoemajor, (int) h->minor); |
1543 | return; |
1544 | } |
1545 | if (h->minor == 0xff) { |
1546 | pr_info("aoe: e%ld.%d: broadcast slot number invalid\n" , |
1547 | aoemajor, (int) h->minor); |
1548 | return; |
1549 | } |
1550 | |
1551 | n = be16_to_cpu(ch->bufcnt); |
1552 | if (n > aoe_maxout) /* keep it reasonable */ |
1553 | n = aoe_maxout; |
1554 | |
1555 | d = aoedev_by_aoeaddr(maj: aoemajor, min: h->minor, do_alloc: 1); |
1556 | if (d == NULL) { |
1557 | pr_info("aoe: device allocation failure\n" ); |
1558 | return; |
1559 | } |
1560 | |
1561 | spin_lock_irqsave(&d->lock, flags); |
1562 | |
1563 | t = gettgt(d, addr: h->src); |
1564 | if (t) { |
1565 | t->nframes = n; |
1566 | if (n < t->maxout) |
1567 | aoecmd_wreset(t); |
1568 | } else { |
1569 | t = addtgt(d, addr: h->src, nframes: n); |
1570 | if (!t) |
1571 | goto bail; |
1572 | } |
1573 | n = skb->dev->mtu; |
1574 | n -= sizeof(struct aoe_hdr) + sizeof(struct aoe_atahdr); |
1575 | n /= 512; |
1576 | if (n > ch->scnt) |
1577 | n = ch->scnt; |
1578 | n = n ? n * 512 : DEFAULTBCNT; |
1579 | setifbcnt(t, nd: skb->dev, bcnt: n); |
1580 | |
1581 | /* don't change users' perspective */ |
1582 | if (d->nopen == 0) { |
1583 | d->fw_ver = be16_to_cpu(ch->fwver); |
1584 | sl = aoecmd_ata_id(d); |
1585 | } |
1586 | bail: |
1587 | spin_unlock_irqrestore(lock: &d->lock, flags); |
1588 | aoedev_put(d); |
1589 | if (sl) { |
1590 | __skb_queue_head_init(list: &queue); |
1591 | __skb_queue_tail(list: &queue, newsk: sl); |
1592 | aoenet_xmit(&queue); |
1593 | } |
1594 | } |
1595 | |
1596 | void |
1597 | aoecmd_wreset(struct aoetgt *t) |
1598 | { |
1599 | t->maxout = 1; |
1600 | t->ssthresh = t->nframes / 2; |
1601 | t->next_cwnd = t->nframes; |
1602 | } |
1603 | |
1604 | void |
1605 | aoecmd_cleanslate(struct aoedev *d) |
1606 | { |
1607 | struct aoetgt **t, **te; |
1608 | |
1609 | d->rttavg = RTTAVG_INIT; |
1610 | d->rttdev = RTTDEV_INIT; |
1611 | d->maxbcnt = 0; |
1612 | |
1613 | t = d->targets; |
1614 | te = t + d->ntargets; |
1615 | for (; t < te && *t; t++) |
1616 | aoecmd_wreset(t: *t); |
1617 | } |
1618 | |
1619 | void |
1620 | aoe_failbuf(struct aoedev *d, struct buf *buf) |
1621 | { |
1622 | if (buf == NULL) |
1623 | return; |
1624 | buf->iter.bi_size = 0; |
1625 | buf->bio->bi_status = BLK_STS_IOERR; |
1626 | if (buf->nframesout == 0) |
1627 | aoe_end_buf(d, buf); |
1628 | } |
1629 | |
1630 | void |
1631 | aoe_flush_iocq(void) |
1632 | { |
1633 | int i; |
1634 | |
1635 | for (i = 0; i < ncpus; i++) { |
1636 | if (kts[i].active) |
1637 | aoe_flush_iocq_by_index(i); |
1638 | } |
1639 | } |
1640 | |
1641 | void |
1642 | aoe_flush_iocq_by_index(int id) |
1643 | { |
1644 | struct frame *f; |
1645 | struct aoedev *d; |
1646 | LIST_HEAD(flist); |
1647 | struct list_head *pos; |
1648 | struct sk_buff *skb; |
1649 | ulong flags; |
1650 | |
1651 | spin_lock_irqsave(&iocq[id].lock, flags); |
1652 | list_splice_init(list: &iocq[id].head, head: &flist); |
1653 | spin_unlock_irqrestore(lock: &iocq[id].lock, flags); |
1654 | while (!list_empty(head: &flist)) { |
1655 | pos = flist.next; |
1656 | list_del(entry: pos); |
1657 | f = list_entry(pos, struct frame, head); |
1658 | d = f->t->d; |
1659 | skb = f->r_skb; |
1660 | spin_lock_irqsave(&d->lock, flags); |
1661 | if (f->buf) { |
1662 | f->buf->nframesout--; |
1663 | aoe_failbuf(d, buf: f->buf); |
1664 | } |
1665 | aoe_freetframe(f); |
1666 | spin_unlock_irqrestore(lock: &d->lock, flags); |
1667 | dev_kfree_skb(skb); |
1668 | aoedev_put(d); |
1669 | } |
1670 | } |
1671 | |
1672 | int __init |
1673 | aoecmd_init(void) |
1674 | { |
1675 | void *p; |
1676 | int i; |
1677 | int ret; |
1678 | |
1679 | /* get_zeroed_page returns page with ref count 1 */ |
1680 | p = (void *) get_zeroed_page(GFP_KERNEL); |
1681 | if (!p) |
1682 | return -ENOMEM; |
1683 | empty_page = virt_to_page(p); |
1684 | |
1685 | ncpus = num_online_cpus(); |
1686 | |
1687 | iocq = kcalloc(n: ncpus, size: sizeof(struct iocq_ktio), GFP_KERNEL); |
1688 | if (!iocq) |
1689 | return -ENOMEM; |
1690 | |
1691 | kts = kcalloc(n: ncpus, size: sizeof(struct ktstate), GFP_KERNEL); |
1692 | if (!kts) { |
1693 | ret = -ENOMEM; |
1694 | goto kts_fail; |
1695 | } |
1696 | |
1697 | ktiowq = kcalloc(n: ncpus, size: sizeof(wait_queue_head_t), GFP_KERNEL); |
1698 | if (!ktiowq) { |
1699 | ret = -ENOMEM; |
1700 | goto ktiowq_fail; |
1701 | } |
1702 | |
1703 | for (i = 0; i < ncpus; i++) { |
1704 | INIT_LIST_HEAD(list: &iocq[i].head); |
1705 | spin_lock_init(&iocq[i].lock); |
1706 | init_waitqueue_head(&ktiowq[i]); |
1707 | snprintf(buf: kts[i].name, size: sizeof(kts[i].name), fmt: "aoe_ktio%d" , i); |
1708 | kts[i].fn = ktio; |
1709 | kts[i].waitq = &ktiowq[i]; |
1710 | kts[i].lock = &iocq[i].lock; |
1711 | kts[i].id = i; |
1712 | kts[i].active = 0; |
1713 | } |
1714 | kts[0].active = 1; |
1715 | if (aoe_ktstart(k: &kts[0])) { |
1716 | ret = -ENOMEM; |
1717 | goto ktstart_fail; |
1718 | } |
1719 | return 0; |
1720 | |
1721 | ktstart_fail: |
1722 | kfree(objp: ktiowq); |
1723 | ktiowq_fail: |
1724 | kfree(objp: kts); |
1725 | kts_fail: |
1726 | kfree(objp: iocq); |
1727 | |
1728 | return ret; |
1729 | } |
1730 | |
1731 | void |
1732 | aoecmd_exit(void) |
1733 | { |
1734 | int i; |
1735 | |
1736 | for (i = 0; i < ncpus; i++) |
1737 | if (kts[i].active) |
1738 | aoe_ktstop(k: &kts[i]); |
1739 | |
1740 | aoe_flush_iocq(); |
1741 | |
1742 | /* Free up the iocq and thread speicific configuration |
1743 | * allocated during startup. |
1744 | */ |
1745 | kfree(objp: iocq); |
1746 | kfree(objp: kts); |
1747 | kfree(objp: ktiowq); |
1748 | |
1749 | free_page((unsigned long) page_address(empty_page)); |
1750 | empty_page = NULL; |
1751 | } |
1752 | |