1 | /* |
2 | * libcxgb_ppm.c: Chelsio common library for T3/T4/T5 iSCSI PagePod Manager |
3 | * |
4 | * Copyright (c) 2016 Chelsio Communications, Inc. All rights reserved. |
5 | * |
6 | * This software is available to you under a choice of one of two |
7 | * licenses. You may choose to be licensed under the terms of the GNU |
8 | * General Public License (GPL) Version 2, available from the file |
9 | * COPYING in the main directory of this source tree, or the |
10 | * OpenIB.org BSD license below: |
11 | * |
12 | * Redistribution and use in source and binary forms, with or |
13 | * without modification, are permitted provided that the following |
14 | * conditions are met: |
15 | * |
16 | * - Redistributions of source code must retain the above |
17 | * copyright notice, this list of conditions and the following |
18 | * disclaimer. |
19 | * |
20 | * - Redistributions in binary form must reproduce the above |
21 | * copyright notice, this list of conditions and the following |
22 | * disclaimer in the documentation and/or other materials |
23 | * provided with the distribution. |
24 | * |
25 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
26 | * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
27 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND |
28 | * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS |
29 | * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN |
30 | * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN |
31 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
32 | * SOFTWARE. |
33 | * |
34 | * Written by: Karen Xie (kxie@chelsio.com) |
35 | */ |
36 | |
37 | #define DRV_NAME "libcxgb" |
38 | #define pr_fmt(fmt) DRV_NAME ": " fmt |
39 | |
40 | #include <linux/kernel.h> |
41 | #include <linux/module.h> |
42 | #include <linux/errno.h> |
43 | #include <linux/types.h> |
44 | #include <linux/debugfs.h> |
45 | #include <linux/export.h> |
46 | #include <linux/list.h> |
47 | #include <linux/skbuff.h> |
48 | #include <linux/pci.h> |
49 | #include <linux/scatterlist.h> |
50 | |
51 | #include "libcxgb_ppm.h" |
52 | |
53 | /* Direct Data Placement - |
54 | * Directly place the iSCSI Data-In or Data-Out PDU's payload into |
55 | * pre-posted final destination host-memory buffers based on the |
56 | * Initiator Task Tag (ITT) in Data-In or Target Task Tag (TTT) |
57 | * in Data-Out PDUs. The host memory address is programmed into |
58 | * h/w in the format of pagepod entries. The location of the |
59 | * pagepod entry is encoded into ddp tag which is used as the base |
60 | * for ITT/TTT. |
61 | */ |
62 | |
63 | /* Direct-Data Placement page size adjustment |
64 | */ |
65 | int cxgbi_ppm_find_page_index(struct cxgbi_ppm *ppm, unsigned long pgsz) |
66 | { |
67 | struct cxgbi_tag_format *tformat = &ppm->tformat; |
68 | int i; |
69 | |
70 | for (i = 0; i < DDP_PGIDX_MAX; i++) { |
71 | if (pgsz == 1UL << (DDP_PGSZ_BASE_SHIFT + |
72 | tformat->pgsz_order[i])) { |
73 | pr_debug("%s: %s ppm, pgsz %lu -> idx %d.\n" , |
74 | __func__, ppm->ndev->name, pgsz, i); |
75 | return i; |
76 | } |
77 | } |
78 | pr_info("ippm: ddp page size %lu not supported.\n" , pgsz); |
79 | return DDP_PGIDX_MAX; |
80 | } |
81 | |
82 | /* DDP setup & teardown |
83 | */ |
84 | static int ppm_find_unused_entries(unsigned long *bmap, |
85 | unsigned int max_ppods, |
86 | unsigned int start, |
87 | unsigned int nr, |
88 | unsigned int align_mask) |
89 | { |
90 | unsigned long i; |
91 | |
92 | i = bitmap_find_next_zero_area(map: bmap, size: max_ppods, start, nr, align_mask); |
93 | |
94 | if (unlikely(i >= max_ppods) && (start > nr)) |
95 | i = bitmap_find_next_zero_area(map: bmap, size: max_ppods, start: 0, nr: start - 1, |
96 | align_mask); |
97 | if (unlikely(i >= max_ppods)) |
98 | return -ENOSPC; |
99 | |
100 | bitmap_set(map: bmap, start: i, nbits: nr); |
101 | return (int)i; |
102 | } |
103 | |
104 | static void ppm_mark_entries(struct cxgbi_ppm *ppm, int i, int count, |
105 | unsigned long caller_data) |
106 | { |
107 | struct cxgbi_ppod_data *pdata = ppm->ppod_data + i; |
108 | |
109 | pdata->caller_data = caller_data; |
110 | pdata->npods = count; |
111 | |
112 | if (pdata->color == ((1 << PPOD_IDX_SHIFT) - 1)) |
113 | pdata->color = 0; |
114 | else |
115 | pdata->color++; |
116 | } |
117 | |
118 | static int ppm_get_cpu_entries(struct cxgbi_ppm *ppm, unsigned int count, |
119 | unsigned long caller_data) |
120 | { |
121 | struct cxgbi_ppm_pool *pool; |
122 | unsigned int cpu; |
123 | int i; |
124 | |
125 | if (!ppm->pool) |
126 | return -EINVAL; |
127 | |
128 | cpu = get_cpu(); |
129 | pool = per_cpu_ptr(ppm->pool, cpu); |
130 | spin_lock_bh(lock: &pool->lock); |
131 | put_cpu(); |
132 | |
133 | i = ppm_find_unused_entries(bmap: pool->bmap, max_ppods: ppm->pool_index_max, |
134 | start: pool->next, nr: count, align_mask: 0); |
135 | if (i < 0) { |
136 | pool->next = 0; |
137 | spin_unlock_bh(lock: &pool->lock); |
138 | return -ENOSPC; |
139 | } |
140 | |
141 | pool->next = i + count; |
142 | if (pool->next >= ppm->pool_index_max) |
143 | pool->next = 0; |
144 | |
145 | spin_unlock_bh(lock: &pool->lock); |
146 | |
147 | pr_debug("%s: cpu %u, idx %d + %d (%d), next %u.\n" , |
148 | __func__, cpu, i, count, i + cpu * ppm->pool_index_max, |
149 | pool->next); |
150 | |
151 | i += cpu * ppm->pool_index_max; |
152 | ppm_mark_entries(ppm, i, count, caller_data); |
153 | |
154 | return i; |
155 | } |
156 | |
157 | static int ppm_get_entries(struct cxgbi_ppm *ppm, unsigned int count, |
158 | unsigned long caller_data) |
159 | { |
160 | int i; |
161 | |
162 | spin_lock_bh(lock: &ppm->map_lock); |
163 | i = ppm_find_unused_entries(bmap: ppm->ppod_bmap, max_ppods: ppm->bmap_index_max, |
164 | start: ppm->next, nr: count, align_mask: 0); |
165 | if (i < 0) { |
166 | ppm->next = 0; |
167 | spin_unlock_bh(lock: &ppm->map_lock); |
168 | pr_debug("ippm: NO suitable entries %u available.\n" , |
169 | count); |
170 | return -ENOSPC; |
171 | } |
172 | |
173 | ppm->next = i + count; |
174 | if (ppm->max_index_in_edram && (ppm->next >= ppm->max_index_in_edram)) |
175 | ppm->next = 0; |
176 | else if (ppm->next >= ppm->bmap_index_max) |
177 | ppm->next = 0; |
178 | |
179 | spin_unlock_bh(lock: &ppm->map_lock); |
180 | |
181 | pr_debug("%s: idx %d + %d (%d), next %u, caller_data 0x%lx.\n" , |
182 | __func__, i, count, i + ppm->pool_rsvd, ppm->next, |
183 | caller_data); |
184 | |
185 | i += ppm->pool_rsvd; |
186 | ppm_mark_entries(ppm, i, count, caller_data); |
187 | |
188 | return i; |
189 | } |
190 | |
191 | static void ppm_unmark_entries(struct cxgbi_ppm *ppm, int i, int count) |
192 | { |
193 | pr_debug("%s: idx %d + %d.\n" , __func__, i, count); |
194 | |
195 | if (i < ppm->pool_rsvd) { |
196 | unsigned int cpu; |
197 | struct cxgbi_ppm_pool *pool; |
198 | |
199 | cpu = i / ppm->pool_index_max; |
200 | i %= ppm->pool_index_max; |
201 | |
202 | pool = per_cpu_ptr(ppm->pool, cpu); |
203 | spin_lock_bh(lock: &pool->lock); |
204 | bitmap_clear(map: pool->bmap, start: i, nbits: count); |
205 | |
206 | if (i < pool->next) |
207 | pool->next = i; |
208 | spin_unlock_bh(lock: &pool->lock); |
209 | |
210 | pr_debug("%s: cpu %u, idx %d, next %u.\n" , |
211 | __func__, cpu, i, pool->next); |
212 | } else { |
213 | spin_lock_bh(lock: &ppm->map_lock); |
214 | |
215 | i -= ppm->pool_rsvd; |
216 | bitmap_clear(map: ppm->ppod_bmap, start: i, nbits: count); |
217 | |
218 | if (i < ppm->next) |
219 | ppm->next = i; |
220 | spin_unlock_bh(lock: &ppm->map_lock); |
221 | |
222 | pr_debug("%s: idx %d, next %u.\n" , __func__, i, ppm->next); |
223 | } |
224 | } |
225 | |
226 | void cxgbi_ppm_ppod_release(struct cxgbi_ppm *ppm, u32 idx) |
227 | { |
228 | struct cxgbi_ppod_data *pdata; |
229 | |
230 | if (idx >= ppm->ppmax) { |
231 | pr_warn("ippm: idx too big %u > %u.\n" , idx, ppm->ppmax); |
232 | return; |
233 | } |
234 | |
235 | pdata = ppm->ppod_data + idx; |
236 | if (!pdata->npods) { |
237 | pr_warn("ippm: idx %u, npods 0.\n" , idx); |
238 | return; |
239 | } |
240 | |
241 | pr_debug("release idx %u, npods %u.\n" , idx, pdata->npods); |
242 | ppm_unmark_entries(ppm, i: idx, count: pdata->npods); |
243 | } |
244 | EXPORT_SYMBOL(cxgbi_ppm_ppod_release); |
245 | |
246 | int cxgbi_ppm_ppods_reserve(struct cxgbi_ppm *ppm, unsigned short nr_pages, |
247 | u32 per_tag_pg_idx, u32 *ppod_idx, |
248 | u32 *ddp_tag, unsigned long caller_data) |
249 | { |
250 | struct cxgbi_ppod_data *pdata; |
251 | unsigned int npods; |
252 | int idx = -1; |
253 | unsigned int hwidx; |
254 | u32 tag; |
255 | |
256 | npods = (nr_pages + PPOD_PAGES_MAX - 1) >> PPOD_PAGES_SHIFT; |
257 | if (!npods) { |
258 | pr_warn("%s: pages %u -> npods %u, full.\n" , |
259 | __func__, nr_pages, npods); |
260 | return -EINVAL; |
261 | } |
262 | |
263 | /* grab from cpu pool first */ |
264 | idx = ppm_get_cpu_entries(ppm, count: npods, caller_data); |
265 | /* try the general pool */ |
266 | if (idx < 0) |
267 | idx = ppm_get_entries(ppm, count: npods, caller_data); |
268 | if (idx < 0) { |
269 | pr_debug("ippm: pages %u, nospc %u, nxt %u, 0x%lx.\n" , |
270 | nr_pages, npods, ppm->next, caller_data); |
271 | return idx; |
272 | } |
273 | |
274 | pdata = ppm->ppod_data + idx; |
275 | hwidx = ppm->base_idx + idx; |
276 | |
277 | tag = cxgbi_ppm_make_ddp_tag(hw_idx: hwidx, color: pdata->color); |
278 | |
279 | if (per_tag_pg_idx) |
280 | tag |= (per_tag_pg_idx << 30) & 0xC0000000; |
281 | |
282 | *ppod_idx = idx; |
283 | *ddp_tag = tag; |
284 | |
285 | pr_debug("ippm: sg %u, tag 0x%x(%u,%u), data 0x%lx.\n" , |
286 | nr_pages, tag, idx, npods, caller_data); |
287 | |
288 | return npods; |
289 | } |
290 | EXPORT_SYMBOL(cxgbi_ppm_ppods_reserve); |
291 | |
292 | void cxgbi_ppm_make_ppod_hdr(struct cxgbi_ppm *ppm, u32 tag, |
293 | unsigned int tid, unsigned int offset, |
294 | unsigned int length, |
295 | struct cxgbi_pagepod_hdr *hdr) |
296 | { |
297 | /* The ddp tag in pagepod should be with bit 31:30 set to 0. |
298 | * The ddp Tag on the wire should be with non-zero 31:30 to the peer |
299 | */ |
300 | tag &= 0x3FFFFFFF; |
301 | |
302 | hdr->vld_tid = htonl(PPOD_VALID_FLAG | PPOD_TID(tid)); |
303 | |
304 | hdr->rsvd = 0; |
305 | hdr->pgsz_tag_clr = htonl(tag & ppm->tformat.idx_clr_mask); |
306 | hdr->max_offset = htonl(length); |
307 | hdr->page_offset = htonl(offset); |
308 | |
309 | pr_debug("ippm: tag 0x%x, tid 0x%x, xfer %u, off %u.\n" , |
310 | tag, tid, length, offset); |
311 | } |
312 | EXPORT_SYMBOL(cxgbi_ppm_make_ppod_hdr); |
313 | |
314 | static void ppm_free(struct cxgbi_ppm *ppm) |
315 | { |
316 | vfree(addr: ppm); |
317 | } |
318 | |
319 | static void ppm_destroy(struct kref *kref) |
320 | { |
321 | struct cxgbi_ppm *ppm = container_of(kref, |
322 | struct cxgbi_ppm, |
323 | refcnt); |
324 | pr_info("ippm: kref 0, destroy %s ppm 0x%p.\n" , |
325 | ppm->ndev->name, ppm); |
326 | |
327 | *ppm->ppm_pp = NULL; |
328 | |
329 | free_percpu(pdata: ppm->pool); |
330 | ppm_free(ppm); |
331 | } |
332 | |
333 | int cxgbi_ppm_release(struct cxgbi_ppm *ppm) |
334 | { |
335 | if (ppm) { |
336 | int rv; |
337 | |
338 | rv = kref_put(kref: &ppm->refcnt, release: ppm_destroy); |
339 | return rv; |
340 | } |
341 | return 1; |
342 | } |
343 | EXPORT_SYMBOL(cxgbi_ppm_release); |
344 | |
345 | static struct cxgbi_ppm_pool *ppm_alloc_cpu_pool(unsigned int *total, |
346 | unsigned int *pcpu_ppmax) |
347 | { |
348 | struct cxgbi_ppm_pool *pools; |
349 | unsigned int ppmax = (*total) / num_possible_cpus(); |
350 | unsigned int max = (PCPU_MIN_UNIT_SIZE - sizeof(*pools)) << 3; |
351 | unsigned int bmap; |
352 | unsigned int alloc_sz; |
353 | unsigned int count = 0; |
354 | unsigned int cpu; |
355 | |
356 | /* make sure per cpu pool fits into PCPU_MIN_UNIT_SIZE */ |
357 | if (ppmax > max) |
358 | ppmax = max; |
359 | |
360 | /* pool size must be multiple of unsigned long */ |
361 | bmap = ppmax / BITS_PER_TYPE(unsigned long); |
362 | if (!bmap) |
363 | return NULL; |
364 | |
365 | ppmax = (bmap * sizeof(unsigned long)) << 3; |
366 | |
367 | alloc_sz = sizeof(*pools) + sizeof(unsigned long) * bmap; |
368 | pools = __alloc_percpu(size: alloc_sz, align: __alignof__(struct cxgbi_ppm_pool)); |
369 | |
370 | if (!pools) |
371 | return NULL; |
372 | |
373 | for_each_possible_cpu(cpu) { |
374 | struct cxgbi_ppm_pool *ppool = per_cpu_ptr(pools, cpu); |
375 | |
376 | memset(ppool, 0, alloc_sz); |
377 | spin_lock_init(&ppool->lock); |
378 | count += ppmax; |
379 | } |
380 | |
381 | *total = count; |
382 | *pcpu_ppmax = ppmax; |
383 | |
384 | return pools; |
385 | } |
386 | |
387 | int cxgbi_ppm_init(void **ppm_pp, struct net_device *ndev, |
388 | struct pci_dev *pdev, void *lldev, |
389 | struct cxgbi_tag_format *tformat, unsigned int iscsi_size, |
390 | unsigned int llimit, unsigned int start, |
391 | unsigned int reserve_factor, unsigned int iscsi_edram_start, |
392 | unsigned int iscsi_edram_size) |
393 | { |
394 | struct cxgbi_ppm *ppm = (struct cxgbi_ppm *)(*ppm_pp); |
395 | struct cxgbi_ppm_pool *pool = NULL; |
396 | unsigned int pool_index_max = 0; |
397 | unsigned int ppmax_pool = 0; |
398 | unsigned int ppod_bmap_size; |
399 | unsigned int alloc_sz; |
400 | unsigned int ppmax; |
401 | |
402 | if (!iscsi_edram_start) |
403 | iscsi_edram_size = 0; |
404 | |
405 | if (iscsi_edram_size && |
406 | ((iscsi_edram_start + iscsi_edram_size) != start)) { |
407 | pr_err("iscsi ppod region not contiguous: EDRAM start 0x%x " |
408 | "size 0x%x DDR start 0x%x\n" , |
409 | iscsi_edram_start, iscsi_edram_size, start); |
410 | return -EINVAL; |
411 | } |
412 | |
413 | if (iscsi_edram_size) { |
414 | reserve_factor = 0; |
415 | start = iscsi_edram_start; |
416 | } |
417 | |
418 | ppmax = (iscsi_edram_size + iscsi_size) >> PPOD_SIZE_SHIFT; |
419 | |
420 | if (ppm) { |
421 | pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n" , |
422 | ndev->name, ppm_pp, ppm, ppm->ppmax, ppmax); |
423 | kref_get(kref: &ppm->refcnt); |
424 | return 1; |
425 | } |
426 | |
427 | if (reserve_factor) { |
428 | ppmax_pool = ppmax / reserve_factor; |
429 | pool = ppm_alloc_cpu_pool(total: &ppmax_pool, pcpu_ppmax: &pool_index_max); |
430 | if (!pool) { |
431 | ppmax_pool = 0; |
432 | reserve_factor = 0; |
433 | } |
434 | |
435 | pr_debug("%s: ppmax %u, cpu total %u, per cpu %u.\n" , |
436 | ndev->name, ppmax, ppmax_pool, pool_index_max); |
437 | } |
438 | |
439 | ppod_bmap_size = BITS_TO_LONGS(ppmax - ppmax_pool); |
440 | alloc_sz = sizeof(struct cxgbi_ppm) + |
441 | ppmax * (sizeof(struct cxgbi_ppod_data)) + |
442 | ppod_bmap_size * sizeof(unsigned long); |
443 | |
444 | ppm = vzalloc(size: alloc_sz); |
445 | if (!ppm) |
446 | goto release_ppm_pool; |
447 | |
448 | ppm->ppod_bmap = (unsigned long *)(&ppm->ppod_data[ppmax]); |
449 | |
450 | if ((ppod_bmap_size >> 3) > (ppmax - ppmax_pool)) { |
451 | unsigned int start = ppmax - ppmax_pool; |
452 | unsigned int end = ppod_bmap_size >> 3; |
453 | |
454 | bitmap_set(map: ppm->ppod_bmap, start: ppmax, nbits: end - start); |
455 | pr_info("%s: %u - %u < %u * 8, mask extra bits %u, %u.\n" , |
456 | __func__, ppmax, ppmax_pool, ppod_bmap_size, start, |
457 | end); |
458 | } |
459 | if (iscsi_edram_size) { |
460 | unsigned int first_ddr_idx = |
461 | iscsi_edram_size >> PPOD_SIZE_SHIFT; |
462 | |
463 | ppm->max_index_in_edram = first_ddr_idx - 1; |
464 | bitmap_set(map: ppm->ppod_bmap, start: first_ddr_idx, nbits: 1); |
465 | pr_debug("reserved %u ppod in bitmap\n" , first_ddr_idx); |
466 | } |
467 | |
468 | spin_lock_init(&ppm->map_lock); |
469 | kref_init(kref: &ppm->refcnt); |
470 | |
471 | memcpy(&ppm->tformat, tformat, sizeof(struct cxgbi_tag_format)); |
472 | |
473 | ppm->ppm_pp = ppm_pp; |
474 | ppm->ndev = ndev; |
475 | ppm->pdev = pdev; |
476 | ppm->lldev = lldev; |
477 | ppm->ppmax = ppmax; |
478 | ppm->next = 0; |
479 | ppm->llimit = llimit; |
480 | ppm->base_idx = start > llimit ? |
481 | (start - llimit + 1) >> PPOD_SIZE_SHIFT : 0; |
482 | ppm->bmap_index_max = ppmax - ppmax_pool; |
483 | |
484 | ppm->pool = pool; |
485 | ppm->pool_rsvd = ppmax_pool; |
486 | ppm->pool_index_max = pool_index_max; |
487 | |
488 | /* check one more time */ |
489 | if (*ppm_pp) { |
490 | ppm_free(ppm); |
491 | ppm = (struct cxgbi_ppm *)(*ppm_pp); |
492 | |
493 | pr_info("ippm: %s, ppm 0x%p,0x%p already initialized, %u/%u.\n" , |
494 | ndev->name, ppm_pp, *ppm_pp, ppm->ppmax, ppmax); |
495 | |
496 | kref_get(kref: &ppm->refcnt); |
497 | return 1; |
498 | } |
499 | *ppm_pp = ppm; |
500 | |
501 | ppm->tformat.pgsz_idx_dflt = cxgbi_ppm_find_page_index(ppm, PAGE_SIZE); |
502 | |
503 | pr_info("ippm %s: ppm 0x%p, 0x%p, base %u/%u, pg %lu,%u, rsvd %u,%u.\n" , |
504 | ndev->name, ppm_pp, ppm, ppm->base_idx, ppm->ppmax, PAGE_SIZE, |
505 | ppm->tformat.pgsz_idx_dflt, ppm->pool_rsvd, |
506 | ppm->pool_index_max); |
507 | |
508 | return 0; |
509 | |
510 | release_ppm_pool: |
511 | free_percpu(pdata: pool); |
512 | return -ENOMEM; |
513 | } |
514 | EXPORT_SYMBOL(cxgbi_ppm_init); |
515 | |
516 | unsigned int cxgbi_tagmask_set(unsigned int ppmax) |
517 | { |
518 | unsigned int bits = fls(x: ppmax); |
519 | |
520 | if (bits > PPOD_IDX_MAX_SIZE) |
521 | bits = PPOD_IDX_MAX_SIZE; |
522 | |
523 | pr_info("ippm: ppmax %u/0x%x -> bits %u, tagmask 0x%x.\n" , |
524 | ppmax, ppmax, bits, 1 << (bits + PPOD_IDX_SHIFT)); |
525 | |
526 | return 1 << (bits + PPOD_IDX_SHIFT); |
527 | } |
528 | EXPORT_SYMBOL(cxgbi_tagmask_set); |
529 | |
530 | MODULE_AUTHOR("Chelsio Communications" ); |
531 | MODULE_DESCRIPTION("Chelsio common library" ); |
532 | MODULE_LICENSE("Dual BSD/GPL" ); |
533 | |