1// SPDX-License-Identifier: GPL-2.0+
2// Copyright 2017 IBM Corp.
3#include <asm/pnv-ocxl.h>
4#include <asm/opal.h>
5#include <misc/ocxl-config.h>
6#include "pci.h"
7
8#define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full
9#define PNV_OCXL_ACTAG_MAX 64
10/* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */
11#define PNV_OCXL_PASID_BITS 15
12#define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1)
13
14#define AFU_PRESENT (1 << 31)
15#define AFU_INDEX_MASK 0x3F000000
16#define AFU_INDEX_SHIFT 24
17#define ACTAG_MASK 0xFFF
18
19
20struct actag_range {
21 u16 start;
22 u16 count;
23};
24
25struct npu_link {
26 struct list_head list;
27 int domain;
28 int bus;
29 int dev;
30 u16 fn_desired_actags[8];
31 struct actag_range fn_actags[8];
32 bool assignment_done;
33};
34static struct list_head links_list = LIST_HEAD_INIT(links_list);
35static DEFINE_MUTEX(links_list_lock);
36
37
38/*
39 * opencapi actags handling:
40 *
41 * When sending commands, the opencapi device references the memory
42 * context it's targeting with an 'actag', which is really an alias
43 * for a (BDF, pasid) combination. When it receives a command, the NPU
44 * must do a lookup of the actag to identify the memory context. The
45 * hardware supports a finite number of actags per link (64 for
46 * POWER9).
47 *
48 * The device can carry multiple functions, and each function can have
49 * multiple AFUs. Each AFU advertises in its config space the number
50 * of desired actags. The host must configure in the config space of
51 * the AFU how many actags the AFU is really allowed to use (which can
52 * be less than what the AFU desires).
53 *
54 * When a PCI function is probed by the driver, it has no visibility
55 * about the other PCI functions and how many actags they'd like,
56 * which makes it impossible to distribute actags fairly among AFUs.
57 *
58 * Unfortunately, the only way to know how many actags a function
59 * desires is by looking at the data for each AFU in the config space
60 * and add them up. Similarly, the only way to know how many actags
61 * all the functions of the physical device desire is by adding the
62 * previously computed function counts. Then we can match that against
63 * what the hardware supports.
64 *
65 * To get a comprehensive view, we use a 'pci fixup': at the end of
66 * PCI enumeration, each function counts how many actags its AFUs
67 * desire and we save it in a 'npu_link' structure, shared between all
68 * the PCI functions of a same device. Therefore, when the first
69 * function is probed by the driver, we can get an idea of the total
70 * count of desired actags for the device, and assign the actags to
71 * the AFUs, by pro-rating if needed.
72 */
73
74static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos)
75{
76 int vsec = pos;
77 u16 vendor, id;
78
79 while ((vsec = pci_find_next_ext_capability(dev, vsec,
80 OCXL_EXT_CAP_ID_DVSEC))) {
81 pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET,
82 &vendor);
83 pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id);
84 if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id)
85 return vsec;
86 }
87 return 0;
88}
89
90static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx)
91{
92 int vsec = 0;
93 u8 idx;
94
95 while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID,
96 pos: vsec))) {
97 pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX,
98 &idx);
99 if (idx == afu_idx)
100 return vsec;
101 }
102 return 0;
103}
104
105static int get_max_afu_index(struct pci_dev *dev, int *afu_idx)
106{
107 int pos;
108 u32 val;
109
110 pos = pci_find_dvsec_capability(dev, PCI_VENDOR_ID_IBM,
111 OCXL_DVSEC_FUNC_ID);
112 if (!pos)
113 return -ESRCH;
114
115 pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val);
116 if (val & AFU_PRESENT)
117 *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT;
118 else
119 *afu_idx = -1;
120 return 0;
121}
122
123static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag)
124{
125 int pos;
126 u16 actag_sup;
127
128 pos = find_dvsec_afu_ctrl(dev, afu_idx);
129 if (!pos)
130 return -ESRCH;
131
132 pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP,
133 &actag_sup);
134 *actag = actag_sup & ACTAG_MASK;
135 return 0;
136}
137
138static struct npu_link *find_link(struct pci_dev *dev)
139{
140 struct npu_link *link;
141
142 list_for_each_entry(link, &links_list, list) {
143 /* The functions of a device all share the same link */
144 if (link->domain == pci_domain_nr(dev->bus) &&
145 link->bus == dev->bus->number &&
146 link->dev == PCI_SLOT(dev->devfn)) {
147 return link;
148 }
149 }
150
151 /* link doesn't exist yet. Allocate one */
152 link = kzalloc(size: sizeof(struct npu_link), GFP_KERNEL);
153 if (!link)
154 return NULL;
155 link->domain = pci_domain_nr(dev->bus);
156 link->bus = dev->bus->number;
157 link->dev = PCI_SLOT(dev->devfn);
158 list_add(new: &link->list, head: &links_list);
159 return link;
160}
161
162static void pnv_ocxl_fixup_actag(struct pci_dev *dev)
163{
164 struct pci_controller *hose = pci_bus_to_host(dev->bus);
165 struct pnv_phb *phb = hose->private_data;
166 struct npu_link *link;
167 int rc, afu_idx = -1, i, actag;
168
169 if (!machine_is(powernv))
170 return;
171
172 if (phb->type != PNV_PHB_NPU_OCAPI)
173 return;
174
175 mutex_lock(&links_list_lock);
176
177 link = find_link(dev);
178 if (!link) {
179 dev_warn(&dev->dev, "couldn't update actag information\n");
180 mutex_unlock(lock: &links_list_lock);
181 return;
182 }
183
184 /*
185 * Check how many actags are desired for the AFUs under that
186 * function and add it to the count for the link
187 */
188 rc = get_max_afu_index(dev, afu_idx: &afu_idx);
189 if (rc) {
190 /* Most likely an invalid config space */
191 dev_dbg(&dev->dev, "couldn't find AFU information\n");
192 afu_idx = -1;
193 }
194
195 link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0;
196 for (i = 0; i <= afu_idx; i++) {
197 /*
198 * AFU index 'holes' are allowed. So don't fail if we
199 * can't read the actag info for an index
200 */
201 rc = get_actag_count(dev, afu_idx: i, actag: &actag);
202 if (rc)
203 continue;
204 link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag;
205 }
206 dev_dbg(&dev->dev, "total actags for function: %d\n",
207 link->fn_desired_actags[PCI_FUNC(dev->devfn)]);
208
209 mutex_unlock(lock: &links_list_lock);
210}
211DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag);
212
213static u16 assign_fn_actags(u16 desired, u16 total)
214{
215 u16 count;
216
217 if (total <= PNV_OCXL_ACTAG_MAX)
218 count = desired;
219 else
220 count = PNV_OCXL_ACTAG_MAX * desired / total;
221
222 return count;
223}
224
225static void assign_actags(struct npu_link *link)
226{
227 u16 actag_count, range_start = 0, total_desired = 0;
228 int i;
229
230 for (i = 0; i < 8; i++)
231 total_desired += link->fn_desired_actags[i];
232
233 for (i = 0; i < 8; i++) {
234 if (link->fn_desired_actags[i]) {
235 actag_count = assign_fn_actags(
236 desired: link->fn_desired_actags[i],
237 total: total_desired);
238 link->fn_actags[i].start = range_start;
239 link->fn_actags[i].count = actag_count;
240 range_start += actag_count;
241 WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX);
242 }
243 pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n",
244 link->domain, link->bus, link->dev, i,
245 link->fn_actags[i].start, link->fn_actags[i].count,
246 link->fn_desired_actags[i]);
247 }
248 link->assignment_done = true;
249}
250
251int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled,
252 u16 *supported)
253{
254 struct npu_link *link;
255
256 mutex_lock(&links_list_lock);
257
258 link = find_link(dev);
259 if (!link) {
260 dev_err(&dev->dev, "actag information not found\n");
261 mutex_unlock(lock: &links_list_lock);
262 return -ENODEV;
263 }
264 /*
265 * On p9, we only have 64 actags per link, so they must be
266 * shared by all the functions of the same adapter. We counted
267 * the desired actag counts during PCI enumeration, so that we
268 * can allocate a pro-rated number of actags to each function.
269 */
270 if (!link->assignment_done)
271 assign_actags(link);
272
273 *base = link->fn_actags[PCI_FUNC(dev->devfn)].start;
274 *enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count;
275 *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)];
276
277 mutex_unlock(lock: &links_list_lock);
278 return 0;
279}
280EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag);
281
282int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count)
283{
284 struct npu_link *link;
285 int i, rc = -EINVAL;
286
287 /*
288 * The number of PASIDs (process address space ID) which can
289 * be used by a function depends on how many functions exist
290 * on the device. The NPU needs to be configured to know how
291 * many bits are available to PASIDs and how many are to be
292 * used by the function BDF identifier.
293 *
294 * We only support one AFU-carrying function for now.
295 */
296 mutex_lock(&links_list_lock);
297
298 link = find_link(dev);
299 if (!link) {
300 dev_err(&dev->dev, "actag information not found\n");
301 mutex_unlock(lock: &links_list_lock);
302 return -ENODEV;
303 }
304
305 for (i = 0; i < 8; i++)
306 if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) {
307 *count = PNV_OCXL_PASID_MAX;
308 rc = 0;
309 break;
310 }
311
312 mutex_unlock(lock: &links_list_lock);
313 dev_dbg(&dev->dev, "%d PASIDs available for function\n",
314 rc ? 0 : *count);
315 return rc;
316}
317EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count);
318
319static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf)
320{
321 int shift, idx;
322
323 WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE);
324 idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2;
325 shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2));
326 buf[idx] |= rate << shift;
327}
328
329int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap,
330 char *rate_buf, int rate_buf_size)
331{
332 if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
333 return -EINVAL;
334 /*
335 * The TL capabilities are a characteristic of the NPU, so
336 * we go with hard-coded values.
337 *
338 * The receiving rate of each template is encoded on 4 bits.
339 *
340 * On P9:
341 * - templates 0 -> 3 are supported
342 * - templates 0, 1 and 3 have a 0 receiving rate
343 * - template 2 has receiving rate of 1 (extra cycle)
344 */
345 memset(rate_buf, 0, rate_buf_size);
346 set_templ_rate(templ: 2, rate: 1, buf: rate_buf);
347 *cap = PNV_OCXL_TL_P9_RECV_CAP;
348 return 0;
349}
350EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap);
351
352int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap,
353 uint64_t rate_buf_phys, int rate_buf_size)
354{
355 struct pci_controller *hose = pci_bus_to_host(dev->bus);
356 struct pnv_phb *phb = hose->private_data;
357 int rc;
358
359 if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE)
360 return -EINVAL;
361
362 rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap,
363 rate_buf_phys, rate_buf_size);
364 if (rc) {
365 dev_err(&dev->dev, "Can't configure host TL: %d\n", rc);
366 return -EINVAL;
367 }
368 return 0;
369}
370EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf);
371
372int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq)
373{
374 int rc;
375
376 rc = of_property_read_u32(np: dev->dev.of_node, propname: "ibm,opal-xsl-irq", out_value: hwirq);
377 if (rc) {
378 dev_err(&dev->dev,
379 "Can't get translation interrupt for device\n");
380 return rc;
381 }
382 return 0;
383}
384EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq);
385
386void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar,
387 void __iomem *tfc, void __iomem *pe_handle)
388{
389 iounmap(addr: dsisr);
390 iounmap(addr: dar);
391 iounmap(addr: tfc);
392 iounmap(addr: pe_handle);
393}
394EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs);
395
396int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr,
397 void __iomem **dar, void __iomem **tfc,
398 void __iomem **pe_handle)
399{
400 u64 reg;
401 int i, j, rc = 0;
402 void __iomem *regs[4];
403
404 /*
405 * opal stores the mmio addresses of the DSISR, DAR, TFC and
406 * PE_HANDLE registers in a device tree property, in that
407 * order
408 */
409 for (i = 0; i < 4; i++) {
410 rc = of_property_read_u64_index(np: dev->dev.of_node,
411 propname: "ibm,opal-xsl-mmio", index: i, out_value: &reg);
412 if (rc)
413 break;
414 regs[i] = ioremap(offset: reg, size: 8);
415 if (!regs[i]) {
416 rc = -EINVAL;
417 break;
418 }
419 }
420 if (rc) {
421 dev_err(&dev->dev, "Can't map translation mmio registers\n");
422 for (j = i - 1; j >= 0; j--)
423 iounmap(addr: regs[j]);
424 } else {
425 *dsisr = regs[0];
426 *dar = regs[1];
427 *tfc = regs[2];
428 *pe_handle = regs[3];
429 }
430 return rc;
431}
432EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs);
433
434struct spa_data {
435 u64 phb_opal_id;
436 u32 bdfn;
437};
438
439int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask,
440 void **platform_data)
441{
442 struct pci_controller *hose = pci_bus_to_host(dev->bus);
443 struct pnv_phb *phb = hose->private_data;
444 struct spa_data *data;
445 u32 bdfn;
446 int rc;
447
448 data = kzalloc(size: sizeof(*data), GFP_KERNEL);
449 if (!data)
450 return -ENOMEM;
451
452 bdfn = pci_dev_id(dev);
453 rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(address: spa_mem),
454 PE_mask);
455 if (rc) {
456 dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc);
457 kfree(objp: data);
458 return rc;
459 }
460 data->phb_opal_id = phb->opal_id;
461 data->bdfn = bdfn;
462 *platform_data = (void *) data;
463 return 0;
464}
465EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup);
466
467void pnv_ocxl_spa_release(void *platform_data)
468{
469 struct spa_data *data = (struct spa_data *) platform_data;
470 int rc;
471
472 rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0);
473 WARN_ON(rc);
474 kfree(objp: data);
475}
476EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release);
477
478int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
479{
480 struct spa_data *data = (struct spa_data *) platform_data;
481
482 return opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle);
483}
484EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
485
486int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
487 uint64_t lpcr, void __iomem **arva)
488{
489 struct pci_controller *hose = pci_bus_to_host(dev->bus);
490 struct pnv_phb *phb = hose->private_data;
491 u64 mmio_atsd;
492 int rc;
493
494 /* ATSD physical address.
495 * ATSD LAUNCH register: write access initiates a shoot down to
496 * initiate the TLB Invalidate command.
497 */
498 rc = of_property_read_u64_index(np: hose->dn, propname: "ibm,mmio-atsd",
499 index: 0, out_value: &mmio_atsd);
500 if (rc) {
501 dev_info(&dev->dev, "No available ATSD found\n");
502 return rc;
503 }
504
505 /* Assign a register set to a Logical Partition and MMIO ATSD
506 * LPARID register to the required value.
507 */
508 rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev),
509 lparid, lpcr);
510 if (rc) {
511 dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc);
512 return rc;
513 }
514
515 *arva = ioremap(offset: mmio_atsd, size: 24);
516 if (!(*arva)) {
517 dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd);
518 rc = -ENOMEM;
519 }
520
521 return rc;
522}
523EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar);
524
525void pnv_ocxl_unmap_lpar(void __iomem *arva)
526{
527 iounmap(addr: arva);
528}
529EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);
530
531void pnv_ocxl_tlb_invalidate(void __iomem *arva,
532 unsigned long pid,
533 unsigned long addr,
534 unsigned long page_size)
535{
536 unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT);
537 u64 val = 0ull;
538 int pend;
539 u8 size;
540
541 if (!(arva))
542 return;
543
544 if (addr) {
545 /* load Abbreviated Virtual Address register with
546 * the necessary value
547 */
548 val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51));
549 out_be64(arva + PNV_OCXL_ATSD_AVA, val);
550 }
551
552 /* Write access initiates a shoot down to initiate the
553 * TLB Invalidate command
554 */
555 val = PNV_OCXL_ATSD_LNCH_R;
556 val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10);
557 if (addr)
558 val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00);
559 else {
560 val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01);
561 val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON;
562 }
563 val |= PNV_OCXL_ATSD_LNCH_PRS;
564 /* Actual Page Size to be invalidated
565 * 000 4KB
566 * 101 64KB
567 * 001 2MB
568 * 010 1GB
569 */
570 size = 0b101;
571 if (page_size == 0x1000)
572 size = 0b000;
573 if (page_size == 0x200000)
574 size = 0b001;
575 if (page_size == 0x40000000)
576 size = 0b010;
577 val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size);
578 val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid);
579 out_be64(arva + PNV_OCXL_ATSD_LNCH, val);
580
581 /* Poll the ATSD status register to determine when the
582 * TLB Invalidate has been completed.
583 */
584 val = in_be64(arva + PNV_OCXL_ATSD_STAT);
585 pend = val >> 63;
586
587 while (pend) {
588 if (time_after_eq(jiffies, timeout)) {
589 pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n",
590 __func__, val, pid);
591 return;
592 }
593 cpu_relax();
594 val = in_be64(arva + PNV_OCXL_ATSD_STAT);
595 pend = val >> 63;
596 }
597}
598EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate);
599

source code of linux/arch/powerpc/platforms/powernv/ocxl.c