omap_dmm_tiler.c source code [linux/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/*
3	* DMM IOMMU driver support functions for TI OMAP processors.
4	*
5	* Copyright (C) 2011 Texas Instruments Incorporated - https://www.ti.com/
6	* Author: Rob Clark <rob@ti.com>
7	* Andy Gross <andy.gross@ti.com>
8	*/
9
10	#include <linux/completion.h>
11	#include <linux/delay.h>
12	#include <linux/dma-mapping.h>
13	#include <linux/dmaengine.h>
14	#include <linux/errno.h>
15	#include <linux/init.h>
16	#include <linux/interrupt.h>
17	#include <linux/list.h>
18	#include <linux/mm.h>
19	#include <linux/module.h>
20	#include <linux/of.h>
21	#include <linux/platform_device.h> /* platform_device() */
22	#include <linux/sched.h>
23	#include <linux/seq_file.h>
24	#include <linux/slab.h>
25	#include <linux/time.h>
26	#include <linux/vmalloc.h>
27	#include <linux/wait.h>
28
29	#include "omap_dmm_tiler.h"
30	#include "omap_dmm_priv.h"
31
32	#define DMM_DRIVER_NAME "dmm"
33
34	/ mappings for associating views to luts /
35	static struct tcm *containers[TILFMT_NFORMATS];
36	static struct dmm *omap_dmm;
37
38	#if defined(CONFIG_OF)
39	static const struct of_device_id dmm_of_match[];
40	#endif
41
42	/ global spinlock for protecting lists /
43	static DEFINE_SPINLOCK(list_lock);
44
45	/ Geometry table /
46	#define GEOM(xshift, yshift, bytes_per_pixel) { \
47	.x_shft = (xshift), \
48	.y_shft = (yshift), \
49	.cpp = (bytes_per_pixel), \
50	.slot_w = 1 << (SLOT_WIDTH_BITS - (xshift)), \
51	.slot_h = 1 << (SLOT_HEIGHT_BITS - (yshift)), \
52	}
53
54	static const struct {
55	u32 x_shft; / unused X-bits (as part of bpp) /
56	u32 y_shft; / unused Y-bits (as part of bpp) /
57	u32 cpp; / bytes/chars per pixel /
58	u32 slot_w; / width of each slot (in pixels) /
59	u32 slot_h; / height of each slot (in pixels) /
60	} geom[TILFMT_NFORMATS] = {
61	[TILFMT_8BIT] = GEOM(`0`, `0`, `1`),
62	[TILFMT_16BIT] = GEOM(`0`, `1`, `2`),
63	[TILFMT_32BIT] = GEOM(`1`, `1`, `4`),
64	[TILFMT_PAGE] = GEOM(SLOT_WIDTH_BITS, SLOT_HEIGHT_BITS, `1`),
65	};
66
67
68	/ lookup table for registers w/ per-engine instances /
69	static const u32 reg[][`4`] = {
70	[PAT_STATUS] = {DMM_PAT_STATUS__0, DMM_PAT_STATUS__1,
71	DMM_PAT_STATUS__2, DMM_PAT_STATUS__3},
72	[PAT_DESCR] = {DMM_PAT_DESCR__0, DMM_PAT_DESCR__1,
73	DMM_PAT_DESCR__2, DMM_PAT_DESCR__3},
74	};
75
76	static int dmm_dma_copy(struct dmm *dmm, dma_addr_t src, dma_addr_t dst)
77	{
78	struct dma_async_tx_descriptor *tx;
79	enum dma_status status;
80	dma_cookie_t cookie;
81
82	tx = dmaengine_prep_dma_memcpy(chan: dmm->wa_dma_chan, dest: dst, src, len: `4`, flags: `0`);
83	if (!tx) {
84	dev_err(dmm->dev, "Failed to prepare DMA memcpy\n");
85	return -EIO;
86	}
87
88	cookie = tx->tx_submit(tx);
89	if (dma_submit_error(cookie)) {
90	dev_err(dmm->dev, "Failed to do DMA tx_submit\n");
91	return -EIO;
92	}
93
94	status = dma_sync_wait(chan: dmm->wa_dma_chan, cookie);
95	if (status != DMA_COMPLETE)
96	dev_err(dmm->dev, "i878 wa DMA copy failure\n");
97
98	dmaengine_terminate_all(chan: dmm->wa_dma_chan);
99	return `0`;
100	}
101
102	static u32 dmm_read_wa(struct dmm *dmm, u32 reg)
103	{
104	dma_addr_t src, dst;
105	int r;
106
107	src = dmm->phys_base + reg;
108	dst = dmm->wa_dma_handle;
109
110	r = dmm_dma_copy(dmm, src, dst);
111	if (r) {
112	dev_err(dmm->dev, "sDMA read transfer timeout\n");
113	return readl(addr: dmm->base + reg);
114	}
115
116	/*
117	* As per i878 workaround, the DMA is used to access the DMM registers.
118	* Make sure that the readl is not moved by the compiler or the CPU
119	* earlier than the DMA finished writing the value to memory.
120	*/
121	rmb();
122	return readl(addr: dmm->wa_dma_data);
123	}
124
125	static void dmm_write_wa(struct dmm *dmm, u32 val, u32 reg)
126	{
127	dma_addr_t src, dst;
128	int r;
129
130	writel(val, addr: dmm->wa_dma_data);
131	/*
132	* As per i878 workaround, the DMA is used to access the DMM registers.
133	* Make sure that the writel is not moved by the compiler or the CPU, so
134	* the data will be in place before we start the DMA to do the actual
135	* register write.
136	*/
137	wmb();
138
139	src = dmm->wa_dma_handle;
140	dst = dmm->phys_base + reg;
141
142	r = dmm_dma_copy(dmm, src, dst);
143	if (r) {
144	dev_err(dmm->dev, "sDMA write transfer timeout\n");
145	writel(val, addr: dmm->base + reg);
146	}
147	}
148
149	static u32 dmm_read(struct dmm *dmm, u32 reg)
150	{
151	if (dmm->dmm_workaround) {
152	u32 v;
153	unsigned long flags;
154
155	spin_lock_irqsave(&dmm->wa_lock, flags);
156	v = dmm_read_wa(dmm, reg);
157	spin_unlock_irqrestore(lock: &dmm->wa_lock, flags);
158
159	return v;
160	} else {
161	return readl(addr: dmm->base + reg);
162	}
163	}
164
165	static void dmm_write(struct dmm *dmm, u32 val, u32 reg)
166	{
167	if (dmm->dmm_workaround) {
168	unsigned long flags;
169
170	spin_lock_irqsave(&dmm->wa_lock, flags);
171	dmm_write_wa(dmm, val, reg);
172	spin_unlock_irqrestore(lock: &dmm->wa_lock, flags);
173	} else {
174	writel(val, addr: dmm->base + reg);
175	}
176	}
177
178	static int dmm_workaround_init(struct dmm *dmm)
179	{
180	dma_cap_mask_t mask;
181
182	spin_lock_init(&dmm->wa_lock);
183
184	dmm->wa_dma_data = dma_alloc_coherent(dev: dmm->dev, size: sizeof(u32),
185	dma_handle: &dmm->wa_dma_handle, GFP_KERNEL);
186	if (!dmm->wa_dma_data)
187	return -ENOMEM;
188
189	dma_cap_zero(mask);
190	dma_cap_set(DMA_MEMCPY, mask);
191
192	dmm->wa_dma_chan = dma_request_channel(mask, NULL, NULL);
193	if (!dmm->wa_dma_chan) {
194	dma_free_coherent(dev: dmm->dev, size: `4`, cpu_addr: dmm->wa_dma_data, dma_handle: dmm->wa_dma_handle);
195	return -ENODEV;
196	}
197
198	return `0`;
199	}
200
201	static void dmm_workaround_uninit(struct dmm *dmm)
202	{
203	dma_release_channel(chan: dmm->wa_dma_chan);
204
205	dma_free_coherent(dev: dmm->dev, size: `4`, cpu_addr: dmm->wa_dma_data, dma_handle: dmm->wa_dma_handle);
206	}
207
208	/ simple allocator to grab next 16 byte aligned memory from txn /
209	static void alloc_dma(struct* dmm_txn txn, size_t sz, dma_addr_t pa)
210	{
211	void *ptr;
212	struct refill_engine *engine = txn->engine_handle;
213
214	/ dmm programming requires 16 byte aligned addresses /
215	txn->current_pa = round_up(txn->current_pa, `16`);
216	txn->current_va = (void )round_up((long*)txn->current_va, `16`);
217
218	ptr = txn->current_va;
219	*pa = txn->current_pa;
220
221	txn->current_pa += sz;
222	txn->current_va += sz;
223
224	BUG_ON((txn->current_va - engine->refill_va) > REFILL_BUFFER_SIZE);
225
226	return ptr;
227	}
228
229	/ check status and spin until wait_mask comes true /
230	static int wait_status(struct refill_engine *engine, u32 wait_mask)
231	{
232	struct dmm *dmm = engine->dmm;
233	u32 r = `0`, err, i;
234
235	i = DMM_FIXED_RETRY_COUNT;
236	while (true) {
237	r = dmm_read(dmm, reg: reg[PAT_STATUS][engine->id]);
238	err = r & DMM_PATSTATUS_ERR;
239	if (err) {
240	dev_err(dmm->dev,
241	"%s: error (engine%d). PAT_STATUS: 0x%08x\n",
242	__func__, engine->id, r);
243	return -EFAULT;
244	}
245
246	if ((r & wait_mask) == wait_mask)
247	break;
248
249	if (--i == `0`) {
250	dev_err(dmm->dev,
251	"%s: timeout (engine%d). PAT_STATUS: 0x%08x\n",
252	__func__, engine->id, r);
253	return -ETIMEDOUT;
254	}
255
256	udelay(`1`);
257	}
258
259	return `0`;
260	}
261
262	static void release_engine(struct refill_engine *engine)
263	{
264	unsigned long flags;
265
266	spin_lock_irqsave(&list_lock, flags);
267	list_add(new: &engine->idle_node, head: &omap_dmm->idle_head);
268	spin_unlock_irqrestore(lock: &list_lock, flags);
269
270	atomic_inc(v: &omap_dmm->engine_counter);
271	wake_up_interruptible(&omap_dmm->engine_queue);
272	}
273
274	static irqreturn_t omap_dmm_irq_handler(int irq, void *arg)
275	{
276	struct dmm *dmm = arg;
277	u32 status = dmm_read(dmm, DMM_PAT_IRQSTATUS);
278	int i;
279
280	/ ack IRQ /
281	dmm_write(dmm, val: status, DMM_PAT_IRQSTATUS);
282
283	for (i = `0`; i < dmm->num_engines; i++) {
284	if (status & DMM_IRQSTAT_ERR_MASK)
285	dev_err(dmm->dev,
286	"irq error(engine%d): IRQSTAT 0x%02x\n",
287	i, status & `0xff`);
288
289	if (status & DMM_IRQSTAT_LST) {
290	if (dmm->engines[i].async)
291	release_engine(engine: &dmm->engines[i]);
292
293	complete(&dmm->engines[i].compl);
294	}
295
296	status >>= `8`;
297	}
298
299	return IRQ_HANDLED;
300	}
301
302	/*
303	* Get a handle for a DMM transaction
304	*/
305	static struct dmm_txn dmm_txn_init(struct* dmm dmm, struct* tcm *tcm)
306	{
307	struct dmm_txn *txn = NULL;
308	struct refill_engine *engine = NULL;
309	int ret;
310	unsigned long flags;
311
312
313	/ wait until an engine is available /
314	ret = wait_event_interruptible(omap_dmm->engine_queue,
315	atomic_add_unless(&omap_dmm->engine_counter, -`1`, `0`));
316	if (ret)
317	return ERR_PTR(error: ret);
318
319	/ grab an idle engine /
320	spin_lock_irqsave(&list_lock, flags);
321	if (!list_empty(head: &dmm->idle_head)) {
322	engine = list_entry(dmm->idle_head.next, struct refill_engine,
323	idle_node);
324	list_del(entry: &engine->idle_node);
325	}
326	spin_unlock_irqrestore(lock: &list_lock, flags);
327
328	BUG_ON(!engine);
329
330	txn = &engine->txn;
331	engine->tcm = tcm;
332	txn->engine_handle = engine;
333	txn->last_pat = NULL;
334	txn->current_va = engine->refill_va;
335	txn->current_pa = engine->refill_pa;
336
337	return txn;
338	}
339
340	/*
341	* Add region to DMM transaction. If pages or pages[i] is NULL, then the
342	* corresponding slot is cleared (ie. dummy_pa is programmed)
343	*/
344	static void dmm_txn_append(struct dmm_txn txn, struct* pat_area *area,
345	struct page **pages, u32 npages, u32 roll)
346	{
347	dma_addr_t pat_pa = `0`, data_pa = `0`;
348	u32 *data;
349	struct pat *pat;
350	struct refill_engine *engine = txn->engine_handle;
351	int columns = (`1` + area->x1 - area->x0);
352	int rows = (`1` + area->y1 - area->y0);
353	int i = columns*rows;
354
355	pat = alloc_dma(txn, sz: sizeof(*pat), pa: &pat_pa);
356
357	if (txn->last_pat)
358	txn->last_pat->next_pa = (u32)pat_pa;
359
360	pat->area = *area;
361
362	/ adjust Y coordinates based off of container parameters /
363	pat->area.y0 += engine->tcm->y_offset;
364	pat->area.y1 += engine->tcm->y_offset;
365
366	pat->ctrl = (struct pat_ctrl){
367	.start = `1`,
368	.lut_id = engine->tcm->lut_id,
369	};
370
371	data = alloc_dma(txn, sz: `4`*i, pa: &data_pa);
372	/ FIXME: what if data_pa is more than 32-bit ? /
373	pat->data_pa = data_pa;
374
375	while (i--) {
376	int n = i + roll;
377	if (n >= npages)
378	n -= npages;
379	data[i] = (pages && pages[n]) ?
380	page_to_phys(pages[n]) : engine->dmm->dummy_pa;
381	}
382
383	txn->last_pat = pat;
384
385	return;
386	}
387
388	/*
389	* Commit the DMM transaction.
390	*/
391	static int dmm_txn_commit(struct dmm_txn *txn, bool wait)
392	{
393	int ret = `0`;
394	struct refill_engine *engine = txn->engine_handle;
395	struct dmm *dmm = engine->dmm;
396
397	if (!txn->last_pat) {
398	dev_err(engine->dmm->dev, "need at least one txn\n");
399	ret = -EINVAL;
400	goto cleanup;
401	}
402
403	txn->last_pat->next_pa = `0`;
404	/ ensure that the written descriptors are visible to DMM /
405	wmb();
406
407	/*
408	* NOTE: the wmb() above should be enough, but there seems to be a bug
409	* in OMAP's memory barrier implementation, which in some rare cases may
410	* cause the writes not to be observable after wmb().
411	*/
412
413	/ read back to ensure the data is in RAM /
414	readl(addr: &txn->last_pat->next_pa);
415
416	/ write to PAT_DESCR to clear out any pending transaction /
417	dmm_write(dmm, val: `0x0`, reg: reg[PAT_DESCR][engine->id]);
418
419	/ wait for engine ready: /
420	ret = wait_status(engine, DMM_PATSTATUS_READY);
421	if (ret) {
422	ret = -EFAULT;
423	goto cleanup;
424	}
425
426	/ mark whether it is async to denote list management in IRQ handler /
427	engine->async = wait ? false : true;
428	reinit_completion(x: &engine->compl);
429	/ verify that the irq handler sees the 'async' and completion value /
430	smp_mb();
431
432	/ kick reload /
433	dmm_write(dmm, val: engine->refill_pa, reg: reg[PAT_DESCR][engine->id]);
434
435	if (wait) {
436	if (!wait_for_completion_timeout(x: &engine->compl,
437	timeout: msecs_to_jiffies(m: `100`))) {
438	dev_err(dmm->dev, "timed out waiting for done\n");
439	ret = -ETIMEDOUT;
440	goto cleanup;
441	}
442
443	/ Check the engine status before continue /
444	ret = wait_status(engine, DMM_PATSTATUS_READY \|
445	DMM_PATSTATUS_VALID \| DMM_PATSTATUS_DONE);
446	}
447
448	cleanup:
449	/ only place engine back on list if we are done with it /
450	if (ret \|\| wait)
451	release_engine(engine);
452
453	return ret;
454	}
455
456	/*
457	* DMM programming
458	*/
459	static int fill(struct tcm_area area, struct* page **pages,
460	u32 npages, u32 roll, bool wait)
461	{
462	int ret = `0`;
463	struct tcm_area slice, area_s;
464	struct dmm_txn *txn;
465
466	/*
467	* FIXME
468	*
469	* Asynchronous fill does not work reliably, as the driver does not
470	* handle errors in the async code paths. The fill operation may
471	* silently fail, leading to leaking DMM engines, which may eventually
472	* lead to deadlock if we run out of DMM engines.
473	*
474	* For now, always set 'wait' so that we only use sync fills. Async
475	* fills should be fixed, or alternatively we could decide to only
476	* support sync fills and so the whole async code path could be removed.
477	*/
478
479	wait = true;
480
481	txn = dmm_txn_init(dmm: omap_dmm, tcm: area->tcm);
482	if (IS_ERR_OR_NULL(ptr: txn))
483	return -ENOMEM;
484
485	tcm_for_each_slice(slice, *area, area_s) {
486	struct pat_area p_area = {
487	.x0 = slice.p0.x, .y0 = slice.p0.y,
488	.x1 = slice.p1.x, .y1 = slice.p1.y,
489	};
490
491	dmm_txn_append(txn, area: &p_area, pages, npages, roll);
492
493	roll += tcm_sizeof(slice);
494	}
495
496	ret = dmm_txn_commit(txn, wait);
497
498	return ret;
499	}
500
501	/*
502	* Pin/unpin
503	*/
504
505	/ note: slots for which pages[i] == NULL are filled w/ dummy page*
506	*/
507	int tiler_pin(struct tiler_block block, struct* page **pages,
508	u32 npages, u32 roll, bool wait)
509	{
510	int ret;
511
512	ret = fill(area: &block->area, pages, npages, roll, wait);
513
514	if (ret)
515	tiler_unpin(block);
516
517	return ret;
518	}
519
520	int tiler_unpin(struct tiler_block *block)
521	{
522	return fill(area: &block->area, NULL, npages: `0`, roll: `0`, wait: false);
523	}
524
525	/*
526	* Reserve/release
527	*/
528	struct tiler_block tiler_reserve_2d(enum* tiler_fmt fmt, u16 w,
529	u16 h, u16 align)
530	{
531	struct tiler_block *block;
532	u32 min_align = `128`;
533	int ret;
534	unsigned long flags;
535	u32 slot_bytes;
536
537	block = kzalloc(size: sizeof(*block), GFP_KERNEL);
538	if (!block)
539	return ERR_PTR(error: -ENOMEM);
540
541	BUG_ON(!validfmt(fmt));
542
543	/ convert width/height to slots /
544	w = DIV_ROUND_UP(w, geom[fmt].slot_w);
545	h = DIV_ROUND_UP(h, geom[fmt].slot_h);
546
547	/ convert alignment to slots /
548	slot_bytes = geom[fmt].slot_w * geom[fmt].cpp;
549	min_align = max(min_align, slot_bytes);
550	align = (align > min_align) ? ALIGN(align, min_align) : min_align;
551	align /= slot_bytes;
552
553	block->fmt = fmt;
554
555	ret = tcm_reserve_2d(tcm: containers[fmt], width: w, height: h, align, offset: -`1`, slot_bytes,
556	area: &block->area);
557	if (ret) {
558	kfree(objp: block);
559	return ERR_PTR(error: -ENOMEM);
560	}
561
562	/ add to allocation list /
563	spin_lock_irqsave(&list_lock, flags);
564	list_add(new: &block->alloc_node, head: &omap_dmm->alloc_head);
565	spin_unlock_irqrestore(lock: &list_lock, flags);
566
567	return block;
568	}
569
570	struct tiler_block *tiler_reserve_1d(size_t size)
571	{
572	struct tiler_block block = kzalloc(size: sizeof(block), GFP_KERNEL);
573	int num_pages = (size + PAGE_SIZE - `1`) >> PAGE_SHIFT;
574	unsigned long flags;
575
576	if (!block)
577	return ERR_PTR(error: -ENOMEM);
578
579	block->fmt = TILFMT_PAGE;
580
581	if (tcm_reserve_1d(tcm: containers[TILFMT_PAGE], slots: num_pages,
582	area: &block->area)) {
583	kfree(objp: block);
584	return ERR_PTR(error: -ENOMEM);
585	}
586
587	spin_lock_irqsave(&list_lock, flags);
588	list_add(new: &block->alloc_node, head: &omap_dmm->alloc_head);
589	spin_unlock_irqrestore(lock: &list_lock, flags);
590
591	return block;
592	}
593
594	/ note: if you have pin'd pages, you should have already unpin'd first! /
595	int tiler_release(struct tiler_block *block)
596	{
597	int ret = tcm_free(area: &block->area);
598	unsigned long flags;
599
600	if (block->area.tcm)
601	dev_err(omap_dmm->dev, "failed to release block\n");
602
603	spin_lock_irqsave(&list_lock, flags);
604	list_del(entry: &block->alloc_node);
605	spin_unlock_irqrestore(lock: &list_lock, flags);
606
607	kfree(objp: block);
608	return ret;
609	}
610
611	/*
612	* Utils
613	*/
614
615	/ calculate the tiler space address of a pixel in a view orientation...*
616	* below description copied from the display subsystem section of TRM:
617	*
618	* When the TILER is addressed, the bits:
619	* [28:27] = 0x0 for 8-bit tiled
620	* 0x1 for 16-bit tiled
621	* 0x2 for 32-bit tiled
622	* 0x3 for page mode
623	* [31:29] = 0x0 for 0-degree view
624	* 0x1 for 180-degree view + mirroring
625	* 0x2 for 0-degree view + mirroring
626	* 0x3 for 180-degree view
627	* 0x4 for 270-degree view + mirroring
628	* 0x5 for 270-degree view
629	* 0x6 for 90-degree view
630	* 0x7 for 90-degree view + mirroring
631	* Otherwise the bits indicated the corresponding bit address to access
632	* the SDRAM.
633	*/
634	static u32 tiler_get_address(enum tiler_fmt fmt, u32 orient, u32 x, u32 y)
635	{
636	u32 x_bits, y_bits, tmp, x_mask, y_mask, alignment;
637
638	x_bits = CONT_WIDTH_BITS - geom[fmt].x_shft;
639	y_bits = CONT_HEIGHT_BITS - geom[fmt].y_shft;
640	alignment = geom[fmt].x_shft + geom[fmt].y_shft;
641
642	/ validate coordinate /
643	x_mask = MASK(x_bits);
644	y_mask = MASK(y_bits);
645
646	if (x < `0` \|\| x > x_mask \|\| y < `0` \|\| y > y_mask) {
647	DBG("invalid coords: %u < 0 \|\| %u > %u \|\| %u < 0 \|\| %u > %u",
648	x, x, x_mask, y, y, y_mask);
649	return `0`;
650	}
651
652	/ account for mirroring /
653	if (orient & MASK_X_INVERT)
654	x ^= x_mask;
655	if (orient & MASK_Y_INVERT)
656	y ^= y_mask;
657
658	/ get coordinate address /
659	if (orient & MASK_XY_FLIP)
660	tmp = ((x << y_bits) + y);
661	else
662	tmp = ((y << x_bits) + x);
663
664	return TIL_ADDR((tmp << alignment), orient, fmt);
665	}
666
667	dma_addr_t tiler_ssptr(struct tiler_block *block)
668	{
669	BUG_ON(!validfmt(block->fmt));
670
671	return TILVIEW_8BIT + tiler_get_address(fmt: block->fmt, orient: `0`,
672	x: block->area.p0.x * geom[block->fmt].slot_w,
673	y: block->area.p0.y * geom[block->fmt].slot_h);
674	}
675
676	dma_addr_t tiler_tsptr(struct tiler_block *block, u32 orient,
677	u32 x, u32 y)
678	{
679	struct tcm_pt *p = &block->area.p0;
680	BUG_ON(!validfmt(block->fmt));
681
682	return tiler_get_address(fmt: block->fmt, orient,
683	x: (p->x * geom[block->fmt].slot_w) + x,
684	y: (p->y * geom[block->fmt].slot_h) + y);
685	}
686
687	void tiler_align(enum tiler_fmt fmt, u16 w, u16 h)
688	{
689	BUG_ON(!validfmt(fmt));
690	w = round_up(w, geom[fmt].slot_w);
691	h = round_up(h, geom[fmt].slot_h);
692	}
693
694	u32 tiler_stride(enum tiler_fmt fmt, u32 orient)
695	{
696	BUG_ON(!validfmt(fmt));
697
698	if (orient & MASK_XY_FLIP)
699	return `1` << (CONT_HEIGHT_BITS + geom[fmt].x_shft);
700	else
701	return `1` << (CONT_WIDTH_BITS + geom[fmt].y_shft);
702	}
703
704	size_t tiler_size(enum tiler_fmt fmt, u16 w, u16 h)
705	{
706	tiler_align(fmt, w: &w, h: &h);
707	return geom[fmt].cpp * w * h;
708	}
709
710	size_t tiler_vsize(enum tiler_fmt fmt, u16 w, u16 h)
711	{
712	BUG_ON(!validfmt(fmt));
713	return round_up(geom[fmt].cpp * w, PAGE_SIZE) * h;
714	}
715
716	u32 tiler_get_cpu_cache_flags(void)
717	{
718	return omap_dmm->plat_data->cpu_cache_flags;
719	}
720
721	bool dmm_is_available(void)
722	{
723	return omap_dmm ? true : false;
724	}
725
726	static void omap_dmm_remove(struct platform_device *dev)
727	{
728	struct tiler_block block, _block;
729	int i;
730	unsigned long flags;
731
732	if (omap_dmm) {
733	/ Disable all enabled interrupts /
734	dmm_write(dmm: omap_dmm, val: `0x7e7e7e7e`, DMM_PAT_IRQENABLE_CLR);
735	free_irq(omap_dmm->irq, omap_dmm);
736
737	/ free all area regions /
738	spin_lock_irqsave(&list_lock, flags);
739	list_for_each_entry_safe(block, _block, &omap_dmm->alloc_head,
740	alloc_node) {
741	list_del(entry: &block->alloc_node);
742	kfree(objp: block);
743	}
744	spin_unlock_irqrestore(lock: &list_lock, flags);
745
746	for (i = `0`; i < omap_dmm->num_lut; i++)
747	if (omap_dmm->tcm && omap_dmm->tcm[i])
748	omap_dmm->tcm[i]->deinit(omap_dmm->tcm[i]);
749	kfree(objp: omap_dmm->tcm);
750
751	kfree(objp: omap_dmm->engines);
752	if (omap_dmm->refill_va)
753	dma_free_wc(dev: omap_dmm->dev,
754	REFILL_BUFFER_SIZE * omap_dmm->num_engines,
755	cpu_addr: omap_dmm->refill_va, dma_addr: omap_dmm->refill_pa);
756	if (omap_dmm->dummy_page)
757	__free_page(omap_dmm->dummy_page);
758
759	if (omap_dmm->dmm_workaround)
760	dmm_workaround_uninit(dmm: omap_dmm);
761
762	iounmap(addr: omap_dmm->base);
763	kfree(objp: omap_dmm);
764	omap_dmm = NULL;
765	}
766	}
767
768	static int omap_dmm_probe(struct platform_device *dev)
769	{
770	int ret = -EFAULT, i;
771	struct tcm_area area = {`0`};
772	u32 hwinfo, pat_geom;
773	struct resource *mem;
774
775	omap_dmm = kzalloc(size: sizeof(*omap_dmm), GFP_KERNEL);
776	if (!omap_dmm)
777	goto fail;
778
779	/ initialize lists /
780	INIT_LIST_HEAD(list: &omap_dmm->alloc_head);
781	INIT_LIST_HEAD(list: &omap_dmm->idle_head);
782
783	init_waitqueue_head(&omap_dmm->engine_queue);
784
785	if (dev->dev.of_node) {
786	const struct of_device_id *match;
787
788	match = of_match_node(matches: dmm_of_match, node: dev->dev.of_node);
789	if (!match) {
790	dev_err(&dev->dev, "failed to find matching device node\n");
791	ret = -ENODEV;
792	goto fail;
793	}
794
795	omap_dmm->plat_data = match->data;
796	}
797
798	/ lookup hwmod data - base address and irq /
799	mem = platform_get_resource(dev, IORESOURCE_MEM, `0`);
800	if (!mem) {
801	dev_err(&dev->dev, "failed to get base address resource\n");
802	goto fail;
803	}
804
805	omap_dmm->phys_base = mem->start;
806	omap_dmm->base = ioremap(offset: mem->start, SZ_2K);
807
808	if (!omap_dmm->base) {
809	dev_err(&dev->dev, "failed to get dmm base address\n");
810	goto fail;
811	}
812
813	omap_dmm->irq = platform_get_irq(dev, `0`);
814	if (omap_dmm->irq < `0`)
815	goto fail;
816
817	omap_dmm->dev = &dev->dev;
818
819	if (of_machine_is_compatible(compat: "ti,dra7")) {
820	/*
821	* DRA7 Errata i878 says that MPU should not be used to access
822	* RAM and DMM at the same time. As it's not possible to prevent
823	* MPU accessing RAM, we need to access DMM via a proxy.
824	*/
825	if (!dmm_workaround_init(dmm: omap_dmm)) {
826	omap_dmm->dmm_workaround = true;
827	dev_info(&dev->dev,
828	"workaround for errata i878 in use\n");
829	} else {
830	dev_warn(&dev->dev,
831	"failed to initialize work-around for i878\n");
832	}
833	}
834
835	hwinfo = dmm_read(dmm: omap_dmm, DMM_PAT_HWINFO);
836	omap_dmm->num_engines = (hwinfo >> `24`) & `0x1F`;
837	omap_dmm->num_lut = (hwinfo >> `16`) & `0x1F`;
838	omap_dmm->container_width = `256`;
839	omap_dmm->container_height = `128`;
840
841	atomic_set(v: &omap_dmm->engine_counter, i: omap_dmm->num_engines);
842
843	/ read out actual LUT width and height /
844	pat_geom = dmm_read(dmm: omap_dmm, DMM_PAT_GEOMETRY);
845	omap_dmm->lut_width = ((pat_geom >> `16`) & `0xF`) << `5`;
846	omap_dmm->lut_height = ((pat_geom >> `24`) & `0xF`) << `5`;
847
848	/ increment LUT by one if on OMAP5 /
849	/ LUT has twice the height, and is split into a separate container /
850	if (omap_dmm->lut_height != omap_dmm->container_height)
851	omap_dmm->num_lut++;
852
853	/ initialize DMM registers /
854	dmm_write(dmm: omap_dmm, val: `0x88888888`, DMM_PAT_VIEW__0);
855	dmm_write(dmm: omap_dmm, val: `0x88888888`, DMM_PAT_VIEW__1);
856	dmm_write(dmm: omap_dmm, val: `0x80808080`, DMM_PAT_VIEW_MAP__0);
857	dmm_write(dmm: omap_dmm, val: `0x80000000`, DMM_PAT_VIEW_MAP_BASE);
858	dmm_write(dmm: omap_dmm, val: `0x88888888`, DMM_TILER_OR__0);
859	dmm_write(dmm: omap_dmm, val: `0x88888888`, DMM_TILER_OR__1);
860
861	omap_dmm->dummy_page = alloc_page(GFP_KERNEL \| __GFP_DMA32);
862	if (!omap_dmm->dummy_page) {
863	dev_err(&dev->dev, "could not allocate dummy page\n");
864	ret = -ENOMEM;
865	goto fail;
866	}
867
868	/ set dma mask for device /
869	ret = dma_set_coherent_mask(dev: &dev->dev, DMA_BIT_MASK(`32`));
870	if (ret)
871	goto fail;
872
873	omap_dmm->dummy_pa = page_to_phys(omap_dmm->dummy_page);
874
875	/ alloc refill memory /
876	omap_dmm->refill_va = dma_alloc_wc(dev: &dev->dev,
877	REFILL_BUFFER_SIZE * omap_dmm->num_engines,
878	dma_addr: &omap_dmm->refill_pa, GFP_KERNEL);
879	if (!omap_dmm->refill_va) {
880	dev_err(&dev->dev, "could not allocate refill memory\n");
881	ret = -ENOMEM;
882	goto fail;
883	}
884
885	/ alloc engines /
886	omap_dmm->engines = kcalloc(n: omap_dmm->num_engines,
887	size: sizeof(*omap_dmm->engines), GFP_KERNEL);
888	if (!omap_dmm->engines) {
889	ret = -ENOMEM;
890	goto fail;
891	}
892
893	for (i = `0`; i < omap_dmm->num_engines; i++) {
894	omap_dmm->engines[i].id = i;
895	omap_dmm->engines[i].dmm = omap_dmm;
896	omap_dmm->engines[i].refill_va = omap_dmm->refill_va +
897	(REFILL_BUFFER_SIZE * i);
898	omap_dmm->engines[i].refill_pa = omap_dmm->refill_pa +
899	(REFILL_BUFFER_SIZE * i);
900	init_completion(x: &omap_dmm->engines[i].compl);
901
902	list_add(new: &omap_dmm->engines[i].idle_node, head: &omap_dmm->idle_head);
903	}
904
905	omap_dmm->tcm = kcalloc(n: omap_dmm->num_lut, size: sizeof(*omap_dmm->tcm),
906	GFP_KERNEL);
907	if (!omap_dmm->tcm) {
908	ret = -ENOMEM;
909	goto fail;
910	}
911
912	/ init containers /
913	/ Each LUT is associated with a TCM (container manager). We use the*
914	lut_id to denote the lut_id used to identify the correct LUT for
915	programming during reill operations /*
916	for (i = `0`; i < omap_dmm->num_lut; i++) {
917	omap_dmm->tcm[i] = sita_init(width: omap_dmm->container_width,
918	height: omap_dmm->container_height);
919
920	if (!omap_dmm->tcm[i]) {
921	dev_err(&dev->dev, "failed to allocate container\n");
922	ret = -ENOMEM;
923	goto fail;
924	}
925
926	omap_dmm->tcm[i]->lut_id = i;
927	}
928
929	/ assign access mode containers to applicable tcm container /
930	/ OMAP 4 has 1 container for all 4 views /
931	/ OMAP 5 has 2 containers, 1 for 2D and 1 for 1D /
932	containers[TILFMT_8BIT] = omap_dmm->tcm[`0`];
933	containers[TILFMT_16BIT] = omap_dmm->tcm[`0`];
934	containers[TILFMT_32BIT] = omap_dmm->tcm[`0`];
935
936	if (omap_dmm->container_height != omap_dmm->lut_height) {
937	/ second LUT is used for PAGE mode. Programming must use*
938	y offset that is added to all y coordinates. LUT id is still
939	0, because it is the same LUT, just the upper 128 lines /*
940	containers[TILFMT_PAGE] = omap_dmm->tcm[`1`];
941	omap_dmm->tcm[`1`]->y_offset = OMAP5_LUT_OFFSET;
942	omap_dmm->tcm[`1`]->lut_id = `0`;
943	} else {
944	containers[TILFMT_PAGE] = omap_dmm->tcm[`0`];
945	}
946
947	area = (struct tcm_area) {
948	.tcm = NULL,
949	.p1.x = omap_dmm->container_width - `1`,
950	.p1.y = omap_dmm->container_height - `1`,
951	};
952
953	ret = request_irq(irq: omap_dmm->irq, handler: omap_dmm_irq_handler, IRQF_SHARED,
954	name: "omap_dmm_irq_handler", dev: omap_dmm);
955
956	if (ret) {
957	dev_err(&dev->dev, "couldn't register IRQ %d, error %d\n",
958	omap_dmm->irq, ret);
959	omap_dmm->irq = -`1`;
960	goto fail;
961	}
962
963	/ Enable all interrupts for each refill engine except*
964	* ERR_LUT_MISS<n> (which is just advisory, and we don't care
965	* about because we want to be able to refill live scanout
966	* buffers for accelerated pan/scroll) and FILL_DSC<n> which
967	* we just generally don't care about.
968	*/
969	dmm_write(dmm: omap_dmm, val: `0x7e7e7e7e`, DMM_PAT_IRQENABLE_SET);
970
971	/ initialize all LUTs to dummy page entries /
972	for (i = `0`; i < omap_dmm->num_lut; i++) {
973	area.tcm = omap_dmm->tcm[i];
974	if (fill(area: &area, NULL, npages: `0`, roll: `0`, wait: true))
975	dev_err(omap_dmm->dev, "refill failed");
976	}
977
978	dev_info(omap_dmm->dev, "initialized all PAT entries\n");
979
980	return `0`;
981
982	fail:
983	omap_dmm_remove(dev);
984	return ret;
985	}
986
987	/*
988	* debugfs support
989	*/
990
991	#ifdef CONFIG_DEBUG_FS
992
993	static const char *alphabet = "abcdefghijklmnopqrstuvwxyz"
994	"ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
995	static const char *special = ".,:;'\"`~!^-+";
996
997	static void fill_map(char *map, int* xdiv, int ydiv, struct tcm_area *a,
998	char c, bool ovw)
999	{
1000	int x, y;
1001	for (y = a->p0.y / ydiv; y <= a->p1.y / ydiv; y++)
1002	for (x = a->p0.x / xdiv; x <= a->p1.x / xdiv; x++)
1003	if (map[y][x] == `' '` \|\| ovw)
1004	map[y][x] = c;
1005	}
1006
1007	static void fill_map_pt(char *map, int* xdiv, int ydiv, struct tcm_pt *p,
1008	char c)
1009	{
1010	map[p->y / ydiv][p->x / xdiv] = c;
1011	}
1012
1013	static char read_map_pt(char *map, int* xdiv, int ydiv, struct tcm_pt *p)
1014	{
1015	return map[p->y / ydiv][p->x / xdiv];
1016	}
1017
1018	static int map_width(int xdiv, int x0, int x1)
1019	{
1020	return (x1 / xdiv) - (x0 / xdiv) + `1`;
1021	}
1022
1023	static void text_map(char *map, int* xdiv, char nice, int* yd, int x0, int x1)
1024	{
1025	char *p = map[yd] + (x0 / xdiv);
1026	int w = (map_width(xdiv, x0, x1) - strlen(nice)) / `2`;
1027	if (w >= `0`) {
1028	p += w;
1029	while (*nice)
1030	p++ = nice++;
1031	}
1032	}
1033
1034	static void map_1d_info(char *map, int* xdiv, int ydiv, char *nice,
1035	struct tcm_area *a)
1036	{
1037	sprintf(buf: nice, fmt: "%dK", tcm_sizeof(a) `4`);
1038	if (a->p0.y + `1` < a->p1.y) {
1039	text_map(map, xdiv, nice, yd: (a->p0.y + a->p1.y) / `2` / ydiv, x0: `0`,
1040	x1: `256` - `1`);
1041	} else if (a->p0.y < a->p1.y) {
1042	if (strlen(nice) < map_width(xdiv, x0: a->p0.x, x1: `256` - `1`))
1043	text_map(map, xdiv, nice, yd: a->p0.y / ydiv,
1044	x0: a->p0.x + xdiv, x1: `256` - `1`);
1045	else if (strlen(nice) < map_width(xdiv, x0: `0`, x1: a->p1.x))
1046	text_map(map, xdiv, nice, yd: a->p1.y / ydiv,
1047	x0: `0`, x1: a->p1.y - xdiv);
1048	} else if (strlen(nice) + `1` < map_width(xdiv, x0: a->p0.x, x1: a->p1.x)) {
1049	text_map(map, xdiv, nice, yd: a->p0.y / ydiv, x0: a->p0.x, x1: a->p1.x);
1050	}
1051	}
1052
1053	static void map_2d_info(char *map, int* xdiv, int ydiv, char *nice,
1054	struct tcm_area *a)
1055	{
1056	sprintf(buf: nice, fmt: "(%d%d)", tcm_awidth(a), tcm_aheight(*a));
1057	if (strlen(nice) + `1` < map_width(xdiv, x0: a->p0.x, x1: a->p1.x))
1058	text_map(map, xdiv, nice, yd: (a->p0.y + a->p1.y) / `2` / ydiv,
1059	x0: a->p0.x, x1: a->p1.x);
1060	}
1061
1062	int tiler_map_show(struct seq_file s, void* *arg)
1063	{
1064	int xdiv = `2`, ydiv = `1`;
1065	char *map = NULL, global_map;
1066	struct tiler_block *block;
1067	struct tcm_area a, p;
1068	int i;
1069	const char *m2d = alphabet;
1070	const char *a2d = special;
1071	const char m2dp = m2d, a2dp = a2d;
1072	char nice[`128`];
1073	int h_adj;
1074	int w_adj;
1075	unsigned long flags;
1076	int lut_idx;
1077
1078
1079	if (!omap_dmm) {
1080	/ early return if dmm/tiler device is not initialized /
1081	return `0`;
1082	}
1083
1084	h_adj = omap_dmm->container_height / ydiv;
1085	w_adj = omap_dmm->container_width / xdiv;
1086
1087	map = kmalloc_array(n: h_adj, size: sizeof(*map), GFP_KERNEL);
1088	global_map = kmalloc_array(n: w_adj + `1`, size: h_adj, GFP_KERNEL);
1089
1090	if (!map \|\| !global_map)
1091	goto error;
1092
1093	for (lut_idx = `0`; lut_idx < omap_dmm->num_lut; lut_idx++) {
1094	memset(map, `0`, h_adj * sizeof(*map));
1095	memset(global_map, `' '`, (w_adj + `1`) * h_adj);
1096
1097	for (i = `0`; i < omap_dmm->container_height; i++) {
1098	map[i] = global_map + i * (w_adj + `1`);
1099	map[i][w_adj] = `0`;
1100	}
1101
1102	spin_lock_irqsave(&list_lock, flags);
1103
1104	list_for_each_entry(block, &omap_dmm->alloc_head, alloc_node) {
1105	if (block->area.tcm == omap_dmm->tcm[lut_idx]) {
1106	if (block->fmt != TILFMT_PAGE) {
1107	fill_map(map, xdiv, ydiv, a: &block->area,
1108	c: *m2dp, ovw: true);
1109	if (!*++a2dp)
1110	a2dp = a2d;
1111	if (!*++m2dp)
1112	m2dp = m2d;
1113	map_2d_info(map, xdiv, ydiv, nice,
1114	a: &block->area);
1115	} else {
1116	bool start = read_map_pt(map, xdiv,
1117	ydiv, p: &block->area.p0) == `' '`;
1118	bool end = read_map_pt(map, xdiv, ydiv,
1119	p: &block->area.p1) == `' '`;
1120
1121	tcm_for_each_slice(a, block->area, p)
1122	fill_map(map, xdiv, ydiv, a: &a,
1123	c: `'='`, ovw: true);
1124	fill_map_pt(map, xdiv, ydiv,
1125	p: &block->area.p0,
1126	c: start ? `'<'` : `'X'`);
1127	fill_map_pt(map, xdiv, ydiv,
1128	p: &block->area.p1,
1129	c: end ? `'>'` : `'X'`);
1130	map_1d_info(map, xdiv, ydiv, nice,
1131	a: &block->area);
1132	}
1133	}
1134	}
1135
1136	spin_unlock_irqrestore(lock: &list_lock, flags);
1137
1138	if (s) {
1139	seq_printf(m: s, fmt: "CONTAINER %d DUMP BEGIN\n", lut_idx);
1140	for (i = `0`; i < `128`; i++)
1141	seq_printf(m: s, fmt: "%03d:%s\n", i, map[i]);
1142	seq_printf(m: s, fmt: "CONTAINER %d DUMP END\n", lut_idx);
1143	} else {
1144	dev_dbg(omap_dmm->dev, "CONTAINER %d DUMP BEGIN\n",
1145	lut_idx);
1146	for (i = `0`; i < `128`; i++)
1147	dev_dbg(omap_dmm->dev, "%03d:%s\n", i, map[i]);
1148	dev_dbg(omap_dmm->dev, "CONTAINER %d DUMP END\n",
1149	lut_idx);
1150	}
1151	}
1152
1153	error:
1154	kfree(objp: map);
1155	kfree(objp: global_map);
1156
1157	return `0`;
1158	}
1159	#endif
1160
1161	#ifdef CONFIG_PM_SLEEP
1162	static int omap_dmm_resume(struct device *dev)
1163	{
1164	struct tcm_area area;
1165	int i;
1166
1167	if (!omap_dmm)
1168	return -ENODEV;
1169
1170	area = (struct tcm_area) {
1171	.tcm = NULL,
1172	.p1.x = omap_dmm->container_width - `1`,
1173	.p1.y = omap_dmm->container_height - `1`,
1174	};
1175
1176	/ initialize all LUTs to dummy page entries /
1177	for (i = `0`; i < omap_dmm->num_lut; i++) {
1178	area.tcm = omap_dmm->tcm[i];
1179	if (fill(area: &area, NULL, npages: `0`, roll: `0`, wait: true))
1180	dev_err(dev, "refill failed");
1181	}
1182
1183	return `0`;
1184	}
1185	#endif
1186
1187	static SIMPLE_DEV_PM_OPS(omap_dmm_pm_ops, NULL, omap_dmm_resume);
1188
1189	#if defined(CONFIG_OF)
1190	static const struct dmm_platform_data dmm_omap4_platform_data = {
1191	.cpu_cache_flags = OMAP_BO_WC,
1192	};
1193
1194	static const struct dmm_platform_data dmm_omap5_platform_data = {
1195	.cpu_cache_flags = OMAP_BO_UNCACHED,
1196	};
1197
1198	static const struct of_device_id dmm_of_match[] = {
1199	{
1200	.compatible = "ti,omap4-dmm",
1201	.data = &dmm_omap4_platform_data,
1202	},
1203	{
1204	.compatible = "ti,omap5-dmm",
1205	.data = &dmm_omap5_platform_data,
1206	},
1207	{},
1208	};
1209	#endif
1210
1211	struct platform_driver omap_dmm_driver = {
1212	.probe = omap_dmm_probe,
1213	.remove_new = omap_dmm_remove,
1214	.driver = {
1215	.owner = THIS_MODULE,
1216	.name = DMM_DRIVER_NAME,
1217	.of_match_table = of_match_ptr(dmm_of_match),
1218	.pm = &omap_dmm_pm_ops,
1219	},
1220	};
1221
1222	MODULE_LICENSE("GPL v2");
1223	MODULE_AUTHOR("Andy Gross <andy.gross@ti.com>");
1224	MODULE_DESCRIPTION("OMAP DMM/Tiler Driver");
1225

source code of linux/drivers/gpu/drm/omapdrm/omap_dmm_tiler.c