1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * Tegra host1x Channel |
4 | * |
5 | * Copyright (c) 2010-2013, NVIDIA Corporation. |
6 | */ |
7 | |
8 | #include <linux/host1x.h> |
9 | #include <linux/iommu.h> |
10 | #include <linux/slab.h> |
11 | |
12 | #include <trace/events/host1x.h> |
13 | |
14 | #include "../channel.h" |
15 | #include "../dev.h" |
16 | #include "../intr.h" |
17 | #include "../job.h" |
18 | |
19 | #define TRACE_MAX_LENGTH 128U |
20 | |
21 | static void trace_write_gather(struct host1x_cdma *cdma, struct host1x_bo *bo, |
22 | u32 offset, u32 words) |
23 | { |
24 | struct device *dev = cdma_to_channel(cdma)->dev; |
25 | void *mem = NULL; |
26 | |
27 | if (host1x_debug_trace_cmdbuf) |
28 | mem = host1x_bo_mmap(bo); |
29 | |
30 | if (mem) { |
31 | u32 i; |
32 | /* |
33 | * Write in batches of 128 as there seems to be a limit |
34 | * of how much you can output to ftrace at once. |
35 | */ |
36 | for (i = 0; i < words; i += TRACE_MAX_LENGTH) { |
37 | u32 num_words = min(words - i, TRACE_MAX_LENGTH); |
38 | |
39 | offset += i * sizeof(u32); |
40 | |
41 | trace_host1x_cdma_push_gather(name: dev_name(dev), bo, |
42 | words: num_words, offset, |
43 | cmdbuf: mem); |
44 | } |
45 | |
46 | host1x_bo_munmap(bo, addr: mem); |
47 | } |
48 | } |
49 | |
50 | static void submit_wait(struct host1x_job *job, u32 id, u32 threshold, |
51 | u32 next_class) |
52 | { |
53 | struct host1x_cdma *cdma = &job->channel->cdma; |
54 | |
55 | #if HOST1X_HW >= 6 |
56 | u32 stream_id; |
57 | |
58 | /* |
59 | * If a memory context has been set, use it. Otherwise |
60 | * (if context isolation is disabled) use the engine's |
61 | * firmware stream ID. |
62 | */ |
63 | if (job->memory_context) |
64 | stream_id = job->memory_context->stream_id; |
65 | else |
66 | stream_id = job->engine_fallback_streamid; |
67 | |
68 | host1x_cdma_push_wide(cdma, |
69 | host1x_opcode_setclass( |
70 | HOST1X_CLASS_HOST1X, |
71 | HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, |
72 | /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */ |
73 | BIT(0) | BIT(2) |
74 | ), |
75 | threshold, |
76 | id, |
77 | HOST1X_OPCODE_NOP |
78 | ); |
79 | host1x_cdma_push_wide(&job->channel->cdma, |
80 | host1x_opcode_setclass(job->class, 0, 0), |
81 | host1x_opcode_setpayload(stream_id), |
82 | host1x_opcode_setstreamid(job->engine_streamid_offset / 4), |
83 | HOST1X_OPCODE_NOP); |
84 | #elif HOST1X_HW >= 2 |
85 | host1x_cdma_push_wide(cdma, |
86 | host1x_opcode_setclass( |
87 | HOST1X_CLASS_HOST1X, |
88 | HOST1X_UCLASS_LOAD_SYNCPT_PAYLOAD_32, |
89 | /* WAIT_SYNCPT_32 is at SYNCPT_PAYLOAD_32+2 */ |
90 | BIT(0) | BIT(2) |
91 | ), |
92 | threshold, |
93 | id, |
94 | host1x_opcode_setclass(next_class, 0, 0) |
95 | ); |
96 | #else |
97 | /* TODO add waitchk or use waitbases or other mitigation */ |
98 | host1x_cdma_push(cdma, |
99 | op1: host1x_opcode_setclass( |
100 | HOST1X_CLASS_HOST1X, |
101 | host1x_uclass_wait_syncpt_r(), |
102 | BIT(0) |
103 | ), |
104 | op2: host1x_class_host_wait_syncpt(id, threshold) |
105 | ); |
106 | host1x_cdma_push(cdma, |
107 | op1: host1x_opcode_setclass(next_class, 0, 0), |
108 | op2: HOST1X_OPCODE_NOP |
109 | ); |
110 | #endif |
111 | } |
112 | |
113 | static void submit_gathers(struct host1x_job *job, u32 job_syncpt_base) |
114 | { |
115 | struct host1x_cdma *cdma = &job->channel->cdma; |
116 | #if HOST1X_HW < 6 |
117 | struct device *dev = job->channel->dev; |
118 | #endif |
119 | unsigned int i; |
120 | u32 threshold; |
121 | |
122 | for (i = 0; i < job->num_cmds; i++) { |
123 | struct host1x_job_cmd *cmd = &job->cmds[i]; |
124 | |
125 | if (cmd->is_wait) { |
126 | if (cmd->wait.relative) |
127 | threshold = job_syncpt_base + cmd->wait.threshold; |
128 | else |
129 | threshold = cmd->wait.threshold; |
130 | |
131 | submit_wait(job, id: cmd->wait.id, threshold, next_class: cmd->wait.next_class); |
132 | } else { |
133 | struct host1x_job_gather *g = &cmd->gather; |
134 | |
135 | dma_addr_t addr = g->base + g->offset; |
136 | u32 op2, op3; |
137 | |
138 | op2 = lower_32_bits(addr); |
139 | op3 = upper_32_bits(addr); |
140 | |
141 | trace_write_gather(cdma, bo: g->bo, offset: g->offset, words: g->words); |
142 | |
143 | if (op3 != 0) { |
144 | #if HOST1X_HW >= 6 |
145 | u32 op1 = host1x_opcode_gather_wide(g->words); |
146 | u32 op4 = HOST1X_OPCODE_NOP; |
147 | |
148 | host1x_cdma_push_wide(cdma, op1, op2, op3, op4); |
149 | #else |
150 | dev_err(dev, "invalid gather for push buffer %pad\n" , |
151 | &addr); |
152 | continue; |
153 | #endif |
154 | } else { |
155 | u32 op1 = host1x_opcode_gather(g->words); |
156 | |
157 | host1x_cdma_push(cdma, op1, op2); |
158 | } |
159 | } |
160 | } |
161 | } |
162 | |
163 | static inline void synchronize_syncpt_base(struct host1x_job *job) |
164 | { |
165 | struct host1x_syncpt *sp = job->syncpt; |
166 | unsigned int id; |
167 | u32 value; |
168 | |
169 | value = host1x_syncpt_read_max(sp); |
170 | id = sp->base->id; |
171 | |
172 | host1x_cdma_push(cdma: &job->channel->cdma, |
173 | op1: host1x_opcode_setclass(HOST1X_CLASS_HOST1X, |
174 | HOST1X_UCLASS_LOAD_SYNCPT_BASE, 1), |
175 | op2: HOST1X_UCLASS_LOAD_SYNCPT_BASE_BASE_INDX_F(id) | |
176 | HOST1X_UCLASS_LOAD_SYNCPT_BASE_VALUE_F(value)); |
177 | } |
178 | |
179 | static void host1x_channel_set_streamid(struct host1x_channel *channel) |
180 | { |
181 | #if HOST1X_HW >= 6 |
182 | u32 stream_id; |
183 | |
184 | if (!tegra_dev_iommu_get_stream_id(channel->dev->parent, &stream_id)) |
185 | stream_id = TEGRA_STREAM_ID_BYPASS; |
186 | |
187 | host1x_ch_writel(channel, stream_id, HOST1X_CHANNEL_SMMU_STREAMID); |
188 | #endif |
189 | } |
190 | |
191 | static void host1x_enable_gather_filter(struct host1x_channel *ch) |
192 | { |
193 | #if HOST1X_HW >= 6 |
194 | struct host1x *host = dev_get_drvdata(ch->dev->parent); |
195 | u32 val; |
196 | |
197 | if (!host->hv_regs) |
198 | return; |
199 | |
200 | val = host1x_hypervisor_readl( |
201 | host, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); |
202 | val |= BIT(ch->id % 32); |
203 | host1x_hypervisor_writel( |
204 | host, val, HOST1X_HV_CH_KERNEL_FILTER_GBUFFER(ch->id / 32)); |
205 | #elif HOST1X_HW >= 4 |
206 | host1x_ch_writel(ch, |
207 | HOST1X_CHANNEL_CHANNELCTRL_KERNEL_FILTER_GBUFFER(1), |
208 | HOST1X_CHANNEL_CHANNELCTRL); |
209 | #endif |
210 | } |
211 | |
212 | static void channel_program_cdma(struct host1x_job *job) |
213 | { |
214 | struct host1x_cdma *cdma = &job->channel->cdma; |
215 | struct host1x_syncpt *sp = job->syncpt; |
216 | |
217 | #if HOST1X_HW >= 6 |
218 | u32 fence; |
219 | |
220 | /* Enter engine class with invalid stream ID. */ |
221 | host1x_cdma_push_wide(cdma, |
222 | host1x_opcode_acquire_mlock(job->class), |
223 | host1x_opcode_setclass(job->class, 0, 0), |
224 | host1x_opcode_setpayload(0), |
225 | host1x_opcode_setstreamid(job->engine_streamid_offset / 4)); |
226 | |
227 | /* Before switching stream ID to real stream ID, ensure engine is idle. */ |
228 | fence = host1x_syncpt_incr_max(sp, 1); |
229 | host1x_cdma_push(&job->channel->cdma, |
230 | host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), |
231 | HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | |
232 | HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); |
233 | submit_wait(job, job->syncpt->id, fence, job->class); |
234 | |
235 | /* Submit work. */ |
236 | job->syncpt_end = host1x_syncpt_incr_max(sp, job->syncpt_incrs); |
237 | submit_gathers(job, job->syncpt_end - job->syncpt_incrs); |
238 | |
239 | /* Before releasing MLOCK, ensure engine is idle again. */ |
240 | fence = host1x_syncpt_incr_max(sp, 1); |
241 | host1x_cdma_push(&job->channel->cdma, |
242 | host1x_opcode_nonincr(HOST1X_UCLASS_INCR_SYNCPT, 1), |
243 | HOST1X_UCLASS_INCR_SYNCPT_INDX_F(job->syncpt->id) | |
244 | HOST1X_UCLASS_INCR_SYNCPT_COND_F(4)); |
245 | submit_wait(job, job->syncpt->id, fence, job->class); |
246 | |
247 | /* Release MLOCK. */ |
248 | host1x_cdma_push(cdma, |
249 | HOST1X_OPCODE_NOP, host1x_opcode_release_mlock(job->class)); |
250 | #else |
251 | if (job->serialize) { |
252 | /* |
253 | * Force serialization by inserting a host wait for the |
254 | * previous job to finish before this one can commence. |
255 | */ |
256 | host1x_cdma_push(cdma, |
257 | op1: host1x_opcode_setclass(HOST1X_CLASS_HOST1X, |
258 | host1x_uclass_wait_syncpt_r(), 1), |
259 | op2: host1x_class_host_wait_syncpt(job->syncpt->id, |
260 | host1x_syncpt_read_max(sp))); |
261 | } |
262 | |
263 | /* Synchronize base register to allow using it for relative waiting */ |
264 | if (sp->base) |
265 | synchronize_syncpt_base(job); |
266 | |
267 | /* add a setclass for modules that require it */ |
268 | if (job->class) |
269 | host1x_cdma_push(cdma, |
270 | op1: host1x_opcode_setclass(job->class, 0, 0), |
271 | op2: HOST1X_OPCODE_NOP); |
272 | |
273 | job->syncpt_end = host1x_syncpt_incr_max(sp, incrs: job->syncpt_incrs); |
274 | |
275 | submit_gathers(job, job_syncpt_base: job->syncpt_end - job->syncpt_incrs); |
276 | #endif |
277 | } |
278 | |
279 | static void job_complete_callback(struct dma_fence *fence, struct dma_fence_cb *cb) |
280 | { |
281 | struct host1x_job *job = container_of(cb, struct host1x_job, fence_cb); |
282 | |
283 | /* Schedules CDMA update. */ |
284 | host1x_cdma_update(cdma: &job->channel->cdma); |
285 | } |
286 | |
287 | static int channel_submit(struct host1x_job *job) |
288 | { |
289 | struct host1x_channel *ch = job->channel; |
290 | struct host1x_syncpt *sp = job->syncpt; |
291 | u32 prev_max = 0; |
292 | u32 syncval; |
293 | int err; |
294 | struct host1x *host = dev_get_drvdata(dev: ch->dev->parent); |
295 | |
296 | trace_host1x_channel_submit(name: dev_name(dev: ch->dev), |
297 | cmdbufs: job->num_cmds, relocs: job->num_relocs, |
298 | syncpt_id: job->syncpt->id, syncpt_incrs: job->syncpt_incrs); |
299 | |
300 | /* before error checks, return current max */ |
301 | prev_max = job->syncpt_end = host1x_syncpt_read_max(sp); |
302 | |
303 | /* get submit lock */ |
304 | err = mutex_lock_interruptible(&ch->submitlock); |
305 | if (err) |
306 | return err; |
307 | |
308 | host1x_channel_set_streamid(channel: ch); |
309 | host1x_enable_gather_filter(ch); |
310 | host1x_hw_syncpt_assign_to_channel(host, sp, ch); |
311 | |
312 | /* begin a CDMA submit */ |
313 | err = host1x_cdma_begin(cdma: &ch->cdma, job); |
314 | if (err) { |
315 | mutex_unlock(lock: &ch->submitlock); |
316 | return err; |
317 | } |
318 | |
319 | channel_program_cdma(job); |
320 | syncval = host1x_syncpt_read_max(sp); |
321 | |
322 | /* |
323 | * Create fence before submitting job to HW to avoid job completing |
324 | * before the fence is set up. |
325 | */ |
326 | job->fence = host1x_fence_create(sp, threshold: syncval, timeout: true); |
327 | if (WARN(IS_ERR(job->fence), "Failed to create submit complete fence" )) { |
328 | job->fence = NULL; |
329 | } else { |
330 | err = dma_fence_add_callback(fence: job->fence, cb: &job->fence_cb, |
331 | func: job_complete_callback); |
332 | } |
333 | |
334 | /* end CDMA submit & stash pinned hMems into sync queue */ |
335 | host1x_cdma_end(cdma: &ch->cdma, job); |
336 | |
337 | trace_host1x_channel_submitted(name: dev_name(dev: ch->dev), syncpt_base: prev_max, syncpt_max: syncval); |
338 | |
339 | mutex_unlock(lock: &ch->submitlock); |
340 | |
341 | if (err == -ENOENT) |
342 | host1x_cdma_update(cdma: &ch->cdma); |
343 | else |
344 | WARN(err, "Failed to set submit complete interrupt" ); |
345 | |
346 | return 0; |
347 | } |
348 | |
349 | static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev, |
350 | unsigned int index) |
351 | { |
352 | #if HOST1X_HW < 6 |
353 | ch->regs = dev->regs + index * 0x4000; |
354 | #else |
355 | ch->regs = dev->regs + index * 0x100; |
356 | #endif |
357 | return 0; |
358 | } |
359 | |
360 | static const struct host1x_channel_ops host1x_channel_ops = { |
361 | .init = host1x_channel_init, |
362 | .submit = channel_submit, |
363 | }; |
364 | |