1 | // SPDX-License-Identifier: GPL-2.0 OR MIT |
2 | /************************************************************************** |
3 | * |
4 | * Copyright 2019-2023 VMware, Inc., Palo Alto, CA., USA |
5 | * |
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
7 | * copy of this software and associated documentation files (the |
8 | * "Software"), to deal in the Software without restriction, including |
9 | * without limitation the rights to use, copy, modify, merge, publish, |
10 | * distribute, sub license, and/or sell copies of the Software, and to |
11 | * permit persons to whom the Software is furnished to do so, subject to |
12 | * the following conditions: |
13 | * |
14 | * The above copyright notice and this permission notice (including the |
15 | * next paragraph) shall be included in all copies or substantial portions |
16 | * of the Software. |
17 | * |
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
19 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
20 | * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL |
21 | * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, |
22 | * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR |
23 | * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE |
24 | * USE OR OTHER DEALINGS IN THE SOFTWARE. |
25 | * |
26 | **************************************************************************/ |
27 | #include "vmwgfx_bo.h" |
28 | #include "vmwgfx_drv.h" |
29 | |
30 | /* |
31 | * Different methods for tracking dirty: |
32 | * VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits |
33 | * VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write- |
34 | * accesses in the VM mkwrite() callback |
35 | */ |
36 | enum vmw_bo_dirty_method { |
37 | VMW_BO_DIRTY_PAGETABLE, |
38 | VMW_BO_DIRTY_MKWRITE, |
39 | }; |
40 | |
41 | /* |
42 | * No dirtied pages at scan trigger a transition to the _MKWRITE method, |
43 | * similarly a certain percentage of dirty pages trigger a transition to |
44 | * the _PAGETABLE method. How many triggers should we wait for before |
45 | * changing method? |
46 | */ |
47 | #define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2 |
48 | |
49 | /* Percentage to trigger a transition to the _PAGETABLE method */ |
50 | #define VMW_DIRTY_PERCENTAGE 10 |
51 | |
52 | /** |
53 | * struct vmw_bo_dirty - Dirty information for buffer objects |
54 | * @start: First currently dirty bit |
55 | * @end: Last currently dirty bit + 1 |
56 | * @method: The currently used dirty method |
57 | * @change_count: Number of consecutive method change triggers |
58 | * @ref_count: Reference count for this structure |
59 | * @bitmap_size: The size of the bitmap in bits. Typically equal to the |
60 | * nuber of pages in the bo. |
61 | * @bitmap: A bitmap where each bit represents a page. A set bit means a |
62 | * dirty page. |
63 | */ |
64 | struct vmw_bo_dirty { |
65 | unsigned long start; |
66 | unsigned long end; |
67 | enum vmw_bo_dirty_method method; |
68 | unsigned int change_count; |
69 | unsigned int ref_count; |
70 | unsigned long bitmap_size; |
71 | unsigned long bitmap[]; |
72 | }; |
73 | |
74 | /** |
75 | * vmw_bo_dirty_scan_pagetable - Perform a pagetable scan for dirty bits |
76 | * @vbo: The buffer object to scan |
77 | * |
78 | * Scans the pagetable for dirty bits. Clear those bits and modify the |
79 | * dirty structure with the results. This function may change the |
80 | * dirty-tracking method. |
81 | */ |
82 | static void vmw_bo_dirty_scan_pagetable(struct vmw_bo *vbo) |
83 | { |
84 | struct vmw_bo_dirty *dirty = vbo->dirty; |
85 | pgoff_t offset = drm_vma_node_start(node: &vbo->tbo.base.vma_node); |
86 | struct address_space *mapping = vbo->tbo.bdev->dev_mapping; |
87 | pgoff_t num_marked; |
88 | |
89 | num_marked = clean_record_shared_mapping_range |
90 | (mapping, |
91 | first_index: offset, nr: dirty->bitmap_size, |
92 | bitmap_pgoff: offset, bitmap: &dirty->bitmap[0], |
93 | start: &dirty->start, end: &dirty->end); |
94 | if (num_marked == 0) |
95 | dirty->change_count++; |
96 | else |
97 | dirty->change_count = 0; |
98 | |
99 | if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) { |
100 | dirty->change_count = 0; |
101 | dirty->method = VMW_BO_DIRTY_MKWRITE; |
102 | wp_shared_mapping_range(mapping, |
103 | first_index: offset, nr: dirty->bitmap_size); |
104 | clean_record_shared_mapping_range(mapping, |
105 | first_index: offset, nr: dirty->bitmap_size, |
106 | bitmap_pgoff: offset, bitmap: &dirty->bitmap[0], |
107 | start: &dirty->start, end: &dirty->end); |
108 | } |
109 | } |
110 | |
111 | /** |
112 | * vmw_bo_dirty_scan_mkwrite - Reset the mkwrite dirty-tracking method |
113 | * @vbo: The buffer object to scan |
114 | * |
115 | * Write-protect pages written to so that consecutive write accesses will |
116 | * trigger a call to mkwrite. |
117 | * |
118 | * This function may change the dirty-tracking method. |
119 | */ |
120 | static void vmw_bo_dirty_scan_mkwrite(struct vmw_bo *vbo) |
121 | { |
122 | struct vmw_bo_dirty *dirty = vbo->dirty; |
123 | unsigned long offset = drm_vma_node_start(node: &vbo->tbo.base.vma_node); |
124 | struct address_space *mapping = vbo->tbo.bdev->dev_mapping; |
125 | pgoff_t num_marked; |
126 | |
127 | if (dirty->end <= dirty->start) |
128 | return; |
129 | |
130 | num_marked = wp_shared_mapping_range(mapping: vbo->tbo.bdev->dev_mapping, |
131 | first_index: dirty->start + offset, |
132 | nr: dirty->end - dirty->start); |
133 | |
134 | if (100UL * num_marked / dirty->bitmap_size > |
135 | VMW_DIRTY_PERCENTAGE) |
136 | dirty->change_count++; |
137 | else |
138 | dirty->change_count = 0; |
139 | |
140 | if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) { |
141 | pgoff_t start = 0; |
142 | pgoff_t end = dirty->bitmap_size; |
143 | |
144 | dirty->method = VMW_BO_DIRTY_PAGETABLE; |
145 | clean_record_shared_mapping_range(mapping, first_index: offset, nr: end, bitmap_pgoff: offset, |
146 | bitmap: &dirty->bitmap[0], |
147 | start: &start, end: &end); |
148 | bitmap_clear(map: &dirty->bitmap[0], start: 0, nbits: dirty->bitmap_size); |
149 | if (dirty->start < dirty->end) |
150 | bitmap_set(map: &dirty->bitmap[0], start: dirty->start, |
151 | nbits: dirty->end - dirty->start); |
152 | dirty->change_count = 0; |
153 | } |
154 | } |
155 | |
156 | /** |
157 | * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty |
158 | * tracking structure |
159 | * @vbo: The buffer object to scan |
160 | * |
161 | * This function may change the dirty tracking method. |
162 | */ |
163 | void vmw_bo_dirty_scan(struct vmw_bo *vbo) |
164 | { |
165 | struct vmw_bo_dirty *dirty = vbo->dirty; |
166 | |
167 | if (dirty->method == VMW_BO_DIRTY_PAGETABLE) |
168 | vmw_bo_dirty_scan_pagetable(vbo); |
169 | else |
170 | vmw_bo_dirty_scan_mkwrite(vbo); |
171 | } |
172 | |
173 | /** |
174 | * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before |
175 | * an unmap_mapping_range operation. |
176 | * @vbo: The buffer object, |
177 | * @start: First page of the range within the buffer object. |
178 | * @end: Last page of the range within the buffer object + 1. |
179 | * |
180 | * If we're using the _PAGETABLE scan method, we may leak dirty pages |
181 | * when calling unmap_mapping_range(). This function makes sure we pick |
182 | * up all dirty pages. |
183 | */ |
184 | static void vmw_bo_dirty_pre_unmap(struct vmw_bo *vbo, |
185 | pgoff_t start, pgoff_t end) |
186 | { |
187 | struct vmw_bo_dirty *dirty = vbo->dirty; |
188 | unsigned long offset = drm_vma_node_start(node: &vbo->tbo.base.vma_node); |
189 | struct address_space *mapping = vbo->tbo.bdev->dev_mapping; |
190 | |
191 | if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end) |
192 | return; |
193 | |
194 | wp_shared_mapping_range(mapping, first_index: start + offset, nr: end - start); |
195 | clean_record_shared_mapping_range(mapping, first_index: start + offset, |
196 | nr: end - start, bitmap_pgoff: offset, |
197 | bitmap: &dirty->bitmap[0], start: &dirty->start, |
198 | end: &dirty->end); |
199 | } |
200 | |
201 | /** |
202 | * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo |
203 | * @vbo: The buffer object, |
204 | * @start: First page of the range within the buffer object. |
205 | * @end: Last page of the range within the buffer object + 1. |
206 | * |
207 | * This is similar to ttm_bo_unmap_virtual() except it takes a subrange. |
208 | */ |
209 | void vmw_bo_dirty_unmap(struct vmw_bo *vbo, |
210 | pgoff_t start, pgoff_t end) |
211 | { |
212 | unsigned long offset = drm_vma_node_start(node: &vbo->tbo.base.vma_node); |
213 | struct address_space *mapping = vbo->tbo.bdev->dev_mapping; |
214 | |
215 | vmw_bo_dirty_pre_unmap(vbo, start, end); |
216 | unmap_shared_mapping_range(mapping, holebegin: (offset + start) << PAGE_SHIFT, |
217 | holelen: (loff_t) (end - start) << PAGE_SHIFT); |
218 | } |
219 | |
220 | /** |
221 | * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object |
222 | * @vbo: The buffer object |
223 | * |
224 | * This function registers a dirty-tracking user to a buffer object. |
225 | * A user can be for example a resource or a vma in a special user-space |
226 | * mapping. |
227 | * |
228 | * Return: Zero on success, -ENOMEM on memory allocation failure. |
229 | */ |
230 | int vmw_bo_dirty_add(struct vmw_bo *vbo) |
231 | { |
232 | struct vmw_bo_dirty *dirty = vbo->dirty; |
233 | pgoff_t num_pages = PFN_UP(vbo->tbo.resource->size); |
234 | size_t size; |
235 | int ret; |
236 | |
237 | if (dirty) { |
238 | dirty->ref_count++; |
239 | return 0; |
240 | } |
241 | |
242 | size = sizeof(*dirty) + BITS_TO_LONGS(num_pages) * sizeof(long); |
243 | dirty = kvzalloc(size, GFP_KERNEL); |
244 | if (!dirty) { |
245 | ret = -ENOMEM; |
246 | goto out_no_dirty; |
247 | } |
248 | |
249 | dirty->bitmap_size = num_pages; |
250 | dirty->start = dirty->bitmap_size; |
251 | dirty->end = 0; |
252 | dirty->ref_count = 1; |
253 | if (num_pages < PAGE_SIZE / sizeof(pte_t)) { |
254 | dirty->method = VMW_BO_DIRTY_PAGETABLE; |
255 | } else { |
256 | struct address_space *mapping = vbo->tbo.bdev->dev_mapping; |
257 | pgoff_t offset = drm_vma_node_start(node: &vbo->tbo.base.vma_node); |
258 | |
259 | dirty->method = VMW_BO_DIRTY_MKWRITE; |
260 | |
261 | /* Write-protect and then pick up already dirty bits */ |
262 | wp_shared_mapping_range(mapping, first_index: offset, nr: num_pages); |
263 | clean_record_shared_mapping_range(mapping, first_index: offset, nr: num_pages, |
264 | bitmap_pgoff: offset, |
265 | bitmap: &dirty->bitmap[0], |
266 | start: &dirty->start, end: &dirty->end); |
267 | } |
268 | |
269 | vbo->dirty = dirty; |
270 | |
271 | return 0; |
272 | |
273 | out_no_dirty: |
274 | return ret; |
275 | } |
276 | |
277 | /** |
278 | * vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object |
279 | * @vbo: The buffer object |
280 | * |
281 | * This function releases a dirty-tracking user from a buffer object. |
282 | * If the reference count reaches zero, then the dirty-tracking object is |
283 | * freed and the pointer to it cleared. |
284 | * |
285 | * Return: Zero on success, -ENOMEM on memory allocation failure. |
286 | */ |
287 | void vmw_bo_dirty_release(struct vmw_bo *vbo) |
288 | { |
289 | struct vmw_bo_dirty *dirty = vbo->dirty; |
290 | |
291 | if (dirty && --dirty->ref_count == 0) { |
292 | kvfree(addr: dirty); |
293 | vbo->dirty = NULL; |
294 | } |
295 | } |
296 | |
297 | /** |
298 | * vmw_bo_dirty_transfer_to_res - Pick up a resource's dirty region from |
299 | * its backing mob. |
300 | * @res: The resource |
301 | * |
302 | * This function will pick up all dirty ranges affecting the resource from |
303 | * it's backup mob, and call vmw_resource_dirty_update() once for each |
304 | * range. The transferred ranges will be cleared from the backing mob's |
305 | * dirty tracking. |
306 | */ |
307 | void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res) |
308 | { |
309 | struct vmw_bo *vbo = res->guest_memory_bo; |
310 | struct vmw_bo_dirty *dirty = vbo->dirty; |
311 | pgoff_t start, cur, end; |
312 | unsigned long res_start = res->guest_memory_offset; |
313 | unsigned long res_end = res->guest_memory_offset + res->guest_memory_size; |
314 | |
315 | WARN_ON_ONCE(res_start & ~PAGE_MASK); |
316 | res_start >>= PAGE_SHIFT; |
317 | res_end = DIV_ROUND_UP(res_end, PAGE_SIZE); |
318 | |
319 | if (res_start >= dirty->end || res_end <= dirty->start) |
320 | return; |
321 | |
322 | cur = max(res_start, dirty->start); |
323 | res_end = max(res_end, dirty->end); |
324 | while (cur < res_end) { |
325 | unsigned long num; |
326 | |
327 | start = find_next_bit(addr: &dirty->bitmap[0], size: res_end, offset: cur); |
328 | if (start >= res_end) |
329 | break; |
330 | |
331 | end = find_next_zero_bit(addr: &dirty->bitmap[0], size: res_end, offset: start + 1); |
332 | cur = end + 1; |
333 | num = end - start; |
334 | bitmap_clear(map: &dirty->bitmap[0], start, nbits: num); |
335 | vmw_resource_dirty_update(res, start, end); |
336 | } |
337 | |
338 | if (res_start <= dirty->start && res_end > dirty->start) |
339 | dirty->start = res_end; |
340 | if (res_start < dirty->end && res_end >= dirty->end) |
341 | dirty->end = res_start; |
342 | } |
343 | |
344 | /** |
345 | * vmw_bo_dirty_clear_res - Clear a resource's dirty region from |
346 | * its backing mob. |
347 | * @res: The resource |
348 | * |
349 | * This function will clear all dirty ranges affecting the resource from |
350 | * it's backup mob's dirty tracking. |
351 | */ |
352 | void vmw_bo_dirty_clear_res(struct vmw_resource *res) |
353 | { |
354 | unsigned long res_start = res->guest_memory_offset; |
355 | unsigned long res_end = res->guest_memory_offset + res->guest_memory_size; |
356 | struct vmw_bo *vbo = res->guest_memory_bo; |
357 | struct vmw_bo_dirty *dirty = vbo->dirty; |
358 | |
359 | res_start >>= PAGE_SHIFT; |
360 | res_end = DIV_ROUND_UP(res_end, PAGE_SIZE); |
361 | |
362 | if (res_start >= dirty->end || res_end <= dirty->start) |
363 | return; |
364 | |
365 | res_start = max(res_start, dirty->start); |
366 | res_end = min(res_end, dirty->end); |
367 | bitmap_clear(map: &dirty->bitmap[0], start: res_start, nbits: res_end - res_start); |
368 | |
369 | if (res_start <= dirty->start && res_end > dirty->start) |
370 | dirty->start = res_end; |
371 | if (res_start < dirty->end && res_end >= dirty->end) |
372 | dirty->end = res_start; |
373 | } |
374 | |
375 | vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf) |
376 | { |
377 | struct vm_area_struct *vma = vmf->vma; |
378 | struct ttm_buffer_object *bo = (struct ttm_buffer_object *) |
379 | vma->vm_private_data; |
380 | vm_fault_t ret; |
381 | unsigned long page_offset; |
382 | unsigned int save_flags; |
383 | struct vmw_bo *vbo = to_vmw_bo(gobj: &bo->base); |
384 | |
385 | /* |
386 | * mkwrite() doesn't handle the VM_FAULT_RETRY return value correctly. |
387 | * So make sure the TTM helpers are aware. |
388 | */ |
389 | save_flags = vmf->flags; |
390 | vmf->flags &= ~FAULT_FLAG_ALLOW_RETRY; |
391 | ret = ttm_bo_vm_reserve(bo, vmf); |
392 | vmf->flags = save_flags; |
393 | if (ret) |
394 | return ret; |
395 | |
396 | page_offset = vmf->pgoff - drm_vma_node_start(node: &bo->base.vma_node); |
397 | if (unlikely(page_offset >= PFN_UP(bo->resource->size))) { |
398 | ret = VM_FAULT_SIGBUS; |
399 | goto out_unlock; |
400 | } |
401 | |
402 | if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE && |
403 | !test_bit(page_offset, &vbo->dirty->bitmap[0])) { |
404 | struct vmw_bo_dirty *dirty = vbo->dirty; |
405 | |
406 | __set_bit(page_offset, &dirty->bitmap[0]); |
407 | dirty->start = min(dirty->start, page_offset); |
408 | dirty->end = max(dirty->end, page_offset + 1); |
409 | } |
410 | |
411 | out_unlock: |
412 | dma_resv_unlock(obj: bo->base.resv); |
413 | return ret; |
414 | } |
415 | |
416 | vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf) |
417 | { |
418 | struct vm_area_struct *vma = vmf->vma; |
419 | struct ttm_buffer_object *bo = (struct ttm_buffer_object *) |
420 | vma->vm_private_data; |
421 | struct vmw_bo *vbo = to_vmw_bo(gobj: &bo->base); |
422 | pgoff_t num_prefault; |
423 | pgprot_t prot; |
424 | vm_fault_t ret; |
425 | |
426 | ret = ttm_bo_vm_reserve(bo, vmf); |
427 | if (ret) |
428 | return ret; |
429 | |
430 | num_prefault = (vma->vm_flags & VM_RAND_READ) ? 1 : |
431 | TTM_BO_VM_NUM_PREFAULT; |
432 | |
433 | if (vbo->dirty) { |
434 | pgoff_t allowed_prefault; |
435 | unsigned long page_offset; |
436 | |
437 | page_offset = vmf->pgoff - |
438 | drm_vma_node_start(node: &bo->base.vma_node); |
439 | if (page_offset >= PFN_UP(bo->resource->size) || |
440 | vmw_resources_clean(vbo, start: page_offset, |
441 | end: page_offset + PAGE_SIZE, |
442 | num_prefault: &allowed_prefault)) { |
443 | ret = VM_FAULT_SIGBUS; |
444 | goto out_unlock; |
445 | } |
446 | |
447 | num_prefault = min(num_prefault, allowed_prefault); |
448 | } |
449 | |
450 | /* |
451 | * If we don't track dirty using the MKWRITE method, make sure |
452 | * sure the page protection is write-enabled so we don't get |
453 | * a lot of unnecessary write faults. |
454 | */ |
455 | if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE) |
456 | prot = vm_get_page_prot(vm_flags: vma->vm_flags & ~VM_SHARED); |
457 | else |
458 | prot = vm_get_page_prot(vm_flags: vma->vm_flags); |
459 | |
460 | ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault); |
461 | if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) |
462 | return ret; |
463 | |
464 | out_unlock: |
465 | dma_resv_unlock(obj: bo->base.resv); |
466 | |
467 | return ret; |
468 | } |
469 | |