1 | // SPDX-License-Identifier: GPL-2.0-only |
2 | /* |
3 | * SImple Tiler Allocator (SiTA): 2D and 1D allocation(reservation) algorithm |
4 | * |
5 | * Authors: Ravi Ramachandra <r.ramachandra@ti.com>, |
6 | * Lajos Molnar <molnar@ti.com> |
7 | * Andy Gross <andy.gross@ti.com> |
8 | * |
9 | * Copyright (C) 2012 Texas Instruments Incorporated - https://www.ti.com/ |
10 | */ |
11 | #include <linux/init.h> |
12 | #include <linux/module.h> |
13 | #include <linux/errno.h> |
14 | #include <linux/sched.h> |
15 | #include <linux/wait.h> |
16 | #include <linux/bitmap.h> |
17 | #include <linux/slab.h> |
18 | #include "tcm.h" |
19 | |
20 | static unsigned long mask[8]; |
21 | /* |
22 | * pos position in bitmap |
23 | * w width in slots |
24 | * h height in slots |
25 | * map ptr to bitmap |
26 | * stride slots in a row |
27 | */ |
28 | static void free_slots(unsigned long pos, u16 w, u16 h, |
29 | unsigned long *map, u16 stride) |
30 | { |
31 | int i; |
32 | |
33 | for (i = 0; i < h; i++, pos += stride) |
34 | bitmap_clear(map, start: pos, nbits: w); |
35 | } |
36 | |
37 | /* |
38 | * w width in slots |
39 | * pos ptr to position |
40 | * map ptr to bitmap |
41 | * num_bits number of bits in bitmap |
42 | */ |
43 | static int r2l_b2t_1d(u16 w, unsigned long *pos, unsigned long *map, |
44 | size_t num_bits) |
45 | { |
46 | unsigned long search_count = 0; |
47 | unsigned long bit; |
48 | bool area_found = false; |
49 | |
50 | *pos = num_bits - w; |
51 | |
52 | while (search_count < num_bits) { |
53 | bit = find_next_bit(addr: map, size: num_bits, offset: *pos); |
54 | |
55 | if (bit - *pos >= w) { |
56 | /* found a long enough free area */ |
57 | bitmap_set(map, start: *pos, nbits: w); |
58 | area_found = true; |
59 | break; |
60 | } |
61 | |
62 | search_count = num_bits - bit + w; |
63 | *pos = bit - w; |
64 | } |
65 | |
66 | return (area_found) ? 0 : -ENOMEM; |
67 | } |
68 | |
69 | /* |
70 | * w = width in slots |
71 | * h = height in slots |
72 | * a = align in slots (mask, 2^n-1, 0 is unaligned) |
73 | * offset = offset in bytes from 4KiB |
74 | * pos = position in bitmap for buffer |
75 | * map = bitmap ptr |
76 | * num_bits = size of bitmap |
77 | * stride = bits in one row of container |
78 | */ |
79 | static int l2r_t2b(u16 w, u16 h, u16 a, s16 offset, |
80 | unsigned long *pos, unsigned long slot_bytes, |
81 | unsigned long *map, size_t num_bits, size_t slot_stride) |
82 | { |
83 | int i; |
84 | unsigned long index; |
85 | bool area_free = false; |
86 | unsigned long slots_per_band = PAGE_SIZE / slot_bytes; |
87 | unsigned long bit_offset = (offset > 0) ? offset / slot_bytes : 0; |
88 | unsigned long curr_bit = bit_offset; |
89 | |
90 | /* reset alignment to 1 if we are matching a specific offset */ |
91 | /* adjust alignment - 1 to get to the format expected in bitmaps */ |
92 | a = (offset > 0) ? 0 : a - 1; |
93 | |
94 | /* FIXME Return error if slots_per_band > stride */ |
95 | |
96 | while (curr_bit < num_bits) { |
97 | *pos = bitmap_find_next_zero_area(map, size: num_bits, start: curr_bit, nr: w, |
98 | align_mask: a); |
99 | |
100 | /* skip forward if we are not at right offset */ |
101 | if (bit_offset > 0 && (*pos % slots_per_band != bit_offset)) { |
102 | curr_bit = ALIGN(*pos, slots_per_band) + bit_offset; |
103 | continue; |
104 | } |
105 | |
106 | /* skip forward to next row if we overlap end of row */ |
107 | if ((*pos % slot_stride) + w > slot_stride) { |
108 | curr_bit = ALIGN(*pos, slot_stride) + bit_offset; |
109 | continue; |
110 | } |
111 | |
112 | /* TODO: Handle overlapping 4K boundaries */ |
113 | |
114 | /* break out of look if we will go past end of container */ |
115 | if ((*pos + slot_stride * h) > num_bits) |
116 | break; |
117 | |
118 | /* generate mask that represents out matching pattern */ |
119 | bitmap_clear(map: mask, start: 0, nbits: slot_stride); |
120 | bitmap_set(map: mask, start: (*pos % BITS_PER_LONG), nbits: w); |
121 | |
122 | /* assume the area is free until we find an overlap */ |
123 | area_free = true; |
124 | |
125 | /* check subsequent rows to see if complete area is free */ |
126 | for (i = 1; i < h; i++) { |
127 | index = *pos / BITS_PER_LONG + i * 8; |
128 | if (bitmap_intersects(src1: &map[index], src2: mask, |
129 | nbits: (*pos % BITS_PER_LONG) + w)) { |
130 | area_free = false; |
131 | break; |
132 | } |
133 | } |
134 | |
135 | if (area_free) |
136 | break; |
137 | |
138 | /* go forward past this match */ |
139 | if (bit_offset > 0) |
140 | curr_bit = ALIGN(*pos, slots_per_band) + bit_offset; |
141 | else |
142 | curr_bit = *pos + a + 1; |
143 | } |
144 | |
145 | if (area_free) { |
146 | /* set area as in-use. iterate over rows */ |
147 | for (i = 0, index = *pos; i < h; i++, index += slot_stride) |
148 | bitmap_set(map, start: index, nbits: w); |
149 | } |
150 | |
151 | return (area_free) ? 0 : -ENOMEM; |
152 | } |
153 | |
154 | static s32 sita_reserve_1d(struct tcm *tcm, u32 num_slots, |
155 | struct tcm_area *area) |
156 | { |
157 | unsigned long pos; |
158 | int ret; |
159 | |
160 | spin_lock(lock: &(tcm->lock)); |
161 | ret = r2l_b2t_1d(w: num_slots, pos: &pos, map: tcm->bitmap, num_bits: tcm->map_size); |
162 | if (!ret) { |
163 | area->p0.x = pos % tcm->width; |
164 | area->p0.y = pos / tcm->width; |
165 | area->p1.x = (pos + num_slots - 1) % tcm->width; |
166 | area->p1.y = (pos + num_slots - 1) / tcm->width; |
167 | } |
168 | spin_unlock(lock: &(tcm->lock)); |
169 | |
170 | return ret; |
171 | } |
172 | |
173 | static s32 sita_reserve_2d(struct tcm *tcm, u16 h, u16 w, u16 align, |
174 | s16 offset, u16 slot_bytes, |
175 | struct tcm_area *area) |
176 | { |
177 | unsigned long pos; |
178 | int ret; |
179 | |
180 | spin_lock(lock: &(tcm->lock)); |
181 | ret = l2r_t2b(w, h, a: align, offset, pos: &pos, slot_bytes, map: tcm->bitmap, |
182 | num_bits: tcm->map_size, slot_stride: tcm->width); |
183 | |
184 | if (!ret) { |
185 | area->p0.x = pos % tcm->width; |
186 | area->p0.y = pos / tcm->width; |
187 | area->p1.x = area->p0.x + w - 1; |
188 | area->p1.y = area->p0.y + h - 1; |
189 | } |
190 | spin_unlock(lock: &(tcm->lock)); |
191 | |
192 | return ret; |
193 | } |
194 | |
195 | static void sita_deinit(struct tcm *tcm) |
196 | { |
197 | kfree(objp: tcm); |
198 | } |
199 | |
200 | static s32 sita_free(struct tcm *tcm, struct tcm_area *area) |
201 | { |
202 | unsigned long pos; |
203 | u16 w, h; |
204 | |
205 | pos = area->p0.x + area->p0.y * tcm->width; |
206 | if (area->is2d) { |
207 | w = area->p1.x - area->p0.x + 1; |
208 | h = area->p1.y - area->p0.y + 1; |
209 | } else { |
210 | w = area->p1.x + area->p1.y * tcm->width - pos + 1; |
211 | h = 1; |
212 | } |
213 | |
214 | spin_lock(lock: &(tcm->lock)); |
215 | free_slots(pos, w, h, map: tcm->bitmap, stride: tcm->width); |
216 | spin_unlock(lock: &(tcm->lock)); |
217 | return 0; |
218 | } |
219 | |
220 | struct tcm *sita_init(u16 width, u16 height) |
221 | { |
222 | struct tcm *tcm; |
223 | size_t map_size = BITS_TO_LONGS(width*height) * sizeof(unsigned long); |
224 | |
225 | if (width == 0 || height == 0) |
226 | return NULL; |
227 | |
228 | tcm = kzalloc(size: sizeof(*tcm) + map_size, GFP_KERNEL); |
229 | if (!tcm) |
230 | goto error; |
231 | |
232 | /* Updating the pointers to SiTA implementation APIs */ |
233 | tcm->height = height; |
234 | tcm->width = width; |
235 | tcm->reserve_2d = sita_reserve_2d; |
236 | tcm->reserve_1d = sita_reserve_1d; |
237 | tcm->free = sita_free; |
238 | tcm->deinit = sita_deinit; |
239 | |
240 | spin_lock_init(&tcm->lock); |
241 | tcm->bitmap = (unsigned long *)(tcm + 1); |
242 | bitmap_clear(map: tcm->bitmap, start: 0, nbits: width*height); |
243 | |
244 | tcm->map_size = width*height; |
245 | |
246 | return tcm; |
247 | |
248 | error: |
249 | return NULL; |
250 | } |
251 | |