1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | #ifndef _ASM_WORD_AT_A_TIME_H |
3 | #define _ASM_WORD_AT_A_TIME_H |
4 | |
5 | #include <linux/bitops.h> |
6 | #include <linux/wordpart.h> |
7 | |
8 | /* |
9 | * This is largely generic for little-endian machines, but the |
10 | * optimal byte mask counting is probably going to be something |
11 | * that is architecture-specific. If you have a reliably fast |
12 | * bit count instruction, that might be better than the multiply |
13 | * and shift, for example. |
14 | */ |
15 | struct word_at_a_time { |
16 | const unsigned long one_bits, high_bits; |
17 | }; |
18 | |
19 | #define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) } |
20 | |
21 | #ifdef CONFIG_64BIT |
22 | |
23 | /* |
24 | * Jan Achrenius on G+: microoptimized version of |
25 | * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" |
26 | * that works for the bytemasks without having to |
27 | * mask them first. |
28 | */ |
29 | static inline long count_masked_bytes(unsigned long mask) |
30 | { |
31 | return mask*0x0001020304050608ul >> 56; |
32 | } |
33 | |
34 | #else /* 32-bit case */ |
35 | |
36 | /* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ |
37 | static inline long count_masked_bytes(long mask) |
38 | { |
39 | /* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */ |
40 | long a = (0x0ff0001+mask) >> 23; |
41 | /* Fix the 1 for 00 case */ |
42 | return a & mask; |
43 | } |
44 | |
45 | #endif |
46 | |
47 | /* Return nonzero if it has a zero */ |
48 | static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c) |
49 | { |
50 | unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits; |
51 | *bits = mask; |
52 | return mask; |
53 | } |
54 | |
55 | static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c) |
56 | { |
57 | return bits; |
58 | } |
59 | |
60 | static inline unsigned long create_zero_mask(unsigned long bits) |
61 | { |
62 | bits = (bits - 1) & ~bits; |
63 | return bits >> 7; |
64 | } |
65 | |
66 | /* The mask we created is directly usable as a bytemask */ |
67 | #define zero_bytemask(mask) (mask) |
68 | |
69 | static inline unsigned long find_zero(unsigned long mask) |
70 | { |
71 | return count_masked_bytes(mask); |
72 | } |
73 | |
74 | /* |
75 | * Load an unaligned word from kernel space. |
76 | * |
77 | * In the (very unlikely) case of the word being a page-crosser |
78 | * and the next page not being mapped, take the exception and |
79 | * return zeroes in the non-existing part. |
80 | */ |
81 | static inline unsigned long load_unaligned_zeropad(const void *addr) |
82 | { |
83 | unsigned long ret; |
84 | |
85 | asm volatile( |
86 | "1: mov %[mem], %[ret]\n" |
87 | "2:\n" |
88 | _ASM_EXTABLE_TYPE(1b, 2b, EX_TYPE_ZEROPAD) |
89 | : [ret] "=r" (ret) |
90 | : [mem] "m" (*(unsigned long *)addr)); |
91 | |
92 | return ret; |
93 | } |
94 | |
95 | #endif /* _ASM_WORD_AT_A_TIME_H */ |
96 | |