1 | // SPDX-License-Identifier: GPL-2.0-or-later |
2 | /* |
3 | * |
4 | * Copyright (C) IBM Corporation, 2012 |
5 | * |
6 | * Author: Anton Blanchard <anton@au.ibm.com> |
7 | */ |
8 | |
9 | /* |
10 | * Sparse (as at v0.5.0) gets very, very confused by this file. |
11 | * Make it a bit simpler for it. |
12 | */ |
13 | #if !defined(__CHECKER__) |
14 | #include <altivec.h> |
15 | #else |
16 | #define vec_xor(a, b) a ^ b |
17 | #define vector __attribute__((vector_size(16))) |
18 | #endif |
19 | |
20 | #include "xor_vmx.h" |
21 | |
22 | typedef vector signed char unative_t; |
23 | |
24 | #define DEFINE(V) \ |
25 | unative_t *V = (unative_t *)V##_in; \ |
26 | unative_t V##_0, V##_1, V##_2, V##_3 |
27 | |
28 | #define LOAD(V) \ |
29 | do { \ |
30 | V##_0 = V[0]; \ |
31 | V##_1 = V[1]; \ |
32 | V##_2 = V[2]; \ |
33 | V##_3 = V[3]; \ |
34 | } while (0) |
35 | |
36 | #define STORE(V) \ |
37 | do { \ |
38 | V[0] = V##_0; \ |
39 | V[1] = V##_1; \ |
40 | V[2] = V##_2; \ |
41 | V[3] = V##_3; \ |
42 | } while (0) |
43 | |
44 | #define XOR(V1, V2) \ |
45 | do { \ |
46 | V1##_0 = vec_xor(V1##_0, V2##_0); \ |
47 | V1##_1 = vec_xor(V1##_1, V2##_1); \ |
48 | V1##_2 = vec_xor(V1##_2, V2##_2); \ |
49 | V1##_3 = vec_xor(V1##_3, V2##_3); \ |
50 | } while (0) |
51 | |
52 | void __xor_altivec_2(unsigned long bytes, |
53 | unsigned long * __restrict v1_in, |
54 | const unsigned long * __restrict v2_in) |
55 | { |
56 | DEFINE(v1); |
57 | DEFINE(v2); |
58 | unsigned long lines = bytes / (sizeof(unative_t)) / 4; |
59 | |
60 | do { |
61 | LOAD(v1); |
62 | LOAD(v2); |
63 | XOR(v1, v2); |
64 | STORE(v1); |
65 | |
66 | v1 += 4; |
67 | v2 += 4; |
68 | } while (--lines > 0); |
69 | } |
70 | |
71 | void __xor_altivec_3(unsigned long bytes, |
72 | unsigned long * __restrict v1_in, |
73 | const unsigned long * __restrict v2_in, |
74 | const unsigned long * __restrict v3_in) |
75 | { |
76 | DEFINE(v1); |
77 | DEFINE(v2); |
78 | DEFINE(v3); |
79 | unsigned long lines = bytes / (sizeof(unative_t)) / 4; |
80 | |
81 | do { |
82 | LOAD(v1); |
83 | LOAD(v2); |
84 | LOAD(v3); |
85 | XOR(v1, v2); |
86 | XOR(v1, v3); |
87 | STORE(v1); |
88 | |
89 | v1 += 4; |
90 | v2 += 4; |
91 | v3 += 4; |
92 | } while (--lines > 0); |
93 | } |
94 | |
95 | void __xor_altivec_4(unsigned long bytes, |
96 | unsigned long * __restrict v1_in, |
97 | const unsigned long * __restrict v2_in, |
98 | const unsigned long * __restrict v3_in, |
99 | const unsigned long * __restrict v4_in) |
100 | { |
101 | DEFINE(v1); |
102 | DEFINE(v2); |
103 | DEFINE(v3); |
104 | DEFINE(v4); |
105 | unsigned long lines = bytes / (sizeof(unative_t)) / 4; |
106 | |
107 | do { |
108 | LOAD(v1); |
109 | LOAD(v2); |
110 | LOAD(v3); |
111 | LOAD(v4); |
112 | XOR(v1, v2); |
113 | XOR(v3, v4); |
114 | XOR(v1, v3); |
115 | STORE(v1); |
116 | |
117 | v1 += 4; |
118 | v2 += 4; |
119 | v3 += 4; |
120 | v4 += 4; |
121 | } while (--lines > 0); |
122 | } |
123 | |
124 | void __xor_altivec_5(unsigned long bytes, |
125 | unsigned long * __restrict v1_in, |
126 | const unsigned long * __restrict v2_in, |
127 | const unsigned long * __restrict v3_in, |
128 | const unsigned long * __restrict v4_in, |
129 | const unsigned long * __restrict v5_in) |
130 | { |
131 | DEFINE(v1); |
132 | DEFINE(v2); |
133 | DEFINE(v3); |
134 | DEFINE(v4); |
135 | DEFINE(v5); |
136 | unsigned long lines = bytes / (sizeof(unative_t)) / 4; |
137 | |
138 | do { |
139 | LOAD(v1); |
140 | LOAD(v2); |
141 | LOAD(v3); |
142 | LOAD(v4); |
143 | LOAD(v5); |
144 | XOR(v1, v2); |
145 | XOR(v3, v4); |
146 | XOR(v1, v5); |
147 | XOR(v1, v3); |
148 | STORE(v1); |
149 | |
150 | v1 += 4; |
151 | v2 += 4; |
152 | v3 += 4; |
153 | v4 += 4; |
154 | v5 += 4; |
155 | } while (--lines > 0); |
156 | } |
157 | |