Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0-or-later
2 : // Copyright (C) 2013 Andrea Mazzoleni
3 :
4 : #ifndef __RAID_INTERNAL_H
5 : #define __RAID_INTERNAL_H
6 :
7 : /*
8 : * Supported instruction sets.
9 : *
10 : * It may happen that the assembler is too old to support
11 : * all instructions, even if the architecture supports them.
12 : * These defines allow to exclude from the build the unsupported ones.
13 : *
14 : * If in your project you use a predefined assembler, you can define them
15 : * using fixed values, instead of using the HAVE_* defines.
16 : */
17 : #if HAVE_CONFIG_H
18 :
19 : /* Includes the project configuration for HAVE_* defines */
20 : #include "config.h"
21 :
22 : /* If the compiler supports assembly */
23 : #if HAVE_ASSEMBLY
24 : /* Autodetect from the compiler */
25 : #if defined(__i386__)
26 : #define CONFIG_X86 1
27 : #define CONFIG_X86_32 1
28 : #endif
29 : #if defined(__x86_64__)
30 : #define CONFIG_X86 1
31 : #define CONFIG_X86_64 1
32 : #endif
33 : #endif
34 :
35 : /* Enables SSE2, SSSE3, AVX2 only if the assembler supports it */
36 : #if HAVE_SSE2
37 : #define CONFIG_SSE2 1
38 : #endif
39 : #if HAVE_SSSE3
40 : #define CONFIG_SSSE3 1
41 : #endif
42 : #if HAVE_AVX2
43 : #define CONFIG_AVX2 1
44 : #endif
45 : #if HAVE_AVX512BW /* Enables AVX2512BW only if the assembler supports it */
46 : #define CONFIG_AVX512BW 1
47 : #endif
48 :
49 : #else /* if HAVE_CONFIG_H is not defined */
50 :
51 : /* Assume that assembly is always supported */
52 : #if defined(__i386__)
53 : #define CONFIG_X86 1
54 : #define CONFIG_X86_32 1
55 : #endif
56 :
57 : #if defined(__x86_64__)
58 : #define CONFIG_X86 1
59 : #define CONFIG_X86_64 1
60 : #endif
61 :
62 : /* Assumes that the assembler supports everything */
63 : #ifdef CONFIG_X86
64 : #define CONFIG_SSE2 1
65 : #define CONFIG_SSSE3 1
66 : #define CONFIG_AVX2 1
67 : #endif
68 : #ifdef CONFIG_X86_64
69 : #define CONFIG_AVX512BW 1
70 : #endif
71 : #endif
72 :
73 : /*
74 : * Includes anything required for compatibility.
75 : */
76 : #include <assert.h>
77 : #include <stdint.h>
78 : #include <stdlib.h>
79 : #include <string.h>
80 :
81 : /*
82 : * Inverse assert.
83 : */
84 : #define BUG_ON(a) assert(!(a))
85 :
86 : /*
87 : * Forced inline.
88 : */
89 : #ifndef __always_inline
90 : #define __always_inline inline __attribute__((always_inline))
91 : #endif
92 :
93 : /*
94 : * Forced alignment.
95 : */
96 : #ifndef __aligned
97 : #define __aligned(a) __attribute__((aligned(a)))
98 : #endif
99 :
100 : /*
101 : * Align a pointer at the specified size.
102 : */
103 : static __always_inline void *__align_ptr(void *ptr, uintptr_t size)
104 : {
105 128005 : uintptr_t offset = (uintptr_t)ptr;
106 :
107 128005 : offset = (offset + size - 1U) & ~(size - 1U);
108 :
109 128005 : return (void *)offset;
110 : }
111 :
112 : /*
113 : * Includes the main interface headers.
114 : */
115 : #include "raid.h"
116 : #include "helper.h"
117 :
118 : /*
119 : * Internal functions.
120 : *
121 : * These are intended to provide external access for testing.
122 : */
123 : void raid_gen_ref(int nd, int np, size_t size, void **vv);
124 : void raid_invert(uint8_t *M, uint8_t *V, int n);
125 : void raid_delta_gen(int nr, int *id, int *ip, int nd, size_t size, void **v);
126 : void raid_rec1of1(int *id, int nd, size_t size, void **v);
127 : void raid_rec2of2_int8(int *id, int *ip, int nd, size_t size, void **vv);
128 : void raid_gen1_int32(int nd, size_t size, void **vv);
129 : void raid_gen1_int64(int nd, size_t size, void **vv);
130 : void raid_gen1_sse2(int nd, size_t size, void **vv);
131 : void raid_gen1_avx2(int nd, size_t size, void **vv);
132 : void raid_gen1_avx512bw(int nd, size_t size, void **vv);
133 : void raid_gen2_int32(int nd, size_t size, void **vv);
134 : void raid_gen2_int64(int nd, size_t size, void **vv);
135 : void raid_gen2_sse2(int nd, size_t size, void **vv);
136 : void raid_gen2_avx2(int nd, size_t size, void **vv);
137 : void raid_gen2_sse2ext(int nd, size_t size, void **vv);
138 : void raid_gen2_avx512bw(int nd, size_t size, void **vv);
139 : void raid_genz_int32(int nd, size_t size, void **vv);
140 : void raid_genz_int64(int nd, size_t size, void **vv);
141 : void raid_genz_sse2(int nd, size_t size, void **vv);
142 : void raid_genz_sse2ext(int nd, size_t size, void **vv);
143 : void raid_genz_avx2ext(int nd, size_t size, void **vv);
144 : void raid_gen3_int8(int nd, size_t size, void **vv);
145 : void raid_gen3_ssse3(int nd, size_t size, void **vv);
146 : void raid_gen3_ssse3ext(int nd, size_t size, void **vv);
147 : void raid_gen3_avx2ext(int nd, size_t size, void **vv);
148 : void raid_gen3_avx512bw(int nd, size_t size, void **vv);
149 : void raid_gen4_int8(int nd, size_t size, void **vv);
150 : void raid_gen4_ssse3(int nd, size_t size, void **vv);
151 : void raid_gen4_ssse3ext(int nd, size_t size, void **vv);
152 : void raid_gen4_avx2ext(int nd, size_t size, void **vv);
153 : void raid_gen4_avx512bw(int nd, size_t size, void **vv);
154 : void raid_gen5_int8(int nd, size_t size, void **vv);
155 : void raid_gen5_ssse3(int nd, size_t size, void **vv);
156 : void raid_gen5_ssse3ext(int nd, size_t size, void **vv);
157 : void raid_gen5_avx2ext(int nd, size_t size, void **vv);
158 : void raid_gen5_avx512bw(int nd, size_t size, void **vv);
159 : void raid_gen6_int8(int nd, size_t size, void **vv);
160 : void raid_gen6_ssse3(int nd, size_t size, void **vv);
161 : void raid_gen6_ssse3ext(int nd, size_t size, void **vv);
162 : void raid_gen6_avx2ext(int nd, size_t size, void **vv);
163 : void raid_gen6_avx512bw(int nd, size_t size, void **vv);
164 : void raid_rec1_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
165 : void raid_rec2_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
166 : void raid_recX_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
167 : void raid_rec1_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
168 : void raid_rec2_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
169 : void raid_recX_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
170 : void raid_rec1_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
171 : void raid_rec2_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
172 : void raid_recX_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
173 : void raid_rec1_avx512bw(int nr, int *id, int *ip, int nd, size_t size, void **vv);
174 : void raid_rec2_avx512bw(int nr, int *id, int *ip, int nd, size_t size, void **vv);
175 : void raid_recX_avx512bw(int nr, int *id, int *ip, int nd, size_t size, void **vv);
176 :
177 : /*
178 : * Functions for parity computation.
179 : *
180 : * These functions compute the parity blocks from the provided data.
181 : *
182 : * The number of parities to compute is implicit in the position in the
183 : * forwarder vector. Position at index #i, computes (#i+1) parities.
184 : *
185 : * All these functions give the guarantee that parities are written
186 : * in order. First parity P, then parity Q, and so on.
187 : * This allows to specify the same memory buffer for multiple parities
188 : * knowing that you'll get the latest written one.
189 : * This characteristic is used by the raid_delta_gen() function to
190 : * avoid to damage unused parities in recovering.
191 : *
192 : * @nd Number of data blocks
193 : * @size Size of the blocks pointed by @vv. It must be a multiple of 64.
194 : * @vv Vector of pointers to the blocks of data and parity.
195 : * It has (@nd + #parities) elements. The starting elements are the blocks
196 : * for data, following with the parity blocks.
197 : * Each block has @size bytes.
198 : */
199 : typedef void (raid_gen_fn)(int nd, size_t size, void **vv);
200 :
201 : /*
202 : * Functions for data recovery.
203 : *
204 : * These functions recover data blocks using the specified parity
205 : * to recompute the missing data.
206 : *
207 : * Note that the format of vectors @id/@ip is different than raid_rec().
208 : * For example, in the vector @ip the first parity is represented with the
209 : * value 0 and not @nd.
210 : *
211 : * @nr Number of failed data blocks to recover.
212 : * @id[] Vector of @nr indexes of the data blocks to recover.
213 : * The indexes start from 0. They must be in order.
214 : * @ip[] Vector of @nr indexes of the parity blocks to use in the recovering.
215 : * The indexes start from 0. They must be in order.
216 : * @nd Number of data blocks.
217 : * @np Number of parity blocks.
218 : * @size Size of the blocks pointed by @vv. It must be a multiple of 64.
219 : * @vv Vector of pointers to the blocks of data and parity.
220 : * It has (@nd + @np) elements. The starting elements are the blocks
221 : * for data, following with the parity blocks.
222 : * Each block has @size bytes.
223 : */
224 : typedef void (raid_rec_fn)(int nr, int *id, int *ip, int nd, size_t size, void **vv);
225 :
226 : /**
227 : * Algorithm indexes
228 : *
229 : * To be used with the register and tag functions.
230 : */
231 : #define RAID_ALGO_CAUCHY_PAR1 0
232 : #define RAID_ALGO_CAUCHY_PAR2 1
233 : #define RAID_ALGO_CAUCHY_PAR3 2
234 : #define RAID_ALGO_CAUCHY_PAR4 3
235 : #define RAID_ALGO_CAUCHY_PAR5 4
236 : #define RAID_ALGO_CAUCHY_PAR6 5
237 : #define RAID_ALGO_VANDERMONDE_PAR3 6
238 : #define RAID_ALGO_MAX 7
239 :
240 : /**
241 : * Register functions for parity computation and data recovery.
242 : *
243 : * Each call overwrites the previous setting. Thus, call it from the
244 : * slowest to the fastest.
245 : *
246 : * @na Algo code of the function. One of RAID_ALGO_*.
247 : * @tag Descriptive short tag of the implementation, like "sse2", "avx2",...
248 : * @fn Function to register.
249 : */
250 : void raid_gen_register(int na, const char *tag, raid_gen_fn *fn);
251 : void raid_rec_register(int na, const char *tag, raid_rec_fn *fn);
252 :
253 : /**
254 : * Set functions for data recovery.
255 : *
256 : * Intended only for testing the recovery function forcing a specific
257 : * parity generation for the delta step.
258 : *
259 : * Each call overwrites the previous setting.
260 : *
261 : * @np Number of parities.
262 : * @fn Function to register.
263 : */
264 : void raid_gen_force(int np, raid_gen_fn *fn);
265 :
266 : /**
267 : * Register all the functions based on integer variables.
268 : */
269 : void raid_register_int(void);
270 :
271 : /**
272 : * Register all the functions based on x86 intructions.
273 : */
274 : void raid_register_x86(void);
275 :
276 : /*
277 : * Tag functions.
278 : *
279 : * Given the specified algo code, return the tag of the registered function.
280 : */
281 : const char *raid_gen_tag(int na);
282 : const char *raid_rec_tag(int na);
283 :
284 : /**
285 : * Basic functionality self test.
286 : *
287 : * Returns 0 on success.
288 : */
289 : int raid_selftest(void);
290 :
291 : /*
292 : * Tables.
293 : */
294 : extern const uint8_t raid_gfmul[256][256] __aligned(256);
295 : extern const uint8_t raid_gfexp[256] __aligned(256);
296 : extern const uint8_t raid_gfinv[256] __aligned(256);
297 : extern const uint8_t raid_gfvandermonde[3][256] __aligned(256);
298 : extern const uint8_t raid_gfcauchy[6][256] __aligned(256);
299 : extern const uint8_t raid_gfcauchypshufb[251][4][2][16] __aligned(256);
300 : extern const uint8_t raid_gfmulpshufb[256][2][16] __aligned(256);
301 : extern const uint8_t (*raid_gfgen)[256];
302 : #define gfmul raid_gfmul
303 : #define gfexp raid_gfexp
304 : #define gfinv raid_gfinv
305 : #define gfvandermonde raid_gfvandermonde
306 : #define gfcauchy raid_gfcauchy
307 : #define gfgenpshufb raid_gfcauchypshufb
308 : #define gfmulpshufb raid_gfmulpshufb
309 : #define gfgen raid_gfgen
310 :
311 : /*
312 : * Assembler blocks.
313 : */
314 : #ifdef CONFIG_X86
315 : #ifdef CONFIG_SSE2
316 : static __always_inline void raid_sse_begin(void)
317 : {
318 1512807 : }
319 :
320 : static __always_inline void raid_sse_end(void)
321 : {
322 : /*
323 : * SSE and AVX code uses non-temporal writes, like MOVNTDQ,
324 : * that use a weak memory model. To ensure that other processors
325 : * see correctly the data written, we use a store-store memory
326 : * barrier at the end of the asm code
327 : */
328 1512807 : asm volatile ("sfence" : : : "memory");
329 :
330 : /*
331 : * Clobbers registers used in the asm code
332 : * this is required because in the Windows ABI,
333 : * registers xmm6-xmm15 should be kept by the callee.
334 : * this clobber list force the compiler to save any
335 : * register that needs to be saved
336 : * we check for __SSE2_ because we require that the
337 : * compiler supports SSE2 registers in the clobber list
338 : */
339 : #ifdef __SSE2__
340 1512807 : asm volatile ("" : : : "%xmm0", "%xmm1", "%xmm2", "%xmm3");
341 1512807 : asm volatile ("" : : : "%xmm4", "%xmm5", "%xmm6", "%xmm7");
342 : #ifdef CONFIG_X86_64
343 1512807 : asm volatile ("" : : : "%xmm8", "%xmm9", "%xmm10", "%xmm11");
344 1512807 : asm volatile ("" : : : "%xmm12", "%xmm13", "%xmm14", "%xmm15");
345 : #endif
346 : #endif
347 1512807 : }
348 : #endif
349 :
350 : #ifdef CONFIG_AVX2
351 : static __always_inline void raid_avx_begin(void)
352 : {
353 : raid_sse_begin();
354 114289 : }
355 :
356 : static __always_inline void raid_avx_end(void)
357 : {
358 : raid_sse_end();
359 :
360 : /*
361 : * Reset the upper part of the ymm registers
362 : * to avoid the 70 clocks penalty on the next
363 : * xmm register use
364 : */
365 114289 : asm volatile ("vzeroupper" : : : "memory");
366 114289 : }
367 : #endif
368 : #endif /* CONFIG_X86 */
369 :
370 : #endif
371 :
|