LCOV - code coverage report
Current view: top level - raid - internal.h (source / functions) Hit Total Coverage
Test: lcov.info Lines: 13 13 100.0 %
Date: 2026-04-29 15:04:44 Functions: 0 0 -

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-2.0-or-later
       2             : // Copyright (C) 2013 Andrea Mazzoleni
       3             : 
       4             : #ifndef __RAID_INTERNAL_H
       5             : #define __RAID_INTERNAL_H
       6             : 
       7             : /*
       8             :  * Supported instruction sets.
       9             :  *
      10             :  * It may happen that the assembler is too old to support
      11             :  * all instructions, even if the architecture supports them.
      12             :  * These defines allow to exclude from the build the unsupported ones.
      13             :  *
      14             :  * If in your project you use a predefined assembler, you can define them
      15             :  * using fixed values, instead of using the HAVE_* defines.
      16             :  */
      17             : #if HAVE_CONFIG_H
      18             : 
      19             : /* Includes the project configuration for HAVE_* defines */
      20             : #include "config.h"
      21             : 
      22             : /* If the compiler supports assembly */
      23             : #if HAVE_ASSEMBLY
      24             : /* Autodetect from the compiler */
      25             : #if defined(__i386__)
      26             : #define CONFIG_X86 1
      27             : #define CONFIG_X86_32 1
      28             : #endif
      29             : #if defined(__x86_64__)
      30             : #define CONFIG_X86 1
      31             : #define CONFIG_X86_64 1
      32             : #endif
      33             : #endif
      34             : 
      35             : /* Enables SSE2, SSSE3, AVX2 only if the assembler supports it */
      36             : #if HAVE_SSE2
      37             : #define CONFIG_SSE2 1
      38             : #endif
      39             : #if HAVE_SSSE3
      40             : #define CONFIG_SSSE3 1
      41             : #endif
      42             : #if HAVE_AVX2
      43             : #define CONFIG_AVX2 1
      44             : #endif
      45             : #if HAVE_AVX512BW /* Enables AVX2512BW only if the assembler supports it */
      46             : #define CONFIG_AVX512BW 1
      47             : #endif
      48             : 
      49             : #else /* if HAVE_CONFIG_H is not defined */
      50             : 
      51             : /* Assume that assembly is always supported */
      52             : #if defined(__i386__)
      53             : #define CONFIG_X86 1
      54             : #define CONFIG_X86_32 1
      55             : #endif
      56             : 
      57             : #if defined(__x86_64__)
      58             : #define CONFIG_X86 1
      59             : #define CONFIG_X86_64 1
      60             : #endif
      61             : 
      62             : /* Assumes that the assembler supports everything */
      63             : #ifdef CONFIG_X86
      64             : #define CONFIG_SSE2 1
      65             : #define CONFIG_SSSE3 1
      66             : #define CONFIG_AVX2 1
      67             : #endif
      68             : #ifdef CONFIG_X86_64
      69             : #define CONFIG_AVX512BW 1
      70             : #endif
      71             : #endif
      72             : 
      73             : /*
      74             :  * Includes anything required for compatibility.
      75             :  */
      76             : #include <assert.h>
      77             : #include <stdint.h>
      78             : #include <stdlib.h>
      79             : #include <string.h>
      80             : 
      81             : /*
      82             :  * Inverse assert.
      83             :  */
      84             : #define BUG_ON(a) assert(!(a))
      85             : 
      86             : /*
      87             :  * Forced inline.
      88             :  */
      89             : #ifndef __always_inline
      90             : #define __always_inline inline __attribute__((always_inline))
      91             : #endif
      92             : 
      93             : /*
      94             :  * Forced alignment.
      95             :  */
      96             : #ifndef __aligned
      97             : #define __aligned(a) __attribute__((aligned(a)))
      98             : #endif
      99             : 
     100             : /*
     101             :  * Align a pointer at the specified size.
     102             :  */
     103             : static __always_inline void *__align_ptr(void *ptr, uintptr_t size)
     104             : {
     105      128005 :         uintptr_t offset = (uintptr_t)ptr;
     106             : 
     107      128005 :         offset = (offset + size - 1U) & ~(size - 1U);
     108             : 
     109      128005 :         return (void *)offset;
     110             : }
     111             : 
     112             : /*
     113             :  * Includes the main interface headers.
     114             :  */
     115             : #include "raid.h"
     116             : #include "helper.h"
     117             : 
     118             : /*
     119             :  * Internal functions.
     120             :  *
     121             :  * These are intended to provide external access for testing.
     122             :  */
     123             : void raid_gen_ref(int nd, int np, size_t size, void **vv);
     124             : void raid_invert(uint8_t *M, uint8_t *V, int n);
     125             : void raid_delta_gen(int nr, int *id, int *ip, int nd, size_t size, void **v);
     126             : void raid_rec1of1(int *id, int nd, size_t size, void **v);
     127             : void raid_rec2of2_int8(int *id, int *ip, int nd, size_t size, void **vv);
     128             : void raid_gen1_int32(int nd, size_t size, void **vv);
     129             : void raid_gen1_int64(int nd, size_t size, void **vv);
     130             : void raid_gen1_sse2(int nd, size_t size, void **vv);
     131             : void raid_gen1_avx2(int nd, size_t size, void **vv);
     132             : void raid_gen1_avx512bw(int nd, size_t size, void **vv);
     133             : void raid_gen2_int32(int nd, size_t size, void **vv);
     134             : void raid_gen2_int64(int nd, size_t size, void **vv);
     135             : void raid_gen2_sse2(int nd, size_t size, void **vv);
     136             : void raid_gen2_avx2(int nd, size_t size, void **vv);
     137             : void raid_gen2_sse2ext(int nd, size_t size, void **vv);
     138             : void raid_gen2_avx512bw(int nd, size_t size, void **vv);
     139             : void raid_genz_int32(int nd, size_t size, void **vv);
     140             : void raid_genz_int64(int nd, size_t size, void **vv);
     141             : void raid_genz_sse2(int nd, size_t size, void **vv);
     142             : void raid_genz_sse2ext(int nd, size_t size, void **vv);
     143             : void raid_genz_avx2ext(int nd, size_t size, void **vv);
     144             : void raid_gen3_int8(int nd, size_t size, void **vv);
     145             : void raid_gen3_ssse3(int nd, size_t size, void **vv);
     146             : void raid_gen3_ssse3ext(int nd, size_t size, void **vv);
     147             : void raid_gen3_avx2ext(int nd, size_t size, void **vv);
     148             : void raid_gen3_avx512bw(int nd, size_t size, void **vv);
     149             : void raid_gen4_int8(int nd, size_t size, void **vv);
     150             : void raid_gen4_ssse3(int nd, size_t size, void **vv);
     151             : void raid_gen4_ssse3ext(int nd, size_t size, void **vv);
     152             : void raid_gen4_avx2ext(int nd, size_t size, void **vv);
     153             : void raid_gen4_avx512bw(int nd, size_t size, void **vv);
     154             : void raid_gen5_int8(int nd, size_t size, void **vv);
     155             : void raid_gen5_ssse3(int nd, size_t size, void **vv);
     156             : void raid_gen5_ssse3ext(int nd, size_t size, void **vv);
     157             : void raid_gen5_avx2ext(int nd, size_t size, void **vv);
     158             : void raid_gen5_avx512bw(int nd, size_t size, void **vv);
     159             : void raid_gen6_int8(int nd, size_t size, void **vv);
     160             : void raid_gen6_ssse3(int nd, size_t size, void **vv);
     161             : void raid_gen6_ssse3ext(int nd, size_t size, void **vv);
     162             : void raid_gen6_avx2ext(int nd, size_t size, void **vv);
     163             : void raid_gen6_avx512bw(int nd, size_t size, void **vv);
     164             : void raid_rec1_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
     165             : void raid_rec2_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
     166             : void raid_recX_int8(int nr, int *id, int *ip, int nd, size_t size, void **vv);
     167             : void raid_rec1_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
     168             : void raid_rec2_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
     169             : void raid_recX_ssse3(int nr, int *id, int *ip, int nd, size_t size, void **vv);
     170             : void raid_rec1_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
     171             : void raid_rec2_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
     172             : void raid_recX_avx2(int nr, int *id, int *ip, int nd, size_t size, void **vv);
     173             : void raid_rec1_avx512bw(int nr, int *id, int *ip, int nd, size_t size, void **vv);
     174             : void raid_rec2_avx512bw(int nr, int *id, int *ip, int nd, size_t size, void **vv);
     175             : void raid_recX_avx512bw(int nr, int *id, int *ip, int nd, size_t size, void **vv);
     176             : 
     177             : /*
     178             :  * Functions for parity computation.
     179             :  *
     180             :  * These functions compute the parity blocks from the provided data.
     181             :  *
     182             :  * The number of parities to compute is implicit in the position in the
     183             :  * forwarder vector. Position at index #i, computes (#i+1) parities.
     184             :  *
     185             :  * All these functions give the guarantee that parities are written
     186             :  * in order. First parity P, then parity Q, and so on.
     187             :  * This allows to specify the same memory buffer for multiple parities
     188             :  * knowing that you'll get the latest written one.
     189             :  * This characteristic is used by the raid_delta_gen() function to
     190             :  * avoid to damage unused parities in recovering.
     191             :  *
     192             :  * @nd Number of data blocks
     193             :  * @size Size of the blocks pointed by @vv. It must be a multiple of 64.
     194             :  * @vv Vector of pointers to the blocks of data and parity.
     195             :  *   It has (@nd + #parities) elements. The starting elements are the blocks
     196             :  *   for data, following with the parity blocks.
     197             :  *   Each block has @size bytes.
     198             :  */
     199             : typedef void (raid_gen_fn)(int nd, size_t size, void **vv);
     200             : 
     201             : /*
     202             :  * Functions for data recovery.
     203             :  *
     204             :  * These functions recover data blocks using the specified parity
     205             :  * to recompute the missing data.
     206             :  *
     207             :  * Note that the format of vectors @id/@ip is different than raid_rec().
     208             :  * For example, in the vector @ip the first parity is represented with the
     209             :  * value 0 and not @nd.
     210             :  *
     211             :  * @nr Number of failed data blocks to recover.
     212             :  * @id[] Vector of @nr indexes of the data blocks to recover.
     213             :  *   The indexes start from 0. They must be in order.
     214             :  * @ip[] Vector of @nr indexes of the parity blocks to use in the recovering.
     215             :  *   The indexes start from 0. They must be in order.
     216             :  * @nd Number of data blocks.
     217             :  * @np Number of parity blocks.
     218             :  * @size Size of the blocks pointed by @vv. It must be a multiple of 64.
     219             :  * @vv Vector of pointers to the blocks of data and parity.
     220             :  *   It has (@nd + @np) elements. The starting elements are the blocks
     221             :  *   for data, following with the parity blocks.
     222             :  *   Each block has @size bytes.
     223             :  */
     224             : typedef void (raid_rec_fn)(int nr, int *id, int *ip, int nd, size_t size, void **vv);
     225             : 
     226             : /**
     227             :  * Algorithm indexes
     228             :  *
     229             :  * To be used with the register and tag functions.
     230             :  */
     231             : #define RAID_ALGO_CAUCHY_PAR1 0
     232             : #define RAID_ALGO_CAUCHY_PAR2 1
     233             : #define RAID_ALGO_CAUCHY_PAR3 2
     234             : #define RAID_ALGO_CAUCHY_PAR4 3
     235             : #define RAID_ALGO_CAUCHY_PAR5 4
     236             : #define RAID_ALGO_CAUCHY_PAR6 5
     237             : #define RAID_ALGO_VANDERMONDE_PAR3 6
     238             : #define RAID_ALGO_MAX 7
     239             : 
     240             : /** 
     241             :  * Register functions for parity computation and data recovery.
     242             :  *
     243             :  * Each call overwrites the previous setting. Thus, call it from the
     244             :  * slowest to the fastest.
     245             :  *
     246             :  * @na Algo code of the function. One of RAID_ALGO_*.
     247             :  * @tag Descriptive short tag of the implementation, like "sse2", "avx2",...
     248             :  * @fn Function to register.
     249             :  */
     250             : void raid_gen_register(int na, const char *tag, raid_gen_fn *fn);
     251             : void raid_rec_register(int na, const char *tag, raid_rec_fn *fn);
     252             : 
     253             : /** 
     254             :  * Set functions for data recovery.
     255             :  * 
     256             :  * Intended only for testing the recovery function forcing a specific
     257             :  * parity generation for the delta step.
     258             :  *
     259             :  * Each call overwrites the previous setting.
     260             :  *
     261             :  * @np Number of parities.
     262             :  * @fn Function to register.
     263             :  */
     264             : void raid_gen_force(int np, raid_gen_fn *fn);
     265             : 
     266             : /**
     267             :  * Register all the functions based on integer variables.
     268             :  */
     269             : void raid_register_int(void);
     270             : 
     271             : /**
     272             :  * Register all the functions based on x86 intructions.
     273             :  */
     274             : void raid_register_x86(void);
     275             : 
     276             : /*
     277             :  * Tag functions.
     278             :  * 
     279             :  * Given the specified algo code, return the tag of the registered function.
     280             :  */
     281             : const char *raid_gen_tag(int na);
     282             : const char *raid_rec_tag(int na);
     283             : 
     284             : /**
     285             :  * Basic functionality self test.
     286             :  * 
     287             :  * Returns 0 on success.
     288             :  */
     289             : int raid_selftest(void);
     290             : 
     291             : /*
     292             :  * Tables.
     293             :  */
     294             : extern const uint8_t raid_gfmul[256][256] __aligned(256);
     295             : extern const uint8_t raid_gfexp[256] __aligned(256);
     296             : extern const uint8_t raid_gfinv[256] __aligned(256);
     297             : extern const uint8_t raid_gfvandermonde[3][256] __aligned(256);
     298             : extern const uint8_t raid_gfcauchy[6][256] __aligned(256);
     299             : extern const uint8_t raid_gfcauchypshufb[251][4][2][16] __aligned(256);
     300             : extern const uint8_t raid_gfmulpshufb[256][2][16] __aligned(256);
     301             : extern const uint8_t (*raid_gfgen)[256];
     302             : #define gfmul raid_gfmul
     303             : #define gfexp raid_gfexp
     304             : #define gfinv raid_gfinv
     305             : #define gfvandermonde raid_gfvandermonde
     306             : #define gfcauchy raid_gfcauchy
     307             : #define gfgenpshufb raid_gfcauchypshufb
     308             : #define gfmulpshufb raid_gfmulpshufb
     309             : #define gfgen raid_gfgen
     310             : 
     311             : /*
     312             :  * Assembler blocks.
     313             :  */
     314             : #ifdef CONFIG_X86
     315             : #ifdef CONFIG_SSE2
     316             : static __always_inline void raid_sse_begin(void)
     317             : {
     318     1512807 : }
     319             : 
     320             : static __always_inline void raid_sse_end(void)
     321             : {
     322             :         /*
     323             :          * SSE and AVX code uses non-temporal writes, like MOVNTDQ,
     324             :          * that use a weak memory model. To ensure that other processors
     325             :          * see correctly the data written, we use a store-store memory
     326             :          * barrier at the end of the asm code
     327             :          */
     328     1512807 :         asm volatile ("sfence" : : : "memory");
     329             : 
     330             :         /*
     331             :          * Clobbers registers used in the asm code
     332             :          * this is required because in the Windows ABI,
     333             :          * registers xmm6-xmm15 should be kept by the callee.
     334             :          * this clobber list force the compiler to save any
     335             :          * register that needs to be saved
     336             :          * we check for __SSE2_ because we require that the
     337             :          * compiler supports SSE2 registers in the clobber list
     338             :          */
     339             : #ifdef __SSE2__
     340     1512807 :         asm volatile ("" : : : "%xmm0", "%xmm1", "%xmm2", "%xmm3");
     341     1512807 :         asm volatile ("" : : : "%xmm4", "%xmm5", "%xmm6", "%xmm7");
     342             : #ifdef CONFIG_X86_64
     343     1512807 :         asm volatile ("" : : : "%xmm8", "%xmm9", "%xmm10", "%xmm11");
     344     1512807 :         asm volatile ("" : : : "%xmm12", "%xmm13", "%xmm14", "%xmm15");
     345             : #endif
     346             : #endif
     347     1512807 : }
     348             : #endif
     349             : 
     350             : #ifdef CONFIG_AVX2
     351             : static __always_inline void raid_avx_begin(void)
     352             : {
     353             :         raid_sse_begin();
     354      114289 : }
     355             : 
     356             : static __always_inline void raid_avx_end(void)
     357             : {
     358             :         raid_sse_end();
     359             : 
     360             :         /*
     361             :          * Reset the upper part of the ymm registers
     362             :          * to avoid the 70 clocks penalty on the next
     363             :          * xmm register use
     364             :          */
     365      114289 :         asm volatile ("vzeroupper" : : : "memory");
     366      114289 : }
     367             : #endif
     368             : #endif /* CONFIG_X86 */
     369             : 
     370             : #endif
     371             : 

Generated by: LCOV version 1.0