LCOV - code coverage report
Current view: top level - cmdline - scrub.c (source / functions) Hit Total Coverage
Test: lcov.info Lines: 304 382 79.6 %
Date: 2026-04-29 15:04:44 Functions: 6 7 85.7 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-3.0-or-later
       2             : // Copyright (C) 2013 Andrea Mazzoleni
       3             : 
       4             : #include "portable.h"
       5             : 
       6             : #include "support.h"
       7             : #include "elem.h"
       8             : #include "state.h"
       9             : #include "parity.h"
      10             : #include "handle.h"
      11             : #include "io.h"
      12             : #include "raid/raid.h"
      13             : 
      14             : /****************************************************************************/
      15             : /* scrub */
      16             : 
      17           0 : static const char* es(int err)
      18             : {
      19           0 :         if (is_hw(err))
      20           0 :                 return "error_io";
      21             :         else
      22           0 :                 return "error";
      23             : }
      24             : 
      25             : /**
      26             :  * Buffer for storing the new hashes.
      27             :  */
      28             : struct snapraid_rehash {
      29             :         unsigned char hash[HASH_MAX];
      30             :         struct snapraid_block* block;
      31             : };
      32             : 
      33             : /**
      34             :  * Scrub plan to use.
      35             :  */
      36             : struct snapraid_plan {
      37             :         struct snapraid_state* state;
      38             :         int plan; /**< One of the SCRUB_*. */
      39             :         time_t timelimit; /**< Time limit. Valid only with SCRUB_AUTO. */
      40             :         block_off_t lastlimit; /**< Number of blocks allowed with time exactly at ::timelimit. Valid only with SCRUB_AUTO. */
      41             : };
      42             : 
      43             : /**
      44             :  * Check if we have to process the specified block index ::i.
      45             :  */
      46       57095 : static int block_is_enabled(struct snapraid_plan* plan, block_off_t* countlast, block_off_t i)
      47             : {
      48             :         time_t blocktime;
      49             :         snapraid_info info;
      50             : 
      51             :         /* don't scrub unused blocks in all plans */
      52       57095 :         info = info_get(&plan->state->infoarr, i);
      53       57095 :         if (info == 0)
      54           0 :                 return 0;
      55             : 
      56             :         /* bad blocks are always scrubbed in all plans */
      57       57095 :         if (info_get_bad(info))
      58        1772 :                 return 1;
      59             : 
      60       55323 :         switch (plan->plan) {
      61        9472 :         case SCRUB_FULL :
      62             :                 /* in 'full' plan everything is scrubbed */
      63        9472 :                 return 1;
      64        9275 :         case SCRUB_EVEN :
      65             :                 /* in 'even' plan, scrub only even blocks */
      66        9275 :                 return i % 2 == 0;
      67        4687 :         case SCRUB_NEW :
      68             :                 /* in 'sync' plan, only blocks never scrubbed */
      69        4687 :                 return info_get_justsynced(info);
      70        3767 :         case SCRUB_BAD :
      71             :                 /* in 'bad' plan, only bad blocks (already reported) */
      72        3767 :                 return 0;
      73             :         }
      74             : 
      75             :         /* if it's too new */
      76       28122 :         blocktime = info_get_time(info);
      77       28122 :         if (blocktime > plan->timelimit) {
      78             :                 /* skip it */
      79        5711 :                 return 0;
      80             :         }
      81             : 
      82             :         /*
      83             :          * If the time is less than the limit, always include
      84             :          * otherwise, check if we reached the last limit count
      85             :          */
      86       22411 :         if (blocktime == plan->timelimit) {
      87             :                 /* if we reached the count limit */
      88       13037 :                 if (*countlast >= plan->lastlimit) {
      89             :                         /* skip it */
      90       11546 :                         return 0;
      91             :                 }
      92             : 
      93        1491 :                 ++*countlast;
      94             :         }
      95             : 
      96       10865 :         return 1;
      97             : }
      98             : 
      99      157632 : static void scrub_data_reader(struct snapraid_worker* worker, struct snapraid_task* task)
     100             : {
     101      157632 :         struct snapraid_io* io = worker->io;
     102      157632 :         struct snapraid_state* state = io->state;
     103      157632 :         struct snapraid_handle* handle = worker->handle;
     104      157632 :         struct snapraid_disk* disk = handle->disk;
     105      157632 :         block_off_t blockcur = task->position;
     106      157632 :         unsigned char* buffer = task->buffer;
     107             :         int ret;
     108             :         char esc_buffer[ESC_MAX];
     109             : 
     110             :         /* if the disk position is not used */
     111      157632 :         if (!disk) {
     112             :                 /* use an empty block */
     113           0 :                 memset(buffer, 0, state->block_size);
     114           0 :                 task->state = TASK_STATE_DONE;
     115        1818 :                 return;
     116             :         }
     117             : 
     118             :         /* get the block */
     119      157632 :         task->block = fs_par2block_find(disk, blockcur);
     120             : 
     121             :         /* if the block is not used */
     122      157632 :         if (!block_has_file(task->block)) {
     123             :                 /* use an empty block */
     124        1818 :                 memset(buffer, 0, state->block_size);
     125        1818 :                 task->state = TASK_STATE_DONE;
     126        1818 :                 return;
     127             :         }
     128             : 
     129             :         /* get the file of this block */
     130      155814 :         task->file = fs_par2file_get(disk, blockcur, &task->file_pos);
     131             : 
     132             :         /* if the file is different than the current one, close it */
     133      155814 :         if (handle->file != 0 && handle->file != task->file) {
     134             :                 /* keep a pointer at the file we are going to close for error reporting */
     135       75677 :                 struct snapraid_file* report = handle->file;
     136       75677 :                 ret = handle_close(handle);
     137       75677 :                 if (ret == -1) {
     138             :                         /* LCOV_EXCL_START */
     139             :                         /*
     140             :                          * This one is really an unexpected error, because we are only reading
     141             :                          * and closing a descriptor should never fail
     142             :                          */
     143             :                         log_tag("%s:%u:%s:%s: Close error. %s.\n", es(errno), blockcur, disk->name, esc_tag(report->sub, esc_buffer), strerror(errno));
     144             :                         log_fatal_errno(errno, disk->name);
     145             :                         log_fatal(errno, "Stopping at block %u\n", blockcur);
     146             : 
     147             :                         if (is_hw(errno)) {
     148             :                                 task->state = TASK_STATE_IOERROR;
     149             :                         } else {
     150             :                                 task->state = TASK_STATE_ERROR;
     151             :                         }
     152             :                         return;
     153             :                         /* LCOV_EXCL_STOP */
     154             :                 }
     155             :         }
     156             : 
     157      155814 :         ret = handle_open(handle, task->file, state->file_mode, log_error, 0); /* for missing file don't output a message */
     158      155814 :         if (ret == -1) {
     159           0 :                 log_tag("%s:%u:%s:%s: Open error. %s.\n", es(errno), blockcur, disk->name, esc_tag(task->file->sub, esc_buffer), strerror(errno));
     160           0 :                 if (is_hw(errno)) {
     161             :                         /* LCOV_EXCL_START */
     162             :                         log_fatal_errno(errno, disk->name);
     163             :                         log_fatal(errno, "Stopping at block %u\n", blockcur);
     164             :                         task->state = TASK_STATE_IOERROR;
     165             :                         return;
     166             :                         /* LCOV_EXCL_STOP */
     167             :                 }
     168             : 
     169           0 :                 task->state = TASK_STATE_ERROR_CONTINUE;
     170           0 :                 return;
     171             :         }
     172             : 
     173             :         /* check if the file is changed */
     174      155814 :         if (handle->st.st_size != task->file->size
     175      155814 :                 || handle->st.st_mtime != task->file->mtime_sec
     176      155814 :                 || STAT_NSEC(&handle->st) != task->file->mtime_nsec
     177             :                 /* don't check the inode to support filesystem without persistent inodes */
     178             :         ) {
     179             :                 /* report that the block and the file are not synced */
     180           0 :                 task->is_timestamp_different = 1;
     181             :                 /* follow */
     182             :         }
     183             : 
     184             :         /*
     185             :          * Note that we intentionally don't abort if the file has different attributes
     186             :          * from the last sync, as we are expected to return errors if running
     187             :          * in an unsynced array. This is just like the check command.
     188             :          */
     189             : 
     190      155814 :         task->read_size = handle_read(handle, task->file_pos, buffer, state->block_size, log_error, 0);
     191      155814 :         if (task->read_size == -1) {
     192           0 :                 log_tag("%s:%u:%s:%s: Read error at position %u. %s.\n", es(errno), blockcur, disk->name, esc_tag(task->file->sub, esc_buffer), task->file_pos, strerror(errno));
     193           0 :                 if (is_hw(errno)) {
     194             :                         /* LCOV_EXCL_START */
     195             :                         log_error_errno(errno, disk->name);
     196             :                         task->state = TASK_STATE_IOERROR_CONTINUE;
     197             :                         return;
     198             :                         /* LCOV_EXCL_STOP */
     199             :                 }
     200             : 
     201           0 :                 task->state = TASK_STATE_ERROR_CONTINUE;
     202           0 :                 return;
     203             :         }
     204             : 
     205             :         /* store the path of the opened file */
     206      155814 :         pathcpy(task->path, sizeof(task->path), handle->path);
     207             : 
     208      155814 :         task->state = TASK_STATE_DONE;
     209             : }
     210             : 
     211      155732 : static void scrub_parity_reader(struct snapraid_worker* worker, struct snapraid_task* task)
     212             : {
     213      155732 :         struct snapraid_io* io = worker->io;
     214      155732 :         struct snapraid_state* state = io->state;
     215      155732 :         struct snapraid_parity_handle* parity_handle = worker->parity_handle;
     216      155732 :         unsigned level = parity_handle->level;
     217      155732 :         block_off_t blockcur = task->position;
     218      155732 :         unsigned char* buffer = task->buffer;
     219             :         int ret;
     220             : 
     221             :         /* read the parity */
     222      155732 :         ret = parity_read(parity_handle, blockcur, buffer, state->block_size, log_error);
     223      155732 :         if (ret == -1) {
     224           0 :                 log_tag("parity_%s:%u:%s: Read error. %s.\n", es(errno), blockcur, lev_config_name(level), strerror(errno));
     225           0 :                 if (is_hw(errno)) {
     226             :                         /* LCOV_EXCL_START */
     227             :                         log_error_errno(errno, lev_config_name(level));
     228             :                         task->state = TASK_STATE_IOERROR_CONTINUE;
     229             :                         return;
     230             :                         /* LCOV_EXCL_STOP */
     231             :                 }
     232             : 
     233           0 :                 task->state = TASK_STATE_ERROR_CONTINUE;
     234           0 :                 return;
     235             :         }
     236             : 
     237      155732 :         task->state = TASK_STATE_DONE;
     238             : }
     239             : 
     240          12 : static int state_scrub_process(struct snapraid_state* state, struct snapraid_parity_handle* parity_handle, block_off_t blockstart, block_off_t blockmax, struct snapraid_plan* plan, time_t now)
     241             : {
     242             :         struct snapraid_io io;
     243             :         struct snapraid_handle* handle;
     244             :         void* rehandle_alloc;
     245             :         struct snapraid_rehash* rehandle;
     246             :         unsigned diskmax;
     247             :         block_off_t blockcur;
     248             :         unsigned j;
     249             :         unsigned buffermax;
     250             :         data_off_t countsize;
     251             :         block_off_t countpos;
     252             :         block_off_t countmax;
     253             :         block_off_t countlast;
     254             :         block_off_t autosavedone;
     255             :         block_off_t autosavelimit;
     256             :         block_off_t autosavemissing;
     257             :         int ret;
     258             :         unsigned soft_error;
     259             :         unsigned silent_error;
     260             :         unsigned io_error;
     261             :         unsigned l;
     262             :         unsigned* waiting_map;
     263             :         unsigned waiting_mac;
     264             :         char esc_buffer[ESC_MAX];
     265             :         bit_vect_t* block_enabled;
     266             : 
     267             :         /* maps the disks to handles */
     268          12 :         handle = handle_mapping(state, &diskmax);
     269             : 
     270             :         /* rehash buffers */
     271          12 :         rehandle = malloc_nofail_align(diskmax * sizeof(struct snapraid_rehash), &rehandle_alloc);
     272             : 
     273             :         /* we need 1 * data + 2 * parity */
     274          12 :         buffermax = diskmax + 2 * state->level;
     275             : 
     276             :         /* initialize the io threads */
     277          12 :         io_init(&io, state, state->opt.io_cache, buffermax, scrub_data_reader, handle, diskmax, scrub_parity_reader, 0, parity_handle, state->level);
     278             : 
     279             :         /* possibly waiting disks */
     280          12 :         waiting_mac = diskmax > RAID_PARITY_MAX ? diskmax : RAID_PARITY_MAX;
     281          12 :         waiting_map = malloc_nofail(waiting_mac * sizeof(unsigned));
     282             : 
     283          12 :         soft_error = 0;
     284          12 :         silent_error = 0;
     285          12 :         io_error = 0;
     286             : 
     287          12 :         msg_progress("Selecting...\n");
     288             : 
     289             :         /* first count the number of blocks to process */
     290          12 :         countmax = 0;
     291          12 :         countlast = 0;
     292          12 :         block_enabled = calloc_nofail(1, bit_vect_size(blockmax)); /* preinitialize to 0 */
     293       57107 :         for (blockcur = blockstart; blockcur < blockmax; ++blockcur) {
     294       57095 :                 if (!block_is_enabled(plan, &countlast, blockcur))
     295       30348 :                         continue;
     296       26747 :                 bit_vect_set(block_enabled, blockcur);
     297       26747 :                 ++countmax;
     298             :         }
     299             : 
     300             :         /*
     301             :          * Compute the autosave size for all disk, even if not read
     302             :          * this makes sense because the speed should be almost the same
     303             :          * if the disks are read in parallel
     304             :          */
     305          12 :         autosavelimit = state->autosave / (diskmax * state->block_size);
     306          12 :         autosavemissing = countmax; /* blocks to do */
     307          12 :         autosavedone = 0; /* blocks done */
     308             : 
     309             :         /* drop until now */
     310          12 :         state_usage_waste(state);
     311             : 
     312          12 :         countsize = 0;
     313          12 :         countpos = 0;
     314             : 
     315          12 :         msg_progress("Scrubbing...\n");
     316             : 
     317             :         /* start all the worker threads */
     318          12 :         io_start(&io, blockstart, blockmax, block_enabled);
     319             : 
     320          12 :         int alert = state_progress_begin(state, blockstart, blockmax, countmax);
     321          12 :         if (alert > 0)
     322           0 :                 goto end;
     323          12 :         if (alert < 0)
     324           0 :                 goto bail;
     325             : 
     326       26747 :         while (1) {
     327             :                 unsigned char* buffer_recov[LEV_MAX];
     328             :                 snapraid_info info;
     329             :                 int error_on_this_block;
     330             :                 int silent_error_on_this_block;
     331             :                 int io_error_on_this_block;
     332             :                 int block_is_unsynced;
     333             :                 int rehash;
     334             :                 void** buffer;
     335             : 
     336             :                 /* go to the next block */
     337       26759 :                 blockcur = io_read_next(&io, &buffer);
     338       26759 :                 if (blockcur >= blockmax)
     339          12 :                         break;
     340             : 
     341             :                 /* until now is scheduling */
     342       26747 :                 state_usage_sched(state);
     343             : 
     344             :                 /* one more block processed for autosave */
     345       26747 :                 ++autosavedone;
     346       26747 :                 --autosavemissing;
     347             : 
     348             :                 /* by default process the block, and skip it if something goes wrong */
     349       26747 :                 error_on_this_block = 0;
     350       26747 :                 silent_error_on_this_block = 0;
     351       26747 :                 io_error_on_this_block = 0;
     352             : 
     353             :                 /*
     354             :                  * If all the blocks at this address are synced
     355             :                  * if not, parity is not even checked
     356             :                  */
     357       26747 :                 block_is_unsynced = 0;
     358             : 
     359             :                 /* get block specific info */
     360       26747 :                 info = info_get(&state->infoarr, blockcur);
     361             : 
     362             :                 /* if we have to use the old hash */
     363       26747 :                 rehash = info_get_rehash(info);
     364             : 
     365             :                 /* for each disk, process the block */
     366      184379 :                 for (j = 0; j < diskmax; ++j) {
     367             :                         struct snapraid_task* task;
     368             :                         int read_size;
     369             :                         unsigned char hash[HASH_MAX];
     370             :                         struct snapraid_block* block;
     371             :                         int file_is_unsynced;
     372             :                         struct snapraid_disk* disk;
     373             :                         struct snapraid_file* file;
     374             :                         block_off_t file_pos;
     375             :                         unsigned diskcur;
     376             : 
     377             :                         /*
     378             :                          * If the file on this disk is synced
     379             :                          * if not, silent errors are assumed as expected error
     380             :                          */
     381      157632 :                         file_is_unsynced = 0;
     382             : 
     383             :                         /* until now is misc */
     384      157632 :                         state_usage_misc(state);
     385             : 
     386             :                         /* get the next task */
     387      157632 :                         task = io_data_read(&io, &diskcur, waiting_map, &waiting_mac);
     388             : 
     389             :                         /* until now is disk */
     390      157632 :                         state_usage_disk(state, handle, waiting_map, waiting_mac);
     391             : 
     392             :                         /* get the task results */
     393      157632 :                         disk = task->disk;
     394      157632 :                         block = task->block;
     395      157632 :                         file = task->file;
     396      157632 :                         file_pos = task->file_pos;
     397      157632 :                         read_size = task->read_size;
     398             : 
     399             :                         /* by default no rehash in case of "continue" */
     400      157632 :                         rehandle[diskcur].block = 0;
     401             : 
     402             :                         /* if the disk position is not used */
     403      157632 :                         if (!disk)
     404        2738 :                                 continue;
     405             : 
     406      157632 :                         state_usage_file(state, disk, file);
     407             : 
     408             :                         /* if the block is unsynced, errors are expected */
     409      157632 :                         if (block_has_invalid_parity(block)) {
     410             :                                 /* report that the block and the file are not synced */
     411           0 :                                 block_is_unsynced = 1;
     412           0 :                                 file_is_unsynced = 1;
     413             :                                 /* follow */
     414             :                         }
     415             : 
     416             :                         /* if the block is not used */
     417      157632 :                         if (!block_has_file(block))
     418        1818 :                                 continue;
     419             : 
     420             :                         /* if the block is unsynced, errors are expected */
     421      155814 :                         if (task->is_timestamp_different) {
     422             :                                 /* report that the block and the file are not synced */
     423           0 :                                 block_is_unsynced = 1;
     424           0 :                                 file_is_unsynced = 1;
     425             :                                 /* follow */
     426             :                         }
     427             : 
     428             :                         /* handle error conditions */
     429      155814 :                         if (task->state == TASK_STATE_IOERROR) {
     430             :                                 /* LCOV_EXCL_START */
     431             :                                 ++io_error;
     432             :                                 goto bail;
     433             :                                 /* LCOV_EXCL_STOP */
     434             :                         }
     435      155814 :                         if (task->state == TASK_STATE_ERROR) {
     436             :                                 /* LCOV_EXCL_START */
     437             :                                 ++soft_error;
     438             :                                 goto bail;
     439             :                                 /* LCOV_EXCL_STOP */
     440             :                         }
     441      155814 :                         if (task->state == TASK_STATE_ERROR_CONTINUE) {
     442           0 :                                 ++soft_error;
     443           0 :                                 error_on_this_block = 1;
     444           0 :                                 continue;
     445             :                         }
     446      155814 :                         if (task->state == TASK_STATE_IOERROR_CONTINUE) {
     447           0 :                                 ++io_error;
     448           0 :                                 if (io_error >= state->opt.io_error_limit) {
     449             :                                         /* LCOV_EXCL_START */
     450             :                                         log_fatal(EIO, "DANGER! Too many input/output errors in the %s disk. It isn't possible to continue.\n", disk->dir);
     451             :                                         log_fatal(EIO, "Stopping at block %u\n", blockcur);
     452             :                                         goto bail;
     453             :                                         /* LCOV_EXCL_STOP */
     454             :                                 }
     455             : 
     456             :                                 /* otherwise continue */
     457           0 :                                 io_error_on_this_block = 1;
     458           0 :                                 continue;
     459             :                         }
     460      155814 :                         if (task->state != TASK_STATE_DONE) {
     461             :                                 /* LCOV_EXCL_START */
     462             :                                 log_fatal(EINTERNAL, "Internal inconsistency in task state\n");
     463             :                                 os_abort();
     464             :                                 /* LCOV_EXCL_STOP */
     465             :                         }
     466             : 
     467      155814 :                         countsize += read_size;
     468             : 
     469             :                         /* now compute the hash */
     470      155814 :                         if (rehash) {
     471       27203 :                                 memhash(state->prevhash, state->prevhashseed, hash, buffer[diskcur], read_size);
     472             : 
     473             :                                 /* compute the new hash, and store it */
     474       27203 :                                 rehandle[diskcur].block = block;
     475       27203 :                                 memhash(state->hash, state->hashseed, rehandle[diskcur].hash, buffer[diskcur], read_size);
     476             :                         } else {
     477      128611 :                                 memhash(state->hash, state->hashseed, hash, buffer[diskcur], read_size);
     478             :                         }
     479             : 
     480             :                         /* until now is hash */
     481      155814 :                         state_usage_hash(state);
     482             : 
     483      155814 :                         if (block_has_updated_hash(block)) {
     484             :                                 /* compare the hash */
     485      155814 :                                 if (memcmp(hash, block->hash, BLOCK_HASH_SIZE) != 0) {
     486         920 :                                         unsigned diff = memdiff(hash, block->hash, BLOCK_HASH_SIZE);
     487             : 
     488             :                                         /* it's a silent error only if we are dealing with synced files */
     489         920 :                                         if (file_is_unsynced) {
     490           0 :                                                 log_tag("error:%u:%s:%s: Data error at position %u, diff hash bits %u/%u\n", blockcur, disk->name, esc_tag(file->sub, esc_buffer), file_pos, diff, BLOCK_HASH_SIZE * 8);
     491           0 :                                                 ++soft_error;
     492           0 :                                                 error_on_this_block = 1;
     493             :                                         } else {
     494         920 :                                                 log_tag("error_data:%u:%s:%s: Data error at position %u, diff hash bits %u/%u\n", blockcur, disk->name, esc_tag(file->sub, esc_buffer), file_pos, diff, BLOCK_HASH_SIZE * 8);
     495         920 :                                                 log_error(EDATA, "Data error in file '%s' at position '%u', diff hash bits %u/%u\n", task->path, file_pos, diff, BLOCK_HASH_SIZE * 8);
     496         920 :                                                 ++silent_error;
     497         920 :                                                 silent_error_on_this_block = 1;
     498             :                                         }
     499         920 :                                         continue;
     500             :                                 }
     501             :                         }
     502             :                 }
     503             : 
     504             :                 /* buffers for parity read and not computed */
     505      182479 :                 for (l = 0; l < state->level; ++l)
     506      155732 :                         buffer_recov[l] = buffer[diskmax + state->level + l];
     507       31497 :                 for (; l < LEV_MAX; ++l)
     508        4750 :                         buffer_recov[l] = 0;
     509             : 
     510             :                 /* until now is misc */
     511       26747 :                 state_usage_misc(state);
     512             : 
     513             :                 /* read the parity */
     514      182479 :                 for (l = 0; l < state->level; ++l) {
     515             :                         struct snapraid_task* task;
     516             :                         unsigned levcur;
     517             : 
     518      155732 :                         task = io_parity_read(&io, &levcur, waiting_map, &waiting_mac);
     519             : 
     520             :                         /* until now is parity */
     521      155732 :                         state_usage_parity(state, waiting_map, waiting_mac);
     522             : 
     523             :                         /* handle error conditions */
     524      155732 :                         if (task->state == TASK_STATE_IOERROR) {
     525             :                                 /* LCOV_EXCL_START */
     526             :                                 ++io_error;
     527             :                                 goto bail;
     528             :                                 /* LCOV_EXCL_STOP */
     529             :                         }
     530      155732 :                         if (task->state == TASK_STATE_ERROR) {
     531             :                                 /* LCOV_EXCL_START */
     532             :                                 ++soft_error;
     533             :                                 goto bail;
     534             :                                 /* LCOV_EXCL_STOP */
     535             :                         }
     536      155732 :                         if (task->state == TASK_STATE_ERROR_CONTINUE) {
     537           0 :                                 ++soft_error;
     538           0 :                                 error_on_this_block = 1;
     539             : 
     540             :                                 /* if continuing on error, clear the missing buffer */
     541           0 :                                 buffer_recov[levcur] = 0;
     542           0 :                                 continue;
     543             :                         }
     544      155732 :                         if (task->state == TASK_STATE_IOERROR_CONTINUE) {
     545           0 :                                 ++io_error;
     546           0 :                                 if (io_error >= state->opt.io_error_limit) {
     547             :                                         /* LCOV_EXCL_START */
     548             :                                         log_fatal(EIO, "DANGER! Too many input/output errors in the %s disk. It isn't possible to continue.\n", lev_name(levcur));
     549             :                                         log_fatal(EIO, "Stopping at block %u\n", blockcur);
     550             :                                         goto bail;
     551             :                                         /* LCOV_EXCL_STOP */
     552             :                                 }
     553             : 
     554             :                                 /* otherwise continue */
     555           0 :                                 io_error_on_this_block = 1;
     556             : 
     557             :                                 /* if continuing on error, clear the missing buffer */
     558           0 :                                 buffer_recov[levcur] = 0;
     559           0 :                                 continue;
     560             :                         }
     561      155732 :                         if (task->state != TASK_STATE_DONE) {
     562             :                                 /* LCOV_EXCL_START */
     563             :                                 log_fatal(EINTERNAL, "Internal inconsistency in task state\n");
     564             :                                 os_abort();
     565             :                                 /* LCOV_EXCL_STOP */
     566             :                         }
     567             :                 }
     568             : 
     569             :                 /* if we have read all the data required and it's correct, proceed with the parity check */
     570       26747 :                 if (!error_on_this_block && !silent_error_on_this_block && !io_error_on_this_block) {
     571             : 
     572             :                         /* compute the parity */
     573       25827 :                         raid_gen(diskmax, state->level, state->block_size, buffer);
     574             : 
     575             :                         /* compare the parity */
     576      176039 :                         for (l = 0; l < state->level; ++l) {
     577      150212 :                                 if (buffer_recov[l] && memcmp(buffer[diskmax + l], buffer_recov[l], state->block_size) != 0) {
     578           0 :                                         unsigned diff = memdiff(buffer[diskmax + l], buffer_recov[l], state->block_size);
     579             : 
     580             :                                         /* it's a silent error only if we are dealing with synced blocks */
     581           0 :                                         if (block_is_unsynced) {
     582           0 :                                                 log_tag("parity_error:%u:%s: Data error, diff parity bits %u/%u\n", blockcur, lev_config_name(l), diff, state->block_size * 8);
     583           0 :                                                 ++soft_error;
     584           0 :                                                 error_on_this_block = 1;
     585             :                                         } else {
     586           0 :                                                 log_tag("parity_error_data:%u:%s: Data error, diff parity bits %u/%u\n", blockcur, lev_config_name(l), diff, state->block_size * 8);
     587           0 :                                                 log_error(EDATA, "Data error in parity '%s' at position '%u', diff parity bits %u/%u\n", lev_config_name(l), blockcur, diff, state->block_size * 8);
     588           0 :                                                 ++silent_error;
     589           0 :                                                 silent_error_on_this_block = 1;
     590             :                                         }
     591             :                                 }
     592             :                         }
     593             : 
     594             :                         /* until now is raid */
     595       25827 :                         state_usage_raid(state);
     596             :                 }
     597             : 
     598       26747 :                 if (silent_error_on_this_block || io_error_on_this_block) {
     599             :                         /* set the error status keeping other info */
     600         920 :                         info_set(&state->infoarr, blockcur, info_set_bad(info));
     601       25827 :                 } else if (error_on_this_block) {
     602             :                         /*
     603             :                          * Do nothing, as this is a generic error
     604             :                          * likely caused by a not synced array
     605             :                          */
     606             :                 } else {
     607             :                         /* if rehash is needed */
     608       25827 :                         if (rehash) {
     609             :                                 /* store all the new hash already computed */
     610       32466 :                                 for (j = 0; j < diskmax; ++j) {
     611       27828 :                                         if (rehandle[j].block)
     612       27203 :                                                 memcpy(rehandle[j].block->hash, rehandle[j].hash, BLOCK_HASH_SIZE);
     613             :                                 }
     614             :                         }
     615             : 
     616             :                         /*
     617             :                          * Update the time info of the block
     618             :                          * and clear any other flag
     619             :                          */
     620       25827 :                         info_set(&state->infoarr, blockcur, info_make(now, 0, 0, 0));
     621             :                 }
     622             : 
     623             :                 /* mark the state as needing write */
     624       26747 :                 state->need_write = 1;
     625             : 
     626             :                 /* count the number of processed block */
     627       26747 :                 ++countpos;
     628             : 
     629             :                 /* progress */
     630       26747 :                 if (state_progress(state, &io, blockcur, countpos, countmax, countsize)) {
     631             :                         /* LCOV_EXCL_START */
     632             :                         break;
     633             :                         /* LCOV_EXCL_STOP */
     634             :                 }
     635             : 
     636             :                 /* thermal control */
     637       26747 :                 if (state_thermal_alarm(state)) {
     638             :                         /* until now is misc */
     639           0 :                         state_usage_misc(state);
     640             : 
     641           0 :                         state_progress_stop(state);
     642             : 
     643           0 :                         state_thermal_cooldown(state);
     644             : 
     645           0 :                         state_progress_restart(state);
     646             : 
     647             :                         /* drop until now */
     648           0 :                         state_usage_waste(state);
     649             :                 }
     650             : 
     651             :                 /* autosave */
     652       26747 :                 if (state->autosave != 0
     653           0 :                         && autosavedone >= autosavelimit /* if we have reached the limit */
     654           0 :                         && autosavemissing >= autosavelimit /* if we have at least a full step to do */
     655             :                 ) {
     656           0 :                         autosavedone = 0; /* restart the counter */
     657             : 
     658             :                         /* until now is misc */
     659           0 :                         state_usage_misc(state);
     660             : 
     661           0 :                         state_progress_stop(state);
     662             : 
     663           0 :                         msg_progress("Autosaving...\n");
     664           0 :                         state_write(state);
     665             : 
     666           0 :                         state_progress_restart(state);
     667             : 
     668             :                         /* drop until now */
     669           0 :                         state_usage_waste(state);
     670             :                 }
     671             :         }
     672             : 
     673          12 : end:
     674          12 :         state_progress_end(state, countpos, countmax, countsize, "Nothing to scrub. Use the -p PLAN option to select a different plan, like -p full.\n");
     675             : 
     676             :         /* save the new state if required */
     677          12 :         if (state->need_write || state->opt.force_content_write)
     678          10 :                 state_write(state);
     679             : 
     680          12 :         state_usage_print(state);
     681             : 
     682          12 :         if (soft_error || silent_error || io_error) {
     683           1 :                 msg_status("\n");
     684           1 :                 msg_status("%8u soft errors\n", soft_error);
     685           1 :                 msg_status("%8u io errors\n", io_error);
     686           1 :                 msg_status("%8u data errors\n", silent_error);
     687             :         } else {
     688          11 :                 msg_status("Everything OK\n");
     689             :         }
     690             : 
     691          12 :         if (soft_error)
     692           0 :                 log_fatal(ESOFT, "WARNING! Unexpected soft errors!\n");
     693          12 :         if (io_error)
     694           0 :                 log_fatal(EIO, "DANGER! Unexpected input/output errors! The failing blocks are now marked as bad!\n");
     695          12 :         if (silent_error)
     696           1 :                 log_fatal(EDATA, "DANGER! Unexpected data errors! The failing blocks are now marked as bad!\n");
     697          12 :         if (io_error || silent_error) {
     698           1 :                 log_fatal(ESOFT, "Use 'snapraid status' to list the bad blocks.\n");
     699           1 :                 log_fatal(ESOFT, "Use 'snapraid -e fix' to recover them.\n");
     700           1 :                 log_fatal(ESOFT, "Use 'snapraid -p bad scrub' to recheck after fixing to clear the bad state.\n");
     701             :         }
     702             : 
     703          12 :         log_tag("summary:error_soft:%u\n", soft_error);
     704          12 :         log_tag("summary:error_io:%u\n", io_error);
     705          12 :         log_tag("summary:error_data:%u\n", silent_error);
     706          12 :         if (soft_error + silent_error + io_error == 0)
     707          11 :                 log_tag("summary:exit:ok\n");
     708           1 :         else if (silent_error + io_error == 0)
     709           0 :                 log_tag("summary:exit:warning\n");
     710             :         else
     711           1 :                 log_tag("summary:exit:error\n");
     712          12 :         log_flush();
     713             : 
     714          12 : bail:
     715             :         /* stop all the worker threads */
     716          12 :         io_stop(&io);
     717             : 
     718          81 :         for (j = 0; j < diskmax; ++j) {
     719          69 :                 struct snapraid_file* file = handle[j].file;
     720          69 :                 struct snapraid_disk* disk = handle[j].disk;
     721          69 :                 ret = handle_close(&handle[j]);
     722          69 :                 if (ret == -1) {
     723             :                         /* LCOV_EXCL_START */
     724             :                         log_tag("%s:%u:%s:%s: Close error. %s.\n", es(errno), blockcur, disk->name, esc_tag(file->sub, esc_buffer), strerror(errno));
     725             :                         log_fatal_errno(errno, disk->name);
     726             : 
     727             :                         if (is_hw(errno)) {
     728             :                                 ++io_error;
     729             :                         } else {
     730             :                                 ++soft_error;
     731             :                         }
     732             :                         /* continue, as we are already exiting */
     733             :                         /* LCOV_EXCL_STOP */
     734             :                 }
     735             :         }
     736             : 
     737          12 :         free(handle);
     738          12 :         free(rehandle_alloc);
     739          12 :         free(waiting_map);
     740          12 :         io_done(&io);
     741          12 :         free(block_enabled);
     742             : 
     743          12 :         if (state->opt.expect_recoverable) {
     744           1 :                 if (soft_error + silent_error + io_error == 0)
     745           0 :                         return -1;
     746             :         } else {
     747          11 :                 if (soft_error + silent_error + io_error != 0)
     748           0 :                         return -1;
     749             :         }
     750             : 
     751          12 :         if (alert < 0)
     752           0 :                 return -1;
     753             : 
     754          12 :         return 0;
     755             : }
     756             : 
     757             : /**
     758             :  * Return a * b / c approximated to the upper value.
     759             :  */
     760           2 : static uint32_t md(uint32_t a, uint32_t b, uint32_t c)
     761             : {
     762           2 :         uint64_t v = a;
     763             : 
     764           2 :         v *= b;
     765           2 :         v += c - 1;
     766           2 :         v /= c;
     767             : 
     768           2 :         return v;
     769             : }
     770             : 
     771          12 : int state_scrub(struct snapraid_state* state, int plan100, int olderthan)
     772             : {
     773             :         block_off_t blockmax;
     774             :         block_off_t countlimit;
     775             :         block_off_t count;
     776             :         time_t recentlimit;
     777             :         int ret;
     778             :         struct snapraid_parity_handle parity_handle[LEV_MAX];
     779             :         struct snapraid_plan ps;
     780             :         unsigned process_error;
     781             :         time_t now;
     782             :         unsigned l;
     783             : 
     784             :         /* get the present time */
     785          12 :         now = time(0);
     786             : 
     787          12 :         msg_progress("Initializing...\n");
     788             : 
     789          12 :         if ((plan100 == SCRUB_BAD || plan100 == SCRUB_NEW || plan100 == SCRUB_FULL)
     790           5 :                 && olderthan >= 0) {
     791             :                 /* LCOV_EXCL_START */
     792             :                 log_fatal(EUSER, "You can specify -o, --older-than only with a numeric percentage.\n");
     793             :                 exit(EXIT_FAILURE);
     794             :                 /* LCOV_EXCL_STOP */
     795             :         }
     796             : 
     797          12 :         blockmax = parity_allocated_size(state);
     798             : 
     799             :         /* preinitialize to avoid warnings */
     800          12 :         countlimit = 0;
     801          12 :         recentlimit = 0;
     802             : 
     803          12 :         ps.state = state;
     804          12 :         if (state->opt.force_scrub_even) {
     805           1 :                 ps.plan = SCRUB_EVEN;
     806          11 :         } else if (plan100 == SCRUB_FULL) {
     807           3 :                 ps.plan = SCRUB_FULL;
     808           3 :                 msg_progress("Scrub plan: full. All data blocks will be checked.\n");
     809           8 :         } else if (plan100 == SCRUB_NEW) {
     810           1 :                 ps.plan = SCRUB_NEW;
     811           1 :                 msg_progress("Scrub plan: new. Only blocks that have never been scrubbed will be checked.\n");
     812           7 :         } else if (plan100 == SCRUB_BAD) {
     813           1 :                 ps.plan = SCRUB_BAD;
     814           1 :                 msg_progress("Scrub plan: bad. Only blocks previously marked as bad will be checked.\n");
     815           6 :         } else if (state->opt.force_scrub_at) {
     816             :                 /* scrub the specified amount of blocks */
     817           4 :                 ps.plan = SCRUB_AUTO;
     818           4 :                 countlimit = state->opt.force_scrub_at;
     819           4 :                 recentlimit = now;
     820             :         } else {
     821           2 :                 ps.plan = SCRUB_AUTO;
     822           2 :                 if (plan100 >= 0) {
     823           1 :                         countlimit = md(blockmax, plan100, 10000);
     824             :                 } else {
     825             :                         /* by default scrub 8.33% of the array (100/12=8.(3)) */
     826           1 :                         countlimit = md(blockmax, 1, 12);
     827             :                 }
     828             : 
     829           2 :                 if (olderthan >= 0) {
     830           1 :                         recentlimit = now - olderthan * 24 * 3600;
     831             :                 } else {
     832             :                         /* by default use a 10 day time limit */
     833           1 :                         recentlimit = now - 10 * 24 * 3600;
     834             :                 }
     835             : 
     836           2 :                 if (plan100 >= 0) {
     837           1 :                         if (olderthan >= 0)
     838           0 :                                 msg_progress("Scrub plan: auto. %.1f%% of the array, older than %d days, will be checked.\n", plan100 / 100.0, olderthan);
     839             :                         else
     840           1 :                                 msg_progress("Scrub plan: auto. %.1f%% of the array, older than 10 days, will be checked.\n", plan100 / 100.0);
     841             :                 } else {
     842           1 :                         if (olderthan >= 0)
     843           1 :                                 msg_progress("Scrub plan: auto. 8.3%% of the array, older than %d days, will be checked.\n", olderthan);
     844             :                         else
     845           0 :                                 msg_progress("Scrub plan: auto. 8.3%% of the array, older than 10 days, will be checked.\n");
     846             :                 }
     847             :         }
     848             : 
     849          12 :         count = 0;
     850          38 :         for (tommy_node* j = tommy_list_head(&state->bucketlist); j != 0; j = j->next) {
     851          26 :                 struct snapraid_bucket* bucket = j->data;
     852          26 :                 count += bucket->count_scrubbed + bucket->count_justsynced;
     853             :         }
     854             : 
     855          12 :         if (!count) {
     856             :                 /* LCOV_EXCL_START */
     857             :                 log_fatal(EUSER, "The array is empty.\n");
     858             :                 exit(EXIT_FAILURE);
     859             :                 /* LCOV_EXCL_STOP */
     860             :         }
     861             : 
     862             :         /* compute the limits from count/recentlimit */
     863          12 :         if (ps.plan == SCRUB_AUTO) {
     864             :                 /* no more than the full count */
     865           6 :                 if (countlimit > count)
     866           2 :                         countlimit = count;
     867             : 
     868             :                 /* by default process everything */
     869           6 :                 ps.timelimit = now;
     870           6 :                 ps.lastlimit = 0;
     871             : 
     872           6 :                 tommy_node* j = tommy_list_head(&state->bucketlist);
     873           6 :                 block_off_t processed_count = 0;
     874          11 :                 while (j) {
     875           9 :                         struct snapraid_bucket* bucket = j->data;
     876           9 :                         block_off_t bucket_count = bucket->count_justsynced + bucket->count_scrubbed;
     877             : 
     878           9 :                         if (bucket->time_at > recentlimit) {
     879           1 :                                 ps.timelimit = recentlimit;
     880           1 :                                 ps.lastlimit = 0;
     881           1 :                                 break;
     882             :                         }
     883             : 
     884           8 :                         if (processed_count + bucket_count > countlimit) {
     885           3 :                                 ps.timelimit = bucket->time_at;
     886           3 :                                 ps.lastlimit = countlimit - processed_count;
     887           3 :                                 processed_count = countlimit;
     888           3 :                                 break;
     889             :                         }
     890             : 
     891           5 :                         processed_count += bucket_count;
     892           5 :                         j = j->next;
     893             :                 }
     894             : 
     895             :                 /* if nothing to scrub, disable also other limits */
     896           6 :                 if (processed_count == 0) {
     897           1 :                         ps.timelimit = 0;
     898           1 :                         ps.lastlimit = 0;
     899             :                 }
     900             : 
     901           6 :                 log_tag("count_limit:%u\n", countlimit);
     902           6 :                 log_tag("time_limit:%" PRIu64 "\n", (uint64_t)ps.timelimit);
     903           6 :                 log_tag("last_limit:%u\n", ps.lastlimit);
     904             :         } else {
     905             :                 /* avoid compiler warnings */
     906           6 :                 ps.timelimit = 0;
     907           6 :                 ps.lastlimit = 0;
     908             :         }
     909             : 
     910             :         /* open the file for reading */
     911          79 :         for (l = 0; l < state->level; ++l) {
     912          67 :                 ret = parity_open(&parity_handle[l], &state->parity[l], l, state->file_mode, state->block_size, state->opt.parity_limit_size);
     913          67 :                 if (ret == -1) {
     914             :                         /* LCOV_EXCL_START */
     915             :                         log_tag("parity_%s:%u:%s: Open error. %s.\n", es(errno), blockmax, lev_config_name(l), strerror(errno));
     916             :                         log_fatal_errno(errno, lev_config_name(l));
     917             :                         exit(EXIT_FAILURE);
     918             :                         /* LCOV_EXCL_STOP */
     919             :                 }
     920             :         }
     921             : 
     922          12 :         process_error = 0;
     923             : 
     924          12 :         ret = state_scrub_process(state, parity_handle, 0, blockmax, &ps, now);
     925          12 :         if (ret == -1) {
     926           0 :                 ++process_error;
     927             :                 /* continue, as we are already exiting */
     928             :         }
     929             : 
     930          79 :         for (l = 0; l < state->level; ++l) {
     931          67 :                 ret = parity_close(&parity_handle[l]);
     932          67 :                 if (ret == -1) {
     933             :                         /* LCOV_EXCL_START */
     934             :                         log_tag("parity_%s:%u:%s: Close error. %s.\n", es(errno), blockmax, lev_config_name(l), strerror(errno));
     935             :                         log_fatal_errno(errno, lev_config_name(l));
     936             : 
     937             :                         ++process_error;
     938             :                         /* continue, as we are already exiting */
     939             :                         /* LCOV_EXCL_STOP */
     940             :                 }
     941             :         }
     942             : 
     943          12 :         if (process_error != 0)
     944           0 :                 return -1;
     945          12 :         return 0;
     946             : }
     947             : 

Generated by: LCOV version 1.0