LCOV - lcov.info - cmdline/scrub.c

LCOV - code coverage report

Current view:	top level - cmdline - scrub.c (source / functions)		Hit	Total	Coverage
Test:	lcov.info	Lines:	281	349	80.5 %
Date:	2017-11-06 22:14:04	Functions:	6	6	100.0 %

          Line data    Source code

       1             : /*
       2             :  * Copyright (C) 2013 Andrea Mazzoleni
       3             :  *
       4             :  * This program is free software: you can redistribute it and/or modify
       5             :  * it under the terms of the GNU General Public License as published by
       6             :  * the Free Software Foundation, either version 3 of the License, or
       7             :  * (at your option) any later version.
       8             :  *
       9             :  * This program is distributed in the hope that it will be useful,
      10             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             :  * GNU General Public License for more details.
      13             :  *
      14             :  * You should have received a copy of the GNU General Public License
      15             :  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
      16             :  */
      17             : 
      18             : #include "portable.h"
      19             : 
      20             : #include "support.h"
      21             : #include "elem.h"
      22             : #include "state.h"
      23             : #include "parity.h"
      24             : #include "handle.h"
      25             : #include "io.h"
      26             : #include "raid/raid.h"
      27             : 
      28             : /****************************************************************************/
      29             : /* scrub */
      30             : 
      31             : /**
      32             :  * Buffer for storing the new hashes.
      33             :  */
      34             : struct snapraid_rehash {
      35             :         unsigned char hash[HASH_MAX];
      36             :         struct snapraid_block* block;
      37             : };
      38             : 
      39             : /**
      40             :  * Scrub plan to use.
      41             :  */
      42             : struct snapraid_plan {
      43             :         struct snapraid_state* state;
      44             :         int plan; /**< One of the SCRUB_*. */
      45             :         time_t timelimit; /**< Time limit. Valid only with SCRUB_AUTO. */
      46             :         block_off_t lastlimit; /**< Number of blocks allowed with time exactly at ::timelimit. */
      47             :         block_off_t countlast; /**< Counter of blocks with time exactly at ::timelimit. */
      48             : };
      49             : 
      50             : /**
      51             :  * Check if we have to process the specified block index ::i.
      52             :  */
      53      112170 : static int block_is_enabled(void* void_plan, block_off_t i)
      54             : {
      55      112170 :         struct snapraid_plan* plan = void_plan;
      56             :         time_t blocktime;
      57             :         snapraid_info info;
      58             : 
      59             :         /* don't scrub unused blocks in all plans */
      60      112170 :         info = info_get(&plan->state->infoarr, i);
      61      112170 :         if (info == 0)
      62           0 :                 return 0;
      63             : 
      64             :         /* bad blocks are always scrubbed in all plans */
      65      112170 :         if (info_get_bad(info))
      66        3546 :                 return 1;
      67             : 
      68      108624 :         switch (plan->plan) {
      69             :         case SCRUB_FULL :
      70             :                 /* in 'full' plan everything is scrubbed */
      71       17042 :                 return 1;
      72             :         case SCRUB_EVEN :
      73             :                 /* in 'even' plan, scrub only even blocks */
      74       18430 :                 return i % 2 == 0;
      75             :         case SCRUB_NEW :
      76             :                 /* in 'sync' plan, only blocks never scrubbed */
      77        9374 :                 return info_get_justsynced(info);
      78             :         case SCRUB_BAD :
      79             :                 /* in 'bad' plan, only bad blocks (already reported) */
      80        7534 :                 return 0;
      81             :         }
      82             : 
      83             :         /* if it's too new */
      84       56244 :         blocktime = info_get_time(info);
      85       56244 :         if (blocktime > plan->timelimit) {
      86             :                 /* skip it */
      87        9574 :                 return 0;
      88             :         }
      89             : 
      90             :         /* if the time is less than the limit, always include */
      91             :         /* otherwise, check if we reached the last limit count */
      92       46670 :         if (blocktime == plan->timelimit) {
      93             :                 /* if we reached the count limit */
      94       39496 :                 if (plan->countlast >= plan->lastlimit) {
      95             :                         /* skip it */
      96       24940 :                         return 0;
      97             :                 }
      98             : 
      99       14556 :                 ++plan->countlast;
     100             :         }
     101             : 
     102       21730 :         return 1;
     103             : }
     104             : 
     105      168097 : static void scrub_data_reader(struct snapraid_worker* worker, struct snapraid_task* task)
     106             : {
     107      168097 :         struct snapraid_io* io = worker->io;
     108      168097 :         struct snapraid_state* state = io->state;
     109      168097 :         struct snapraid_handle* handle = worker->handle;
     110      168097 :         struct snapraid_disk* disk = handle->disk;
     111      168097 :         block_off_t blockcur = task->position;
     112      168097 :         unsigned char* buffer = task->buffer;
     113             :         int ret;
     114             :         char esc_buffer[ESC_MAX];
     115             : 
     116             :         /* if the disk position is not used */
     117      168097 :         if (!disk) {
     118             :                 /* use an empty block */
     119           0 :                 memset(buffer, 0, state->block_size);
     120           0 :                 task->state = TASK_STATE_DONE;
     121        1572 :                 return;
     122             :         }
     123             : 
     124             :         /* get the block */
     125      168098 :         task->block = fs_par2block_find(disk, blockcur);
     126             : 
     127             :         /* if the block is not used */
     128      167743 :         if (!block_has_file(task->block)) {
     129             :                 /* use an empty block */
     130        1574 :                 memset(buffer, 0, state->block_size);
     131        1574 :                 task->state = TASK_STATE_DONE;
     132        1574 :                 return;
     133             :         }
     134             : 
     135             :         /* get the file of this block */
     136      166321 :         task->file = fs_par2file_get(disk, blockcur, &task->file_pos);
     137             : 
     138             :         /* if the file is different than the current one, close it */
     139      166395 :         if (handle->file != 0 && handle->file != task->file) {
     140             :                 /* keep a pointer at the file we are going to close for error reporting */
     141       78046 :                 struct snapraid_file* report = handle->file;
     142       78046 :                 ret = handle_close(handle);
     143       77385 :                 if (ret == -1) {
     144             :                         /* LCOV_EXCL_START */
     145             :                         /* This one is really an unexpected error, because we are only reading */
     146             :                         /* and closing a descriptor should never fail */
     147             :                         if (errno == EIO) {
     148             :                                 log_tag("error:%u:%s:%s: Close EIO error. %s\n", blockcur, disk->name, esc_tag(report->sub, esc_buffer), strerror(errno));
     149             :                                 log_fatal("DANGER! Unexpected input/output close error in a data disk, it isn't possible to scrub.\n");
     150             :                                 log_fatal("Ensure that disk '%s' is sane and that file '%s' can be accessed.\n", disk->dir, handle->path);
     151             :                                 log_fatal("Stopping at block %u\n", blockcur);
     152             :                                 task->state = TASK_STATE_IOERROR;
     153             :                                 return;
     154             :                         }
     155             : 
     156             :                         log_tag("error:%u:%s:%s: Close error. %s\n", blockcur, disk->name, esc_tag(report->sub, esc_buffer), strerror(errno));
     157             :                         log_fatal("WARNING! Unexpected close error in a data disk, it isn't possible to scrub.\n");
     158             :                         log_fatal("Ensure that file '%s' can be accessed.\n", handle->path);
     159             :                         log_fatal("Stopping at block %u\n", blockcur);
     160             :                         task->state = TASK_STATE_ERROR;
     161             :                         return;
     162             :                         /* LCOV_EXCL_STOP */
     163             :                 }
     164             :         }
     165             : 
     166      165734 :         ret = handle_open(handle, task->file, state->file_mode, log_error, 0);
     167      163716 :         if (ret == -1) {
     168           0 :                 if (errno == EIO) {
     169             :                         /* LCOV_EXCL_START */
     170             :                         log_tag("error:%u:%s:%s: Open EIO error. %s\n", blockcur, disk->name, esc_tag(task->file->sub, esc_buffer), strerror(errno));
     171             :                         log_fatal("DANGER! Unexpected input/output open error in a data disk, it isn't possible to scrub.\n");
     172             :                         log_fatal("Ensure that disk '%s' is sane and that file '%s' can be accessed.\n", disk->dir, handle->path);
     173             :                         log_fatal("Stopping at block %u\n", blockcur);
     174             :                         task->state = TASK_STATE_IOERROR;
     175             :                         return;
     176             :                         /* LCOV_EXCL_STOP */
     177             :                 }
     178             : 
     179           0 :                 log_tag("error:%u:%s:%s: Open error. %s\n", blockcur, disk->name, esc_tag(task->file->sub, esc_buffer), strerror(errno));
     180           0 :                 task->state = TASK_STATE_ERROR_CONTINUE;
     181           0 :                 return;
     182             :         }
     183             : 
     184             :         /* check if the file is changed */
     185      163716 :         if (handle->st.st_size != task->file->size
     186      164597 :                 || handle->st.st_mtime != task->file->mtime_sec
     187      159902 :                 || STAT_NSEC(&handle->st) != task->file->mtime_nsec
     188             :                 /* don't check the inode to support filesystem without persistent inodes */
     189             :         ) {
     190             :                 /* report that the block and the file are not synced */
     191        5598 :                 task->is_timestamp_different = 1;
     192             :                 /* follow */
     193             :         }
     194             : 
     195             :         /* note that we intentionally don't abort if the file has different attributes */
     196             :         /* from the last sync, as we are expected to return errors if running */
     197             :         /* in an unsynced array. This is just like the check command. */
     198             : 
     199      163716 :         task->read_size = handle_read(handle, task->file_pos, buffer, state->block_size, log_error, 0);
     200      165693 :         if (task->read_size == -1) {
     201           0 :                 if (errno == EIO) {
     202           0 :                         log_tag("error:%u:%s:%s: Read EIO error at position %u. %s\n", blockcur, disk->name, esc_tag(task->file->sub, esc_buffer), task->file_pos, strerror(errno));
     203           0 :                         log_error("Input/Output error in file '%s' at position '%u'\n", handle->path, task->file_pos);
     204           0 :                         task->state = TASK_STATE_IOERROR_CONTINUE;
     205           0 :                         return;
     206             :                 }
     207             : 
     208           0 :                 log_tag("error:%u:%s:%s: Read error at position %u. %s\n", blockcur, disk->name, esc_tag(task->file->sub, esc_buffer), task->file_pos, strerror(errno));
     209           0 :                 task->state = TASK_STATE_ERROR_CONTINUE;
     210           0 :                 return;
     211             :         }
     212             : 
     213             :         /* store the path of the opened file */
     214      165693 :         pathcpy(task->path, sizeof(task->path), handle->path);
     215             : 
     216      163863 :         task->state = TASK_STATE_DONE;
     217             : }
     218             : 
     219      168123 : static void scrub_parity_reader(struct snapraid_worker* worker, struct snapraid_task* task)
     220             : {
     221      168123 :         struct snapraid_io* io = worker->io;
     222      168123 :         struct snapraid_state* state = io->state;
     223      168123 :         struct snapraid_parity_handle* parity_handle = worker->parity_handle;
     224      168123 :         unsigned level = parity_handle->level;
     225      168123 :         block_off_t blockcur = task->position;
     226      168123 :         unsigned char* buffer = task->buffer;
     227             :         int ret;
     228             : 
     229             :         /* read the parity */
     230      168123 :         ret = parity_read(parity_handle, blockcur, buffer, state->block_size, log_error);
     231      168038 :         if (ret == -1) {
     232           0 :                 if (errno == EIO) {
     233           0 :                         log_tag("parity_error:%u:%s: Read EIO error. %s\n", blockcur, lev_config_name(level), strerror(errno));
     234           0 :                         log_error("Input/Output error in parity '%s' at position '%u'\n", lev_config_name(level), blockcur);
     235           0 :                         task->state = TASK_STATE_IOERROR_CONTINUE;
     236           0 :                         return;
     237             :                 }
     238             : 
     239           0 :                 log_tag("parity_error:%u:%s: Read error. %s\n", blockcur, lev_config_name(level), strerror(errno));
     240           0 :                 task->state = TASK_STATE_ERROR_CONTINUE;
     241           0 :                 return;
     242             :         }
     243             : 
     244      168038 :         task->state = TASK_STATE_DONE;
     245             : }
     246             : 
     247          11 : static int state_scrub_process(struct snapraid_state* state, struct snapraid_parity_handle* parity_handle, block_off_t blockstart, block_off_t blockmax, struct snapraid_plan* plan, time_t now)
     248             : {
     249             :         struct snapraid_io io;
     250             :         struct snapraid_handle* handle;
     251             :         void* rehandle_alloc;
     252             :         struct snapraid_rehash* rehandle;
     253             :         unsigned diskmax;
     254             :         block_off_t blockcur;
     255             :         unsigned j;
     256             :         unsigned buffermax;
     257             :         data_off_t countsize;
     258             :         block_off_t countpos;
     259             :         block_off_t countmax;
     260             :         block_off_t autosavedone;
     261             :         block_off_t autosavelimit;
     262             :         block_off_t autosavemissing;
     263             :         int ret;
     264             :         unsigned error;
     265             :         unsigned silent_error;
     266             :         unsigned io_error;
     267             :         unsigned l;
     268             :         unsigned* waiting_map;
     269             :         unsigned waiting_mac;
     270             :         char esc_buffer[ESC_MAX];
     271             : 
     272             :         /* maps the disks to handles */
     273          11 :         handle = handle_mapping(state, &diskmax);
     274             : 
     275             :         /* rehash buffers */
     276          11 :         rehandle = malloc_nofail_align(diskmax * sizeof(struct snapraid_rehash), &rehandle_alloc);
     277             : 
     278             :         /* we need 1 * data + 2 * parity */
     279          11 :         buffermax = diskmax + 2 * state->level;
     280             : 
     281             :         /* initialize the io threads */
     282          11 :         io_init(&io, state, state->opt.io_cache, buffermax, scrub_data_reader, handle, diskmax, scrub_parity_reader, 0, parity_handle, state->level);
     283             : 
     284             :         /* possibly waiting disks */
     285          11 :         waiting_mac = diskmax > RAID_PARITY_MAX ? diskmax : RAID_PARITY_MAX;
     286          11 :         waiting_map = malloc_nofail(waiting_mac * sizeof(unsigned));
     287             : 
     288          11 :         error = 0;
     289          11 :         silent_error = 0;
     290          11 :         io_error = 0;
     291             : 
     292             :         /* first count the number of blocks to process */
     293          11 :         countmax = 0;
     294          11 :         plan->countlast = 0;
     295       56096 :         for (blockcur = blockstart; blockcur < blockmax; ++blockcur) {
     296       56085 :                 if (!block_is_enabled(plan, blockcur))
     297       28053 :                         continue;
     298       28032 :                 ++countmax;
     299             :         }
     300             : 
     301             :         /* compute the autosave size for all disk, even if not read */
     302             :         /* this makes sense because the speed should be almost the same */
     303             :         /* if the disks are read in parallel */
     304          11 :         autosavelimit = state->autosave / (diskmax * state->block_size);
     305          11 :         autosavemissing = countmax; /* blocks to do */
     306          11 :         autosavedone = 0; /* blocks done */
     307             : 
     308             :         /* drop until now */
     309          11 :         state_usage_waste(state);
     310             : 
     311          11 :         countsize = 0;
     312          11 :         countpos = 0;
     313          11 :         plan->countlast = 0;
     314             : 
     315             :         /* start all the worker threads */
     316          11 :         io_start(&io, blockstart, blockmax, &block_is_enabled, plan);
     317             : 
     318          11 :         state_progress_begin(state, blockstart, blockmax, countmax);
     319             :         while (1) {
     320             :                 unsigned char* buffer_recov[LEV_MAX];
     321             :                 snapraid_info info;
     322             :                 int error_on_this_block;
     323             :                 int silent_error_on_this_block;
     324             :                 int io_error_on_this_block;
     325             :                 int block_is_unsynced;
     326             :                 int rehash;
     327             :                 void** buffer;
     328             : 
     329             :                 /* go to the next block */
     330       28043 :                 blockcur = io_read_next(&io, &buffer);
     331       28043 :                 if (blockcur >= blockmax)
     332          22 :                         break;
     333             : 
     334             :                 /* until now is scheduling */
     335       28032 :                 state_usage_sched(state);
     336             : 
     337             :                 /* one more block processed for autosave */
     338       28032 :                 ++autosavedone;
     339       28032 :                 --autosavemissing;
     340             : 
     341             :                 /* by default process the block, and skip it if something goes wrong */
     342       28032 :                 error_on_this_block = 0;
     343       28032 :                 silent_error_on_this_block = 0;
     344       28032 :                 io_error_on_this_block = 0;
     345             : 
     346             :                 /* if all the blocks at this address are synced */
     347             :                 /* if not, parity is not even checked */
     348       28032 :                 block_is_unsynced = 0;
     349             : 
     350             :                 /* get block specific info */
     351       28032 :                 info = info_get(&state->infoarr, blockcur);
     352             : 
     353             :                 /* if we have to use the old hash */
     354       28032 :                 rehash = info_get_rehash(info);
     355             : 
     356             :                 /* for each disk, process the block */
     357      196224 :                 for (j = 0; j < diskmax; ++j) {
     358             :                         struct snapraid_task* task;
     359             :                         int read_size;
     360             :                         unsigned char hash[HASH_MAX];
     361             :                         struct snapraid_block* block;
     362             :                         int file_is_unsynced;
     363             :                         struct snapraid_disk* disk;
     364             :                         struct snapraid_file* file;
     365             :                         block_off_t file_pos;
     366             :                         unsigned diskcur;
     367             : 
     368             :                         /* if the file on this disk is synced */
     369             :                         /* if not, silent errors are assumed as expected error */
     370      168192 :                         file_is_unsynced = 0;
     371             : 
     372             :                         /* until now is misc */
     373      168192 :                         state_usage_misc(state);
     374             : 
     375             :                         /* get the next task */
     376      168192 :                         task = io_data_read(&io, &diskcur, waiting_map, &waiting_mac);
     377             : 
     378             :                         /* until now is disk */
     379      168192 :                         state_usage_disk(state, handle, waiting_map, waiting_mac);
     380             : 
     381             :                         /* get the task results */
     382      168192 :                         disk = task->disk;
     383      168192 :                         block = task->block;
     384      168192 :                         file = task->file;
     385      168192 :                         file_pos = task->file_pos;
     386      168192 :                         read_size = task->read_size;
     387             : 
     388             :                         /* by default no rehash in case of "continue" */
     389      168192 :                         rehandle[diskcur].block = 0;
     390             : 
     391             :                         /* if the disk position is not used */
     392      168192 :                         if (!disk)
     393        2505 :                                 continue;
     394             : 
     395             :                         /* if the block is unsynced, errors are expected */
     396      168192 :                         if (block_has_invalid_parity(block)) {
     397             :                                 /* report that the block and the file are not synced */
     398           0 :                                 block_is_unsynced = 1;
     399           0 :                                 file_is_unsynced = 1;
     400             :                                 /* follow */
     401             :                         }
     402             : 
     403             :                         /* if the block is not used */
     404      168192 :                         if (!block_has_file(block))
     405        1585 :                                 continue;
     406             : 
     407             :                         /* if the block is unsynced, errors are expected */
     408      166607 :                         if (task->is_timestamp_different) {
     409             :                                 /* report that the block and the file are not synced */
     410        5598 :                                 block_is_unsynced = 1;
     411        5598 :                                 file_is_unsynced = 1;
     412             :                                 /* follow */
     413             :                         }
     414             : 
     415             :                         /* handle error conditions */
     416      166607 :                         if (task->state == TASK_STATE_IOERROR) {
     417             :                                 /* LCOV_EXCL_START */
     418             :                                 ++io_error;
     419             :                                 goto bail;
     420             :                                 /* LCOV_EXCL_STOP */
     421             :                         }
     422      166607 :                         if (task->state == TASK_STATE_ERROR) {
     423             :                                 /* LCOV_EXCL_START */
     424             :                                 ++error;
     425             :                                 goto bail;
     426             :                                 /* LCOV_EXCL_STOP */
     427             :                         }
     428      166607 :                         if (task->state == TASK_STATE_ERROR_CONTINUE) {
     429           0 :                                 ++error;
     430           0 :                                 error_on_this_block = 1;
     431           0 :                                 continue;
     432             :                         }
     433      166607 :                         if (task->state == TASK_STATE_IOERROR_CONTINUE) {
     434           0 :                                 ++io_error;
     435           0 :                                 if (io_error >= state->opt.io_error_limit) {
     436             :                                         /* LCOV_EXCL_START */
     437             :                                         log_fatal("DANGER! Too many input/output read error in a data disk, it isn't possible to scrub.\n");
     438             :                                         log_fatal("Ensure that disk '%s' is sane and that file '%s' can be accessed.\n", disk->dir, task->path);
     439             :                                         log_fatal("Stopping at block %u\n", blockcur);
     440             :                                         goto bail;
     441             :                                         /* LCOV_EXCL_STOP */
     442             :                                 }
     443             : 
     444             :                                 /* otherwise continue */
     445           0 :                                 io_error_on_this_block = 1;
     446           0 :                                 continue;
     447             :                         }
     448      166607 :                         if (task->state != TASK_STATE_DONE) {
     449             :                                 /* LCOV_EXCL_START */
     450             :                                 log_fatal("Internal inconsistency in task state\n");
     451             :                                 os_abort();
     452             :                                 /* LCOV_EXCL_STOP */
     453             :                         }
     454             : 
     455      166607 :                         countsize += read_size;
     456             : 
     457             :                         /* now compute the hash */
     458      166607 :                         if (rehash) {
     459       27247 :                                 memhash(state->prevhash, state->prevhashseed, hash, buffer[diskcur], read_size);
     460             : 
     461             :                                 /* compute the new hash, and store it */
     462       27247 :                                 rehandle[diskcur].block = block;
     463       27247 :                                 memhash(state->hash, state->hashseed, rehandle[diskcur].hash, buffer[diskcur], read_size);
     464             :                         } else {
     465      139360 :                                 memhash(state->hash, state->hashseed, hash, buffer[diskcur], read_size);
     466             :                         }
     467             : 
     468             :                         /* until now is hash */
     469      166607 :                         state_usage_hash(state);
     470             : 
     471      166607 :                         if (block_has_updated_hash(block)) {
     472             :                                 /* compare the hash */
     473      166607 :                                 if (memcmp(hash, block->hash, BLOCK_HASH_SIZE) != 0) {
     474         920 :                                         unsigned diff = memdiff(hash, block->hash, BLOCK_HASH_SIZE);
     475             : 
     476         920 :                                         log_tag("error:%u:%s:%s: Data error at position %u, diff bits %u/%u\n", blockcur, disk->name, esc_tag(file->sub, esc_buffer), file_pos, diff, BLOCK_HASH_SIZE * 8);
     477             : 
     478             :                                         /* it's a silent error only if we are dealing with synced files */
     479         920 :                                         if (file_is_unsynced) {
     480           0 :                                                 ++error;
     481           0 :                                                 error_on_this_block = 1;
     482             :                                         } else {
     483         920 :                                                 log_error("Data error in file '%s' at position '%u', diff bits %u/%u\n", task->path, file_pos, diff, BLOCK_HASH_SIZE * 8);
     484         920 :                                                 ++silent_error;
     485         920 :                                                 silent_error_on_this_block = 1;
     486             :                                         }
     487         920 :                                         continue;
     488             :                                 }
     489             :                         }
     490             :                 }
     491             : 
     492             :                 /* buffers for parity read and not computed */
     493      196224 :                 for (l = 0; l < state->level; ++l)
     494      168192 :                         buffer_recov[l] = buffer[diskmax + state->level + l];
     495       28032 :                 for (; l < LEV_MAX; ++l)
     496           0 :                         buffer_recov[l] = 0;
     497             : 
     498             :                 /* until now is misc */
     499       28032 :                 state_usage_misc(state);
     500             : 
     501             :                 /* read the parity */
     502      196224 :                 for (l = 0; l < state->level; ++l) {
     503             :                         struct snapraid_task* task;
     504             :                         unsigned levcur;
     505             : 
     506      168192 :                         task = io_parity_read(&io, &levcur, waiting_map, &waiting_mac);
     507             : 
     508             :                         /* until now is parity */
     509      168192 :                         state_usage_parity(state, waiting_map, waiting_mac);
     510             : 
     511             :                         /* handle error conditions */
     512      168192 :                         if (task->state == TASK_STATE_IOERROR) {
     513             :                                 /* LCOV_EXCL_START */
     514             :                                 ++io_error;
     515             :                                 goto bail;
     516             :                                 /* LCOV_EXCL_STOP */
     517             :                         }
     518      168192 :                         if (task->state == TASK_STATE_ERROR) {
     519             :                                 /* LCOV_EXCL_START */
     520             :                                 ++error;
     521             :                                 goto bail;
     522             :                                 /* LCOV_EXCL_STOP */
     523             :                         }
     524      168192 :                         if (task->state == TASK_STATE_ERROR_CONTINUE) {
     525           0 :                                 ++error;
     526           0 :                                 error_on_this_block = 1;
     527             : 
     528             :                                 /* if continuing on error, clear the missing buffer */
     529           0 :                                 buffer_recov[levcur] = 0;
     530           0 :                                 continue;
     531             :                         }
     532      168192 :                         if (task->state == TASK_STATE_IOERROR_CONTINUE) {
     533           0 :                                 ++io_error;
     534           0 :                                 if (io_error >= state->opt.io_error_limit) {
     535             :                                         /* LCOV_EXCL_START */
     536             :                                         log_fatal("DANGER! Too many input/output read error in the %s disk, it isn't possible to scrub.\n", lev_name(levcur));
     537             :                                         log_fatal("Ensure that disk '%s' is sane and can be read.\n", lev_config_name(levcur));
     538             :                                         log_fatal("Stopping at block %u\n", blockcur);
     539             :                                         goto bail;
     540             :                                         /* LCOV_EXCL_STOP */
     541             :                                 }
     542             : 
     543             :                                 /* otherwise continue */
     544           0 :                                 io_error_on_this_block = 1;
     545             : 
     546             :                                 /* if continuing on error, clear the missing buffer */
     547           0 :                                 buffer_recov[levcur] = 0;
     548           0 :                                 continue;
     549             :                         }
     550      168192 :                         if (task->state != TASK_STATE_DONE) {
     551             :                                 /* LCOV_EXCL_START */
     552             :                                 log_fatal("Internal inconsistency in task state\n");
     553             :                                 os_abort();
     554             :                                 /* LCOV_EXCL_STOP */
     555             :                         }
     556             :                 }
     557             : 
     558             :                 /* if we have read all the data required and it's correct, proceed with the parity check */
     559       28032 :                 if (!error_on_this_block && !silent_error_on_this_block && !io_error_on_this_block) {
     560             : 
     561             :                         /* compute the parity */
     562       27112 :                         raid_gen(diskmax, state->level, state->block_size, buffer);
     563             : 
     564             :                         /* compare the parity */
     565      189784 :                         for (l = 0; l < state->level; ++l) {
     566      162672 :                                 if (buffer_recov[l] && memcmp(buffer[diskmax + l], buffer_recov[l], state->block_size) != 0) {
     567           0 :                                         unsigned diff = memdiff(buffer[diskmax + l], buffer_recov[l], state->block_size);
     568             : 
     569           0 :                                         log_tag("parity_error:%u:%s: Data error, diff bits %u/%u\n", blockcur, lev_config_name(l), diff, state->block_size * 8);
     570             : 
     571             :                                         /* it's a silent error only if we are dealing with synced blocks */
     572           0 :                                         if (block_is_unsynced) {
     573           0 :                                                 ++error;
     574           0 :                                                 error_on_this_block = 1;
     575             :                                         } else {
     576           0 :                                                 log_fatal("Data error in parity '%s' at position '%u', diff bits %u/%u\n", lev_config_name(l), blockcur, diff, state->block_size * 8);
     577           0 :                                                 ++silent_error;
     578           0 :                                                 silent_error_on_this_block = 1;
     579             :                                         }
     580             :                                 }
     581             :                         }
     582             : 
     583             :                         /* until now is raid */
     584       27112 :                         state_usage_raid(state);
     585             :                 }
     586             : 
     587       28032 :                 if (silent_error_on_this_block || io_error_on_this_block) {
     588             :                         /* set the error status keeping other info */
     589         920 :                         info_set(&state->infoarr, blockcur, info_set_bad(info));
     590       27112 :                 } else if (error_on_this_block) {
     591             :                         /* do nothing, as this is a generic error */
     592             :                         /* likely caused by a not synced array */
     593             :                 } else {
     594             :                         /* if rehash is needed */
     595       27112 :                         if (rehash) {
     596             :                                 /* store all the new hash already computed */
     597       32256 :                                 for (j = 0; j < diskmax; ++j) {
     598       27648 :                                         if (rehandle[j].block)
     599       27247 :                                                 memcpy(rehandle[j].block->hash, rehandle[j].hash, BLOCK_HASH_SIZE);
     600             :                                 }
     601             :                         }
     602             : 
     603             :                         /* update the time info of the block */
     604             :                         /* and clear any other flag */
     605       27112 :                         info_set(&state->infoarr, blockcur, info_make(now, 0, 0, 0));
     606             :                 }
     607             : 
     608             :                 /* mark the state as needing write */
     609       28032 :                 state->need_write = 1;
     610             : 
     611             :                 /* count the number of processed block */
     612       28032 :                 ++countpos;
     613             : 
     614             :                 /* progress */
     615       28032 :                 if (state_progress(state, &io, blockcur, countpos, countmax, countsize)) {
     616             :                         /* LCOV_EXCL_START */
     617             :                         break;
     618             :                         /* LCOV_EXCL_STOP */
     619             :                 }
     620             : 
     621             :                 /* autosave */
     622       28032 :                 if (state->autosave != 0
     623           0 :                         && autosavedone >= autosavelimit /* if we have reached the limit */
     624           0 :                         && autosavemissing >= autosavelimit /* if we have at least a full step to do */
     625             :                 ) {
     626           0 :                         autosavedone = 0; /* restart the counter */
     627             : 
     628             :                         /* until now is misc */
     629           0 :                         state_usage_misc(state);
     630             : 
     631           0 :                         state_progress_stop(state);
     632             : 
     633           0 :                         msg_progress("Autosaving...\n");
     634           0 :                         state_write(state);
     635             : 
     636           0 :                         state_progress_restart(state);
     637             : 
     638             :                         /* drop until now */
     639           0 :                         state_usage_waste(state);
     640             :                 }
     641       28032 :         }
     642             : 
     643          11 :         state_progress_end(state, countpos, countmax, countsize);
     644             : 
     645          11 :         state_usage_print(state);
     646             : 
     647          11 :         if (error || silent_error || io_error) {
     648           1 :                 msg_status("\n");
     649           1 :                 msg_status("%8u file errors\n", error);
     650           1 :                 msg_status("%8u io errors\n", io_error);
     651           1 :                 msg_status("%8u data errors\n", silent_error);
     652             :         } else {
     653             :                 /* print the result only if processed something */
     654          10 :                 if (countpos != 0)
     655           9 :                         msg_status("Everything OK\n");
     656             :         }
     657             : 
     658          11 :         if (error)
     659           0 :                 log_fatal("WARNING! Unexpected file errors!\n");
     660          11 :         if (io_error)
     661           0 :                 log_fatal("DANGER! Unexpected input/output errors! The failing blocks are now marked as bad!\n");
     662          11 :         if (silent_error)
     663           1 :                 log_fatal("DANGER! Unexpected data errors! The failing blocks are now marked as bad!\n");
     664          11 :         if (io_error || silent_error) {
     665           1 :                 log_fatal("Use 'snapraid status' to list the bad blocks.\n");
     666           1 :                 log_fatal("Use 'snapraid -e fix' to recover.\n");
     667             :         }
     668             : 
     669          11 :         log_tag("summary:error_file:%u\n", error);
     670          11 :         log_tag("summary:error_io:%u\n", io_error);
     671          11 :         log_tag("summary:error_data:%u\n", silent_error);
     672          11 :         if (error + silent_error + io_error == 0)
     673          10 :                 log_tag("summary:exit:ok\n");
     674             :         else
     675           1 :                 log_tag("summary:exit:error\n");
     676          11 :         log_flush();
     677             : 
     678             : bail:
     679             :         /* stop all the worker threads */
     680          11 :         io_stop(&io);
     681             : 
     682          77 :         for (j = 0; j < diskmax; ++j) {
     683          66 :                 struct snapraid_file* file = handle[j].file;
     684          66 :                 struct snapraid_disk* disk = handle[j].disk;
     685          66 :                 ret = handle_close(&handle[j]);
     686          66 :                 if (ret == -1) {
     687             :                         /* LCOV_EXCL_START */
     688             :                         log_tag("error:%u:%s:%s: Close error. %s\n", blockcur, disk->name, esc_tag(file->sub, esc_buffer), strerror(errno));
     689             :                         log_fatal("DANGER! Unexpected close error in a data disk.\n");
     690             :                         ++error;
     691             :                         /* continue, as we are already exiting */
     692             :                         /* LCOV_EXCL_STOP */
     693             :                 }
     694             :         }
     695             : 
     696          11 :         free(handle);
     697          11 :         free(rehandle_alloc);
     698          11 :         free(waiting_map);
     699          11 :         io_done(&io);
     700             : 
     701          11 :         if (state->opt.expect_recoverable) {
     702           1 :                 if (error + silent_error + io_error == 0)
     703           0 :                         return -1;
     704             :         } else {
     705          10 :                 if (error + silent_error + io_error != 0)
     706           0 :                         return -1;
     707             :         }
     708          11 :         return 0;
     709             : }
     710             : 
     711             : /**
     712             :  * Return a * b / c approximated to the upper value.
     713             :  */
     714           2 : static uint32_t md(uint32_t a, uint32_t b, uint32_t c)
     715             : {
     716           2 :         uint64_t v = a;
     717             : 
     718           2 :         v *= b;
     719           2 :         v += c - 1;
     720           2 :         v /= c;
     721             : 
     722           2 :         return v;
     723             : }
     724             : 
     725          11 : int state_scrub(struct snapraid_state* state, int plan, int olderthan)
     726             : {
     727             :         block_off_t blockmax;
     728             :         block_off_t countlimit;
     729             :         block_off_t i;
     730             :         block_off_t count;
     731             :         time_t recentlimit;
     732             :         int ret;
     733             :         struct snapraid_parity_handle parity_handle[LEV_MAX];
     734             :         struct snapraid_plan ps;
     735             :         time_t* timemap;
     736             :         unsigned error;
     737             :         time_t now;
     738             :         unsigned l;
     739             : 
     740             :         /* get the present time */
     741          11 :         now = time(0);
     742             : 
     743          11 :         msg_progress("Initializing...\n");
     744             : 
     745          11 :         if ((plan == SCRUB_BAD || plan == SCRUB_NEW || plan == SCRUB_FULL)
     746           4 :                 && olderthan >= 0) {
     747             :                 /* LCOV_EXCL_START */
     748             :                 log_fatal("You can specify -o, --older-than only with a numeric percentage.\n");
     749             :                 exit(EXIT_FAILURE);
     750             :                 /* LCOV_EXCL_STOP */
     751             :         }
     752             : 
     753          11 :         blockmax = parity_allocated_size(state);
     754             : 
     755             :         /* preinitialize to avoid warnings */
     756          11 :         countlimit = 0;
     757          11 :         recentlimit = 0;
     758             : 
     759          11 :         ps.state = state;
     760          11 :         if (state->opt.force_scrub_even) {
     761           1 :                 ps.plan = SCRUB_EVEN;
     762          10 :         } else if (plan == SCRUB_FULL) {
     763           2 :                 ps.plan = SCRUB_FULL;
     764           8 :         } else if (plan == SCRUB_NEW) {
     765           1 :                 ps.plan = SCRUB_NEW;
     766           7 :         } else if (plan == SCRUB_BAD) {
     767           1 :                 ps.plan = SCRUB_BAD;
     768           6 :         } else if (state->opt.force_scrub_at) {
     769             :                 /* scrub the specified amount of blocks */
     770           4 :                 ps.plan = SCRUB_AUTO;
     771           4 :                 countlimit = state->opt.force_scrub_at;
     772           4 :                 recentlimit = now;
     773             :         } else {
     774           2 :                 ps.plan = SCRUB_AUTO;
     775           2 :                 if (plan >= 0) {
     776           1 :                         countlimit = md(blockmax, plan, 100);
     777             :                 } else {
     778             :                         /* by default scrub 8.33% of the array (100/12=8.(3)) */
     779           1 :                         countlimit = md(blockmax, 1, 12);
     780             :                 }
     781             : 
     782           2 :                 if (olderthan >= 0) {
     783           1 :                         recentlimit = now - olderthan * 24 * 3600;
     784             :                 } else {
     785             :                         /* by default use a 10 day time limit */
     786           1 :                         recentlimit = now - 10 * 24 * 3600;
     787             :                 }
     788             :         }
     789             : 
     790             :         /* identify the time limit */
     791             :         /* we sort all the block times, and we identify the time limit for which we reach the quota */
     792             :         /* this allow to process first the oldest blocks */
     793          11 :         timemap = malloc_nofail(blockmax * sizeof(time_t));
     794             : 
     795             :         /* copy the info in the temp vector */
     796          11 :         count = 0;
     797          11 :         log_tag("block_count:%u\n", blockmax);
     798       56096 :         for (i = 0; i < blockmax; ++i) {
     799       56085 :                 snapraid_info info = info_get(&state->infoarr, i);
     800             : 
     801             :                 /* skip unused blocks */
     802       56085 :                 if (info == 0)
     803           0 :                         continue;
     804             : 
     805       56085 :                 timemap[count++] = info_get_time(info);
     806             :         }
     807             : 
     808          11 :         if (!count) {
     809             :                 /* LCOV_EXCL_START */
     810             :                 log_fatal("The array appears to be empty.\n");
     811             :                 exit(EXIT_FAILURE);
     812             :                 /* LCOV_EXCL_STOP */
     813             :         }
     814             : 
     815             :         /* sort it */
     816          11 :         qsort(timemap, count, sizeof(time_t), time_compare);
     817             : 
     818             :         /* output the info map */
     819          11 :         i = 0;
     820          11 :         log_tag("info_count:%u\n", count);
     821          38 :         while (i < count) {
     822          16 :                 unsigned j = i + 1;
     823       56101 :                 while (j < count && timemap[i] == timemap[j])
     824       56069 :                         ++j;
     825          16 :                 log_tag("info_time:%" PRIu64 ":%u\n", (uint64_t)timemap[i], j - i);
     826          16 :                 i = j;
     827             :         }
     828             : 
     829             :         /* compute the limits from count/recentlimit */
     830          11 :         if (ps.plan == SCRUB_AUTO) {
     831             :                 /* no more than the full count */
     832           6 :                 if (countlimit > count)
     833           2 :                         countlimit = count;
     834             : 
     835             :                 /* decrease until we reach the specific recentlimit */
     836          59 :                 while (countlimit > 0 && timemap[countlimit - 1] > recentlimit)
     837          47 :                         --countlimit;
     838             : 
     839             :                 /* if there is something to scrub */
     840           6 :                 if (countlimit > 0) {
     841             :                         /* get the most recent time we want to scrub */
     842           5 :                         ps.timelimit = timemap[countlimit - 1];
     843             : 
     844             :                         /* count how many entries for this exact time we have to scrub */
     845             :                         /* if the blocks have all the same time, we end with countlimit == lastlimit */
     846           5 :                         ps.lastlimit = 1;
     847        7283 :                         while (countlimit > ps.lastlimit && timemap[countlimit - ps.lastlimit - 1] == ps.timelimit)
     848        7273 :                                 ++ps.lastlimit;
     849             :                 } else {
     850             :                         /* if nothing to scrub, disable also other limits */
     851           1 :                         ps.timelimit = 0;
     852           1 :                         ps.lastlimit = 0;
     853             :                 }
     854             : 
     855           6 :                 log_tag("count_limit:%u\n", countlimit);
     856           6 :                 log_tag("time_limit:%" PRIu64 "\n", (uint64_t)ps.timelimit);
     857           6 :                 log_tag("last_limit:%u\n", ps.lastlimit);
     858             :         }
     859             : 
     860             :         /* free the temp vector */
     861          11 :         free(timemap);
     862             : 
     863             :         /* open the file for reading */
     864          77 :         for (l = 0; l < state->level; ++l) {
     865          66 :                 ret = parity_open(&parity_handle[l], &state->parity[l], l, state->file_mode, state->block_size, state->opt.parity_limit_size);
     866          66 :                 if (ret == -1) {
     867             :                         /* LCOV_EXCL_START */
     868             :                         log_fatal("WARNING! Without an accessible %s file, it isn't possible to scrub.\n", lev_name(l));
     869             :                         exit(EXIT_FAILURE);
     870             :                         /* LCOV_EXCL_STOP */
     871             :                 }
     872             :         }
     873             : 
     874          11 :         msg_progress("Scrubbing...\n");
     875             : 
     876          11 :         error = 0;
     877             : 
     878          11 :         ret = state_scrub_process(state, parity_handle, 0, blockmax, &ps, now);
     879          11 :         if (ret == -1) {
     880           0 :                 ++error;
     881             :                 /* continue, as we are already exiting */
     882             :         }
     883             : 
     884          77 :         for (l = 0; l < state->level; ++l) {
     885          66 :                 ret = parity_close(&parity_handle[l]);
     886          66 :                 if (ret == -1) {
     887             :                         /* LCOV_EXCL_START */
     888             :                         log_fatal("DANGER! Unexpected close error in %s disk.\n", lev_name(l));
     889             :                         ++error;
     890             :                         /* continue, as we are already exiting */
     891             :                         /* LCOV_EXCL_STOP */
     892             :                 }
     893             :         }
     894             : 
     895             :         /* abort if required */
     896          11 :         if (error != 0)
     897           0 :                 return -1;
     898          11 :         return 0;
     899             : }
     900             :

Generated by: LCOV version 1.13