Line data Source code
1 : /*
2 : * Copyright (C) 2011 Andrea Mazzoleni
3 : *
4 : * This program is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 : */
17 :
18 : #include "portable.h"
19 :
20 : #include "support.h"
21 : #include "util.h"
22 : #include "elem.h"
23 : #include "import.h"
24 : #include "search.h"
25 : #include "state.h"
26 : #include "parity.h"
27 : #include "handle.h"
28 : #include "raid/raid.h"
29 : #include "raid/combo.h"
30 :
31 : /****************************************************************************/
32 : /* check */
33 :
34 : /**
35 : * A block that failed the hash check, or that was deleted.
36 : */
37 : struct failed_struct {
38 : /**
39 : * If we know for sure that the block is garbage or missing
40 : * and it needs to be recovered and rewritten to the disk.
41 : */
42 : int is_bad;
43 :
44 : /**
45 : * If that we have recovered may be not updated data,
46 : * an old version, or just garbage.
47 : *
48 : * Essentially, it means that we are not sure what we have recovered
49 : * is really correct. It's just our best guess.
50 : *
51 : * These "recovered" block are also written to the disk if the block is marked as ::is_bad.
52 : * But these files are marked also as FILE_IS_DAMAGED, and then renamed to .unrecoverable.
53 : *
54 : * Note that this could happen only for CHG blocks.
55 : */
56 : int is_outofdate;
57 :
58 : unsigned index; /**< Index of the failed block. */
59 : struct snapraid_block* block; /**< The failed block */
60 : struct snapraid_disk* disk; /**< The failed disk. */
61 : struct snapraid_file* file; /**< The failed file. 0 for DELETED block. */
62 : block_off_t file_pos; /**< Offset inside the file */
63 : struct snapraid_handle* handle; /**< The handle containing the failed block, or 0 for a DELETED block */
64 : };
65 :
66 : /**
67 : * Check if a block hash matches the specified buffer.
68 : * Return ==0 if equal
69 : */
70 473818 : static int blockcmp(struct snapraid_state* state, int rehash, struct snapraid_block* block, unsigned pos_size, unsigned char* buffer, unsigned char* buffer_zero)
71 : {
72 : unsigned char hash[HASH_MAX];
73 :
74 : /* now compute the hash of the valid part */
75 473818 : if (rehash) {
76 0 : memhash(state->prevhash, state->prevhashseed, hash, buffer, pos_size);
77 : } else {
78 473818 : memhash(state->hash, state->hashseed, hash, buffer, pos_size);
79 : }
80 :
81 : /* compare the hash */
82 473818 : if (memcmp(hash, block->hash, BLOCK_HASH_SIZE) != 0) {
83 19235 : return -1;
84 : }
85 :
86 : /* compare to the end of the block */
87 454583 : if (pos_size < state->block_size) {
88 187483 : if (memcmp(buffer + pos_size, buffer_zero + pos_size, state->block_size - pos_size) != 0) {
89 42 : return -1;
90 : }
91 : }
92 :
93 454541 : return 0;
94 : }
95 :
96 : /**
97 : * Check if the hash of all the failed block we are expecting to recover are now matching.
98 : */
99 215056 : static int is_hash_matching(struct snapraid_state* state, int rehash, unsigned diskmax, struct failed_struct* failed, unsigned* failed_map, unsigned failed_count, void** buffer, void* buffer_zero)
100 : {
101 : unsigned j;
102 : int hash_checked;
103 :
104 215056 : hash_checked = 0; /* keep track if we check at least one block */
105 :
106 : /* check if the recovered blocks are OK */
107 674119 : for (j = 0; j < failed_count; ++j) {
108 : /* if we are expected to recover this block */
109 478340 : if (!failed[failed_map[j]].is_outofdate
110 : /* if the block has a hash to check */
111 473818 : && block_has_updated_hash(failed[failed_map[j]].block)
112 : ) {
113 : /* if a hash doesn't match, fail the check */
114 473818 : unsigned pos_size = file_block_size(failed[failed_map[j]].file, failed[failed_map[j]].file_pos, state->block_size);
115 473818 : if (blockcmp(state, rehash, failed[failed_map[j]].block, pos_size, buffer[failed[failed_map[j]].index], buffer_zero) != 0) {
116 19277 : log_tag("hash_error: Hash mismatch on entry %u\n", failed_map[j]);
117 19277 : return 0;
118 : }
119 :
120 454541 : hash_checked = 1;
121 : }
122 : }
123 :
124 : /* if nothing checked, we reject it */
125 : /* note that we are excluding this case at upper level */
126 : /* but checking again doesn't hurt */
127 195779 : if (!hash_checked) {
128 : /* LCOV_EXCL_START */
129 : return 0;
130 : /* LCOV_EXCL_STOP */
131 : }
132 :
133 : /* if we checked something, and no block failed the check */
134 : /* recompute all the redundancy information */
135 195779 : raid_gen(diskmax, state->level, state->block_size, buffer);
136 195779 : return 1;
137 : }
138 :
139 : /**
140 : * Check if specified parity is now matching with a recomputed one.
141 : */
142 8 : static int is_parity_matching(struct snapraid_state* state, unsigned diskmax, unsigned i, void** buffer, void** buffer_recov)
143 : {
144 : /* recompute parity, note that we don't need parity over i */
145 8 : raid_gen(diskmax, i + 1, state->block_size, buffer);
146 :
147 : /* if the recovered parity block matches */
148 8 : if (memcmp(buffer[diskmax + i], buffer_recov[i], state->block_size) == 0) {
149 : /* recompute all the redundancy information */
150 8 : raid_gen(diskmax, state->level, state->block_size, buffer);
151 8 : return 1;
152 : }
153 :
154 0 : return 0;
155 : }
156 :
157 : /**
158 : * Repair errors.
159 : * Return <0 if failure for missing strategy, >0 if data is wrong and we cannot rebuild correctly, 0 on success.
160 : * If success, the parity are computed in the buffer variable.
161 : */
162 252983 : static int repair_step(struct snapraid_state* state, int rehash, unsigned pos, unsigned diskmax, struct failed_struct* failed, unsigned* failed_map, unsigned failed_count, void** buffer, void** buffer_recov, void* buffer_zero)
163 : {
164 : unsigned i, n;
165 : int error;
166 : int has_hash;
167 : int id[LEV_MAX];
168 : int ip[LEV_MAX];
169 :
170 : /* no fix required, already checked at higher level, but just to be sure */
171 252983 : if (failed_count == 0) {
172 : /* LCOV_EXCL_START */
173 : /* recompute only the parity */
174 : raid_gen(diskmax, state->level, state->block_size, buffer);
175 : return 0;
176 : /* LCOV_EXCL_STOP */
177 : }
178 :
179 252983 : n = state->level;
180 252983 : error = 0;
181 :
182 : /* setup vector of failed disk indexes */
183 892658 : for (i = 0; i < failed_count; ++i)
184 639675 : id[i] = failed[failed_map[i]].index;
185 :
186 : /* check if there is at least a failed block that can be checked for correctness using the hash */
187 : /* if there isn't, we have to sacrifice a parity block to check that the result is correct */
188 252983 : has_hash = 0;
189 892658 : for (i = 0; i < failed_count; ++i) {
190 : /* if we are expected to recover this block */
191 639675 : if (!failed[failed_map[i]].is_outofdate
192 : /* if the block has a hash to check */
193 630631 : && block_has_updated_hash(failed[failed_map[i]].block)
194 : )
195 630623 : has_hash = 1;
196 : }
197 :
198 : /* if we don't have a hash, but we have an extra parity */
199 : /* (strictly-less failures than number of parities) */
200 252983 : if (!has_hash && failed_count < n) {
201 : /* number of parity to use, one more to check the recovering */
202 8 : unsigned r = failed_count + 1;
203 :
204 : /* all combinations (r of n) parities */
205 8 : combination_first(r, n, ip);
206 : do {
207 : /* if a parity is missing, do nothing */
208 24 : for (i = 0; i < r; ++i) {
209 16 : if (buffer_recov[ip[i]] == 0)
210 0 : break;
211 : }
212 8 : if (i != r)
213 0 : continue;
214 :
215 : /* copy the parities to use, one less because the last is used for checking */
216 16 : for (i = 0; i < r - 1; ++i)
217 8 : memcpy(buffer[diskmax + ip[i]], buffer_recov[ip[i]], state->block_size);
218 :
219 : /* recover using one less parity, the ip[r-1] one */
220 8 : raid_data(r - 1, id, ip, diskmax, state->block_size, buffer);
221 :
222 : /* use the remaining ip[r-1] parity to check the result */
223 8 : if (is_parity_matching(state, diskmax, ip[r - 1], buffer, buffer_recov))
224 8 : return 0;
225 :
226 : /* log */
227 0 : log_tag("parity_error:%u:", pos);
228 0 : for (i = 0; i < r; ++i) {
229 0 : if (i != 0)
230 0 : log_tag("/");
231 0 : log_tag("%s", lev_config_name(ip[i]));
232 : }
233 0 : log_tag(":parity: Parity mismatch\n");
234 0 : ++error;
235 0 : } while (combination_next(r, n, ip));
236 : }
237 :
238 : /* if we have a hash, and enough parities */
239 : /* (less-or-equal failures than number of parities) */
240 252975 : if (has_hash && failed_count <= n) {
241 : /* number of parities to use equal at the number of failures */
242 205177 : unsigned r = failed_count;
243 :
244 : /* all combinations (r of n) parities */
245 205177 : combination_first(r, n, ip);
246 : do {
247 : /* if a parity is missing, do nothing */
248 693396 : for (i = 0; i < r; ++i) {
249 478340 : if (buffer_recov[ip[i]] == 0)
250 0 : break;
251 : }
252 215056 : if (i != r)
253 0 : continue;
254 :
255 : /* copy the parities to use */
256 693396 : for (i = 0; i < r; ++i)
257 478340 : memcpy(buffer[diskmax + ip[i]], buffer_recov[ip[i]], state->block_size);
258 :
259 : /* recover */
260 215056 : raid_data(r, id, ip, diskmax, state->block_size, buffer);
261 :
262 : /* use the hash to check the result */
263 215056 : if (is_hash_matching(state, rehash, diskmax, failed, failed_map, failed_count, buffer, buffer_zero))
264 195779 : return 0;
265 :
266 : /* log */
267 19277 : log_tag("parity_error:%u:", pos);
268 38554 : for (i = 0; i < r; ++i) {
269 19277 : if (i != 0)
270 0 : log_tag("/");
271 19277 : log_tag("%s", lev_config_name(ip[i]));
272 : }
273 19277 : log_tag(":hash: Hash mismatch\n");
274 19277 : ++error;
275 38554 : } while (combination_next(r, n, ip));
276 : }
277 :
278 : /* return the number of failed attempts, or -1 if no strategy */
279 57196 : if (error)
280 9398 : return error;
281 :
282 47798 : log_tag("strategy_error:%u: No strategy to recover from %u failures with %u parity %s hash\n",
283 : pos, failed_count, n, has_hash ? "with" : "without");
284 47798 : return -1;
285 : }
286 :
287 870959 : static int repair(struct snapraid_state* state, int rehash, unsigned pos, unsigned diskmax, struct failed_struct* failed, unsigned* failed_map, unsigned failed_count, void** buffer, void** buffer_recov, void* buffer_zero)
288 : {
289 : int ret;
290 : int error;
291 : unsigned j;
292 : int n;
293 : int something_to_recover;
294 : int something_unsynced;
295 : char esc_buffer[ESC_MAX];
296 :
297 870959 : error = 0;
298 :
299 : /* if nothing failed, just recompute the parity */
300 870959 : if (failed_count == 0) {
301 583064 : raid_gen(diskmax, state->level, state->block_size, buffer);
302 583064 : return 0;
303 : }
304 :
305 : /* logs the status */
306 1017589 : for (j = 0; j < failed_count; ++j) {
307 : const char* desc;
308 : const char* hash;
309 : const char* data;
310 729694 : struct snapraid_block* block = failed[j].block;
311 729694 : unsigned block_state = block_state_get(block);
312 :
313 729694 : switch (block_state) {
314 18454 : case BLOCK_STATE_DELETED : desc = "delete"; break;
315 13851 : case BLOCK_STATE_CHG : desc = "change"; break;
316 62282 : case BLOCK_STATE_REP : desc = "replace"; break;
317 635107 : case BLOCK_STATE_BLK : desc = "block"; break;
318 : /* LCOV_EXCL_START */
319 : default : desc = "unknown"; break;
320 : /* LCOV_EXCL_STOP */
321 : }
322 :
323 729694 : if (hash_is_invalid(block->hash)) {
324 13843 : hash = "lost";
325 715851 : } else if (hash_is_zero(block->hash)) {
326 8 : hash = "zero";
327 : } else {
328 715843 : hash = "known";
329 : }
330 :
331 729694 : if (failed[j].is_bad)
332 643969 : data = "bad";
333 : else
334 85725 : data = "good";
335 :
336 729694 : if (failed[j].file) {
337 711240 : struct snapraid_disk* disk = failed[j].disk;
338 711240 : struct snapraid_file* file = failed[j].file;
339 711240 : block_off_t file_pos = failed[j].file_pos;
340 :
341 711240 : log_tag("entry:%u:%s:%s:%s:%s:%s:%u:\n", j, desc, hash, data, disk->name, esc_tag(file->sub, esc_buffer), file_pos);
342 : } else {
343 18454 : log_tag("entry:%u:%s:%s:%s:\n", j, desc, hash, data);
344 : }
345 : }
346 :
347 : /* Here we have to try two different strategies to recover, because in case the 'sync' */
348 : /* process is aborted, we don't know if the parity data is really updated just like after 'sync', */
349 : /* or if it still represents the state before the 'sync'. */
350 :
351 : /* Note that if the 'sync' ends normally, we don't have any DELETED, REP and CHG blocks */
352 : /* and the two strategies are identical */
353 :
354 : /* As first, we assume that the parity IS updated for the current state */
355 : /* and that we are going to recover the state after the last 'sync'. */
356 : /* In this case, parity contains info from BLK, REP and CHG blocks, */
357 : /* but not for DELETED. */
358 : /* We need to put in the recovering process only the bad blocks, because all the */
359 : /* others already contains the correct data read from disk, and the parity is correctly computed for them. */
360 : /* We are interested to recover BLK, REP and CHG blocks if they are marked as bad, */
361 : /* but we are not interested in DELETED ones. */
362 :
363 287895 : n = 0;
364 287895 : something_to_recover = 0; /* keep track if there is at least one block to fix */
365 1017589 : for (j = 0; j < failed_count; ++j) {
366 729694 : if (failed[j].is_bad) {
367 643969 : unsigned block_state = block_state_get(failed[j].block);
368 :
369 643969 : assert(block_state != BLOCK_STATE_DELETED); /* we cannot have bad DELETED blocks */
370 :
371 : /* if we have the hash for it */
372 643969 : if ((block_state == BLOCK_STATE_BLK || block_state == BLOCK_STATE_REP)
373 : /* try to fetch the block using the known hash */
374 643961 : && (state_import_fetch(state, rehash, failed[j].block, buffer[failed[j].index]) == 0
375 639489 : || state_search_fetch(state, rehash, failed[j].file, failed[j].file_pos, failed[j].block, buffer[failed[j].index]) == 0)
376 : ) {
377 : /* we already have corrected it! */
378 22382 : log_tag("hash_import: Fixed entry %u\n", j);
379 : } else {
380 : /* otherwise try to recover it */
381 621587 : failed_map[n] = j;
382 621587 : ++n;
383 :
384 : /* we have something to try to recover */
385 621587 : something_to_recover = 1;
386 : }
387 : }
388 : }
389 :
390 : /* if nothing to fix */
391 287895 : if (!something_to_recover) {
392 43956 : log_tag("recover_sync:%u:%u: Skipped for already recovered\n", pos, n);
393 :
394 : /* recompute only the parity */
395 43956 : raid_gen(diskmax, state->level, state->block_size, buffer);
396 43956 : return 0;
397 : }
398 :
399 243939 : ret = repair_step(state, rehash, pos, diskmax, failed, failed_map, n, buffer, buffer_recov, buffer_zero);
400 243939 : if (ret == 0) {
401 : /* reprocess the CHG blocks, for which we don't have a hash to check */
402 : /* if they were BAD we have to use some heuristics to ensure that we have recovered */
403 : /* the state after the sync. If unsure, we assume the worst case */
404 :
405 667919 : for (j = 0; j < failed_count; ++j) {
406 : /* we take care only of BAD blocks we have to write back */
407 476654 : if (failed[j].is_bad) {
408 463182 : unsigned block_state = block_state_get(failed[j].block);
409 :
410 : /* BLK and REP blocks are always OK, because at this point */
411 : /* we have already checked their hash */
412 463182 : if (block_state != BLOCK_STATE_CHG) {
413 463174 : assert(block_state == BLOCK_STATE_BLK || block_state == BLOCK_STATE_REP);
414 463174 : continue;
415 : }
416 :
417 : /* for CHG blocks we have to 'guess' if they are correct or not */
418 :
419 : /* if the hash is invalid we cannot check the result */
420 : /* this could happen if we have lost this information */
421 : /* after an aborted sync */
422 8 : if (hash_is_invalid(failed[j].block->hash)) {
423 : /* it may contain garbage */
424 0 : failed[j].is_outofdate = 1;
425 :
426 0 : log_tag("hash_unknown: Unknown hash on entry %u\n", j);
427 8 : } else if (hash_is_zero(failed[j].block->hash)) {
428 : /* if the block is not filled with 0, we are sure to have */
429 : /* restored it to the state after the 'sync' */
430 : /* instead, if the block is filled with 0, it could be either that the */
431 : /* block after the sync is really filled by 0, or that */
432 : /* we restored the block before the 'sync'. */
433 8 : if (memcmp(buffer[failed[j].index], buffer_zero, state->block_size) == 0) {
434 : /* it may contain garbage */
435 8 : failed[j].is_outofdate = 1;
436 :
437 8 : log_tag("hash_unknown: Maybe old zero on entry %u\n", j);
438 : }
439 : } else {
440 : /* if the hash is different than the previous one, we are sure to have */
441 : /* restored it to the state after the 'sync' */
442 : /* instead, if the hash matches, it could be either that the */
443 : /* block after the sync has this hash, or that */
444 : /* we restored the block before the 'sync'. */
445 0 : unsigned pos_size = file_block_size(failed[j].file, failed[j].file_pos, state->block_size);
446 0 : if (blockcmp(state, rehash, failed[j].block, pos_size, buffer[failed[j].index], buffer_zero) == 0) {
447 : /* it may contain garbage */
448 0 : failed[j].is_outofdate = 1;
449 :
450 0 : log_tag("hash_unknown: Maybe old data on entry %u\n", j);
451 : }
452 : }
453 : }
454 : }
455 :
456 191265 : return 0;
457 : }
458 52674 : if (ret > 0)
459 9398 : error += ret;
460 :
461 52674 : if (ret < 0)
462 43276 : log_tag("recover_sync:%u:%u: Failed with no attempts\n", pos, n);
463 : else
464 9398 : log_tag("recover_sync:%u:%u: Failed with %d attempts\n", pos, n, ret);
465 :
466 : /* Now assume that the parity IS NOT updated at the current state, */
467 : /* but still represent the state before the last 'sync' process. */
468 : /* In this case, parity contains info from BLK, REP (old version), CHG (old version) and DELETED blocks, */
469 : /* but not for REP (new version) and CHG (new version). */
470 : /* We are interested to recover BLK ones marked as bad, */
471 : /* but we are not interested to recover CHG (new version) and REP (new version) blocks, */
472 : /* even if marked as bad, because we don't have parity for them and it's just impossible, */
473 : /* and we are not interested to recover DELETED ones. */
474 52674 : n = 0;
475 52674 : something_to_recover = 0; /* keep track if there is at least one block to fix */
476 52674 : something_unsynced = 0; /* keep track if we have some unsynced info to process */
477 233278 : for (j = 0; j < failed_count; ++j) {
478 180604 : unsigned block_state = block_state_get(failed[j].block);
479 :
480 180604 : if (block_state == BLOCK_STATE_DELETED
481 171560 : || block_state == BLOCK_STATE_CHG
482 171560 : || block_state == BLOCK_STATE_REP
483 : ) {
484 : /* If the block is CHG, REP or DELETED, we don't have the original content of block, */
485 : /* and we must try to recover it. */
486 : /* This apply to CHG and REP blocks even if they are not marked bad, */
487 : /* because the parity is computed with old content, and not with the new one. */
488 : /* Note that this recovering is done just to make possible to recover any other BLK one, */
489 : /* we are not really interested in DELETED, CHG (old version) and REP (old version). */
490 9044 : something_unsynced = 1;
491 :
492 9044 : if (block_state == BLOCK_STATE_CHG
493 0 : && hash_is_zero(failed[j].block->hash)
494 : ) {
495 : /* If the block was a ZERO block, restore it to the original 0 as before the 'sync' */
496 : /* We do this to just allow recovering of other BLK ones */
497 :
498 0 : memset(buffer[failed[j].index], 0, state->block_size);
499 : /* note that from now the buffer is definitively lost */
500 : /* we can do this only because it's the last retry of recovering */
501 :
502 : /* try to fetch the old block using the old hash for CHG and DELETED blocks */
503 9044 : } else if ((block_state == BLOCK_STATE_CHG || block_state == BLOCK_STATE_DELETED)
504 9044 : && hash_is_unique(failed[j].block->hash)
505 9044 : && state_import_fetch(state, rehash, failed[j].block, buffer[failed[j].index]) == 0) {
506 :
507 : /* note that from now the buffer is definitively lost */
508 : /* we can do this only because it's the last retry of recovering */
509 : } else {
510 : /* otherwise try to recover it */
511 9044 : failed_map[n] = j;
512 9044 : ++n;
513 :
514 : /* note that we don't set something_to_recover, because we are */
515 : /* not really interested to recover *only* old blocks. */
516 : }
517 :
518 : /* avoid to use the hash of this block to verify the recovering */
519 : /* this applies to REP blocks because we are going to recover the old state */
520 : /* and the REP hash represent the new one */
521 : /* it also applies to CHG and DELETE blocks because we want to have */
522 : /* a successful recovering only if a BLK one is matching */
523 9044 : failed[j].is_outofdate = 1;
524 171560 : } else if (failed[j].is_bad) {
525 : /* If the block is bad we don't know its content, and we try to recover it */
526 : /* At this point, we can have only BLK ones */
527 :
528 171560 : assert(block_state == BLOCK_STATE_BLK);
529 :
530 : /* we have something we are interested to recover */
531 171560 : something_to_recover = 1;
532 :
533 : /* we try to recover it */
534 171560 : failed_map[n] = j;
535 171560 : ++n;
536 : }
537 : }
538 :
539 : /* if nothing to fix, we just don't try */
540 : /* if nothing unsynced we also don't retry, because it's the same try as before */
541 52674 : if (something_to_recover && something_unsynced) {
542 9044 : ret = repair_step(state, rehash, pos, diskmax, failed, failed_map, n, buffer, buffer_recov, buffer_zero);
543 9044 : if (ret == 0) {
544 : /* reprocess the REP and CHG blocks, for which we have recovered and old state */
545 : /* that we don't want to save into disk */
546 : /* we have already marked them, but we redo it for logging */
547 :
548 13566 : for (j = 0; j < failed_count; ++j) {
549 : /* we take care only of BAD blocks we have to write back */
550 9044 : if (failed[j].is_bad) {
551 4522 : unsigned block_state = block_state_get(failed[j].block);
552 :
553 4522 : if (block_state == BLOCK_STATE_CHG
554 4522 : || block_state == BLOCK_STATE_REP
555 : ) {
556 : /* mark that we have restored an old state */
557 : /* and we don't want to write it to the disk */
558 0 : failed[j].is_outofdate = 1;
559 :
560 0 : log_tag("hash_unknown: Surely old data on entry %u\n", j);
561 : }
562 : }
563 : }
564 :
565 4522 : return 0;
566 : }
567 4522 : if (ret > 0)
568 0 : error += ret;
569 :
570 9044 : if (ret < 0)
571 4522 : log_tag("recover_unsync:%u:%u: Failed with no attempts\n", pos, n);
572 : else
573 0 : log_tag("recover_unsync:%u:%u: Failed with %d attempts\n", pos, n, ret);
574 : } else {
575 43630 : log_tag("recover_unsync:%u:%u: Skipped for%s%s\n", pos, n,
576 : !something_to_recover ? " nothing to recover" : "",
577 : !something_unsynced ? " nothing unsynched" : ""
578 : );
579 : }
580 :
581 : /* return the number of failed attempts, or -1 if no strategy */
582 48152 : if (error)
583 4876 : return error;
584 : else
585 43276 : return -1;
586 : }
587 :
588 : /**
589 : * Post process all the files at the specified block index ::i.
590 : * For each file, if we are at the last block, closes it,
591 : * adjust the timestamp, and print the result.
592 : *
593 : * This works with the assumption to always process the whole files to
594 : * fix. This assumption is not always correct, and in such case we have to
595 : * skip the whole postprocessing. And example, is when fixing only bad blocks.
596 : */
597 935479 : static int file_post(struct snapraid_state* state, int fix, unsigned i, struct snapraid_handle* handle, unsigned diskmax)
598 : {
599 : unsigned j;
600 : int ret;
601 : char esc_buffer[ESC_MAX];
602 : char esc_buffer_alt[ESC_MAX];
603 :
604 : /* if we are processing only bad blocks, we don't have to do any post-processing */
605 : /* as we don't have any guarantee to process the last block of the fixed files */
606 935479 : if (state->opt.badonly)
607 9374 : return 0;
608 :
609 : /* for all the files print the final status, and does the final time fix */
610 : /* we also ensure to close files after processing the last block */
611 6482735 : for (j = 0; j < diskmax; ++j) {
612 : struct snapraid_block* block;
613 : struct snapraid_disk* disk;
614 : struct snapraid_file* collide_file;
615 : struct snapraid_file* file;
616 : block_off_t file_pos;
617 : char path[PATH_MAX];
618 : uint64_t inode;
619 :
620 5556630 : disk = handle[j].disk;
621 5556630 : if (!disk) {
622 : /* if no disk, nothing to do */
623 3408432 : continue;
624 : }
625 :
626 5542569 : block = fs_par2block_find(disk, i);
627 5542569 : if (!block_has_file(block)) {
628 : /* if no file, nothing to do */
629 213402 : continue;
630 : }
631 :
632 5329167 : file = fs_par2file_get(disk, i, &file_pos);
633 5329167 : pathprint(path, sizeof(path), "%s%s", disk->dir, file->sub);
634 :
635 : /* if it isn't the last block in the file */
636 5329167 : if (!file_block_is_last(file, file_pos)) {
637 : /* nothing to do */
638 3166908 : continue;
639 : }
640 :
641 : /* if the file is excluded, we have nothing to adjust as the file is never written */
642 2162259 : if (file_flag_has(file, FILE_IS_EXCLUDED)
643 2065266 : || (state->opt.syncedonly && file_flag_has(file, FILE_IS_UNSYNCED))) {
644 : /* nothing to do, but close the file */
645 : goto close_and_continue;
646 : }
647 :
648 : /* finish the fix process if it's the last block of the files */
649 2065266 : if (fix) {
650 : /* mark that we finished with this file */
651 : /* to identify later any NOT finished ones */
652 757234 : file_flag_set(file, FILE_IS_FINISHED);
653 :
654 : /* if the file is damaged, meaning that a fix failed */
655 757234 : if (file_flag_has(file, FILE_IS_DAMAGED)) {
656 : /* rename it to .unrecoverable */
657 : char path_to[PATH_MAX];
658 :
659 63316 : pathprint(path_to, sizeof(path_to), "%s%s.unrecoverable", disk->dir, file->sub);
660 :
661 : /* ensure to close the file before renaming */
662 63316 : if (handle[j].file == file) {
663 63316 : ret = handle_close(&handle[j]);
664 63316 : if (ret != 0) {
665 : /* LCOV_EXCL_START */
666 : log_tag("error:%u:%s:%s: Close error. %s\n", i, disk->name, esc_tag(file->sub, esc_buffer), strerror(errno));
667 : log_fatal("DANGER! Unexpected close error in a data disk.\n");
668 : return -1;
669 : /* LCOV_EXCL_STOP */
670 : }
671 : }
672 :
673 63316 : ret = rename(path, path_to);
674 63316 : if (ret != 0) {
675 : /* LCOV_EXCL_START */
676 : log_fatal("Error renaming '%s' to '%s'. %s.\n", path, path_to, strerror(errno));
677 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
678 : return -1;
679 : /* LCOV_EXCL_STOP */
680 : }
681 :
682 63316 : log_tag("status:unrecoverable:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
683 63316 : msg_info("unrecoverable %s\n", fmt_term(disk, file->sub, esc_buffer));
684 :
685 : /* and do not set the time if damaged */
686 63316 : goto close_and_continue;
687 : }
688 :
689 : /* if the file is not fixed, meaning that it is untouched */
690 693918 : if (!file_flag_has(file, FILE_IS_FIXED)) {
691 : /* nothing to do, but close the file */
692 574456 : goto close_and_continue;
693 : }
694 :
695 : /* if the file is closed or different than the one expected, reopen it */
696 : /* a different open file could happen when filtering for bad blocks */
697 119462 : if (handle[j].file != file) {
698 : /* close a potential different file */
699 0 : ret = handle_close(&handle[j]);
700 0 : if (ret != 0) {
701 : /* LCOV_EXCL_START */
702 : log_tag("error:%u:%s:%s: Close error. %s\n", i, disk->name, esc_tag(handle[j].file->sub, esc_buffer), strerror(errno));
703 : log_fatal("DANGER! Unexpected close error in a data disk.\n");
704 : return -1;
705 : /* LCOV_EXCL_STOP */
706 : }
707 :
708 : /* reopen it as readonly, as to set the mtime readonly access it's enough */
709 : /* we know that the file exists because it has the FILE_IS_FIXED tag */
710 0 : ret = handle_open(&handle[j], file, state->file_mode, log_error, 0);
711 0 : if (ret != 0) {
712 : /* LCOV_EXCL_START */
713 : log_tag("error:%u:%s:%s: Open error. %s\n", i, disk->name, esc_tag(file->sub, esc_buffer), strerror(errno));
714 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
715 : return -1;
716 : /* LCOV_EXCL_STOP */
717 : }
718 : }
719 :
720 119462 : log_tag("status:recovered:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
721 119462 : msg_info("recovered %s\n", fmt_term(disk, file->sub, esc_buffer));
722 :
723 119462 : inode = handle[j].st.st_ino;
724 :
725 : /* search for the corresponding inode */
726 119462 : collide_file = tommy_hashdyn_search(&disk->inodeset, file_inode_compare_to_arg, &inode, file_inode_hash(inode));
727 :
728 : /* if the inode is already in the database and it refers at a different file name, */
729 : /* we can fix the file time ONLY if the time and size allow to differentiate */
730 : /* between the two files */
731 :
732 : /* for example, suppose we delete a bunch of files with all the same size and time, */
733 : /* when recreating them the inodes may be reused in a different order, */
734 : /* and at the next sync some files may have matching inode/size/time even if different name */
735 : /* not allowing sync to detect that the file is changed and not renamed */
736 119462 : if (!collide_file /* if not in the database, there is no collision */
737 42346 : || strcmp(collide_file->sub, file->sub) == 0 /* if the name is the same, it's the right collision */
738 39313 : || collide_file->size != file->size /* if the size is different, the collision is identified */
739 5 : || collide_file->mtime_sec != file->mtime_sec /* if the mtime is different, the collision is identified */
740 4 : || collide_file->mtime_nsec != file->mtime_nsec /* same for mtime_nsec */
741 : ) {
742 : /* set the original modification time */
743 119462 : ret = handle_utime(&handle[j]);
744 238924 : if (ret == -1) {
745 : /* LCOV_EXCL_START */
746 : /* mark the file as damaged */
747 : file_flag_set(file, FILE_IS_DAMAGED);
748 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
749 : return -1;
750 : /* LCOV_EXCL_STOP */
751 : }
752 : } else {
753 0 : log_tag("collision:%s:%s:%s: Not setting modification time to avoid inode collision\n", disk->name, esc_tag(file->sub, esc_buffer), esc_tag(collide_file->sub, esc_buffer_alt));
754 : }
755 : } else {
756 : /* we are not fixing, but only checking */
757 : /* print just the final status */
758 1308032 : if (file_flag_has(file, FILE_IS_DAMAGED)) {
759 9629 : if (state->opt.auditonly) {
760 2843 : log_tag("status:damaged:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
761 2843 : msg_info("damaged %s\n", fmt_term(disk, file->sub, esc_buffer));
762 : } else {
763 6786 : log_tag("status:unrecoverable:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
764 6786 : msg_info("unrecoverable %s\n", fmt_term(disk, file->sub, esc_buffer));
765 : }
766 1298403 : } else if (file_flag_has(file, FILE_IS_FIXED)) {
767 76202 : log_tag("status:recoverable:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
768 76202 : msg_info("recoverable %s\n", fmt_term(disk, file->sub, esc_buffer));
769 : } else {
770 : /* we don't use msg_verbose() because it also goes into the log */
771 1222201 : if (msg_level >= MSG_VERBOSE) {
772 23220 : log_tag("status:correct:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
773 23220 : msg_info("correct %s\n", fmt_term(disk, file->sub, esc_buffer));
774 : }
775 : }
776 : }
777 :
778 : close_and_continue:
779 : /* if the opened file is the correct one, close it */
780 : /* in case of excluded and fragmented files it's possible */
781 : /* that the opened file is not the current one */
782 2162259 : if (handle[j].file == file) {
783 : /* ensure to close the file just after finishing with it */
784 : /* to avoid to keep it open without any possible use */
785 1978230 : ret = handle_close(&handle[j]);
786 1978230 : if (ret != 0) {
787 : /* LCOV_EXCL_START */
788 : log_tag("error:%u:%s:%s: Close error. %s\n", i, disk->name, esc_tag(file->sub, esc_buffer), strerror(errno));
789 : log_fatal("DANGER! Unexpected close error in a data disk.\n");
790 : return -1;
791 : /* LCOV_EXCL_STOP */
792 : }
793 : }
794 : }
795 :
796 926105 : return 0;
797 : }
798 :
799 : /**
800 : * Check if we have to process the specified block index ::i.
801 : */
802 1870958 : static int block_is_enabled(struct snapraid_state* state, block_off_t i, struct snapraid_handle* handle, unsigned diskmax)
803 : {
804 : snapraid_info info;
805 : unsigned j;
806 : unsigned l;
807 :
808 : /* get block specific info */
809 1870958 : info = info_get(&state->infoarr, i);
810 :
811 : /* if we filter for only bad blocks */
812 1870958 : if (state->opt.badonly) {
813 : /* skip if this is not bad */
814 18748 : if (!info_get_bad(info))
815 15202 : return 0;
816 : }
817 :
818 : /* now apply the filters */
819 :
820 : /* if a parity is not excluded, include all blocks, even unused ones */
821 2125820 : for (l = 0; l < state->level; ++l) {
822 2042440 : if (!state->parity[l].is_excluded_by_filter) {
823 1772376 : return 1;
824 : }
825 : }
826 :
827 : /* otherwise include only used blocks */
828 386636 : for (j = 0; j < diskmax; ++j) {
829 : struct snapraid_block* block;
830 :
831 : /* if no disk, nothing to check */
832 351348 : if (!handle[j].disk)
833 0 : continue;
834 :
835 351348 : block = fs_par2block_find(handle[j].disk, i);
836 :
837 : /* try to recover all files, even the ones without hash */
838 : /* because in some cases we can recover also them */
839 351348 : if (block_has_file(block)) {
840 344686 : struct snapraid_file* file = fs_par2file_get(handle[j].disk, i, 0);
841 344686 : if (!file_flag_has(file, FILE_IS_EXCLUDED)) { /* only if the file is not filtered out */
842 48092 : return 1;
843 : }
844 : }
845 : }
846 :
847 35288 : return 0;
848 : }
849 :
850 118 : static int state_check_process(struct snapraid_state* state, int fix, struct snapraid_parity_handle** parity, block_off_t blockstart, block_off_t blockmax)
851 : {
852 : struct snapraid_handle* handle;
853 : unsigned diskmax;
854 : block_off_t i;
855 : unsigned j;
856 : void* buffer_alloc;
857 : void** buffer;
858 : unsigned buffermax;
859 : int ret;
860 : data_off_t countsize;
861 : block_off_t countpos;
862 : block_off_t countmax;
863 : unsigned error;
864 : unsigned unrecoverable_error;
865 : unsigned recovered_error;
866 : struct failed_struct* failed;
867 : unsigned* failed_map;
868 : unsigned l;
869 : char esc_buffer[ESC_MAX];
870 : char esc_buffer_alt[ESC_MAX];
871 :
872 118 : handle = handle_mapping(state, &diskmax);
873 :
874 : /* we need 1 * data + 2 * parity + 1 * zero */
875 118 : buffermax = diskmax + 2 * state->level + 1;
876 :
877 118 : buffer = malloc_nofail_vector_align(diskmax, buffermax, state->block_size, &buffer_alloc);
878 118 : if (!state->opt.skip_self)
879 0 : mtest_vector(buffermax, state->block_size, buffer);
880 :
881 : /* fill up the zero buffer */
882 118 : memset(buffer[buffermax - 1], 0, state->block_size);
883 118 : raid_zero(buffer[buffermax - 1]);
884 :
885 118 : failed = malloc_nofail(diskmax * sizeof(struct failed_struct));
886 118 : failed_map = malloc_nofail(diskmax * sizeof(unsigned));
887 :
888 118 : error = 0;
889 118 : unrecoverable_error = 0;
890 118 : recovered_error = 0;
891 :
892 : /* first count the number of blocks to process */
893 118 : countmax = 0;
894 935597 : for (i = blockstart; i < blockmax; ++i) {
895 935479 : if (!block_is_enabled(state, i, handle, diskmax))
896 25245 : continue;
897 910234 : ++countmax;
898 : }
899 :
900 : /* check all the blocks in files */
901 118 : countsize = 0;
902 118 : countpos = 0;
903 118 : state_progress_begin(state, blockstart, blockmax, countmax);
904 935597 : for (i = blockstart; i < blockmax; ++i) {
905 : unsigned failed_count;
906 : int valid_parity;
907 : int used_parity;
908 : snapraid_info info;
909 : int rehash;
910 :
911 935479 : if (!block_is_enabled(state, i, handle, diskmax)) {
912 : /* post process the files */
913 25245 : ret = file_post(state, fix, i, handle, diskmax);
914 25245 : if (ret == -1) {
915 : /* LCOV_EXCL_START */
916 : log_fatal("Stopping at block %u\n", i);
917 : ++unrecoverable_error;
918 : goto bail;
919 : /* LCOV_EXCL_STOP */
920 : }
921 :
922 : /* and now continue with the next block */
923 25245 : continue;
924 : }
925 :
926 : /* If we have valid parity, and it makes sense to check its content. */
927 : /* If we already know that the parity is invalid, we just read the file */
928 : /* but we don't report parity errors */
929 : /* Note that with auditonly, we anyway skip the full parity check, */
930 : /* because we also don't read it at all */
931 910234 : valid_parity = 1;
932 :
933 : /* If the parity is used by at least one file */
934 910234 : used_parity = 0;
935 :
936 : /* keep track of the number of failed blocks */
937 910234 : failed_count = 0;
938 :
939 : /* get block specific info */
940 910234 : info = info_get(&state->infoarr, i);
941 :
942 : /* if we have to use the old hash */
943 910234 : rehash = info_get_rehash(info);
944 :
945 : /* for each disk, process the block */
946 6371638 : for (j = 0; j < diskmax; ++j) {
947 : int read_size;
948 : unsigned char hash[HASH_MAX];
949 : struct snapraid_disk* disk;
950 : struct snapraid_block* block;
951 : struct snapraid_file* file;
952 : block_off_t file_pos;
953 : unsigned block_state;
954 :
955 : /* if the disk position is not used */
956 5461404 : disk = handle[j].disk;
957 5461404 : if (!disk) {
958 : /* use an empty block */
959 14061 : memset(buffer[j], 0, state->block_size);
960 955953 : continue;
961 : }
962 :
963 : /* if the disk block is not used */
964 5447343 : block = fs_par2block_find(disk, i);
965 5447343 : if (block == BLOCK_NULL) {
966 : /* use an empty block */
967 192063 : memset(buffer[j], 0, state->block_size);
968 192063 : continue;
969 : }
970 :
971 : /* get the state of the block */
972 5255280 : block_state = block_state_get(block);
973 :
974 : /* if the parity is not valid */
975 5255280 : if (block_has_invalid_parity(block)) {
976 : /* mark the parity as invalid, and don't try to check/fix it */
977 : /* because it will be recomputed at the next sync */
978 94587 : valid_parity = 0;
979 : /* follow */
980 : }
981 :
982 : /* if the block is DELETED */
983 5255280 : if (block_state == BLOCK_STATE_DELETED) {
984 : /* use an empty block */
985 18454 : memset(buffer[j], 0, state->block_size);
986 :
987 : /* store it in the failed set, because potentially */
988 : /* the parity may be still computed with the previous content */
989 18454 : failed[failed_count].is_bad = 0; /* note that is_bad==0 <=> file==0 */
990 18454 : failed[failed_count].is_outofdate = 0;
991 18454 : failed[failed_count].index = j;
992 18454 : failed[failed_count].block = block;
993 18454 : failed[failed_count].disk = disk;
994 18454 : failed[failed_count].file = 0;
995 18454 : failed[failed_count].file_pos = 0;
996 18454 : failed[failed_count].handle = 0;
997 18454 : ++failed_count;
998 18454 : continue;
999 : }
1000 :
1001 : /* here we are sure that the parity is used by a file */
1002 5236826 : used_parity = 1;
1003 :
1004 : /* get the file of this block */
1005 5236826 : file = fs_par2file_get(disk, i, &file_pos);
1006 :
1007 : /* if we are only hashing, we can skip excluded files and don't even read them */
1008 5236826 : if (state->opt.auditonly && file_flag_has(file, FILE_IS_EXCLUDED)) {
1009 : /* use an empty block */
1010 : /* in true, this is unnecessary, because we are not checking any parity */
1011 : /* but we keep it for completeness */
1012 0 : memset(buffer[j], 0, state->block_size);
1013 0 : continue;
1014 : }
1015 :
1016 : /* if the file is closed or different than the current one */
1017 5236826 : if (handle[j].file == 0 || handle[j].file != file) {
1018 : /* close the old one, if any */
1019 2260164 : ret = handle_close(&handle[j]);
1020 2260164 : if (ret == -1) {
1021 : /* LCOV_EXCL_START */
1022 : log_tag("error:%u:%s:%s: Close error. %s\n", i, disk->name, esc_tag(handle[j].file->sub, esc_buffer), strerror(errno));
1023 : log_fatal("DANGER! Unexpected close error in a data disk.\n");
1024 : log_fatal("Stopping at block %u\n", i);
1025 : ++unrecoverable_error;
1026 : goto bail;
1027 : /* LCOV_EXCL_STOP */
1028 : }
1029 :
1030 : /* if fixing, and the file is not excluded, we must open for writing */
1031 2260164 : if (fix && !file_flag_has(file, FILE_IS_EXCLUDED)) {
1032 : /* if fixing, create the file, open for writing and resize if required */
1033 771262 : ret = handle_create(&handle[j], file, state->file_mode);
1034 771262 : if (ret == -1) {
1035 : /* LCOV_EXCL_START */
1036 : if (errno == EACCES) {
1037 : log_fatal("WARNING! Please give write permission to the file.\n");
1038 : } else {
1039 : log_fatal("DANGER! Without a working data disk, it isn't possible to fix errors on it.\n");
1040 : }
1041 : log_fatal("Stopping at block %u\n", i);
1042 : ++unrecoverable_error;
1043 : goto bail;
1044 : /* LCOV_EXCL_STOP */
1045 : }
1046 :
1047 : /* check if the file was just created */
1048 1542524 : if (handle[j].created != 0) {
1049 : /* if fragmented, it may be reopened, so remember that the file */
1050 : /* was originally missing */
1051 113168 : file_flag_set(file, FILE_IS_CREATED);
1052 : }
1053 : } else {
1054 : /* open the file only for reading */
1055 1488902 : if (!file_flag_has(file, FILE_IS_MISSING))
1056 1372852 : ret = handle_open(&handle[j], file, state->file_mode,
1057 1372852 : log_error, state->opt.expected_missing ? log_expected : 0);
1058 : else
1059 116050 : ret = -1; /* if the file is missing, we cannot open it */
1060 1488902 : if (ret == -1) {
1061 : /* save the failed block for the check/fix */
1062 194966 : failed[failed_count].is_bad = 1;
1063 194966 : failed[failed_count].is_outofdate = 0;
1064 194966 : failed[failed_count].index = j;
1065 194966 : failed[failed_count].block = block;
1066 194966 : failed[failed_count].disk = disk;
1067 194966 : failed[failed_count].file = file;
1068 194966 : failed[failed_count].file_pos = file_pos;
1069 194966 : failed[failed_count].handle = &handle[j];
1070 194966 : ++failed_count;
1071 :
1072 194966 : log_tag("error:%u:%s:%s: Open error at position %u\n", i, disk->name, esc_tag(file->sub, esc_buffer), file_pos);
1073 194966 : ++error;
1074 :
1075 : /* mark the file as missing, to avoid to retry to open it again */
1076 : /* note that this can be done only if we are not fixing it */
1077 : /* otherwise, it could be recreated */
1078 194966 : file_flag_set(file, FILE_IS_MISSING);
1079 194966 : continue;
1080 : }
1081 : }
1082 :
1083 : /* if it's the first open, and not excluded */
1084 2065198 : if (!file_flag_has(file, FILE_IS_OPENED)
1085 2049838 : && !file_flag_has(file, FILE_IS_EXCLUDED)) {
1086 :
1087 : /* check if the file is changed */
1088 1998407 : if (handle[j].st.st_size != file->size
1089 1805874 : || handle[j].st.st_mtime != file->mtime_sec
1090 1781526 : || STAT_NSEC(&handle[j].st) != file->mtime_nsec
1091 : /* don't check the inode to support file-system without persistent inodes */
1092 : ) {
1093 : /* report that the file is not synced */
1094 216881 : file_flag_set(file, FILE_IS_UNSYNCED);
1095 : }
1096 : }
1097 :
1098 : /* if it's the first open, and not excluded and larger */
1099 2065198 : if (!file_flag_has(file, FILE_IS_OPENED)
1100 2049838 : && !file_flag_has(file, FILE_IS_EXCLUDED)
1101 1998407 : && !(state->opt.syncedonly && file_flag_has(file, FILE_IS_UNSYNCED))
1102 1998407 : && handle[j].st.st_size > file->size
1103 : ) {
1104 8622 : log_error("File '%s' is larger than expected.\n", handle[j].path);
1105 8622 : log_tag("error:%u:%s:%s: Size error\n", i, disk->name, esc_tag(file->sub, esc_buffer));
1106 8622 : ++error;
1107 :
1108 8622 : if (fix) {
1109 3186 : ret = handle_truncate(&handle[j], file);
1110 3186 : if (ret == -1) {
1111 : /* LCOV_EXCL_START */
1112 : log_fatal("DANGER! Unexpected truncate error in a data disk, it isn't possible to fix.\n");
1113 : log_fatal("Stopping at block %u\n", i);
1114 : ++unrecoverable_error;
1115 : goto bail;
1116 : /* LCOV_EXCL_STOP */
1117 : }
1118 :
1119 3186 : log_tag("fixed:%u:%s:%s: Fixed size\n", i, disk->name, esc_tag(file->sub, esc_buffer));
1120 3186 : ++recovered_error;
1121 : }
1122 : }
1123 :
1124 : /* mark the file as opened at least one time */
1125 : /* this is used to avoid to check the unsynced and size */
1126 : /* more than one time, in case the file is reopened later */
1127 2065198 : file_flag_set(file, FILE_IS_OPENED);
1128 : }
1129 :
1130 : /* read from the file */
1131 5041860 : read_size = handle_read(&handle[j], file_pos, buffer[j], state->block_size,
1132 5041860 : log_error, state->opt.expected_missing ? log_expected : 0);
1133 5041860 : if (read_size == -1) {
1134 : /* save the failed block for the check/fix */
1135 442136 : failed[failed_count].is_bad = 1; /* it's bad because we cannot read it */
1136 442136 : failed[failed_count].is_outofdate = 0;
1137 442136 : failed[failed_count].index = j;
1138 442136 : failed[failed_count].block = block;
1139 442136 : failed[failed_count].disk = disk;
1140 442136 : failed[failed_count].file = file;
1141 442136 : failed[failed_count].file_pos = file_pos;
1142 442136 : failed[failed_count].handle = &handle[j];
1143 442136 : ++failed_count;
1144 :
1145 442136 : log_tag("error:%u:%s:%s: Read error at position %u\n", i, disk->name, esc_tag(file->sub, esc_buffer), file_pos);
1146 442136 : ++error;
1147 442136 : continue;
1148 : }
1149 :
1150 4599724 : countsize += read_size;
1151 :
1152 : /* always insert CHG blocks, the repair functions needs all of them */
1153 : /* because the parity may be still referring at the old state */
1154 : /* and the repair must be aware of it */
1155 4599724 : if (block_state == BLOCK_STATE_CHG) {
1156 : /* we DO NOT mark them as bad to avoid to overwrite them with wrong data. */
1157 : /* if we don't have a hash, we always assume the first read of the block correct. */
1158 13843 : failed[failed_count].is_bad = 0; /* we assume the CHG block correct */
1159 13843 : failed[failed_count].is_outofdate = 0;
1160 13843 : failed[failed_count].index = j;
1161 13843 : failed[failed_count].block = block;
1162 13843 : failed[failed_count].disk = disk;
1163 13843 : failed[failed_count].file = file;
1164 13843 : failed[failed_count].file_pos = file_pos;
1165 13843 : failed[failed_count].handle = &handle[j];
1166 13843 : ++failed_count;
1167 13843 : continue;
1168 : }
1169 :
1170 4585881 : assert(block_state == BLOCK_STATE_BLK || block_state == BLOCK_STATE_REP);
1171 :
1172 : /* compute the hash of the block just read */
1173 4585881 : if (rehash) {
1174 27243 : memhash(state->prevhash, state->prevhashseed, hash, buffer[j], read_size);
1175 : } else {
1176 4558638 : memhash(state->hash, state->hashseed, hash, buffer[j], read_size);
1177 : }
1178 :
1179 : /* compare the hash */
1180 4585881 : if (memcmp(hash, block->hash, BLOCK_HASH_SIZE) != 0) {
1181 12941 : unsigned diff = memdiff(hash, block->hash, BLOCK_HASH_SIZE);
1182 :
1183 : /* save the failed block for the check/fix */
1184 12941 : failed[failed_count].is_bad = 1; /* it's bad because the hash doesn't match */
1185 12941 : failed[failed_count].is_outofdate = 0;
1186 12941 : failed[failed_count].index = j;
1187 12941 : failed[failed_count].block = block;
1188 12941 : failed[failed_count].disk = disk;
1189 12941 : failed[failed_count].file = file;
1190 12941 : failed[failed_count].file_pos = file_pos;
1191 12941 : failed[failed_count].handle = &handle[j];
1192 12941 : ++failed_count;
1193 :
1194 12941 : log_tag("error:%u:%s:%s: Data error at position %u, diff bits %u/%u\n", i, disk->name, esc_tag(file->sub, esc_buffer), file_pos, diff, BLOCK_HASH_SIZE * 8);
1195 12941 : ++error;
1196 12941 : continue;
1197 : }
1198 :
1199 : /* always insert REP blocks, the repair functions needs all of them */
1200 : /* because the parity may be still referring at the old state */
1201 : /* and the repair must be aware of it */
1202 4572940 : if (block_state == BLOCK_STATE_REP) {
1203 53428 : failed[failed_count].is_bad = 0; /* it's not bad */
1204 53428 : failed[failed_count].is_outofdate = 0;
1205 53428 : failed[failed_count].index = j;
1206 53428 : failed[failed_count].block = block;
1207 53428 : failed[failed_count].disk = disk;
1208 53428 : failed[failed_count].file = file;
1209 53428 : failed[failed_count].file_pos = file_pos;
1210 53428 : failed[failed_count].handle = &handle[j];
1211 53428 : ++failed_count;
1212 53428 : continue;
1213 : }
1214 : }
1215 :
1216 : /* now read and check the parity if requested */
1217 910234 : if (!state->opt.auditonly) {
1218 : void* buffer_recov[LEV_MAX];
1219 : void* buffer_zero;
1220 :
1221 : /* buffers for parity read and not computed */
1222 4170794 : for (l = 0; l < state->level; ++l)
1223 3299835 : buffer_recov[l] = buffer[diskmax + state->level + l];
1224 2796878 : for (; l < LEV_MAX; ++l)
1225 1925919 : buffer_recov[l] = 0;
1226 :
1227 : /* the zero buffer is the last one */
1228 870959 : buffer_zero = buffer[buffermax - 1];
1229 :
1230 : /* read the parity */
1231 4170794 : for (l = 0; l < state->level; ++l) {
1232 3299835 : if (parity[l]) {
1233 3285933 : ret = parity_read(parity[l], i, buffer_recov[l], state->block_size, log_error);
1234 3285933 : if (ret == -1) {
1235 11360 : buffer_recov[l] = 0; /* no parity to use */
1236 :
1237 11360 : log_tag("parity_error:%u:%s: Read error\n", i, lev_config_name(l));
1238 11360 : ++error;
1239 : }
1240 : } else {
1241 13902 : buffer_recov[l] = 0;
1242 : }
1243 : }
1244 :
1245 : /* try all the recovering strategies */
1246 870959 : ret = repair(state, rehash, i, diskmax, failed, failed_map, failed_count, buffer, buffer_recov, buffer_zero);
1247 870959 : if (ret != 0) {
1248 : /* increment the number of errors */
1249 48152 : if (ret > 0)
1250 4876 : error += ret;
1251 48152 : ++unrecoverable_error;
1252 :
1253 : /* print a list of all the errors in files */
1254 219712 : for (j = 0; j < failed_count; ++j) {
1255 171560 : if (failed[j].is_bad)
1256 167038 : log_tag("unrecoverable:%u:%s:%s: Unrecoverable error at position %u\n", i, failed[j].disk->name, esc_tag(failed[j].file->sub, esc_buffer), failed[j].file_pos);
1257 : }
1258 :
1259 : /* keep track of damaged files */
1260 219712 : for (j = 0; j < failed_count; ++j) {
1261 171560 : if (failed[j].is_bad)
1262 167038 : file_flag_set(failed[j].file, FILE_IS_DAMAGED);
1263 : }
1264 : } else {
1265 : /* now counts partial recovers */
1266 : /* note that this could happen only when we have an incomplete 'sync' */
1267 : /* and that we have recovered is the state before the 'sync' */
1268 822807 : int partial_recover_error = 0;
1269 :
1270 : /* print a list of all the errors in files */
1271 1380941 : for (j = 0; j < failed_count; ++j) {
1272 558134 : if (failed[j].is_bad && failed[j].is_outofdate) {
1273 8 : ++partial_recover_error;
1274 8 : log_tag("unrecoverable:%u:%s:%s: Unrecoverable unsynced error at position %u\n", i, failed[j].disk->name, esc_tag(failed[j].file->sub, esc_buffer), failed[j].file_pos);
1275 : }
1276 : }
1277 822807 : if (partial_recover_error != 0) {
1278 8 : error += partial_recover_error;
1279 8 : ++unrecoverable_error;
1280 : }
1281 :
1282 : /*
1283 : * Check parities, but only if all the blocks have it computed and it's used.
1284 : *
1285 : * If you check/fix after a partial sync, it's OK to have parity errors
1286 : * on the blocks with invalid parity and doesn't make sense to try to fix it.
1287 : *
1288 : * It's also OK to have data errors on unused parity, because sync doesn't
1289 : * update it.
1290 : */
1291 822807 : if (used_parity && valid_parity) {
1292 : /* check the parity */
1293 3728519 : for (l = 0; l < state->level; ++l) {
1294 2967297 : if (buffer_recov[l] != 0 && memcmp(buffer_recov[l], buffer[diskmax + l], state->block_size) != 0) {
1295 78425 : unsigned diff = memdiff(buffer_recov[l], buffer[diskmax + l], state->block_size);
1296 :
1297 : /* mark that the read parity is wrong, setting ptr to 0 */
1298 78425 : buffer_recov[l] = 0;
1299 :
1300 78425 : log_tag("parity_error:%u:%s: Data error, diff bits %u/%u\n", i, lev_config_name(l), diff, state->block_size * 8);
1301 78425 : ++error;
1302 : }
1303 : }
1304 : }
1305 :
1306 : /* now write recovered files */
1307 822807 : if (fix) {
1308 : /* update the fixed files */
1309 652872 : for (j = 0; j < failed_count; ++j) {
1310 : /* nothing to do if it doesn't need recovering */
1311 338245 : if (!failed[j].is_bad)
1312 40318 : continue;
1313 :
1314 : /* do not fix if the file is excluded */
1315 297927 : if (file_flag_has(failed[j].file, FILE_IS_EXCLUDED)
1316 288647 : || (state->opt.syncedonly && file_flag_has(failed[j].file, FILE_IS_UNSYNCED)))
1317 9280 : continue;
1318 :
1319 288647 : ret = handle_write(failed[j].handle, failed[j].file_pos, buffer[failed[j].index], state->block_size);
1320 288647 : if (ret == -1) {
1321 : /* LCOV_EXCL_START */
1322 : /* mark the file as damaged */
1323 : file_flag_set(failed[j].file, FILE_IS_DAMAGED);
1324 :
1325 : if (errno == EACCES) {
1326 : log_fatal("WARNING! Please give write permission to the file.\n");
1327 : } else {
1328 : /* we do not use DANGER because it could be ENOSPC which is not always correctly reported */
1329 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1330 : }
1331 : log_fatal("Stopping at block %u\n", i);
1332 : ++unrecoverable_error;
1333 : goto bail;
1334 : /* LCOV_EXCL_STOP */
1335 : }
1336 :
1337 : /* if we are not sure that the recovered content is uptodate */
1338 288647 : if (failed[j].is_outofdate) {
1339 : /* mark the file as damaged */
1340 8 : file_flag_set(failed[j].file, FILE_IS_DAMAGED);
1341 8 : continue;
1342 : }
1343 :
1344 : /* mark the file as containing some fixes */
1345 : /* note that it could be also marked as damaged in other iterations */
1346 288639 : file_flag_set(failed[j].file, FILE_IS_FIXED);
1347 :
1348 288639 : log_tag("fixed:%u:%s:%s: Fixed data error at position %u\n", i, failed[j].disk->name, esc_tag(failed[j].file->sub, esc_buffer), failed[j].file_pos);
1349 288639 : ++recovered_error;
1350 : }
1351 :
1352 : /*
1353 : * Update parity only if all the blocks have it computed and it's used.
1354 : *
1355 : * If you check/fix after a partial sync, you do not want to fix parity
1356 : * for blocks that are going to have it computed in the sync completion.
1357 : *
1358 : * For unused parity there is no need to write it, because when fixing
1359 : * we already have allocated space for it on parity file creation,
1360 : * and its content doesn't matter.
1361 : */
1362 314627 : if (used_parity && valid_parity) {
1363 : /* update the parity */
1364 963984 : for (l = 0; l < state->level; ++l) {
1365 : /* if the parity on disk is wrong */
1366 698537 : if (buffer_recov[l] == 0
1367 : /* and we have access at the parity */
1368 71983 : && parity[l] != 0
1369 : /* and the parity is not excluded */
1370 67296 : && !state->parity[l].is_excluded_by_filter
1371 : ) {
1372 67296 : ret = parity_write(parity[l], i, buffer[diskmax + l], state->block_size);
1373 67296 : if (ret == -1) {
1374 : /* LCOV_EXCL_START */
1375 : /* we do not use DANGER because it could be ENOSPC which is not always correctly reported */
1376 : log_fatal("WARNING! Without a working %s disk, it isn't possible to fix errors on it.\n", lev_name(l));
1377 : log_fatal("Stopping at block %u\n", i);
1378 : ++unrecoverable_error;
1379 : goto bail;
1380 : /* LCOV_EXCL_STOP */
1381 : }
1382 :
1383 67296 : log_tag("parity_fixed:%u:%s: Fixed data error\n", i, lev_config_name(l));
1384 67296 : ++recovered_error;
1385 : }
1386 : }
1387 : }
1388 : } else {
1389 : /* if we are not fixing, we just set the FIXED flag */
1390 : /* meaning that we could fix this file if we try */
1391 728069 : for (j = 0; j < failed_count; ++j) {
1392 219889 : if (failed[j].is_bad) {
1393 179004 : file_flag_set(failed[j].file, FILE_IS_FIXED);
1394 : }
1395 : }
1396 : }
1397 : }
1398 : } else {
1399 : /* if we are not checking, we just set the DAMAGED flag */
1400 : /* to report that the file is damaged, and we don't know if we can fix it */
1401 45349 : for (j = 0; j < failed_count; ++j) {
1402 6074 : if (failed[j].is_bad) {
1403 6074 : file_flag_set(failed[j].file, FILE_IS_DAMAGED);
1404 : }
1405 : }
1406 : }
1407 :
1408 : /* post process the files */
1409 910234 : ret = file_post(state, fix, i, handle, diskmax);
1410 910234 : if (ret == -1) {
1411 : /* LCOV_EXCL_START */
1412 : log_fatal("Stopping at block %u\n", i);
1413 : ++unrecoverable_error;
1414 : goto bail;
1415 : /* LCOV_EXCL_STOP */
1416 : }
1417 :
1418 : /* count the number of processed block */
1419 910234 : ++countpos;
1420 :
1421 : /* progress */
1422 910234 : if (state_progress(state, 0, i, countpos, countmax, countsize)) {
1423 : /* LCOV_EXCL_START */
1424 : break;
1425 : /* LCOV_EXCL_STOP */
1426 : }
1427 : }
1428 :
1429 : /* for each disk, recover empty files, symlinks and empty dirs */
1430 826 : for (i = 0; i < diskmax; ++i) {
1431 : tommy_node* node;
1432 : struct snapraid_disk* disk;
1433 :
1434 708 : if (!handle[i].disk)
1435 3 : continue;
1436 :
1437 : /* for each empty file in the disk */
1438 705 : disk = handle[i].disk;
1439 705 : node = disk->filelist;
1440 2189135 : while (node) {
1441 : char path[PATH_MAX];
1442 : struct stat st;
1443 : struct snapraid_file* file;
1444 2187725 : int unsuccesful = 0;
1445 :
1446 2187725 : file = node->data;
1447 2187725 : node = node->next; /* next node */
1448 :
1449 : /* if not empty, it's already checked and continue to the next one */
1450 2187725 : if (file->size != 0) {
1451 4369715 : continue;
1452 : }
1453 :
1454 : /* if excluded continue to the next one */
1455 2952 : if (file_flag_has(file, FILE_IS_EXCLUDED)) {
1456 169 : continue;
1457 : }
1458 :
1459 : /* stat the file */
1460 2783 : pathprint(path, sizeof(path), "%s%s", disk->dir, file->sub);
1461 2783 : ret = stat(path, &st);
1462 2783 : if (ret == -1) {
1463 163 : unsuccesful = 1;
1464 :
1465 163 : log_error("Error stating empty file '%s'. %s.\n", path, strerror(errno));
1466 163 : log_tag("error:%s:%s: Empty file stat error\n", disk->name, esc_tag(file->sub, esc_buffer));
1467 163 : ++error;
1468 2620 : } else if (!S_ISREG(st.st_mode)) {
1469 0 : unsuccesful = 1;
1470 :
1471 0 : log_tag("error:%s:%s: Empty file error for not regular file\n", disk->name, esc_tag(file->sub, esc_buffer));
1472 0 : ++error;
1473 2620 : } else if (st.st_size != 0) {
1474 11 : unsuccesful = 1;
1475 :
1476 11 : log_tag("error:%s:%s: Empty file error for size '%" PRIu64 "'\n", disk->name, esc_tag(file->sub, esc_buffer), (uint64_t)st.st_size);
1477 11 : ++error;
1478 : }
1479 :
1480 2783 : if (fix && unsuccesful) {
1481 : int f;
1482 :
1483 : /* create the ancestor directories */
1484 155 : ret = mkancestor(path);
1485 155 : if (ret != 0) {
1486 : /* LCOV_EXCL_START */
1487 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1488 : log_fatal("Stopping\n");
1489 : ++unrecoverable_error;
1490 : goto bail;
1491 : /* LCOV_EXCL_STOP */
1492 : }
1493 :
1494 : /* create it */
1495 : /* O_NOFOLLOW: do not follow links to ensure to open the real file */
1496 155 : f = open(path, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_NOFOLLOW, 0600);
1497 155 : if (f == -1) {
1498 : /* LCOV_EXCL_START */
1499 : log_fatal("Error creating empty file '%s'. %s.\n", path, strerror(errno));
1500 : if (errno == EACCES) {
1501 : log_fatal("WARNING! Please give write permission to the file.\n");
1502 : } else {
1503 : /* we do not use DANGER because it could be ENOSPC which is not always correctly reported */
1504 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1505 : }
1506 : log_fatal("Stopping\n");
1507 : ++unrecoverable_error;
1508 : goto bail;
1509 : /* LCOV_EXCL_STOP */
1510 : }
1511 :
1512 : /* set the original modification time */
1513 155 : ret = fmtime(f, file->mtime_sec, file->mtime_nsec);
1514 155 : if (ret != 0) {
1515 : /* LCOV_EXCL_START */
1516 : close(f);
1517 :
1518 : log_fatal("Error timing file '%s'. %s.\n", file->sub, strerror(errno));
1519 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1520 : log_fatal("Stopping\n");
1521 : ++unrecoverable_error;
1522 : goto bail;
1523 : /* LCOV_EXCL_STOP */
1524 : }
1525 :
1526 : /* close it */
1527 155 : ret = close(f);
1528 155 : if (ret != 0) {
1529 : /* LCOV_EXCL_START */
1530 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1531 : log_fatal("Stopping\n");
1532 : ++unrecoverable_error;
1533 : goto bail;
1534 : /* LCOV_EXCL_STOP */
1535 : }
1536 :
1537 155 : log_tag("fixed:%s:%s: Fixed empty file\n", disk->name, esc_tag(file->sub, esc_buffer));
1538 155 : ++recovered_error;
1539 :
1540 155 : log_tag("status:recovered:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
1541 155 : msg_info("recovered %s\n", fmt_term(disk, file->sub, esc_buffer));
1542 : }
1543 : }
1544 :
1545 : /* for each link in the disk */
1546 705 : disk = handle[i].disk;
1547 705 : node = disk->linklist;
1548 71537 : while (node) {
1549 : char path[PATH_MAX];
1550 : char pathto[PATH_MAX];
1551 : char linkto[PATH_MAX];
1552 : struct stat st;
1553 : struct stat stto;
1554 : struct snapraid_link* slink;
1555 70127 : int unsuccesful = 0;
1556 70127 : int unrecoverable = 0;
1557 :
1558 70127 : slink = node->data;
1559 70127 : node = node->next; /* next node */
1560 :
1561 : /* if excluded continue to the next one */
1562 70127 : if (link_flag_has(slink, FILE_IS_EXCLUDED)) {
1563 3277 : continue;
1564 : }
1565 :
1566 66850 : if (link_flag_has(slink, FILE_IS_HARDLINK)) {
1567 : /* stat the link */
1568 318 : pathprint(path, sizeof(path), "%s%s", disk->dir, slink->sub);
1569 318 : ret = stat(path, &st);
1570 318 : if (ret == -1) {
1571 36 : unsuccesful = 1;
1572 :
1573 36 : log_error("Error stating hardlink '%s'. %s.\n", path, strerror(errno));
1574 36 : log_tag("hardlink_error:%s:%s:%s: Hardlink stat error\n", disk->name, esc_tag(slink->sub, esc_buffer), esc_tag(slink->linkto, esc_buffer_alt));
1575 36 : ++error;
1576 282 : } else if (!S_ISREG(st.st_mode)) {
1577 0 : unsuccesful = 1;
1578 :
1579 0 : log_tag("hardlink_error:%s:%s:%s: Hardlink error for not regular file\n", disk->name, esc_tag(slink->sub, esc_buffer), esc_tag(slink->linkto, esc_buffer_alt));
1580 0 : ++error;
1581 : }
1582 :
1583 : /* stat the "to" file */
1584 318 : pathprint(pathto, sizeof(pathto), "%s%s", disk->dir, slink->linkto);
1585 318 : ret = stat(pathto, &stto);
1586 318 : if (ret == -1) {
1587 24 : unsuccesful = 1;
1588 :
1589 24 : if (errno == ENOENT) {
1590 24 : unrecoverable = 1;
1591 24 : if (fix) {
1592 : /* if the target doesn't exist, it's unrecoverable */
1593 : /* because we cannot create an hardlink of a file that */
1594 : /* doesn't exists */
1595 12 : ++unrecoverable_error;
1596 : } else {
1597 : /* but in check, we can assume that fixing will recover */
1598 : /* such missing file, so we assume a less drastic error */
1599 12 : ++error;
1600 : }
1601 : }
1602 :
1603 24 : log_error("Error stating hardlink-to '%s'. %s.\n", pathto, strerror(errno));
1604 24 : log_tag("hardlink_error:%s:%s:%s: Hardlink to stat error\n", disk->name, esc_tag(slink->sub, esc_buffer), esc_tag(slink->linkto, esc_buffer_alt));
1605 24 : ++error;
1606 294 : } else if (!S_ISREG(stto.st_mode)) {
1607 0 : unsuccesful = 1;
1608 :
1609 0 : log_tag("hardlink_error:%s:%s:%s: Hardlink-to error for not regular file\n", disk->name, esc_tag(slink->sub, esc_buffer), esc_tag(slink->linkto, esc_buffer_alt));
1610 0 : ++error;
1611 294 : } else if (!unsuccesful && st.st_ino != stto.st_ino) {
1612 0 : unsuccesful = 1;
1613 :
1614 0 : log_error("Mismatch hardlink '%s' and '%s'. Different inode.\n", path, pathto);
1615 0 : log_tag("hardlink_error:%s:%s:%s: Hardlink mismatch for different inode\n", disk->name, esc_tag(slink->sub, esc_buffer), esc_tag(slink->linkto, esc_buffer_alt));
1616 0 : ++error;
1617 : }
1618 : } else {
1619 : /* read the symlink */
1620 66532 : pathprint(path, sizeof(path), "%s%s", disk->dir, slink->sub);
1621 66532 : ret = readlink(path, linkto, sizeof(linkto));
1622 66532 : if (ret < 0) {
1623 4317 : unsuccesful = 1;
1624 :
1625 4317 : log_error("Error reading symlink '%s'. %s.\n", path, strerror(errno));
1626 4317 : log_tag("symlink_error:%s:%s: Symlink read error\n", disk->name, esc_tag(slink->sub, esc_buffer));
1627 4317 : ++error;
1628 62215 : } else if (ret >= PATH_MAX) {
1629 0 : unsuccesful = 1;
1630 :
1631 0 : log_error("Error reading symlink '%s'. Symlink too long.\n", path);
1632 0 : log_tag("symlink_error:%s:%s: Symlink read error\n", disk->name, esc_tag(slink->sub, esc_buffer));
1633 0 : ++error;
1634 : } else {
1635 62215 : linkto[ret] = 0;
1636 :
1637 62215 : if (strcmp(linkto, slink->linkto) != 0) {
1638 517 : unsuccesful = 1;
1639 :
1640 517 : log_tag("symlink_error:%s:%s: Symlink data error '%s' instead of '%s'\n", disk->name, esc_tag(slink->sub, esc_buffer), linkto, slink->linkto);
1641 517 : ++error;
1642 : }
1643 : }
1644 : }
1645 :
1646 66850 : if (fix && unsuccesful && !unrecoverable) {
1647 : /* create the ancestor directories */
1648 3941 : ret = mkancestor(path);
1649 3941 : if (ret != 0) {
1650 : /* LCOV_EXCL_START */
1651 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1652 : log_fatal("Stopping\n");
1653 : ++unrecoverable_error;
1654 : goto bail;
1655 : /* LCOV_EXCL_STOP */
1656 : }
1657 :
1658 : /* if it exists, it must be deleted before recreating */
1659 3941 : ret = remove(path);
1660 3941 : if (ret != 0 && errno != ENOENT) {
1661 : /* LCOV_EXCL_START */
1662 : log_fatal("Error removing '%s'. %s.\n", path, strerror(errno));
1663 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1664 : log_fatal("Stopping\n");
1665 : ++unrecoverable_error;
1666 : goto bail;
1667 : /* LCOV_EXCL_STOP */
1668 : }
1669 :
1670 : /* create it */
1671 3941 : if (link_flag_has(slink, FILE_IS_HARDLINK)) {
1672 12 : ret = hardlink(pathto, path);
1673 12 : if (ret != 0) {
1674 : /* LCOV_EXCL_START */
1675 : log_fatal("Error writing hardlink '%s' to '%s'. %s.\n", path, pathto, strerror(errno));
1676 : if (errno == EACCES) {
1677 : log_fatal("WARNING! Please give write permission to the hardlink.\n");
1678 : } else {
1679 : /* we do not use DANGER because it could be ENOSPC which is not always correctly reported */
1680 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1681 : }
1682 : log_fatal("Stopping\n");
1683 : ++unrecoverable_error;
1684 : goto bail;
1685 : /* LCOV_EXCL_STOP */
1686 : }
1687 :
1688 12 : log_tag("hardlink_fixed:%s:%s: Fixed hardlink error\n", disk->name, esc_tag(slink->sub, esc_buffer));
1689 12 : ++recovered_error;
1690 : } else {
1691 3929 : ret = symlink(slink->linkto, path);
1692 3929 : if (ret != 0) {
1693 : /* LCOV_EXCL_START */
1694 : log_fatal("Error writing symlink '%s' to '%s'. %s.\n", path, slink->linkto, strerror(errno));
1695 : if (errno == EACCES) {
1696 : log_fatal("WARNING! Please give write permission to the symlink.\n");
1697 : } else {
1698 : /* we do not use DANGER because it could be ENOSPC which is not always correctly reported */
1699 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1700 : }
1701 : log_fatal("Stopping\n");
1702 : ++unrecoverable_error;
1703 : goto bail;
1704 : /* LCOV_EXCL_STOP */
1705 : }
1706 :
1707 3929 : log_tag("symlink_fixed:%s:%s: Fixed symlink error\n", disk->name, esc_tag(slink->sub, esc_buffer));
1708 3929 : ++recovered_error;
1709 : }
1710 :
1711 3941 : log_tag("status:recovered:%s:%s\n", disk->name, esc_tag(slink->sub, esc_buffer));
1712 3941 : msg_info("recovered %s\n", fmt_term(disk, slink->sub, esc_buffer));
1713 : }
1714 : }
1715 :
1716 : /* for each dir in the disk */
1717 705 : disk = handle[i].disk;
1718 705 : node = disk->dirlist;
1719 1850 : while (node) {
1720 : char path[PATH_MAX];
1721 : struct stat st;
1722 : struct snapraid_dir* dir;
1723 440 : int unsuccesful = 0;
1724 :
1725 440 : dir = node->data;
1726 440 : node = node->next; /* next node */
1727 :
1728 : /* if excluded continue to the next one */
1729 440 : if (dir_flag_has(dir, FILE_IS_EXCLUDED)) {
1730 19 : continue;
1731 : }
1732 :
1733 : /* stat the dir */
1734 421 : pathprint(path, sizeof(path), "%s%s", disk->dir, dir->sub);
1735 421 : ret = stat(path, &st);
1736 421 : if (ret == -1) {
1737 23 : unsuccesful = 1;
1738 :
1739 23 : log_error("Error stating dir '%s'. %s.\n", path, strerror(errno));
1740 23 : log_tag("dir_error:%s:%s: Dir stat error\n", disk->name, esc_tag(dir->sub, esc_buffer));
1741 23 : ++error;
1742 398 : } else if (!S_ISDIR(st.st_mode)) {
1743 0 : unsuccesful = 1;
1744 :
1745 0 : log_tag("dir_error:%s:%s: Dir error for not directory\n", disk->name, esc_tag(dir->sub, esc_buffer));
1746 0 : ++error;
1747 : }
1748 :
1749 421 : if (fix && unsuccesful) {
1750 : /* create the ancestor directories */
1751 21 : ret = mkancestor(path);
1752 21 : if (ret != 0) {
1753 : /* LCOV_EXCL_START */
1754 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1755 : log_fatal("Stopping\n");
1756 : ++unrecoverable_error;
1757 : goto bail;
1758 : /* LCOV_EXCL_STOP */
1759 : }
1760 :
1761 : /* create it */
1762 21 : ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1763 21 : if (ret != 0) {
1764 : /* LCOV_EXCL_START */
1765 : log_fatal("Error creating dir '%s'. %s.\n", path, strerror(errno));
1766 : if (errno == EACCES) {
1767 : log_fatal("WARNING! Please give write permission to the dir.\n");
1768 : } else {
1769 : /* we do not use DANGER because it could be ENOSPC which is not always correctly reported */
1770 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1771 : }
1772 : log_fatal("Stopping\n");
1773 : ++unrecoverable_error;
1774 : goto bail;
1775 : /* LCOV_EXCL_STOP */
1776 : }
1777 :
1778 21 : log_tag("dir_fixed:%s:%s: Fixed dir error\n", disk->name, esc_tag(dir->sub, esc_buffer));
1779 21 : ++recovered_error;
1780 :
1781 21 : log_tag("status:recovered:%s:%s\n", disk->name, esc_tag(dir->sub, esc_buffer));
1782 21 : msg_info("recovered %s\n", fmt_term(disk, dir->sub, esc_buffer));
1783 : }
1784 : }
1785 : }
1786 :
1787 118 : state_progress_end(state, countpos, countmax, countsize);
1788 :
1789 : bail:
1790 : /* close all the files left open */
1791 826 : for (j = 0; j < diskmax; ++j) {
1792 708 : struct snapraid_file* file = handle[j].file;
1793 708 : struct snapraid_disk* disk = handle[j].disk;
1794 708 : ret = handle_close(&handle[j]);
1795 708 : if (ret == -1) {
1796 : /* LCOV_EXCL_START */
1797 : log_tag("error:%u:%s:%s: Close error. %s\n", blockmax, disk->name, esc_tag(file->sub, esc_buffer), strerror(errno));
1798 : log_fatal("DANGER! Unexpected close error in a data disk.\n");
1799 : ++unrecoverable_error;
1800 : /* continue, as we are already exiting */
1801 : /* LCOV_EXCL_STOP */
1802 : }
1803 : }
1804 :
1805 : /* remove all the files created from scratch that have not finished the processing */
1806 : /* it happens only when aborting pressing Ctrl+C or other reason. */
1807 118 : if (fix) {
1808 : /* for each disk */
1809 315 : for (i = 0; i < diskmax; ++i) {
1810 : tommy_node* node;
1811 : struct snapraid_disk* disk;
1812 :
1813 270 : if (!handle[i].disk)
1814 1 : continue;
1815 :
1816 : /* for each file in the disk */
1817 269 : disk = handle[i].disk;
1818 269 : node = disk->filelist;
1819 870849 : while (node) {
1820 : char path[PATH_MAX];
1821 : struct snapraid_file* file;
1822 :
1823 870311 : file = node->data;
1824 870311 : node = node->next; /* next node */
1825 :
1826 : /* if the file was not created, meaning that it was already existing */
1827 870311 : if (!file_flag_has(file, FILE_IS_CREATED)) {
1828 : /* nothing to do */
1829 1627454 : continue;
1830 : }
1831 :
1832 : /* if processing was finished */
1833 113168 : if (file_flag_has(file, FILE_IS_FINISHED)) {
1834 : /* nothing to do */
1835 113168 : continue;
1836 : }
1837 :
1838 : /* if the file was originally missing, and processing not yet finished */
1839 : /* we have to throw it away to ensure that at the next run we will retry */
1840 : /* to fix it, in case we select to undelete missing files */
1841 0 : pathprint(path, sizeof(path), "%s%s", disk->dir, file->sub);
1842 :
1843 0 : ret = remove(path);
1844 0 : if (ret != 0) {
1845 : /* LCOV_EXCL_START */
1846 : log_fatal("Error removing '%s'. %s.\n", path, strerror(errno));
1847 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1848 : ++unrecoverable_error;
1849 : /* continue, as we are already exiting */
1850 : /* LCOV_EXCL_STOP */
1851 : }
1852 : }
1853 : }
1854 : }
1855 :
1856 118 : if (error || recovered_error || unrecoverable_error) {
1857 67 : msg_status("\n");
1858 67 : msg_status("%8u errors\n", error);
1859 67 : if (fix) {
1860 43 : msg_status("%8u recovered errors\n", recovered_error);
1861 : }
1862 134 : if (unrecoverable_error) {
1863 13 : msg_status("%8u UNRECOVERABLE errors\n", unrecoverable_error);
1864 : } else {
1865 : /* without checking, we don't know if they are really recoverable or not */
1866 54 : if (!state->opt.auditonly)
1867 52 : msg_status("%8u unrecoverable errors\n", unrecoverable_error);
1868 54 : if (fix)
1869 34 : msg_status("Everything OK\n");
1870 : }
1871 : } else {
1872 51 : msg_status("Everything OK\n");
1873 : }
1874 :
1875 118 : if (error && !fix)
1876 24 : log_fatal("WARNING! There are errors!\n");
1877 118 : if (unrecoverable_error)
1878 13 : log_fatal("DANGER! There are unrecoverable errors!\n");
1879 :
1880 118 : log_tag("summary:error:%u\n", error);
1881 118 : if (fix)
1882 45 : log_tag("summary:error_recovered:%u\n", recovered_error);
1883 118 : if (!state->opt.auditonly)
1884 113 : log_tag("summary:error_unrecoverable:%u\n", unrecoverable_error);
1885 118 : if (fix) {
1886 45 : if (error + recovered_error + unrecoverable_error == 0)
1887 2 : log_tag("summary:exit:ok\n");
1888 43 : else if (unrecoverable_error == 0)
1889 34 : log_tag("summary:exit:recovered\n");
1890 : else
1891 9 : log_tag("summary:exit:unrecoverable\n");
1892 73 : } else if (!state->opt.auditonly) {
1893 68 : if (error + unrecoverable_error == 0)
1894 46 : log_tag("summary:exit:ok\n");
1895 22 : else if (unrecoverable_error == 0)
1896 18 : log_tag("summary:exit:recoverable\n");
1897 : else
1898 4 : log_tag("summary:exit:unrecoverable\n");
1899 : } else { /* audit only */
1900 5 : if (error == 0)
1901 3 : log_tag("summary:exit:ok\n");
1902 : else
1903 2 : log_tag("summary:exit:error\n");
1904 : }
1905 118 : log_flush();
1906 :
1907 118 : free(failed);
1908 118 : free(failed_map);
1909 118 : free(handle);
1910 118 : free(buffer_alloc);
1911 118 : free(buffer);
1912 :
1913 : /* fail if some error are present after the run */
1914 118 : if (fix) {
1915 45 : if (state->opt.expect_unrecoverable) {
1916 9 : if (unrecoverable_error == 0)
1917 0 : return -1;
1918 : } else {
1919 36 : if (unrecoverable_error != 0)
1920 0 : return -1;
1921 : }
1922 : } else {
1923 73 : if (state->opt.expect_unrecoverable) {
1924 4 : if (unrecoverable_error == 0)
1925 0 : return -1;
1926 69 : } else if (state->opt.expect_recoverable) {
1927 20 : if (unrecoverable_error != 0 || error == 0)
1928 0 : return -1;
1929 : } else {
1930 49 : if (error != 0 || unrecoverable_error != 0)
1931 0 : return -1;
1932 : }
1933 : }
1934 :
1935 118 : return 0;
1936 : }
1937 :
1938 120 : int state_check(struct snapraid_state* state, int fix, block_off_t blockstart, block_off_t blockcount)
1939 : {
1940 : block_off_t blockmax;
1941 : data_off_t size;
1942 : int ret;
1943 : struct snapraid_parity_handle parity[LEV_MAX];
1944 : struct snapraid_parity_handle* parity_ptr[LEV_MAX];
1945 : unsigned error;
1946 : unsigned l;
1947 :
1948 120 : msg_progress("Initializing...\n");
1949 :
1950 120 : blockmax = parity_allocated_size(state);
1951 120 : size = blockmax * (data_off_t)state->block_size;
1952 :
1953 120 : if (blockstart > blockmax) {
1954 : /* LCOV_EXCL_START */
1955 : log_fatal("Error in the specified starting block %u. It's bigger than the parity size %u.\n", blockstart, blockmax);
1956 : exit(EXIT_FAILURE);
1957 : /* LCOV_EXCL_STOP */
1958 : }
1959 :
1960 : /* adjust the number of block to process */
1961 120 : if (blockcount != 0 && blockstart + blockcount < blockmax) {
1962 0 : blockmax = blockstart + blockcount;
1963 : }
1964 :
1965 120 : if (fix) {
1966 : /* if fixing, create the file and open for writing */
1967 : /* if it fails, we cannot continue */
1968 180 : for (l = 0; l < state->level; ++l) {
1969 : /* skip parity disks that are not accessible */
1970 135 : if (state->parity[l].skip_access) {
1971 1 : parity_ptr[l] = 0;
1972 1 : continue;
1973 : }
1974 :
1975 134 : parity_ptr[l] = &parity[l];
1976 134 : ret = parity_create(parity_ptr[l], &state->parity[l], l, state->file_mode, state->block_size, state->opt.parity_limit_size);
1977 134 : if (ret == -1) {
1978 : /* LCOV_EXCL_START */
1979 : log_fatal("WARNING! Without an accessible %s file, it isn't possible to fix any error.\n", lev_name(l));
1980 : exit(EXIT_FAILURE);
1981 : /* LCOV_EXCL_STOP */
1982 : }
1983 :
1984 134 : ret = parity_chsize(parity_ptr[l], &state->parity[l], 0, size, state->block_size, state->opt.skip_fallocate, state->opt.skip_space_holder);
1985 134 : if (ret == -1) {
1986 : /* LCOV_EXCL_START */
1987 : log_fatal("WARNING! Without an accessible %s file, it isn't possible to sync.\n", lev_name(l));
1988 : exit(EXIT_FAILURE);
1989 : /* LCOV_EXCL_STOP */
1990 : }
1991 : }
1992 75 : } else if (!state->opt.auditonly) {
1993 : /* if checking, open the file for reading */
1994 : /* it may fail if the file doesn't exist, in this case we continue to check the files */
1995 393 : for (l = 0; l < state->level; ++l) {
1996 324 : parity_ptr[l] = &parity[l];
1997 324 : ret = parity_open(parity_ptr[l], &state->parity[l], l, state->file_mode, state->block_size, state->opt.parity_limit_size);
1998 324 : if (ret == -1) {
1999 1 : msg_status("No accessible %s file, only files will be checked.\n", lev_name(l));
2000 : /* continue anyway */
2001 1 : parity_ptr[l] = 0;
2002 : }
2003 : }
2004 : } else {
2005 : /* otherwise don't use any parity */
2006 28 : for (l = 0; l < state->level; ++l)
2007 22 : parity_ptr[l] = 0;
2008 : }
2009 :
2010 120 : if (fix)
2011 45 : msg_progress("Fixing...\n");
2012 75 : else if (!state->opt.auditonly)
2013 69 : msg_progress("Checking...\n");
2014 : else
2015 6 : msg_progress("Hashing...\n");
2016 :
2017 120 : error = 0;
2018 :
2019 : /* skip degenerated cases of empty parity, or skipping all */
2020 120 : if (blockstart < blockmax) {
2021 118 : ret = state_check_process(state, fix, parity_ptr, blockstart, blockmax);
2022 118 : if (ret == -1) {
2023 : /* LCOV_EXCL_START */
2024 : ++error;
2025 : /* continue, as we are already exiting */
2026 : /* LCOV_EXCL_STOP */
2027 : }
2028 : }
2029 :
2030 : /* try to close only if opened */
2031 601 : for (l = 0; l < state->level; ++l) {
2032 481 : if (parity_ptr[l]) {
2033 457 : ret = parity_close(parity_ptr[l]);
2034 : /* LCOV_EXCL_START */
2035 : if (ret == -1) {
2036 : log_fatal("DANGER! Unexpected close error in %s disk.\n", lev_name(l));
2037 : ++error;
2038 : /* continue, as we are already exiting */
2039 : }
2040 : /* LCOV_EXCL_STOP */
2041 : }
2042 : }
2043 :
2044 : /* abort if error are present */
2045 120 : if (error != 0)
2046 0 : return -1;
2047 120 : return 0;
2048 : }
2049 :
|