Line data Source code
1 : /*
2 : * Copyright (C) 2011 Andrea Mazzoleni
3 : *
4 : * This program is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 : */
17 :
18 : #include "portable.h"
19 :
20 : #include "support.h"
21 : #include "util.h"
22 : #include "elem.h"
23 : #include "import.h"
24 : #include "search.h"
25 : #include "state.h"
26 : #include "parity.h"
27 : #include "handle.h"
28 : #include "raid/raid.h"
29 : #include "raid/combo.h"
30 :
31 : /****************************************************************************/
32 : /* check */
33 :
34 : /**
35 : * A block that failed the hash check, or that was deleted.
36 : */
37 : struct failed_struct {
38 : /**
39 : * If we know for sure that the block is garbage or missing
40 : * and it needs to be recovered and rewritten to the disk.
41 : */
42 : int is_bad;
43 :
44 : /**
45 : * If that we have recovered may be not updated data,
46 : * an old version, or just garbage.
47 : *
48 : * Essentially, it means that we are not sure what we have recovered
49 : * is really correct. It's just our best guess.
50 : *
51 : * These "recovered" block are also written to the disk if the block is marked as ::is_bad.
52 : * But these files are marked also as FILE_IS_DAMAGED, and then renamed to .unrecoverable.
53 : *
54 : * Note that this could happen only for CHG blocks.
55 : */
56 : int is_outofdate;
57 :
58 : unsigned index; /**< Index of the failed block. */
59 : struct snapraid_block* block; /**< The failed block */
60 : struct snapraid_disk* disk; /**< The failed disk. */
61 : struct snapraid_file* file; /**< The failed file. 0 for DELETED block. */
62 : block_off_t file_pos; /**< Offset inside the file */
63 : struct snapraid_handle* handle; /**< The handle containing the failed block, or 0 for a DELETED block */
64 : };
65 :
66 : /**
67 : * Check if a block hash matches the specified buffer.
68 : * Return ==0 if equal
69 : */
70 468578 : static int blockcmp(struct snapraid_state* state, int rehash, struct snapraid_block* block, unsigned pos_size, unsigned char* buffer, unsigned char* buffer_zero)
71 : {
72 : unsigned char hash[HASH_MAX];
73 :
74 : /* now compute the hash of the valid part */
75 468578 : if (rehash) {
76 0 : memhash(state->prevhash, state->prevhashseed, hash, buffer, pos_size);
77 : } else {
78 468578 : memhash(state->hash, state->hashseed, hash, buffer, pos_size);
79 : }
80 :
81 : /* compare the hash */
82 468578 : if (memcmp(hash, block->hash, BLOCK_HASH_SIZE) != 0) {
83 14448 : return -1;
84 : }
85 :
86 : /* compare to the end of the block */
87 454130 : if (pos_size < state->block_size) {
88 187534 : if (memcmp(buffer + pos_size, buffer_zero + pos_size, state->block_size - pos_size) != 0) {
89 45 : return -1;
90 : }
91 : }
92 :
93 454085 : return 0;
94 : }
95 :
96 : /**
97 : * Check if the hash of all the failed block we are expecting to recover are now matching.
98 : */
99 210001 : static int is_hash_matching(struct snapraid_state* state, int rehash, unsigned diskmax, struct failed_struct* failed, unsigned* failed_map, unsigned failed_count, void** buffer, void* buffer_zero)
100 : {
101 : unsigned j;
102 : int hash_checked;
103 :
104 210001 : hash_checked = 0; /* keep track if we check at least one block */
105 :
106 : /* check if the recovered blocks are OK */
107 668608 : for (j = 0; j < failed_count; ++j) {
108 : /* if we are expected to recover this block */
109 473100 : if (!failed[failed_map[j]].is_outofdate
110 : /* if the block has a hash to check */
111 468578 : && block_has_updated_hash(failed[failed_map[j]].block)
112 : ) {
113 : /* if a hash doesn't match, fail the check */
114 468578 : unsigned pos_size = file_block_size(failed[failed_map[j]].file, failed[failed_map[j]].file_pos, state->block_size);
115 468578 : if (blockcmp(state, rehash, failed[failed_map[j]].block, pos_size, buffer[failed[failed_map[j]].index], buffer_zero) != 0) {
116 14493 : log_tag("hash_error: Hash mismatch on entry %u\n", failed_map[j]);
117 14493 : return 0;
118 : }
119 :
120 454085 : hash_checked = 1;
121 : }
122 : }
123 :
124 : /* if nothing checked, we reject it */
125 : /* note that we are excluding this case at upper level */
126 : /* but checking again doesn't hurt */
127 195508 : if (!hash_checked) {
128 : /* LCOV_EXCL_START */
129 : return 0;
130 : /* LCOV_EXCL_STOP */
131 : }
132 :
133 : /* if we checked something, and no block failed the check */
134 : /* recompute all the redundancy information */
135 195508 : raid_gen(diskmax, state->level, state->block_size, buffer);
136 195508 : return 1;
137 : }
138 :
139 : /**
140 : * Check if specified parity is now matching with a recomputed one.
141 : */
142 8 : static int is_parity_matching(struct snapraid_state* state, unsigned diskmax, unsigned i, void** buffer, void** buffer_recov)
143 : {
144 : /* recompute parity, note that we don't need parity over i */
145 8 : raid_gen(diskmax, i + 1, state->block_size, buffer);
146 :
147 : /* if the recovered parity block matches */
148 8 : if (memcmp(buffer[diskmax + i], buffer_recov[i], state->block_size) == 0) {
149 : /* recompute all the redundancy information */
150 8 : raid_gen(diskmax, state->level, state->block_size, buffer);
151 8 : return 1;
152 : }
153 :
154 0 : return 0;
155 : }
156 :
157 : /**
158 : * Repair errors.
159 : * Return <0 if failure for missing strategy, >0 if data is wrong and we cannot rebuild correctly, 0 on success.
160 : * If success, the parity are computed in the buffer variable.
161 : */
162 252579 : static int repair_step(struct snapraid_state* state, int rehash, unsigned pos, unsigned diskmax, struct failed_struct* failed, unsigned* failed_map, unsigned failed_count, void** buffer, void** buffer_recov, void* buffer_zero)
163 : {
164 : unsigned i, n;
165 : int error;
166 : int has_hash;
167 : int id[LEV_MAX];
168 : int ip[LEV_MAX];
169 :
170 : /* no fix required, already checked at higher level, but just to be sure */
171 252579 : if (failed_count == 0) {
172 : /* LCOV_EXCL_START */
173 : /* recompute only the parity */
174 : raid_gen(diskmax, state->level, state->block_size, buffer);
175 : return 0;
176 : /* LCOV_EXCL_STOP */
177 : }
178 :
179 252579 : n = state->level;
180 252579 : error = 0;
181 :
182 : /* setup vector of failed disk indexes */
183 891577 : for (i = 0; i < failed_count; ++i)
184 638998 : id[i] = failed[failed_map[i]].index;
185 :
186 : /* check if there is at least a failed block that can be checked for correctness using the hash */
187 : /* if there isn't, we have to sacrifice a parity block to check that the result is correct */
188 252579 : has_hash = 0;
189 891577 : for (i = 0; i < failed_count; ++i) {
190 : /* if we are expected to recover this block */
191 638998 : if (!failed[failed_map[i]].is_outofdate
192 : /* if the block has a hash to check */
193 629954 : && block_has_updated_hash(failed[failed_map[i]].block)
194 : )
195 629946 : has_hash = 1;
196 : }
197 :
198 : /* if we don't have a hash, but we have an extra parity */
199 : /* (strictly-less failures than number of parities) */
200 252579 : if (!has_hash && failed_count < n) {
201 : /* number of parity to use, one more to check the recovering */
202 8 : unsigned r = failed_count + 1;
203 :
204 : /* all combinations (r of n) parities */
205 8 : combination_first(r, n, ip);
206 : do {
207 : /* if a parity is missing, do nothing */
208 24 : for (i = 0; i < r; ++i) {
209 16 : if (buffer_recov[ip[i]] == 0)
210 0 : break;
211 : }
212 8 : if (i != r)
213 0 : continue;
214 :
215 : /* copy the parities to use, one less because the last is used for checking */
216 16 : for (i = 0; i < r - 1; ++i)
217 8 : memcpy(buffer[diskmax + ip[i]], buffer_recov[ip[i]], state->block_size);
218 :
219 : /* recover using one less parity, the ip[r-1] one */
220 8 : raid_data(r - 1, id, ip, diskmax, state->block_size, buffer);
221 :
222 : /* use the remaining ip[r-1] parity to check the result */
223 8 : if (is_parity_matching(state, diskmax, ip[r - 1], buffer, buffer_recov))
224 8 : return 0;
225 :
226 : /* log */
227 0 : log_tag("parity_error:%u:", pos);
228 0 : for (i = 0; i < r; ++i) {
229 0 : if (i != 0)
230 0 : log_tag("/");
231 0 : log_tag("%s", lev_config_name(ip[i]));
232 : }
233 0 : log_tag(":parity: Parity mismatch\n");
234 0 : ++error;
235 0 : } while (combination_next(r, n, ip));
236 : }
237 :
238 : /* if we have a hash, and enough parities */
239 : /* (less-or-equal failures than number of parities) */
240 252571 : if (has_hash && failed_count <= n) {
241 : /* number of parities to use equal at the number of failures */
242 204861 : unsigned r = failed_count;
243 :
244 : /* all combinations (r of n) parities */
245 204861 : combination_first(r, n, ip);
246 : do {
247 : /* if a parity is missing, do nothing */
248 687750 : for (i = 0; i < r; ++i) {
249 477749 : if (buffer_recov[ip[i]] == 0)
250 4649 : break;
251 : }
252 214650 : if (i != r)
253 4649 : continue;
254 :
255 : /* copy the parities to use */
256 683101 : for (i = 0; i < r; ++i)
257 473100 : memcpy(buffer[diskmax + ip[i]], buffer_recov[ip[i]], state->block_size);
258 :
259 : /* recover */
260 210001 : raid_data(r, id, ip, diskmax, state->block_size, buffer);
261 :
262 : /* use the hash to check the result */
263 210001 : if (is_hash_matching(state, rehash, diskmax, failed, failed_map, failed_count, buffer, buffer_zero))
264 195508 : return 0;
265 :
266 : /* log */
267 14493 : log_tag("parity_error:%u:", pos);
268 28986 : for (i = 0; i < r; ++i) {
269 14493 : if (i != 0)
270 0 : log_tag("/");
271 14493 : log_tag("%s", lev_config_name(ip[i]));
272 : }
273 14493 : log_tag(":hash: Hash mismatch\n");
274 14493 : ++error;
275 38284 : } while (combination_next(r, n, ip));
276 : }
277 :
278 : /* return the number of failed attempts, or -1 if no strategy */
279 57063 : if (error)
280 9353 : return error;
281 :
282 47710 : log_tag("strategy_error:%u: No strategy to recover from %u failures with %u parity %s hash\n",
283 : pos, failed_count, n, has_hash ? "with" : "without");
284 47710 : return -1;
285 : }
286 :
287 873611 : static int repair(struct snapraid_state* state, int rehash, unsigned pos, unsigned diskmax, struct failed_struct* failed, unsigned* failed_map, unsigned failed_count, void** buffer, void** buffer_recov, void* buffer_zero)
288 : {
289 : int ret;
290 : int error;
291 : unsigned j;
292 : int n;
293 : int something_to_recover;
294 : int something_unsynced;
295 : char esc_buffer[ESC_MAX];
296 :
297 873611 : error = 0;
298 :
299 : /* if nothing failed, just recompute the parity */
300 873611 : if (failed_count == 0) {
301 586120 : raid_gen(diskmax, state->level, state->block_size, buffer);
302 586120 : return 0;
303 : }
304 :
305 : /* logs the status */
306 1016508 : for (j = 0; j < failed_count; ++j) {
307 : const char* desc;
308 : const char* hash;
309 : const char* data;
310 729017 : struct snapraid_block* block = failed[j].block;
311 729017 : unsigned block_state = block_state_get(block);
312 :
313 729017 : switch (block_state) {
314 18454 : case BLOCK_STATE_DELETED : desc = "delete"; break;
315 13851 : case BLOCK_STATE_CHG : desc = "change"; break;
316 62282 : case BLOCK_STATE_REP : desc = "replace"; break;
317 634430 : case BLOCK_STATE_BLK : desc = "block"; break;
318 : /* LCOV_EXCL_START */
319 : default : desc = "unknown"; break;
320 : /* LCOV_EXCL_STOP */
321 : }
322 :
323 729017 : if (hash_is_invalid(block->hash)) {
324 13843 : hash = "lost";
325 715174 : } else if (hash_is_zero(block->hash)) {
326 8 : hash = "zero";
327 : } else {
328 715166 : hash = "known";
329 : }
330 :
331 729017 : if (failed[j].is_bad)
332 643292 : data = "bad";
333 : else
334 85725 : data = "good";
335 :
336 729017 : if (failed[j].file) {
337 710563 : struct snapraid_disk* disk = failed[j].disk;
338 710563 : struct snapraid_file* file = failed[j].file;
339 710563 : block_off_t file_pos = failed[j].file_pos;
340 :
341 710563 : log_tag("entry:%u:%s:%s:%s:%s:%s:%u:\n", j, desc, hash, data, disk->name, esc_tag(file->sub, esc_buffer), file_pos);
342 : } else {
343 18454 : log_tag("entry:%u:%s:%s:%s:\n", j, desc, hash, data);
344 : }
345 : }
346 :
347 : /* Here we have to try two different strategies to recover, because in case the 'sync' */
348 : /* process is aborted, we don't know if the parity data is really updated just like after 'sync', */
349 : /* or if it still represents the state before the 'sync'. */
350 :
351 : /* Note that if the 'sync' ends normally, we don't have any DELETED, REP and CHG blocks */
352 : /* and the two strategies are identical */
353 :
354 : /* As first, we assume that the parity IS updated for the current state */
355 : /* and that we are going to recover the state after the last 'sync'. */
356 : /* In this case, parity contains info from BLK, REP and CHG blocks, */
357 : /* but not for DELETED. */
358 : /* We need to put in the recovering process only the bad blocks, because all the */
359 : /* others already contains the correct data read from disk, and the parity is correctly computed for them. */
360 : /* We are interested to recover BLK, REP and CHG blocks if they are marked as bad, */
361 : /* but we are not interested in DELETED ones. */
362 :
363 287491 : n = 0;
364 287491 : something_to_recover = 0; /* keep track if there is at least one block to fix */
365 1016508 : for (j = 0; j < failed_count; ++j) {
366 729017 : if (failed[j].is_bad) {
367 643292 : unsigned block_state = block_state_get(failed[j].block);
368 :
369 643292 : assert(block_state != BLOCK_STATE_DELETED); /* we cannot have bad DELETED blocks */
370 :
371 : /* if we have the hash for it */
372 643292 : if ((block_state == BLOCK_STATE_BLK || block_state == BLOCK_STATE_REP)
373 : /* try to fetch the block using the known hash */
374 643284 : && (state_import_fetch(state, rehash, failed[j].block, buffer[failed[j].index]) == 0
375 638812 : || state_search_fetch(state, rehash, failed[j].file, failed[j].file_pos, failed[j].block, buffer[failed[j].index]) == 0)
376 : ) {
377 : /* we already have corrected it! */
378 22382 : log_tag("hash_import: Fixed entry %u\n", j);
379 : } else {
380 : /* otherwise try to recover it */
381 620910 : failed_map[n] = j;
382 620910 : ++n;
383 :
384 : /* we have something to try to recover */
385 620910 : something_to_recover = 1;
386 : }
387 : }
388 : }
389 :
390 : /* if nothing to fix */
391 287491 : if (!something_to_recover) {
392 43956 : log_tag("recover_sync:%u:%u: Skipped for already recovered\n", pos, n);
393 :
394 : /* recompute only the parity */
395 43956 : raid_gen(diskmax, state->level, state->block_size, buffer);
396 43956 : return 0;
397 : }
398 :
399 243535 : ret = repair_step(state, rehash, pos, diskmax, failed, failed_map, n, buffer, buffer_recov, buffer_zero);
400 243535 : if (ret == 0) {
401 : /* reprocess the CHG blocks, for which we don't have a hash to check */
402 : /* if they were BAD we have to use some heuristics to ensure that we have recovered */
403 : /* the state after the sync. If unsure, we assume the worst case */
404 :
405 667192 : for (j = 0; j < failed_count; ++j) {
406 : /* we take care only of BAD blocks we have to write back */
407 476198 : if (failed[j].is_bad) {
408 462726 : unsigned block_state = block_state_get(failed[j].block);
409 :
410 : /* BLK and REP blocks are always OK, because at this point */
411 : /* we have already checked their hash */
412 462726 : if (block_state != BLOCK_STATE_CHG) {
413 462718 : assert(block_state == BLOCK_STATE_BLK || block_state == BLOCK_STATE_REP);
414 462718 : continue;
415 : }
416 :
417 : /* for CHG blocks we have to 'guess' if they are correct or not */
418 :
419 : /* if the hash is invalid we cannot check the result */
420 : /* this could happen if we have lost this information */
421 : /* after an aborted sync */
422 8 : if (hash_is_invalid(failed[j].block->hash)) {
423 : /* it may contain garbage */
424 0 : failed[j].is_outofdate = 1;
425 :
426 0 : log_tag("hash_unknown: Unknown hash on entry %u\n", j);
427 8 : } else if (hash_is_zero(failed[j].block->hash)) {
428 : /* if the block is not filled with 0, we are sure to have */
429 : /* restored it to the state after the 'sync' */
430 : /* instead, if the block is filled with 0, it could be either that the */
431 : /* block after the sync is really filled by 0, or that */
432 : /* we restored the block before the 'sync'. */
433 8 : if (memcmp(buffer[failed[j].index], buffer_zero, state->block_size) == 0) {
434 : /* it may contain garbage */
435 8 : failed[j].is_outofdate = 1;
436 :
437 8 : log_tag("hash_unknown: Maybe old zero on entry %u\n", j);
438 : }
439 : } else {
440 : /* if the hash is different than the previous one, we are sure to have */
441 : /* restored it to the state after the 'sync' */
442 : /* instead, if the hash matches, it could be either that the */
443 : /* block after the sync has this hash, or that */
444 : /* we restored the block before the 'sync'. */
445 0 : unsigned pos_size = file_block_size(failed[j].file, failed[j].file_pos, state->block_size);
446 0 : if (blockcmp(state, rehash, failed[j].block, pos_size, buffer[failed[j].index], buffer_zero) == 0) {
447 : /* it may contain garbage */
448 0 : failed[j].is_outofdate = 1;
449 :
450 0 : log_tag("hash_unknown: Maybe old data on entry %u\n", j);
451 : }
452 : }
453 : }
454 : }
455 :
456 190994 : return 0;
457 : }
458 52541 : if (ret > 0)
459 9353 : error += ret;
460 :
461 52541 : if (ret < 0)
462 43188 : log_tag("recover_sync:%u:%u: Failed with no attempts\n", pos, n);
463 : else
464 9353 : log_tag("recover_sync:%u:%u: Failed with %d attempts\n", pos, n, ret);
465 :
466 : /* Now assume that the parity IS NOT updated at the current state, */
467 : /* but still represent the state before the last 'sync' process. */
468 : /* In this case, parity contains info from BLK, REP (old version), CHG (old version) and DELETED blocks, */
469 : /* but not for REP (new version) and CHG (new version). */
470 : /* We are interested to recover BLK ones marked as bad, */
471 : /* but we are not interested to recover CHG (new version) and REP (new version) blocks, */
472 : /* even if marked as bad, because we don't have parity for them and it's just impossible, */
473 : /* and we are not interested to recover DELETED ones. */
474 52541 : n = 0;
475 52541 : something_to_recover = 0; /* keep track if there is at least one block to fix */
476 52541 : something_unsynced = 0; /* keep track if we have some unsynced info to process */
477 232924 : for (j = 0; j < failed_count; ++j) {
478 180383 : unsigned block_state = block_state_get(failed[j].block);
479 :
480 180383 : if (block_state == BLOCK_STATE_DELETED
481 171339 : || block_state == BLOCK_STATE_CHG
482 171339 : || block_state == BLOCK_STATE_REP
483 : ) {
484 : /* If the block is CHG, REP or DELETED, we don't have the original content of block, */
485 : /* and we must try to recover it. */
486 : /* This apply to CHG and REP blocks even if they are not marked bad, */
487 : /* because the parity is computed with old content, and not with the new one. */
488 : /* Note that this recovering is done just to make possible to recover any other BLK one, */
489 : /* we are not really interested in DELETED, CHG (old version) and REP (old version). */
490 9044 : something_unsynced = 1;
491 :
492 9044 : if (block_state == BLOCK_STATE_CHG
493 0 : && hash_is_zero(failed[j].block->hash)
494 : ) {
495 : /* If the block was a ZERO block, restore it to the original 0 as before the 'sync' */
496 : /* We do this to just allow recovering of other BLK ones */
497 :
498 0 : memset(buffer[failed[j].index], 0, state->block_size);
499 : /* note that from now the buffer is definitively lost */
500 : /* we can do this only because it's the last retry of recovering */
501 :
502 : /* try to fetch the old block using the old hash for CHG and DELETED blocks */
503 9044 : } else if ((block_state == BLOCK_STATE_CHG || block_state == BLOCK_STATE_DELETED)
504 9044 : && hash_is_unique(failed[j].block->hash)
505 9044 : && state_import_fetch(state, rehash, failed[j].block, buffer[failed[j].index]) == 0) {
506 :
507 : /* note that from now the buffer is definitively lost */
508 : /* we can do this only because it's the last retry of recovering */
509 : } else {
510 : /* otherwise try to recover it */
511 9044 : failed_map[n] = j;
512 9044 : ++n;
513 :
514 : /* note that we don't set something_to_recover, because we are */
515 : /* not really interested to recover *only* old blocks. */
516 : }
517 :
518 : /* avoid to use the hash of this block to verify the recovering */
519 : /* this applies to REP blocks because we are going to recover the old state */
520 : /* and the REP hash represent the new one */
521 : /* it also applies to CHG and DELETE blocks because we want to have */
522 : /* a successful recovering only if a BLK one is matching */
523 9044 : failed[j].is_outofdate = 1;
524 171339 : } else if (failed[j].is_bad) {
525 : /* If the block is bad we don't know its content, and we try to recover it */
526 : /* At this point, we can have only BLK ones */
527 :
528 171339 : assert(block_state == BLOCK_STATE_BLK);
529 :
530 : /* we have something we are interested to recover */
531 171339 : something_to_recover = 1;
532 :
533 : /* we try to recover it */
534 171339 : failed_map[n] = j;
535 171339 : ++n;
536 : }
537 : }
538 :
539 : /* if nothing to fix, we just don't try */
540 : /* if nothing unsynced we also don't retry, because it's the same try as before */
541 52541 : if (something_to_recover && something_unsynced) {
542 9044 : ret = repair_step(state, rehash, pos, diskmax, failed, failed_map, n, buffer, buffer_recov, buffer_zero);
543 9044 : if (ret == 0) {
544 : /* reprocess the REP and CHG blocks, for which we have recovered and old state */
545 : /* that we don't want to save into disk */
546 : /* we have already marked them, but we redo it for logging */
547 :
548 13566 : for (j = 0; j < failed_count; ++j) {
549 : /* we take care only of BAD blocks we have to write back */
550 9044 : if (failed[j].is_bad) {
551 4522 : unsigned block_state = block_state_get(failed[j].block);
552 :
553 4522 : if (block_state == BLOCK_STATE_CHG
554 4522 : || block_state == BLOCK_STATE_REP
555 : ) {
556 : /* mark that we have restored an old state */
557 : /* and we don't want to write it to the disk */
558 0 : failed[j].is_outofdate = 1;
559 :
560 0 : log_tag("hash_unknown: Surely old data on entry %u\n", j);
561 : }
562 : }
563 : }
564 :
565 4522 : return 0;
566 : }
567 4522 : if (ret > 0)
568 0 : error += ret;
569 :
570 4522 : if (ret < 0)
571 4522 : log_tag("recover_unsync:%u:%u: Failed with no attempts\n", pos, n);
572 : else
573 0 : log_tag("recover_unsync:%u:%u: Failed with %d attempts\n", pos, n, ret);
574 : } else {
575 43497 : log_tag("recover_unsync:%u:%u: Skipped for%s%s\n", pos, n,
576 : !something_to_recover ? " nothing to recover" : "",
577 : !something_unsynced ? " nothing unsynced" : ""
578 : );
579 : }
580 :
581 : /* return the number of failed attempts, or -1 if no strategy */
582 48019 : if (error)
583 4831 : return error;
584 : else
585 43188 : return -1;
586 : }
587 :
588 : /**
589 : * Post process all the files at the specified block index ::i.
590 : * For each file, if we are at the last block, closes it,
591 : * adjust the timestamp, and print the result.
592 : *
593 : * This works only if the whole file is processed, including its last block.
594 : * This doesn't always happen, like with an explicit end block.
595 : *
596 : * In such case, the check/fix command won't report any information of the
597 : * files partially checked.
598 : */
599 912886 : static int file_post(struct snapraid_state* state, int fix, unsigned i, struct snapraid_handle* handle, unsigned diskmax)
600 : {
601 : unsigned j;
602 : int ret;
603 : char esc_buffer[ESC_MAX];
604 : char esc_buffer_alt[ESC_MAX];
605 :
606 : /* for all the files print the final status, and does the final time fix */
607 : /* we also ensure to close files after processing the last block */
608 6390202 : for (j = 0; j < diskmax; ++j) {
609 : struct snapraid_block* block;
610 : struct snapraid_disk* disk;
611 : struct snapraid_file* collide_file;
612 : struct snapraid_file* file;
613 : block_off_t file_pos;
614 : uint64_t inode;
615 :
616 5477316 : disk = handle[j].disk;
617 5477316 : if (!disk) {
618 : /* if no disk, nothing to do */
619 3346226 : continue;
620 : }
621 :
622 5463255 : block = fs_par2block_find(disk, i);
623 5463255 : if (!block_has_file(block)) {
624 : /* if no file, nothing to do */
625 210517 : continue;
626 : }
627 :
628 5252738 : file = fs_par2file_get(disk, i, &file_pos);
629 :
630 : /* if it isn't the last block in the file */
631 5252738 : if (!file_block_is_last(file, file_pos)) {
632 : /* nothing to do */
633 3121648 : continue;
634 : }
635 :
636 : /* if the file is excluded, we have nothing to adjust as the file is never written */
637 2131090 : if (file_flag_has(file, FILE_IS_EXCLUDED)
638 2073564 : || (state->opt.syncedonly && file_flag_has(file, FILE_IS_UNSYNCED))) {
639 : /* nothing to do, but close the file */
640 57526 : goto close_and_continue;
641 : }
642 :
643 : /* finish the fix process if it's the last block of the files */
644 2073564 : if (fix) {
645 : /* mark that we finished with this file */
646 : /* to identify later any NOT finished ones */
647 765532 : file_flag_set(file, FILE_IS_FINISHED);
648 :
649 : /* if the file is damaged, meaning that a fix failed */
650 765532 : if (file_flag_has(file, FILE_IS_DAMAGED)) {
651 : /* rename it to .unrecoverable */
652 : char path[PATH_MAX];
653 : char path_to[PATH_MAX];
654 :
655 63293 : pathprint(path, sizeof(path), "%s%s", disk->dir, file->sub);
656 63293 : pathprint(path_to, sizeof(path_to), "%s%s.unrecoverable", disk->dir, file->sub);
657 :
658 : /* ensure to close the file before renaming */
659 63293 : if (handle[j].file == file) {
660 63293 : ret = handle_close(&handle[j]);
661 63293 : if (ret != 0) {
662 : /* LCOV_EXCL_START */
663 : log_tag("error:%u:%s:%s: Close error. %s\n", i, disk->name, esc_tag(file->sub, esc_buffer), strerror(errno));
664 : log_fatal("DANGER! Unexpected close error in a data disk.\n");
665 : return -1;
666 : /* LCOV_EXCL_STOP */
667 : }
668 : }
669 :
670 63293 : ret = rename(path, path_to);
671 63293 : if (ret != 0) {
672 : /* LCOV_EXCL_START */
673 : log_fatal("Error renaming '%s' to '%s'. %s.\n", path, path_to, strerror(errno));
674 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
675 : return -1;
676 : /* LCOV_EXCL_STOP */
677 : }
678 :
679 63293 : log_tag("status:unrecoverable:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
680 63293 : msg_info("unrecoverable %s\n", fmt_term(disk, file->sub, esc_buffer));
681 :
682 : /* and do not set the time if damaged */
683 63293 : goto close_and_continue;
684 : }
685 :
686 : /* if the file is not fixed, meaning that it is untouched */
687 702239 : if (!file_flag_has(file, FILE_IS_FIXED)) {
688 : /* nothing to do, but close the file */
689 581009 : goto close_and_continue;
690 : }
691 :
692 : /* if the file is closed or different than the one expected, reopen it */
693 : /* a different open file could happen when filtering for bad blocks */
694 121230 : if (handle[j].file != file) {
695 : /* close a potential different file */
696 0 : ret = handle_close(&handle[j]);
697 0 : if (ret != 0) {
698 : /* LCOV_EXCL_START */
699 : log_tag("error:%u:%s:%s: Close error. %s\n", i, disk->name, esc_tag(handle[j].file->sub, esc_buffer), strerror(errno));
700 : log_fatal("DANGER! Unexpected close error in a data disk.\n");
701 : return -1;
702 : /* LCOV_EXCL_STOP */
703 : }
704 :
705 : /* reopen it as readonly, as to set the mtime readonly access it's enough */
706 : /* we know that the file exists because it has the FILE_IS_FIXED tag */
707 0 : ret = handle_open(&handle[j], file, state->file_mode, log_error, 0);
708 0 : if (ret != 0) {
709 : /* LCOV_EXCL_START */
710 : log_tag("error:%u:%s:%s: Open error. %s\n", i, disk->name, esc_tag(file->sub, esc_buffer), strerror(errno));
711 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
712 : return -1;
713 : /* LCOV_EXCL_STOP */
714 : }
715 : }
716 :
717 121230 : log_tag("status:recovered:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
718 121230 : msg_info("recovered %s\n", fmt_term(disk, file->sub, esc_buffer));
719 :
720 121230 : inode = handle[j].st.st_ino;
721 :
722 : /* search for the corresponding inode */
723 121230 : collide_file = tommy_hashdyn_search(&disk->inodeset, file_inode_compare_to_arg, &inode, file_inode_hash(inode));
724 :
725 : /* if the inode is already in the database and it refers at a different file name, */
726 : /* we can fix the file time ONLY if the time and size allow to differentiate */
727 : /* between the two files */
728 :
729 : /* for example, suppose we delete a bunch of files with all the same size and time, */
730 : /* when recreating them the inodes may be reused in a different order, */
731 : /* and at the next sync some files may have matching inode/size/time even if different name */
732 : /* not allowing sync to detect that the file is changed and not renamed */
733 121230 : if (!collide_file /* if not in the database, there is no collision */
734 41827 : || strcmp(collide_file->sub, file->sub) == 0 /* if the name is the same, it's the right collision */
735 32995 : || collide_file->size != file->size /* if the size is different, the collision is identified */
736 5 : || collide_file->mtime_sec != file->mtime_sec /* if the mtime is different, the collision is identified */
737 3 : || collide_file->mtime_nsec != file->mtime_nsec /* same for mtime_nsec */
738 : ) {
739 : /* set the original modification time */
740 121230 : ret = handle_utime(&handle[j]);
741 121230 : if (ret == -1) {
742 : /* LCOV_EXCL_START */
743 : /* mark the file as damaged */
744 : file_flag_set(file, FILE_IS_DAMAGED);
745 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
746 : return -1;
747 : /* LCOV_EXCL_STOP */
748 : }
749 : } else {
750 0 : log_tag("collision:%s:%s:%s: Not setting modification time to avoid inode collision\n", disk->name, esc_tag(file->sub, esc_buffer), esc_tag(collide_file->sub, esc_buffer_alt));
751 : }
752 : } else {
753 : /* we are not fixing, but only checking */
754 : /* print just the final status */
755 1308032 : if (file_flag_has(file, FILE_IS_DAMAGED)) {
756 9499 : if (state->opt.auditonly) {
757 2824 : log_tag("status:damaged:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
758 2824 : msg_info("damaged %s\n", fmt_term(disk, file->sub, esc_buffer));
759 : } else {
760 6675 : log_tag("status:unrecoverable:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
761 6675 : msg_info("unrecoverable %s\n", fmt_term(disk, file->sub, esc_buffer));
762 : }
763 1298533 : } else if (file_flag_has(file, FILE_IS_FIXED)) {
764 76264 : log_tag("status:recoverable:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
765 76264 : msg_info("recoverable %s\n", fmt_term(disk, file->sub, esc_buffer));
766 : } else {
767 : /* we don't use msg_verbose() because it also goes into the log */
768 1222269 : if (msg_level >= MSG_VERBOSE) {
769 23207 : log_tag("status:correct:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
770 23207 : msg_info("correct %s\n", fmt_term(disk, file->sub, esc_buffer));
771 : }
772 : }
773 : }
774 :
775 1199062 : close_and_continue:
776 : /* if the opened file is the correct one, close it */
777 : /* in case of excluded and fragmented files it's possible */
778 : /* that the opened file is not the current one */
779 2131090 : if (handle[j].file == file) {
780 : /* ensure to close the file just after finishing with it */
781 : /* to avoid to keep it open without any possible use */
782 1989160 : ret = handle_close(&handle[j]);
783 1989160 : if (ret != 0) {
784 : /* LCOV_EXCL_START */
785 : log_tag("error:%u:%s:%s: Close error. %s\n", i, disk->name, esc_tag(file->sub, esc_buffer), strerror(errno));
786 : log_fatal("DANGER! Unexpected close error in a data disk.\n");
787 : return -1;
788 : /* LCOV_EXCL_STOP */
789 : }
790 : }
791 : }
792 :
793 912886 : return 0;
794 : }
795 :
796 : /**
797 : * Check if we have to process the specified block index ::i.
798 : */
799 935479 : static int block_is_enabled(struct snapraid_state* state, block_off_t i, struct snapraid_handle* handle, unsigned diskmax)
800 : {
801 : unsigned j;
802 : unsigned l;
803 :
804 : /* filter for bad blocks */
805 935479 : if (state->opt.badblockonly) {
806 : snapraid_info info;
807 :
808 : /* get block specific info */
809 0 : info = info_get(&state->infoarr, i);
810 :
811 : /*
812 : * Filter specifically only for bad blocks
813 : */
814 0 : return info_get_bad(info);
815 : }
816 :
817 : /* filter for the parity */
818 935479 : if (state->opt.badfileonly) {
819 : snapraid_info info;
820 :
821 : /* get block specific info */
822 9374 : info = info_get(&state->infoarr, i);
823 :
824 : /*
825 : * If the block is bad, it has to be processed
826 : *
827 : * This is not necessary in normal cases because if a block is bad,
828 : * it necessary needs to have a file related to it, and files with
829 : * bad blocks are fully included.
830 : *
831 : * But some files may be excluded by additional filter options,
832 : * so it's not always true, and this ensures to always check all
833 : * the bad blocks.
834 : */
835 9374 : if (info_get_bad(info))
836 1772 : return 1;
837 : } else {
838 : /* if a parity is not excluded, include all blocks, even unused ones */
839 1061137 : for (l = 0; l < state->level; ++l) {
840 1019447 : if (!state->parity[l].is_excluded_by_filter) {
841 884415 : return 1;
842 : }
843 : }
844 : }
845 :
846 : /* filter for the files */
847 231094 : for (j = 0; j < diskmax; ++j) {
848 : struct snapraid_block* block;
849 :
850 : /* if no disk, nothing to check */
851 208501 : if (!handle[j].disk)
852 0 : continue;
853 :
854 208501 : block = fs_par2block_find(handle[j].disk, i);
855 :
856 : /* try to recover all files, even the ones without hash */
857 : /* because in some cases we can recover also them */
858 208501 : if (block_has_file(block)) {
859 204578 : struct snapraid_file* file = fs_par2file_get(handle[j].disk, i, 0);
860 204578 : if (!file_flag_has(file, FILE_IS_EXCLUDED)) { /* only if the file is not filtered out */
861 26699 : return 1;
862 : }
863 : }
864 : }
865 :
866 22593 : return 0;
867 : }
868 :
869 118 : static int state_check_process(struct snapraid_state* state, int fix, struct snapraid_parity_handle** parity, block_off_t blockstart, block_off_t blockmax)
870 : {
871 : struct snapraid_handle* handle;
872 : unsigned diskmax;
873 : block_off_t i;
874 : unsigned j;
875 : void* buffer_alloc;
876 : void** buffer;
877 : unsigned buffermax;
878 : int ret;
879 : data_off_t countsize;
880 : block_off_t countpos;
881 : block_off_t countmax;
882 : unsigned error;
883 : unsigned unrecoverable_error;
884 : unsigned recovered_error;
885 : struct failed_struct* failed;
886 : unsigned* failed_map;
887 : unsigned l;
888 : char esc_buffer[ESC_MAX];
889 : char esc_buffer_alt[ESC_MAX];
890 : bit_vect_t* block_enabled;
891 : struct snapraid_bw bw;
892 :
893 118 : handle = handle_mapping(state, &diskmax);
894 :
895 : /* initialize the bandwith context */
896 118 : bw_init(&bw, state->opt.bwlimit);
897 :
898 : /* share the bandwidth context with all handles */
899 826 : for (j = 0; j < diskmax; ++j)
900 708 : handle[j].bw = &bw;
901 587 : for (j = 0; j < state->level; ++j)
902 469 : if (parity[j])
903 451 : parity[j]->bw = &bw;
904 :
905 : /* we need 1 * data + 2 * parity + 1 * zero */
906 118 : buffermax = diskmax + 2 * state->level + 1;
907 :
908 118 : buffer = malloc_nofail_vector_align(diskmax, buffermax, state->block_size, &buffer_alloc);
909 118 : if (!state->opt.skip_self)
910 0 : mtest_vector(buffermax, state->block_size, buffer);
911 :
912 : /* fill up the zero buffer */
913 118 : memset(buffer[buffermax - 1], 0, state->block_size);
914 118 : raid_zero(buffer[buffermax - 1]);
915 :
916 118 : failed = malloc_nofail(diskmax * sizeof(struct failed_struct));
917 118 : failed_map = malloc_nofail(diskmax * sizeof(unsigned));
918 :
919 118 : error = 0;
920 118 : unrecoverable_error = 0;
921 118 : recovered_error = 0;
922 :
923 118 : msg_progress("Selecting...\n");
924 :
925 : /* first count the number of blocks to process */
926 118 : countmax = 0;
927 118 : block_enabled = calloc_nofail(1, bit_vect_size(blockmax)); /* preinitialize to 0 */
928 935597 : for (i = blockstart; i < blockmax; ++i) {
929 935479 : if (!block_is_enabled(state, i, handle, diskmax))
930 22593 : continue;
931 912886 : bit_vect_set(block_enabled, i);
932 912886 : ++countmax;
933 : }
934 :
935 118 : if (fix)
936 45 : msg_progress("Fixing...\n");
937 73 : else if (!state->opt.auditonly)
938 68 : msg_progress("Checking...\n");
939 : else
940 5 : msg_progress("Hashing...\n");
941 :
942 : /* check all the blocks in files */
943 118 : countsize = 0;
944 118 : countpos = 0;
945 118 : if (!state_progress_begin(state, blockstart, blockmax, countmax))
946 0 : goto end;
947 :
948 935597 : for (i = blockstart; i < blockmax; ++i) {
949 : unsigned failed_count;
950 : int valid_parity;
951 : int used_parity;
952 : snapraid_info info;
953 : int rehash;
954 :
955 935479 : if (!bit_vect_test(block_enabled, i)) {
956 : /* continue with the next block */
957 22593 : continue;
958 : }
959 :
960 : /* If we have valid parity, and it makes sense to check its content. */
961 : /* If we already know that the parity is invalid, we just read the file */
962 : /* but we don't report parity errors */
963 : /* Note that with auditonly, we anyway skip the full parity check, */
964 : /* because we also don't read it at all */
965 912886 : valid_parity = 1;
966 :
967 : /* If the parity is used by at least one file */
968 912886 : used_parity = 0;
969 :
970 : /* keep track of the number of failed blocks */
971 912886 : failed_count = 0;
972 :
973 : /* get block specific info */
974 912886 : info = info_get(&state->infoarr, i);
975 :
976 : /* if we have to use the old hash */
977 912886 : rehash = info_get_rehash(info);
978 :
979 : /* for each disk, process the block */
980 6390202 : for (j = 0; j < diskmax; ++j) {
981 : int read_size;
982 : unsigned char hash[HASH_MAX];
983 : struct snapraid_disk* disk;
984 : struct snapraid_block* block;
985 : struct snapraid_file* file;
986 : block_off_t file_pos;
987 : unsigned block_state;
988 :
989 : /* if the disk position is not used */
990 5477316 : disk = handle[j].disk;
991 5477316 : if (!disk) {
992 : /* use an empty block */
993 14061 : memset(buffer[j], 0, state->block_size);
994 941099 : continue;
995 : }
996 :
997 : /* if the disk block is not used */
998 5463255 : block = fs_par2block_find(disk, i);
999 5463255 : if (block == BLOCK_NULL) {
1000 : /* use an empty block */
1001 192063 : memset(buffer[j], 0, state->block_size);
1002 192063 : continue;
1003 : }
1004 :
1005 : /* get the state of the block */
1006 5271192 : block_state = block_state_get(block);
1007 :
1008 : /* if the parity is not valid */
1009 5271192 : if (block_has_invalid_parity(block)) {
1010 : /* mark the parity as invalid, and don't try to check/fix it */
1011 : /* because it will be recomputed at the next sync */
1012 94587 : valid_parity = 0;
1013 : /* follow */
1014 : }
1015 :
1016 : /* if the block is DELETED */
1017 5271192 : if (block_state == BLOCK_STATE_DELETED) {
1018 : /* use an empty block */
1019 18454 : memset(buffer[j], 0, state->block_size);
1020 :
1021 : /* store it in the failed set, because potentially */
1022 : /* the parity may be still computed with the previous content */
1023 18454 : failed[failed_count].is_bad = 0; /* note that is_bad==0 <=> file==0 */
1024 18454 : failed[failed_count].is_outofdate = 0;
1025 18454 : failed[failed_count].index = j;
1026 18454 : failed[failed_count].block = block;
1027 18454 : failed[failed_count].disk = disk;
1028 18454 : failed[failed_count].file = 0;
1029 18454 : failed[failed_count].file_pos = 0;
1030 18454 : failed[failed_count].handle = 0;
1031 18454 : ++failed_count;
1032 18454 : continue;
1033 : }
1034 :
1035 : /* here we are sure that the parity is used by a file */
1036 5252738 : used_parity = 1;
1037 :
1038 : /* get the file of this block */
1039 5252738 : file = fs_par2file_get(disk, i, &file_pos);
1040 :
1041 : /* if we are only hashing, we can skip excluded files and don't even read them */
1042 5252738 : if (state->opt.auditonly && file_flag_has(file, FILE_IS_EXCLUDED)) {
1043 : /* use an empty block */
1044 : /* in true, this is unnecessary, because we are not checking any parity */
1045 : /* but we keep it for completeness */
1046 0 : memset(buffer[j], 0, state->block_size);
1047 0 : continue;
1048 : }
1049 :
1050 : /* if the file is closed or different than the current one */
1051 5252738 : if (handle[j].file == 0 || handle[j].file != file) {
1052 : /* close the old one, if any */
1053 2262167 : ret = handle_close(&handle[j]);
1054 2262167 : if (ret == -1) {
1055 : /* LCOV_EXCL_START */
1056 : log_tag("error:%u:%s:%s: Close error. %s\n", i, disk->name, esc_tag(handle[j].file->sub, esc_buffer), strerror(errno));
1057 : log_fatal("DANGER! Unexpected close error in a data disk.\n");
1058 : log_fatal("Stopping at block %u\n", i);
1059 : ++unrecoverable_error;
1060 : goto bail;
1061 : /* LCOV_EXCL_STOP */
1062 : }
1063 :
1064 : /* if fixing, and the file is not excluded, we must open for writing */
1065 2262167 : if (fix && !file_flag_has(file, FILE_IS_EXCLUDED)) {
1066 : /* if fixing, create the file, open for writing and resize if required */
1067 771326 : ret = handle_create(&handle[j], file, state->file_mode);
1068 771326 : if (ret == -1) {
1069 : /* LCOV_EXCL_START */
1070 : if (errno == EACCES) {
1071 : log_fatal("WARNING! Please give write permission to the file.\n");
1072 : } else {
1073 : log_fatal("DANGER! Without a working data disk, it isn't possible to fix errors on it.\n");
1074 : }
1075 : log_fatal("Stopping at block %u\n", i);
1076 : ++unrecoverable_error;
1077 : goto bail;
1078 : /* LCOV_EXCL_STOP */
1079 : }
1080 :
1081 : /* check if the file was just created */
1082 771326 : if (handle[j].created != 0) {
1083 : /* if fragmented, it may be reopened, so remember that the file */
1084 : /* was originally missing */
1085 113055 : file_flag_set(file, FILE_IS_CREATED);
1086 : }
1087 : } else {
1088 : /* open the file only for reading */
1089 1490841 : if (!file_flag_has(file, FILE_IS_MISSING))
1090 1375256 : ret = handle_open(&handle[j], file, state->file_mode,
1091 1375256 : log_error, state->opt.expected_missing ? log_expected : 0);
1092 : else
1093 115585 : ret = -1; /* if the file is missing, we cannot open it */
1094 1490841 : if (ret == -1) {
1095 : /* save the failed block for the check/fix */
1096 194223 : failed[failed_count].is_bad = 1;
1097 194223 : failed[failed_count].is_outofdate = 0;
1098 194223 : failed[failed_count].index = j;
1099 194223 : failed[failed_count].block = block;
1100 194223 : failed[failed_count].disk = disk;
1101 194223 : failed[failed_count].file = file;
1102 194223 : failed[failed_count].file_pos = file_pos;
1103 194223 : failed[failed_count].handle = &handle[j];
1104 194223 : ++failed_count;
1105 :
1106 194223 : log_tag("error:%u:%s:%s: Open error at position %u\n", i, disk->name, esc_tag(file->sub, esc_buffer), file_pos);
1107 194223 : ++error;
1108 :
1109 : /* mark the file as missing, to avoid to retry to open it again */
1110 : /* note that this can be done only if we are not fixing it */
1111 : /* otherwise, it could be recreated */
1112 194223 : file_flag_set(file, FILE_IS_MISSING);
1113 194223 : continue;
1114 : }
1115 : }
1116 :
1117 : /* if it's the first open, and not excluded */
1118 2067944 : if (!file_flag_has(file, FILE_IS_OPENED)
1119 2052544 : && !file_flag_has(file, FILE_IS_EXCLUDED)) {
1120 :
1121 : /* check if the file is changed */
1122 1998700 : if (handle[j].st.st_size != file->size
1123 1805957 : || handle[j].st.st_mtime != file->mtime_sec
1124 1783214 : || STAT_NSEC(&handle[j].st) != file->mtime_nsec
1125 : /* don't check the inode to support file-system without persistent inodes */
1126 : ) {
1127 : /* report that the file is not synced */
1128 215486 : file_flag_set(file, FILE_IS_UNSYNCED);
1129 : }
1130 : }
1131 :
1132 : /* if it's the first open, and not excluded and larger */
1133 2067944 : if (!file_flag_has(file, FILE_IS_OPENED)
1134 2052544 : && !file_flag_has(file, FILE_IS_EXCLUDED)
1135 1998700 : && !(state->opt.syncedonly && file_flag_has(file, FILE_IS_UNSYNCED))
1136 1998700 : && handle[j].st.st_size > file->size
1137 : ) {
1138 8668 : log_error("File '%s' is larger than expected.\n", handle[j].path);
1139 8668 : log_tag("error:%u:%s:%s: Size error\n", i, disk->name, esc_tag(file->sub, esc_buffer));
1140 8668 : ++error;
1141 :
1142 8668 : if (fix) {
1143 3197 : ret = handle_truncate(&handle[j], file);
1144 3197 : if (ret == -1) {
1145 : /* LCOV_EXCL_START */
1146 : log_fatal("DANGER! Unexpected truncate error in a data disk, it isn't possible to fix.\n");
1147 : log_fatal("Stopping at block %u\n", i);
1148 : ++unrecoverable_error;
1149 : goto bail;
1150 : /* LCOV_EXCL_STOP */
1151 : }
1152 :
1153 3197 : log_tag("fixed:%u:%s:%s: Fixed size\n", i, disk->name, esc_tag(file->sub, esc_buffer));
1154 3197 : ++recovered_error;
1155 : }
1156 : }
1157 :
1158 : /* mark the file as opened at least one time */
1159 : /* this is used to avoid to check the unsynced and size */
1160 : /* more than one time, in case the file is reopened later */
1161 2067944 : file_flag_set(file, FILE_IS_OPENED);
1162 : }
1163 :
1164 : /* read from the file */
1165 5058515 : read_size = handle_read(&handle[j], file_pos, buffer[j], state->block_size,
1166 5058515 : log_error, state->opt.expected_missing ? log_expected : 0);
1167 5058515 : if (read_size == -1) {
1168 : /* save the failed block for the check/fix */
1169 442175 : failed[failed_count].is_bad = 1; /* it's bad because we cannot read it */
1170 442175 : failed[failed_count].is_outofdate = 0;
1171 442175 : failed[failed_count].index = j;
1172 442175 : failed[failed_count].block = block;
1173 442175 : failed[failed_count].disk = disk;
1174 442175 : failed[failed_count].file = file;
1175 442175 : failed[failed_count].file_pos = file_pos;
1176 442175 : failed[failed_count].handle = &handle[j];
1177 442175 : ++failed_count;
1178 :
1179 442175 : log_tag("error:%u:%s:%s: Read error at position %u\n", i, disk->name, esc_tag(file->sub, esc_buffer), file_pos);
1180 442175 : ++error;
1181 442175 : continue;
1182 : }
1183 :
1184 4616340 : countsize += read_size;
1185 :
1186 : /* always insert CHG blocks, the repair functions needs all of them */
1187 : /* because the parity may be still referring at the old state */
1188 : /* and the repair must be aware of it */
1189 4616340 : if (block_state == BLOCK_STATE_CHG) {
1190 : /* we DO NOT mark them as bad to avoid to overwrite them with wrong data. */
1191 : /* if we don't have a hash, we always assume the first read of the block correct. */
1192 13843 : failed[failed_count].is_bad = 0; /* we assume the CHG block correct */
1193 13843 : failed[failed_count].is_outofdate = 0;
1194 13843 : failed[failed_count].index = j;
1195 13843 : failed[failed_count].block = block;
1196 13843 : failed[failed_count].disk = disk;
1197 13843 : failed[failed_count].file = file;
1198 13843 : failed[failed_count].file_pos = file_pos;
1199 13843 : failed[failed_count].handle = &handle[j];
1200 13843 : ++failed_count;
1201 13843 : continue;
1202 : }
1203 :
1204 4602497 : assert(block_state == BLOCK_STATE_BLK || block_state == BLOCK_STATE_REP);
1205 :
1206 : /* compute the hash of the block just read */
1207 4602497 : if (rehash) {
1208 27243 : memhash(state->prevhash, state->prevhashseed, hash, buffer[j], read_size);
1209 : } else {
1210 4575254 : memhash(state->hash, state->hashseed, hash, buffer[j], read_size);
1211 : }
1212 :
1213 : /* compare the hash */
1214 4602497 : if (memcmp(hash, block->hash, BLOCK_HASH_SIZE) != 0) {
1215 12852 : unsigned diff = memdiff(hash, block->hash, BLOCK_HASH_SIZE);
1216 :
1217 : /* save the failed block for the check/fix */
1218 12852 : failed[failed_count].is_bad = 1; /* it's bad because the hash doesn't match */
1219 12852 : failed[failed_count].is_outofdate = 0;
1220 12852 : failed[failed_count].index = j;
1221 12852 : failed[failed_count].block = block;
1222 12852 : failed[failed_count].disk = disk;
1223 12852 : failed[failed_count].file = file;
1224 12852 : failed[failed_count].file_pos = file_pos;
1225 12852 : failed[failed_count].handle = &handle[j];
1226 12852 : ++failed_count;
1227 :
1228 12852 : log_tag("error:%u:%s:%s: Data error at position %u, diff bits %u/%u\n", i, disk->name, esc_tag(file->sub, esc_buffer), file_pos, diff, BLOCK_HASH_SIZE * 8);
1229 12852 : ++error;
1230 12852 : continue;
1231 : }
1232 :
1233 : /* always insert REP blocks, the repair functions needs all of them */
1234 : /* because the parity may be still referring at the old state */
1235 : /* and the repair must be aware of it */
1236 4589645 : if (block_state == BLOCK_STATE_REP) {
1237 53428 : failed[failed_count].is_bad = 0; /* it's not bad */
1238 53428 : failed[failed_count].is_outofdate = 0;
1239 53428 : failed[failed_count].index = j;
1240 53428 : failed[failed_count].block = block;
1241 53428 : failed[failed_count].disk = disk;
1242 53428 : failed[failed_count].file = file;
1243 53428 : failed[failed_count].file_pos = file_pos;
1244 53428 : failed[failed_count].handle = &handle[j];
1245 53428 : ++failed_count;
1246 53428 : continue;
1247 : }
1248 : }
1249 :
1250 : /* now read and check the parity if requested */
1251 912886 : if (!state->opt.auditonly) {
1252 : void* buffer_recov[LEV_MAX];
1253 : void* buffer_zero;
1254 :
1255 : /* buffers for parity read and not computed */
1256 4189358 : for (l = 0; l < state->level; ++l)
1257 3315747 : buffer_recov[l] = buffer[diskmax + state->level + l];
1258 2799530 : for (; l < LEV_MAX; ++l)
1259 1925919 : buffer_recov[l] = 0;
1260 :
1261 : /* the zero buffer is the last one */
1262 873611 : buffer_zero = buffer[buffermax - 1];
1263 :
1264 : /* read the parity */
1265 4189358 : for (l = 0; l < state->level; ++l) {
1266 3315747 : if (parity[l]) {
1267 3301845 : ret = parity_read(parity[l], i, buffer_recov[l], state->block_size, log_error);
1268 3301845 : if (ret == -1) {
1269 90335 : buffer_recov[l] = 0; /* no parity to use */
1270 :
1271 90335 : log_tag("parity_error:%u:%s: Read error\n", i, lev_config_name(l));
1272 90335 : ++error;
1273 : }
1274 : } else {
1275 13902 : buffer_recov[l] = 0;
1276 : }
1277 : }
1278 :
1279 : /* try all the recovering strategies */
1280 873611 : ret = repair(state, rehash, i, diskmax, failed, failed_map, failed_count, buffer, buffer_recov, buffer_zero);
1281 873611 : if (ret != 0) {
1282 : /* increment the number of errors */
1283 48019 : if (ret > 0)
1284 4831 : error += ret;
1285 48019 : ++unrecoverable_error;
1286 :
1287 : /* print a list of all the errors in files */
1288 219358 : for (j = 0; j < failed_count; ++j) {
1289 171339 : if (failed[j].is_bad)
1290 166817 : log_tag("unrecoverable:%u:%s:%s: Unrecoverable error at position %u\n", i, failed[j].disk->name, esc_tag(failed[j].file->sub, esc_buffer), failed[j].file_pos);
1291 : }
1292 :
1293 : /* keep track of damaged files */
1294 219358 : for (j = 0; j < failed_count; ++j) {
1295 171339 : if (failed[j].is_bad)
1296 166817 : file_flag_set(failed[j].file, FILE_IS_DAMAGED);
1297 : }
1298 : } else {
1299 : /* now counts partial recovers */
1300 : /* note that this could happen only when we have an incomplete 'sync' */
1301 : /* and that we have recovered is the state before the 'sync' */
1302 825592 : int partial_recover_error = 0;
1303 :
1304 : /* print a list of all the errors in files */
1305 1383270 : for (j = 0; j < failed_count; ++j) {
1306 557678 : if (failed[j].is_bad && failed[j].is_outofdate) {
1307 8 : ++partial_recover_error;
1308 8 : log_tag("unrecoverable:%u:%s:%s: Unrecoverable unsynced error at position %u\n", i, failed[j].disk->name, esc_tag(failed[j].file->sub, esc_buffer), failed[j].file_pos);
1309 : }
1310 : }
1311 825592 : if (partial_recover_error != 0) {
1312 8 : error += partial_recover_error;
1313 8 : ++unrecoverable_error;
1314 : }
1315 :
1316 : /*
1317 : * Check parities, but only if all the blocks have it computed and it's used.
1318 : *
1319 : * If you check/fix after a partial sync, it's OK to have parity errors
1320 : * on the blocks with invalid parity and doesn't make sense to try to fix it.
1321 : *
1322 : * It's also OK to have data errors on unused parity, because sync doesn't
1323 : * update it.
1324 : */
1325 825592 : if (used_parity && valid_parity) {
1326 : /* check the parity */
1327 3747349 : for (l = 0; l < state->level; ++l) {
1328 2983342 : if (buffer_recov[l] != 0 && memcmp(buffer_recov[l], buffer[diskmax + l], state->block_size) != 0) {
1329 21225 : unsigned diff = memdiff(buffer_recov[l], buffer[diskmax + l], state->block_size);
1330 :
1331 : /* mark that the read parity is wrong, setting ptr to 0 */
1332 21225 : buffer_recov[l] = 0;
1333 :
1334 21225 : log_tag("parity_error:%u:%s: Data error, diff bits %u/%u\n", i, lev_config_name(l), diff, state->block_size * 8);
1335 21225 : ++error;
1336 : }
1337 : }
1338 : }
1339 :
1340 : /* now write recovered files */
1341 825592 : if (fix) {
1342 : /* update the fixed files */
1343 655332 : for (j = 0; j < failed_count; ++j) {
1344 : /* nothing to do if it doesn't need recovering */
1345 338035 : if (!failed[j].is_bad)
1346 40318 : continue;
1347 :
1348 : /* do not fix if the file is excluded */
1349 297717 : if (file_flag_has(failed[j].file, FILE_IS_EXCLUDED)
1350 288437 : || (state->opt.syncedonly && file_flag_has(failed[j].file, FILE_IS_UNSYNCED)))
1351 9280 : continue;
1352 :
1353 288437 : ret = handle_write(failed[j].handle, failed[j].file_pos, buffer[failed[j].index], state->block_size);
1354 288437 : if (ret == -1) {
1355 : /* LCOV_EXCL_START */
1356 : /* mark the file as damaged */
1357 : file_flag_set(failed[j].file, FILE_IS_DAMAGED);
1358 :
1359 : if (errno == EACCES) {
1360 : log_fatal("WARNING! Please give write permission to the file.\n");
1361 : } else {
1362 : /* we do not use DANGER because it could be ENOSPC which is not always correctly reported */
1363 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1364 : }
1365 : log_fatal("Stopping at block %u\n", i);
1366 : ++unrecoverable_error;
1367 : goto bail;
1368 : /* LCOV_EXCL_STOP */
1369 : }
1370 :
1371 : /* if we are not sure that the recovered content is uptodate */
1372 288437 : if (failed[j].is_outofdate) {
1373 : /* mark the file as damaged */
1374 8 : file_flag_set(failed[j].file, FILE_IS_DAMAGED);
1375 8 : continue;
1376 : }
1377 :
1378 : /* mark the file as containing some fixes */
1379 : /* note that it could be also marked as damaged in other iterations */
1380 288429 : file_flag_set(failed[j].file, FILE_IS_FIXED);
1381 :
1382 288429 : log_tag("fixed:%u:%s:%s: Fixed data error at position %u\n", i, failed[j].disk->name, esc_tag(failed[j].file->sub, esc_buffer), failed[j].file_pos);
1383 288429 : ++recovered_error;
1384 : }
1385 :
1386 : /*
1387 : * Update parity only if all the blocks have it computed and it's used.
1388 : *
1389 : * If you check/fix after a partial sync, you do not want to fix parity
1390 : * for blocks that are going to have it computed in the sync completion.
1391 : *
1392 : * For unused parity there is no need to write it, because when fixing
1393 : * we already have allocated space for it on parity file creation,
1394 : * and its content doesn't matter.
1395 : */
1396 317297 : if (used_parity && valid_parity) {
1397 : /* update the parity */
1398 982584 : for (l = 0; l < state->level; ++l) {
1399 : /* if the parity on disk is wrong */
1400 714467 : if (buffer_recov[l] == 0
1401 : /* and we have access at the parity */
1402 72150 : && parity[l] != 0
1403 : /* and the parity is not excluded */
1404 67463 : && !state->parity[l].is_excluded_by_filter
1405 : ) {
1406 67463 : ret = parity_write(parity[l], i, buffer[diskmax + l], state->block_size);
1407 67463 : if (ret == -1) {
1408 : /* LCOV_EXCL_START */
1409 : /* we do not use DANGER because it could be ENOSPC which is not always correctly reported */
1410 : log_fatal("WARNING! Without a working %s disk, it isn't possible to fix errors on it.\n", lev_name(l));
1411 : log_fatal("Stopping at block %u\n", i);
1412 : ++unrecoverable_error;
1413 : goto bail;
1414 : /* LCOV_EXCL_STOP */
1415 : }
1416 :
1417 67463 : log_tag("parity_fixed:%u:%s: Fixed data error\n", i, lev_config_name(l));
1418 67463 : ++recovered_error;
1419 : }
1420 : }
1421 : }
1422 : } else {
1423 : /* if we are not fixing, we just set the FIXED flag */
1424 : /* meaning that we could fix this file if we try */
1425 727938 : for (j = 0; j < failed_count; ++j) {
1426 219643 : if (failed[j].is_bad) {
1427 178758 : file_flag_set(failed[j].file, FILE_IS_FIXED);
1428 : }
1429 : }
1430 : }
1431 : }
1432 : } else {
1433 : /* if we are not checking, we just set the DAMAGED flag */
1434 : /* to report that the file is damaged, and we don't know if we can fix it */
1435 45233 : for (j = 0; j < failed_count; ++j) {
1436 5958 : if (failed[j].is_bad) {
1437 5958 : file_flag_set(failed[j].file, FILE_IS_DAMAGED);
1438 : }
1439 : }
1440 : }
1441 :
1442 : /* post process the files */
1443 912886 : ret = file_post(state, fix, i, handle, diskmax);
1444 912886 : if (ret == -1) {
1445 : /* LCOV_EXCL_START */
1446 : log_fatal("Stopping at block %u\n", i);
1447 : ++unrecoverable_error;
1448 : goto bail;
1449 : /* LCOV_EXCL_STOP */
1450 : }
1451 :
1452 : /* count the number of processed block */
1453 912886 : ++countpos;
1454 :
1455 : /* progress */
1456 912886 : if (state_progress(state, 0, i, countpos, countmax, countsize)) {
1457 : /* LCOV_EXCL_START */
1458 : break;
1459 : /* LCOV_EXCL_STOP */
1460 : }
1461 :
1462 : /* thermal control */
1463 912886 : if (state_thermal_alarm(state)) {
1464 : /* until now is misc */
1465 0 : state_usage_misc(state);
1466 :
1467 0 : state_progress_stop(state);
1468 :
1469 0 : state_thermal_cooldown(state);
1470 :
1471 0 : state_progress_restart(state);
1472 :
1473 : /* drop until now */
1474 0 : state_usage_waste(state);
1475 : }
1476 : }
1477 :
1478 : /* for each disk, recover empty files, symlinks and empty dirs */
1479 826 : for (i = 0; i < diskmax; ++i) {
1480 : tommy_node* node;
1481 : struct snapraid_disk* disk;
1482 :
1483 708 : if (!handle[i].disk)
1484 3 : continue;
1485 :
1486 : /* for each empty file in the disk */
1487 705 : disk = handle[i].disk;
1488 705 : node = disk->filelist;
1489 2188430 : while (node) {
1490 : char path[PATH_MAX];
1491 : struct stat st;
1492 : struct snapraid_file* file;
1493 2187725 : int unsuccessful = 0;
1494 :
1495 2187725 : file = node->data;
1496 2187725 : node = node->next; /* next node */
1497 :
1498 : /* if not empty, it's already checked and continue to the next one */
1499 2187725 : if (file->size != 0) {
1500 2184942 : continue;
1501 : }
1502 :
1503 : /* if excluded continue to the next one */
1504 2952 : if (file_flag_has(file, FILE_IS_EXCLUDED)) {
1505 169 : continue;
1506 : }
1507 :
1508 : /* stat the file */
1509 2783 : pathprint(path, sizeof(path), "%s%s", disk->dir, file->sub);
1510 2783 : ret = stat(path, &st);
1511 2783 : if (ret == -1) {
1512 163 : unsuccessful = 1;
1513 :
1514 163 : log_error("Error stating empty file '%s'. %s.\n", path, strerror(errno));
1515 163 : log_tag("error:%s:%s: Empty file stat error\n", disk->name, esc_tag(file->sub, esc_buffer));
1516 163 : ++error;
1517 2620 : } else if (!S_ISREG(st.st_mode)) {
1518 0 : unsuccessful = 1;
1519 :
1520 0 : log_tag("error:%s:%s: Empty file error for not regular file\n", disk->name, esc_tag(file->sub, esc_buffer));
1521 0 : ++error;
1522 2620 : } else if (st.st_size != 0) {
1523 6 : unsuccessful = 1;
1524 :
1525 6 : log_tag("error:%s:%s: Empty file error for size '%" PRIu64 "'\n", disk->name, esc_tag(file->sub, esc_buffer), (uint64_t)st.st_size);
1526 6 : ++error;
1527 : }
1528 :
1529 2783 : if (fix && unsuccessful) {
1530 : int f;
1531 :
1532 : /* create the ancestor directories */
1533 153 : ret = mkancestor(path);
1534 153 : if (ret != 0) {
1535 : /* LCOV_EXCL_START */
1536 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1537 : log_fatal("Stopping\n");
1538 : ++unrecoverable_error;
1539 : goto bail;
1540 : /* LCOV_EXCL_STOP */
1541 : }
1542 :
1543 : /* create it */
1544 : /* O_NOFOLLOW: do not follow links to ensure to open the real file */
1545 153 : f = open(path, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_NOFOLLOW, 0600);
1546 153 : if (f == -1) {
1547 : /* LCOV_EXCL_START */
1548 : log_fatal("Error creating empty file '%s'. %s.\n", path, strerror(errno));
1549 : if (errno == EACCES) {
1550 : log_fatal("WARNING! Please give write permission to the file.\n");
1551 : } else {
1552 : /* we do not use DANGER because it could be ENOSPC which is not always correctly reported */
1553 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1554 : }
1555 : log_fatal("Stopping\n");
1556 : ++unrecoverable_error;
1557 : goto bail;
1558 : /* LCOV_EXCL_STOP */
1559 : }
1560 :
1561 : /* set the original modification time */
1562 153 : ret = fmtime(f, file->mtime_sec, file->mtime_nsec);
1563 153 : if (ret != 0) {
1564 : /* LCOV_EXCL_START */
1565 : close(f);
1566 :
1567 : log_fatal("Error timing file '%s'. %s.\n", file->sub, strerror(errno));
1568 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1569 : log_fatal("Stopping\n");
1570 : ++unrecoverable_error;
1571 : goto bail;
1572 : /* LCOV_EXCL_STOP */
1573 : }
1574 :
1575 : /* close it */
1576 153 : ret = close(f);
1577 153 : if (ret != 0) {
1578 : /* LCOV_EXCL_START */
1579 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1580 : log_fatal("Stopping\n");
1581 : ++unrecoverable_error;
1582 : goto bail;
1583 : /* LCOV_EXCL_STOP */
1584 : }
1585 :
1586 153 : log_tag("fixed:%s:%s: Fixed empty file\n", disk->name, esc_tag(file->sub, esc_buffer));
1587 153 : ++recovered_error;
1588 :
1589 153 : log_tag("status:recovered:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
1590 153 : msg_info("recovered %s\n", fmt_term(disk, file->sub, esc_buffer));
1591 : }
1592 : }
1593 :
1594 : /* for each link in the disk */
1595 705 : disk = handle[i].disk;
1596 705 : node = disk->linklist;
1597 70832 : while (node) {
1598 : char path[PATH_MAX];
1599 : char pathto[PATH_MAX];
1600 : char linkto[PATH_MAX];
1601 : struct stat st;
1602 : struct stat stto;
1603 : struct snapraid_link* slink;
1604 70127 : int unsuccessful = 0;
1605 70127 : int unrecoverable = 0;
1606 :
1607 70127 : slink = node->data;
1608 70127 : node = node->next; /* next node */
1609 :
1610 : /* if excluded continue to the next one */
1611 70127 : if (link_flag_has(slink, FILE_IS_EXCLUDED)) {
1612 3277 : continue;
1613 : }
1614 :
1615 66850 : if (link_flag_has(slink, FILE_IS_HARDLINK)) {
1616 : /* stat the link */
1617 318 : pathprint(path, sizeof(path), "%s%s", disk->dir, slink->sub);
1618 318 : ret = stat(path, &st);
1619 318 : if (ret == -1) {
1620 36 : unsuccessful = 1;
1621 :
1622 36 : log_error("Error stating hardlink '%s'. %s.\n", path, strerror(errno));
1623 36 : log_tag("hardlink_error:%s:%s:%s: Hardlink stat error\n", disk->name, esc_tag(slink->sub, esc_buffer), esc_tag(slink->linkto, esc_buffer_alt));
1624 36 : ++error;
1625 282 : } else if (!S_ISREG(st.st_mode)) {
1626 0 : unsuccessful = 1;
1627 :
1628 0 : log_tag("hardlink_error:%s:%s:%s: Hardlink error for not regular file\n", disk->name, esc_tag(slink->sub, esc_buffer), esc_tag(slink->linkto, esc_buffer_alt));
1629 0 : ++error;
1630 : }
1631 :
1632 : /* stat the "to" file */
1633 318 : pathprint(pathto, sizeof(pathto), "%s%s", disk->dir, slink->linkto);
1634 318 : ret = stat(pathto, &stto);
1635 318 : if (ret == -1) {
1636 24 : unsuccessful = 1;
1637 :
1638 24 : if (errno == ENOENT) {
1639 24 : unrecoverable = 1;
1640 24 : if (fix) {
1641 : /* if the target doesn't exist, it's unrecoverable */
1642 : /* because we cannot create an hardlink of a file that */
1643 : /* doesn't exists */
1644 12 : ++unrecoverable_error;
1645 : } else {
1646 : /* but in check, we can assume that fixing will recover */
1647 : /* such missing file, so we assume a less drastic error */
1648 12 : ++error;
1649 : }
1650 : }
1651 :
1652 24 : log_error("Error stating hardlink-to '%s'. %s.\n", pathto, strerror(errno));
1653 24 : log_tag("hardlink_error:%s:%s:%s: Hardlink to stat error\n", disk->name, esc_tag(slink->sub, esc_buffer), esc_tag(slink->linkto, esc_buffer_alt));
1654 24 : ++error;
1655 294 : } else if (!S_ISREG(stto.st_mode)) {
1656 0 : unsuccessful = 1;
1657 :
1658 0 : log_tag("hardlink_error:%s:%s:%s: Hardlink-to error for not regular file\n", disk->name, esc_tag(slink->sub, esc_buffer), esc_tag(slink->linkto, esc_buffer_alt));
1659 0 : ++error;
1660 294 : } else if (!unsuccessful && st.st_ino != stto.st_ino) {
1661 0 : unsuccessful = 1;
1662 :
1663 0 : log_error("Mismatch hardlink '%s' and '%s'. Different inode.\n", path, pathto);
1664 0 : log_tag("hardlink_error:%s:%s:%s: Hardlink mismatch for different inode\n", disk->name, esc_tag(slink->sub, esc_buffer), esc_tag(slink->linkto, esc_buffer_alt));
1665 0 : ++error;
1666 : }
1667 : } else {
1668 : /* read the symlink */
1669 66532 : pathprint(path, sizeof(path), "%s%s", disk->dir, slink->sub);
1670 66532 : ret = readlink(path, linkto, sizeof(linkto));
1671 66532 : if (ret < 0) {
1672 4309 : unsuccessful = 1;
1673 :
1674 4309 : log_error("Error reading symlink '%s'. %s.\n", path, strerror(errno));
1675 4309 : log_tag("symlink_error:%s:%s: Symlink read error\n", disk->name, esc_tag(slink->sub, esc_buffer));
1676 4309 : ++error;
1677 62223 : } else if (ret >= PATH_MAX) {
1678 0 : unsuccessful = 1;
1679 :
1680 0 : log_error("Error reading symlink '%s'. Symlink too long.\n", path);
1681 0 : log_tag("symlink_error:%s:%s: Symlink read error\n", disk->name, esc_tag(slink->sub, esc_buffer));
1682 0 : ++error;
1683 : } else {
1684 62223 : linkto[ret] = 0;
1685 :
1686 62223 : if (strcmp(linkto, slink->linkto) != 0) {
1687 527 : unsuccessful = 1;
1688 :
1689 527 : log_tag("symlink_error:%s:%s: Symlink data error '%s' instead of '%s'\n", disk->name, esc_tag(slink->sub, esc_buffer), linkto, slink->linkto);
1690 527 : ++error;
1691 : }
1692 : }
1693 : }
1694 :
1695 66850 : if (fix && unsuccessful && !unrecoverable) {
1696 : /* create the ancestor directories */
1697 3942 : ret = mkancestor(path);
1698 3942 : if (ret != 0) {
1699 : /* LCOV_EXCL_START */
1700 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1701 : log_fatal("Stopping\n");
1702 : ++unrecoverable_error;
1703 : goto bail;
1704 : /* LCOV_EXCL_STOP */
1705 : }
1706 :
1707 : /* if it exists, it must be deleted before recreating */
1708 3942 : ret = remove(path);
1709 3942 : if (ret != 0 && errno != ENOENT) {
1710 : /* LCOV_EXCL_START */
1711 : log_fatal("Error removing '%s'. %s.\n", path, strerror(errno));
1712 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1713 : log_fatal("Stopping\n");
1714 : ++unrecoverable_error;
1715 : goto bail;
1716 : /* LCOV_EXCL_STOP */
1717 : }
1718 :
1719 : /* create it */
1720 3942 : if (link_flag_has(slink, FILE_IS_HARDLINK)) {
1721 12 : ret = hardlink(pathto, path);
1722 12 : if (ret != 0) {
1723 : /* LCOV_EXCL_START */
1724 : log_fatal("Error writing hardlink '%s' to '%s'. %s.\n", path, pathto, strerror(errno));
1725 : if (errno == EACCES) {
1726 : log_fatal("WARNING! Please give write permission to the hardlink.\n");
1727 : } else {
1728 : /* we do not use DANGER because it could be ENOSPC which is not always correctly reported */
1729 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1730 : }
1731 : log_fatal("Stopping\n");
1732 : ++unrecoverable_error;
1733 : goto bail;
1734 : /* LCOV_EXCL_STOP */
1735 : }
1736 :
1737 12 : log_tag("hardlink_fixed:%s:%s: Fixed hardlink error\n", disk->name, esc_tag(slink->sub, esc_buffer));
1738 12 : ++recovered_error;
1739 : } else {
1740 3930 : ret = symlink(slink->linkto, path);
1741 3930 : if (ret != 0) {
1742 : /* LCOV_EXCL_START */
1743 : log_fatal("Error writing symlink '%s' to '%s'. %s.\n", path, slink->linkto, strerror(errno));
1744 : if (errno == EACCES) {
1745 : log_fatal("WARNING! Please give write permission to the symlink.\n");
1746 : } else {
1747 : /* we do not use DANGER because it could be ENOSPC which is not always correctly reported */
1748 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1749 : }
1750 : log_fatal("Stopping\n");
1751 : ++unrecoverable_error;
1752 : goto bail;
1753 : /* LCOV_EXCL_STOP */
1754 : }
1755 :
1756 3930 : log_tag("symlink_fixed:%s:%s: Fixed symlink error\n", disk->name, esc_tag(slink->sub, esc_buffer));
1757 3930 : ++recovered_error;
1758 : }
1759 :
1760 3942 : log_tag("status:recovered:%s:%s\n", disk->name, esc_tag(slink->sub, esc_buffer));
1761 3942 : msg_info("recovered %s\n", fmt_term(disk, slink->sub, esc_buffer));
1762 : }
1763 : }
1764 :
1765 : /* for each dir in the disk */
1766 705 : disk = handle[i].disk;
1767 705 : node = disk->dirlist;
1768 1145 : while (node) {
1769 : char path[PATH_MAX];
1770 : struct stat st;
1771 : struct snapraid_dir* dir;
1772 440 : int unsuccessful = 0;
1773 :
1774 440 : dir = node->data;
1775 440 : node = node->next; /* next node */
1776 :
1777 : /* if excluded continue to the next one */
1778 440 : if (dir_flag_has(dir, FILE_IS_EXCLUDED)) {
1779 19 : continue;
1780 : }
1781 :
1782 : /* stat the dir */
1783 421 : pathprint(path, sizeof(path), "%s%s", disk->dir, dir->sub);
1784 421 : ret = stat(path, &st);
1785 421 : if (ret == -1) {
1786 23 : unsuccessful = 1;
1787 :
1788 23 : log_error("Error stating dir '%s'. %s.\n", path, strerror(errno));
1789 23 : log_tag("dir_error:%s:%s: Dir stat error\n", disk->name, esc_tag(dir->sub, esc_buffer));
1790 23 : ++error;
1791 398 : } else if (!S_ISDIR(st.st_mode)) {
1792 0 : unsuccessful = 1;
1793 :
1794 0 : log_tag("dir_error:%s:%s: Dir error for not directory\n", disk->name, esc_tag(dir->sub, esc_buffer));
1795 0 : ++error;
1796 : }
1797 :
1798 421 : if (fix && unsuccessful) {
1799 : /* create the ancestor directories */
1800 21 : ret = mkancestor(path);
1801 21 : if (ret != 0) {
1802 : /* LCOV_EXCL_START */
1803 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1804 : log_fatal("Stopping\n");
1805 : ++unrecoverable_error;
1806 : goto bail;
1807 : /* LCOV_EXCL_STOP */
1808 : }
1809 :
1810 : /* create it */
1811 21 : ret = mkdir(path, S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH);
1812 21 : if (ret != 0) {
1813 : /* LCOV_EXCL_START */
1814 : log_fatal("Error creating dir '%s'. %s.\n", path, strerror(errno));
1815 : if (errno == EACCES) {
1816 : log_fatal("WARNING! Please give write permission to the dir.\n");
1817 : } else {
1818 : /* we do not use DANGER because it could be ENOSPC which is not always correctly reported */
1819 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1820 : }
1821 : log_fatal("Stopping\n");
1822 : ++unrecoverable_error;
1823 : goto bail;
1824 : /* LCOV_EXCL_STOP */
1825 : }
1826 :
1827 21 : log_tag("dir_fixed:%s:%s: Fixed dir error\n", disk->name, esc_tag(dir->sub, esc_buffer));
1828 21 : ++recovered_error;
1829 :
1830 21 : log_tag("status:recovered:%s:%s\n", disk->name, esc_tag(dir->sub, esc_buffer));
1831 21 : msg_info("recovered %s\n", fmt_term(disk, dir->sub, esc_buffer));
1832 : }
1833 : }
1834 : }
1835 :
1836 118 : end:
1837 118 : state_progress_end(state, countpos, countmax, countsize, "Nothing to check.\n");
1838 :
1839 118 : bail:
1840 : /* close all the files left open */
1841 826 : for (j = 0; j < diskmax; ++j) {
1842 708 : struct snapraid_file* file = handle[j].file;
1843 708 : struct snapraid_disk* disk = handle[j].disk;
1844 708 : ret = handle_close(&handle[j]);
1845 708 : if (ret == -1) {
1846 : /* LCOV_EXCL_START */
1847 : log_tag("error:%u:%s:%s: Close error. %s\n", blockmax, disk->name, esc_tag(file->sub, esc_buffer), strerror(errno));
1848 : log_fatal("DANGER! Unexpected close error in a data disk.\n");
1849 : ++unrecoverable_error;
1850 : /* continue, as we are already exiting */
1851 : /* LCOV_EXCL_STOP */
1852 : }
1853 : }
1854 :
1855 : /* remove all the files created from scratch that have not finished the processing */
1856 : /* it happens only when aborting pressing Ctrl+C or other reason. */
1857 118 : if (fix) {
1858 : /* for each disk */
1859 315 : for (i = 0; i < diskmax; ++i) {
1860 : tommy_node* node;
1861 : struct snapraid_disk* disk;
1862 :
1863 270 : if (!handle[i].disk)
1864 1 : continue;
1865 :
1866 : /* for each file in the disk */
1867 269 : disk = handle[i].disk;
1868 269 : node = disk->filelist;
1869 870580 : while (node) {
1870 : char path[PATH_MAX];
1871 : struct snapraid_file* file;
1872 :
1873 870311 : file = node->data;
1874 870311 : node = node->next; /* next node */
1875 :
1876 : /* if the file was not created, meaning that it was already existing */
1877 870311 : if (!file_flag_has(file, FILE_IS_CREATED)) {
1878 : /* nothing to do */
1879 870311 : continue;
1880 : }
1881 :
1882 : /* if processing was finished */
1883 113055 : if (file_flag_has(file, FILE_IS_FINISHED)) {
1884 : /* nothing to do */
1885 113055 : continue;
1886 : }
1887 :
1888 : /* if the file was originally missing, and processing not yet finished */
1889 : /* we have to throw it away to ensure that at the next run we will retry */
1890 : /* to fix it, in case we select to undelete missing files */
1891 0 : pathprint(path, sizeof(path), "%s%s", disk->dir, file->sub);
1892 :
1893 0 : ret = remove(path);
1894 0 : if (ret != 0) {
1895 : /* LCOV_EXCL_START */
1896 : log_fatal("Error removing '%s'. %s.\n", path, strerror(errno));
1897 : log_fatal("WARNING! Without a working data disk, it isn't possible to fix errors on it.\n");
1898 : ++unrecoverable_error;
1899 : /* continue, as we are already exiting */
1900 : /* LCOV_EXCL_STOP */
1901 : }
1902 : }
1903 : }
1904 : }
1905 :
1906 118 : if (error || recovered_error || unrecoverable_error) {
1907 68 : msg_status("\n");
1908 68 : msg_status("%8u errors\n", error);
1909 68 : if (fix) {
1910 44 : msg_status("%8u recovered errors\n", recovered_error);
1911 : }
1912 68 : if (unrecoverable_error) {
1913 13 : msg_status("%8u UNRECOVERABLE errors\n", unrecoverable_error);
1914 : } else {
1915 : /* without checking, we don't know if they are really recoverable or not */
1916 55 : if (!state->opt.auditonly)
1917 53 : msg_status("%8u unrecoverable errors\n", unrecoverable_error);
1918 55 : if (fix)
1919 35 : msg_status("Everything OK\n");
1920 : }
1921 : } else {
1922 50 : msg_status("Everything OK\n");
1923 : }
1924 :
1925 118 : if (error && !fix)
1926 24 : log_fatal("WARNING! There are errors!\n");
1927 118 : if (unrecoverable_error)
1928 13 : log_fatal("DANGER! Unrecoverable errors detected!\n");
1929 :
1930 118 : log_tag("summary:error:%u\n", error);
1931 118 : if (fix)
1932 45 : log_tag("summary:error_recovered:%u\n", recovered_error);
1933 118 : if (!state->opt.auditonly)
1934 113 : log_tag("summary:error_unrecoverable:%u\n", unrecoverable_error);
1935 118 : if (fix) {
1936 45 : if (error + recovered_error + unrecoverable_error == 0)
1937 1 : log_tag("summary:exit:ok\n");
1938 44 : else if (unrecoverable_error == 0)
1939 35 : log_tag("summary:exit:recovered\n");
1940 : else
1941 9 : log_tag("summary:exit:unrecoverable\n");
1942 73 : } else if (!state->opt.auditonly) {
1943 68 : if (error + unrecoverable_error == 0)
1944 46 : log_tag("summary:exit:ok\n");
1945 22 : else if (unrecoverable_error == 0)
1946 18 : log_tag("summary:exit:recoverable\n");
1947 : else
1948 4 : log_tag("summary:exit:unrecoverable\n");
1949 : } else { /* audit only */
1950 5 : if (error == 0)
1951 3 : log_tag("summary:exit:ok\n");
1952 : else
1953 2 : log_tag("summary:exit:error\n");
1954 : }
1955 118 : log_flush();
1956 :
1957 118 : free(failed);
1958 118 : free(failed_map);
1959 118 : free(block_enabled);
1960 118 : free(handle);
1961 118 : free(buffer_alloc);
1962 118 : free(buffer);
1963 :
1964 : /* fail if some error are present after the run */
1965 118 : if (fix) {
1966 45 : if (state->opt.expect_unrecoverable) {
1967 9 : if (unrecoverable_error == 0)
1968 0 : return -1;
1969 : } else {
1970 36 : if (unrecoverable_error != 0)
1971 0 : return -1;
1972 : }
1973 : } else {
1974 73 : if (state->opt.expect_unrecoverable) {
1975 4 : if (unrecoverable_error == 0)
1976 0 : return -1;
1977 69 : } else if (state->opt.expect_recoverable) {
1978 20 : if (unrecoverable_error != 0 || error == 0)
1979 0 : return -1;
1980 : } else {
1981 49 : if (error != 0 || unrecoverable_error != 0)
1982 0 : return -1;
1983 : }
1984 : }
1985 :
1986 118 : return 0;
1987 : }
1988 :
1989 120 : int state_check(struct snapraid_state* state, int fix, block_off_t blockstart, block_off_t blockcount)
1990 : {
1991 : block_off_t blockmax;
1992 : data_off_t size;
1993 : int ret;
1994 : struct snapraid_parity_handle parity[LEV_MAX];
1995 : struct snapraid_parity_handle* parity_ptr[LEV_MAX];
1996 : unsigned error;
1997 : unsigned l;
1998 :
1999 120 : msg_progress("Initializing...\n");
2000 :
2001 120 : blockmax = parity_allocated_size(state);
2002 120 : size = blockmax * (data_off_t)state->block_size;
2003 :
2004 120 : if (blockstart > blockmax) {
2005 : /* LCOV_EXCL_START */
2006 : log_fatal("Error in the specified starting block %u. It's larger than the parity size %u.\n", blockstart, blockmax);
2007 : exit(EXIT_FAILURE);
2008 : /* LCOV_EXCL_STOP */
2009 : }
2010 :
2011 : /* adjust the number of block to process */
2012 120 : if (blockcount != 0 && blockstart + blockcount < blockmax) {
2013 0 : blockmax = blockstart + blockcount;
2014 : }
2015 :
2016 120 : if (fix) {
2017 : /* if fixing, create the file and open for writing */
2018 : /* if it fails, we cannot continue */
2019 180 : for (l = 0; l < state->level; ++l) {
2020 : /* skip parity disks that are not accessible */
2021 135 : if (state->parity[l].skip_access) {
2022 1 : parity_ptr[l] = 0;
2023 1 : continue;
2024 : }
2025 :
2026 134 : parity_ptr[l] = &parity[l];
2027 :
2028 : /* if the parity is excluded */
2029 134 : if (state->parity[l].is_excluded_by_filter) {
2030 : /* open for reading, and ignore error */
2031 12 : ret = parity_open(parity_ptr[l], &state->parity[l], l, state->file_mode, state->block_size, state->opt.parity_limit_size);
2032 12 : if (ret == -1) {
2033 : /* continue anyway */
2034 0 : parity_ptr[l] = 0;
2035 : }
2036 : } else {
2037 : /* open for writing */
2038 122 : ret = parity_create(parity_ptr[l], &state->parity[l], l, state->file_mode, state->block_size, state->opt.parity_limit_size);
2039 122 : if (ret == -1) {
2040 : /* LCOV_EXCL_START */
2041 : log_fatal("WARNING! Without an accessible %s file, it isn't possible to fix any error.\n", lev_name(l));
2042 : exit(EXIT_FAILURE);
2043 : /* LCOV_EXCL_STOP */
2044 : }
2045 :
2046 122 : ret = parity_chsize(parity_ptr[l], &state->parity[l], 0, size, state->block_size, state->opt.skip_fallocate, state->opt.skip_space_holder);
2047 122 : if (ret == -1) {
2048 : /* LCOV_EXCL_START */
2049 : log_fatal("WARNING! Without an accessible %s file, it isn't possible to sync.\n", lev_name(l));
2050 : exit(EXIT_FAILURE);
2051 : /* LCOV_EXCL_STOP */
2052 : }
2053 : }
2054 : }
2055 75 : } else if (!state->opt.auditonly) {
2056 : /* if checking, open the file for reading */
2057 : /* it may fail if the file doesn't exist, in this case we continue to check the files */
2058 393 : for (l = 0; l < state->level; ++l) {
2059 324 : parity_ptr[l] = &parity[l];
2060 324 : ret = parity_open(parity_ptr[l], &state->parity[l], l, state->file_mode, state->block_size, state->opt.parity_limit_size);
2061 324 : if (ret == -1) {
2062 1 : msg_status("No accessible %s file, only files will be checked.\n", lev_name(l));
2063 : /* continue anyway */
2064 1 : parity_ptr[l] = 0;
2065 : }
2066 : }
2067 : } else {
2068 : /* otherwise don't use any parity */
2069 28 : for (l = 0; l < state->level; ++l)
2070 22 : parity_ptr[l] = 0;
2071 : }
2072 :
2073 120 : error = 0;
2074 :
2075 : /* skip degenerated cases of empty parity, or skipping all */
2076 120 : if (blockstart < blockmax) {
2077 118 : ret = state_check_process(state, fix, parity_ptr, blockstart, blockmax);
2078 118 : if (ret == -1) {
2079 : /* LCOV_EXCL_START */
2080 : ++error;
2081 : /* continue, as we are already exiting */
2082 : /* LCOV_EXCL_STOP */
2083 : }
2084 : }
2085 :
2086 : /* try to close only if opened */
2087 601 : for (l = 0; l < state->level; ++l) {
2088 481 : if (parity_ptr[l]) {
2089 : /* if fixing and not excluded, truncate parity not valid */
2090 457 : if (fix && !state->parity[l].is_excluded_by_filter) {
2091 122 : ret = parity_truncate(parity_ptr[l]);
2092 122 : if (ret == -1) {
2093 : /* LCOV_EXCL_START */
2094 : log_fatal("DANGER! Unexpected truncate error in %s disk.\n", lev_name(l));
2095 : ++error;
2096 : /* continue, as we are already exiting */
2097 : /* LCOV_EXCL_STOP */
2098 : }
2099 : }
2100 :
2101 457 : ret = parity_close(parity_ptr[l]);
2102 457 : if (ret == -1) {
2103 : /* LCOV_EXCL_START */
2104 : log_fatal("DANGER! Unexpected close error in %s disk.\n", lev_name(l));
2105 : ++error;
2106 : /* continue, as we are already exiting */
2107 : /* LCOV_EXCL_STOP */
2108 : }
2109 : }
2110 : }
2111 :
2112 : /* abort if error are present */
2113 120 : if (error != 0)
2114 0 : return -1;
2115 120 : return 0;
2116 : }
2117 :
|