Line data Source code
1 : /*
2 : * Copyright (C) 2011 Andrea Mazzoleni
3 : *
4 : * This program is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 : */
17 :
18 : #include "portable.h"
19 :
20 : #include "support.h"
21 : #include "elem.h"
22 : #include "state.h"
23 : #include "parity.h"
24 :
25 : struct snapraid_scan {
26 : struct snapraid_state* state; /**< State used. */
27 : struct snapraid_disk* disk; /**< Disk used. */
28 : thread_id_t thread; /**< Thread used for scanning the disk */
29 :
30 : int is_diff; /**< If it's a diff command or a scanning */
31 : int need_write; /**< If a state write is required */
32 :
33 : /**
34 : * Counters of changes.
35 : */
36 : unsigned count_equal; /**< Files equal. */
37 : unsigned count_move; /**< Files with a different name, but equal inode, size and timestamp in the same disk. */
38 : unsigned count_restore; /**< Files with equal name, size and timestamp, but different inode. */
39 : unsigned count_change; /**< Files with same name, but different size and/or timestamp. */
40 : unsigned count_copy; /**< Files new, with same name size and timestamp of a file in a different disk. */
41 : unsigned count_relocate; /**< Like copy, but with the original disappeared. */
42 : unsigned count_insert; /**< Files new. */
43 : unsigned count_remove; /**< Files removed. */
44 :
45 : tommy_list file_insert_list; /**< Files to insert. */
46 : tommy_list link_insert_list; /**< Links to insert. */
47 : tommy_list dir_insert_list; /**< Dirs to insert. */
48 : tommy_list local_filter_list; /**< Filter list specific for the disk. */
49 :
50 : /* nodes for data structures */
51 : tommy_node node;
52 : };
53 :
54 629 : static struct snapraid_scan* scan_alloc(struct snapraid_state* state, struct snapraid_disk* disk, int is_diff)
55 : {
56 : struct snapraid_scan* scan;
57 :
58 629 : scan = malloc_nofail(sizeof(struct snapraid_scan));
59 629 : scan->state = state;
60 629 : scan->disk = disk;
61 629 : scan->count_equal = 0;
62 629 : scan->count_move = 0;
63 629 : scan->count_copy = 0;
64 629 : scan->count_relocate = 0;
65 629 : scan->count_restore = 0;
66 629 : scan->count_change = 0;
67 629 : scan->count_remove = 0;
68 629 : scan->count_insert = 0;
69 629 : tommy_list_init(&scan->file_insert_list);
70 629 : tommy_list_init(&scan->link_insert_list);
71 629 : tommy_list_init(&scan->dir_insert_list);
72 629 : tommy_list_init(&scan->local_filter_list);
73 629 : scan->is_diff = is_diff;
74 629 : scan->need_write = 0;
75 :
76 : #if HAVE_THREAD
77 629 : thread_mutex_init(&disk->stamp_mutex);
78 : #endif
79 :
80 629 : return scan;
81 : }
82 :
83 623 : static void scan_free(struct snapraid_scan* scan)
84 : {
85 : #if HAVE_THREAD
86 623 : thread_mutex_destroy(&scan->disk->stamp_mutex);
87 : #endif
88 623 : tommy_list_foreach(&scan->local_filter_list, filter_free);
89 623 : free(scan);
90 623 : }
91 :
92 533356 : static void stamp_lock(struct snapraid_disk* disk)
93 : {
94 : #if HAVE_THREAD
95 533356 : thread_mutex_lock(&disk->stamp_mutex);
96 : #else
97 : (void)disk;
98 : #endif
99 533356 : }
100 :
101 533356 : static void stamp_unlock(struct snapraid_disk* disk)
102 : {
103 : #if HAVE_THREAD
104 533356 : thread_mutex_unlock(&disk->stamp_mutex);
105 : #else
106 : (void)disk;
107 : #endif
108 533356 : }
109 :
110 : /**
111 : * Remove the specified link from the data set.
112 : */
113 955 : static void scan_link_remove(struct snapraid_scan* scan, struct snapraid_link* slink)
114 : {
115 955 : struct snapraid_disk* disk = scan->disk;
116 :
117 : /* state changed */
118 955 : scan->need_write = 1;
119 :
120 : /* remove the file from the link containers */
121 955 : tommy_hashdyn_remove_existing(&disk->linkset, &slink->nodeset);
122 955 : tommy_list_remove_existing(&disk->linklist, &slink->nodelist);
123 :
124 : /* deallocate */
125 955 : link_free(slink);
126 955 : }
127 :
128 : /**
129 : * Insert the specified link in the data set.
130 : */
131 2449 : static void scan_link_insert(struct snapraid_scan* scan, struct snapraid_link* slink)
132 : {
133 2449 : struct snapraid_disk* disk = scan->disk;
134 :
135 : /* state changed */
136 2449 : scan->need_write = 1;
137 :
138 : /* insert the link in the link containers */
139 2449 : tommy_hashdyn_insert(&disk->linkset, &slink->nodeset, slink, link_name_hash(slink->sub));
140 2449 : tommy_list_insert_tail(&disk->linklist, &slink->nodelist, slink);
141 2449 : }
142 :
143 : /**
144 : * Process a symbolic link.
145 : */
146 39756 : static void scan_link(struct snapraid_scan* scan, int is_diff, const char* sub, const char* linkto, unsigned link_flag)
147 : {
148 39756 : struct snapraid_state* state = scan->state;
149 39756 : struct snapraid_disk* disk = scan->disk;
150 : struct snapraid_link* slink;
151 : char esc_buffer[ESC_MAX];
152 :
153 : /* check if the link already exists */
154 39756 : slink = tommy_hashdyn_search(&disk->linkset, link_name_compare_to_arg, sub, link_name_hash(sub));
155 39756 : if (slink) {
156 : /* check if multiple files have the same name */
157 37307 : if (link_flag_has(slink, FILE_IS_PRESENT)) {
158 : /* LCOV_EXCL_START */
159 : log_fatal(EINTERNAL, "Internal inconsistency for link '%s%s'\n", disk->dir, sub);
160 : os_abort();
161 : /* LCOV_EXCL_STOP */
162 : }
163 :
164 : /* mark as present */
165 37307 : link_flag_set(slink, FILE_IS_PRESENT);
166 :
167 : /* check if the link is not changed and it's of the same kind */
168 37307 : if (strcmp(slink->linkto, linkto) == 0 && link_flag == link_flag_get(slink, FILE_IS_LINK_MASK)) {
169 : /* it's equal */
170 37300 : ++scan->count_equal;
171 :
172 37300 : if (state->opt.gui_verbose) {
173 0 : log_tag("scan:equal:%s:%s\n", disk->name, esc_tag(slink->sub, esc_buffer));
174 : }
175 : } else {
176 : /* it's an update */
177 :
178 : /* we have to save the linkto/type */
179 7 : scan->need_write = 1;
180 :
181 7 : ++scan->count_change;
182 :
183 7 : log_tag("scan:update:%s:%s\n", disk->name, esc_tag(slink->sub, esc_buffer));
184 7 : if (is_diff) {
185 3 : msg_info("update %s\n", fmt_term(disk, slink->sub, esc_buffer));
186 : }
187 :
188 : /* update it */
189 7 : free(slink->linkto);
190 7 : slink->linkto = strdup_nofail(linkto);
191 7 : link_flag_let(slink, link_flag, FILE_IS_LINK_MASK);
192 : }
193 :
194 : /* nothing more to do */
195 37307 : return;
196 : } else {
197 : /* create the new link */
198 2449 : ++scan->count_insert;
199 :
200 2449 : log_tag("scan:add:%s:%s\n", disk->name, esc_tag(sub, esc_buffer));
201 2449 : if (is_diff) {
202 599 : msg_info("add %s\n", fmt_term(disk, sub, esc_buffer));
203 : }
204 :
205 : /* and continue to insert it */
206 : }
207 :
208 : /* insert it */
209 2449 : slink = link_alloc(sub, linkto, link_flag);
210 :
211 : /* mark it as present */
212 2449 : link_flag_set(slink, FILE_IS_PRESENT);
213 :
214 : /* insert it in the delayed insert list */
215 2449 : tommy_list_insert_tail(&scan->link_insert_list, &slink->nodelist, slink);
216 : }
217 :
218 : /**
219 : * Insert the specified file in the parity.
220 : */
221 99390 : static void scan_file_allocate(struct snapraid_scan* scan, struct snapraid_file* file)
222 : {
223 99390 : struct snapraid_state* state = scan->state;
224 99390 : struct snapraid_disk* disk = scan->disk;
225 : block_off_t i;
226 : block_off_t parity_pos;
227 :
228 : /* state changed */
229 99390 : scan->need_write = 1;
230 :
231 : /* allocate the blocks of the file */
232 99390 : parity_pos = disk->first_free_block;
233 347534 : for (i = 0; i < file->blockmax; ++i) {
234 : struct snapraid_block* block;
235 : struct snapraid_block* over_block;
236 : snapraid_info info;
237 :
238 : /* increment the position until the first really free block */
239 779635 : while (block_has_file(fs_par2block_find(disk, parity_pos)))
240 531491 : ++parity_pos;
241 :
242 : /* get block we are going to overwrite, if any */
243 248144 : over_block = fs_par2block_find(disk, parity_pos);
244 :
245 : /* deallocate it */
246 248144 : if (over_block != BLOCK_NULL)
247 90104 : fs_deallocate(disk, parity_pos);
248 :
249 : /* get block specific info */
250 248144 : info = info_get(&state->infoarr, parity_pos);
251 :
252 : /* get the new block we are going to write */
253 248144 : block = fs_file2block_get(file, i);
254 :
255 : /* if the file block already has an updated hash without rehash */
256 248144 : if (block_has_updated_hash(block) && !info_get_rehash(info)) {
257 : /* the only possible case is for REP blocks */
258 85548 : assert(block_state_get(block) == BLOCK_STATE_REP);
259 :
260 : /* convert to a REP block */
261 85548 : block_state_set(block, BLOCK_STATE_REP);
262 :
263 : /* and keep the hash as it's */
264 : } else {
265 : unsigned over_state;
266 :
267 : /* convert to a CHG block */
268 162596 : block_state_set(block, BLOCK_STATE_CHG);
269 :
270 : /* state of the block we are going to overwrite */
271 162596 : over_state = block_state_get(over_block);
272 :
273 : /* if the block is an empty one */
274 162596 : if (over_state == BLOCK_STATE_EMPTY) {
275 : /* the block was empty and filled with zeros */
276 : /* set the hash to the special ZERO value */
277 117596 : hash_zero_set(block->hash);
278 : } else {
279 : /* otherwise it's a DELETED one */
280 45000 : assert(over_state == BLOCK_STATE_DELETED);
281 :
282 : /* copy the past hash of the block */
283 45000 : memcpy(block->hash, over_block->hash, BLOCK_HASH_SIZE);
284 :
285 : /* if we have not already cleared the past hash */
286 45000 : if (!state->clear_past_hash) {
287 : /* in this case we don't know if the old state is still the one */
288 : /* stored inside the parity, because after an aborted sync, the parity */
289 : /* may be or may be not have been updated with the new data */
290 : /* Then we reset the hash to a bogus value */
291 : /* For example: */
292 : /* - One file is deleted */
293 : /* - Sync aborted after, updating the parity to the new state, */
294 : /* but without saving the content file representing this new state. */
295 : /* - Another file is added again (exactly here) */
296 : /* with the hash of DELETED block not representing the real parity state */
297 421 : hash_invalid_set(block->hash);
298 : }
299 : }
300 : }
301 :
302 : /* store in the disk map, after invalidating all the other blocks */
303 248144 : fs_allocate(disk, parity_pos, file, i);
304 :
305 : /* set the new free position */
306 248144 : disk->first_free_block = parity_pos + 1;
307 : }
308 :
309 : /* insert in the list of contained files */
310 99390 : tommy_list_insert_tail(&disk->filelist, &file->nodelist, file);
311 99390 : }
312 :
313 : /**
314 : * Delete the specified file from the parity.
315 : *
316 : * Note that the parity remains allocated, but the blocks and the file are marked as DELETED.
317 : * The file is then inserted in the deleted set, and it should not be deallocated,
318 : * as the parity still references it.
319 : */
320 54572 : static void scan_file_deallocate(struct snapraid_scan* scan, struct snapraid_file* file)
321 : {
322 54572 : struct snapraid_state* state = scan->state;
323 54572 : struct snapraid_disk* disk = scan->disk;
324 : block_off_t i;
325 :
326 : /* remove from the list of contained files */
327 54572 : tommy_list_remove_existing(&disk->filelist, &file->nodelist);
328 :
329 : /* state changed */
330 54572 : scan->need_write = 1;
331 :
332 : /* here we are supposed to adjust the ::first_free_block position */
333 : /* with the parity position we are deleting */
334 : /* but we also know that we do only delayed insert, after all the deletion, */
335 : /* so at this point ::first_free_block is always at 0, and we don't need to update it */
336 54572 : if (disk->first_free_block != 0) {
337 : /* LCOV_EXCL_START */
338 : log_fatal(EINTERNAL, "Internal inconsistency for first free position at '%u' deallocating file '%s'\n", disk->first_free_block, file->sub);
339 : os_abort();
340 : /* LCOV_EXCL_STOP */
341 : }
342 :
343 : /* free all the blocks of the file */
344 192188 : for (i = 0; i < file->blockmax; ++i) {
345 137616 : struct snapraid_block* block = fs_file2block_get(file, i);
346 : unsigned block_state;
347 :
348 : /* in case we scan after an aborted sync, */
349 : /* we could get also intermediate states */
350 137616 : block_state = block_state_get(block);
351 137616 : switch (block_state) {
352 32989 : case BLOCK_STATE_BLK :
353 : /* we keep the hash making it an "old" hash, because the parity is still containing data for it */
354 32989 : break;
355 27968 : case BLOCK_STATE_CHG :
356 : /* if we have not already cleared the past hash */
357 27968 : if (!state->clear_past_hash) {
358 : /* in these cases we don't know if the old state is still the one */
359 : /* stored inside the parity, because after an aborted sync, the parity */
360 : /* may be or may be not have been updated with the data that it's now */
361 : /* deleted. Then we reset the hash to a bogus value. */
362 : /* For example: */
363 : /* - One file is added */
364 : /* - Sync aborted after updating the parity to the new state, */
365 : /* but without saving the content file representing this new state. */
366 : /* - File is now deleted after the aborted sync */
367 : /* - Sync again, deleting the blocks (exactly here) */
368 : /* with the hash of CHG block not representing the real parity state */
369 40 : hash_invalid_set(block->hash);
370 : }
371 27968 : break;
372 76659 : case BLOCK_STATE_REP :
373 : /* we just don't know the old hash, and then we set it to invalid */
374 76659 : hash_invalid_set(block->hash);
375 76659 : break;
376 0 : default :
377 : /* LCOV_EXCL_START */
378 : log_fatal(EINTERNAL, "Internal inconsistency in file '%s' deallocating block '%u:%u' state %u\n", file->sub, i, file->blockmax, block_state);
379 : os_abort();
380 : /* LCOV_EXCL_STOP */
381 : }
382 :
383 : /* set the block as deleted */
384 137616 : block_state_set(block, BLOCK_STATE_DELETED);
385 : }
386 :
387 : /* mark the file as deleted */
388 54572 : file_flag_set(file, FILE_IS_DELETED);
389 :
390 : /* insert it in the list of deleted blocks */
391 54572 : tommy_list_insert_tail(&disk->deletedlist, &file->nodelist, file);
392 54572 : }
393 :
394 99390 : static void scan_file_delayed_allocate(struct snapraid_scan* scan, struct snapraid_file* file)
395 : {
396 99390 : struct snapraid_state* state = scan->state;
397 99390 : struct snapraid_disk* disk = scan->disk;
398 :
399 : /* if we sort for physical offsets we have to read them for new files */
400 99390 : if (state->opt.force_order == SORT_PHYSICAL
401 11432 : && file->physical == FILEPHY_UNREAD_OFFSET
402 : ) {
403 : char path_next[PATH_MAX];
404 :
405 11432 : pathprint(path_next, sizeof(path_next), "%s%s", disk->dir, file->sub);
406 :
407 11432 : if (filephy(path_next, file->size, &file->physical) != 0) {
408 : /* LCOV_EXCL_START */
409 : log_fatal(errno, "Error in getting the physical offset of file '%s'. %s.\n", path_next, strerror(errno));
410 : exit(EXIT_FAILURE);
411 : /* LCOV_EXCL_STOP */
412 : }
413 : }
414 :
415 : /* insert in the delayed list */
416 99390 : tommy_list_insert_tail(&scan->file_insert_list, &file->nodelist, file);
417 99390 : }
418 :
419 : /**
420 : * Check if a file is completely formed of blocks with invalid parity,
421 : * and no rehash is tagged, and if it has at least one block.
422 : */
423 1138644 : static int file_is_full_invalid_parity_and_stable(struct snapraid_state* state, struct snapraid_disk* disk, struct snapraid_file* file)
424 : {
425 : block_off_t i;
426 :
427 : /* with no block, it never has an invalid parity */
428 1138644 : if (file->blockmax == 0)
429 1605 : return 0;
430 :
431 : /* check all blocks */
432 1193958 : for (i = 0; i < file->blockmax; ++i) {
433 : snapraid_info info;
434 1171321 : struct snapraid_block* block = fs_file2block_get(file, i);
435 : block_off_t parity_pos;
436 :
437 : /* exclude blocks with parity */
438 1171321 : if (!block_has_invalid_parity(block))
439 1114402 : return 0;
440 :
441 : /*
442 : * Get the parity position.
443 : *
444 : * Note that here we expect to always have mapped
445 : * parity, because kept files always have it.
446 : *
447 : * Anyway, checking for POS_NULL doesn't hurt.
448 : */
449 56919 : parity_pos = fs_file2par_find(disk, file, i);
450 :
451 : /* if it's not mapped, it cannot have rehash */
452 56919 : if (parity_pos != POS_NULL) {
453 : /* get block specific info */
454 56919 : info = info_get(&state->infoarr, parity_pos);
455 :
456 : /* if rehash fails */
457 56919 : if (info_get_rehash(info))
458 0 : return 0;
459 : }
460 : }
461 :
462 22637 : return 1;
463 : }
464 :
465 : /**
466 : * Check if a file is completely formed of blocks with an updated hash,
467 : * and no rehash is tagged, and if it has at least one block.
468 : */
469 23020 : static int file_is_full_hashed_and_stable(struct snapraid_state* state, struct snapraid_disk* disk, struct snapraid_file* file)
470 : {
471 : block_off_t i;
472 :
473 : /* with no block, it never has a hash */
474 23020 : if (file->blockmax == 0)
475 25 : return 0;
476 :
477 : /* check all blocks */
478 79548 : for (i = 0; i < file->blockmax; ++i) {
479 : snapraid_info info;
480 56559 : struct snapraid_block* block = fs_file2block_get(file, i);
481 : block_off_t parity_pos;
482 :
483 : /* exclude blocks without hash */
484 56559 : if (!block_has_updated_hash(block))
485 6 : return 0;
486 :
487 : /*
488 : * Get the parity position.
489 : *
490 : * Note that it's possible to have files
491 : * not mapped into the parity, even if they
492 : * have a valid hash.
493 : *
494 : * This happens for example, for 'copied' files
495 : * that have REP blocks, but not yet mapped.
496 : *
497 : * If there are multiple copies, it's also possible
498 : * that such files are used as 'source' to copy
499 : * hashes, and then to get them inside this function.
500 : */
501 56553 : parity_pos = fs_file2par_find(disk, file, i);
502 :
503 : /* if it's not mapped, it cannot have rehash */
504 56553 : if (parity_pos != POS_NULL) {
505 : /* get block specific info */
506 56545 : info = info_get(&state->infoarr, parity_pos);
507 :
508 : /* exclude blocks needing a rehash */
509 56545 : if (info_get_rehash(info))
510 0 : return 0;
511 : }
512 : }
513 :
514 22989 : return 1;
515 : }
516 :
517 : /**
518 : * Refresh the file info.
519 : *
520 : * This is needed by Windows as the normal way to list directories may report not
521 : * updated info. Only the GetFileInformationByHandle() func, called file-by-file,
522 : * really ensures to return synced info.
523 : *
524 : * If this happens, we read also the physical offset, to avoid to read it later.
525 : */
526 76753 : static void scan_file_refresh(struct snapraid_scan* scan, const char* sub, struct stat* st, uint64_t* physical)
527 : {
528 : #if HAVE_LSTAT_SYNC
529 : struct snapraid_state* state = scan->state;
530 : struct snapraid_disk* disk = scan->disk;
531 :
532 : /* if the st_sync is not set, ensure to get synced info */
533 : if (st->st_sync == 0) {
534 : char path_next[PATH_MAX];
535 : struct stat synced_st;
536 :
537 : pathprint(path_next, sizeof(path_next), "%s%s", disk->dir, sub);
538 :
539 : /* if we sort for physical offsets we have to read them for new files */
540 : if (state->opt.force_order == SORT_PHYSICAL
541 : && *physical == FILEPHY_UNREAD_OFFSET
542 : ) {
543 : /* do nothing, leave the pointer to read the physical offset */
544 : } else {
545 : physical = 0; /* set the pointer to 0 to read nothing */
546 : }
547 :
548 : if (lstat_sync(path_next, &synced_st, physical) != 0) {
549 : /* LCOV_EXCL_START */
550 : log_fatal(errno, "Error in stat file '%s'. %s.\n", path_next, strerror(errno));
551 : exit(EXIT_FAILURE);
552 : /* LCOV_EXCL_STOP */
553 : }
554 :
555 : if (st->st_mtime != synced_st.st_mtime
556 : || st->st_mtimensec != synced_st.st_mtimensec
557 : ) {
558 : #ifndef _WIN32
559 : /*
560 : * In Windows having different metadata is expected with open files
561 : * because the metadata in the directory is updated only when the file
562 : * is closed.
563 : *
564 : * The same happens for hardlinks that duplicate metadata.
565 : * The link metadata is updated only when the link is opened.
566 : * This extends also to st_size and st_nlink.
567 : *
568 : * See also:
569 : * Why is the file size reported incorrectly for files that are still being written to?
570 : * http://blogs.msdn.com/b/oldnewthing/archive/2011/12/26/10251026.aspx
571 : */
572 : log_fatal(ESOFT, "WARNING! Detected uncached time change from %" PRIu64 ".%09u to %" PRIu64 ".%09u for file '%s'\n",
573 : (uint64_t)st->st_mtime, (uint32_t)st->st_mtimensec, (uint64_t)synced_st.st_mtime, (uint32_t)synced_st.st_mtimensec, sub);
574 : log_fatal(ESOFT, "It's better if you run SnapRAID without other processes running.\n");
575 : #endif
576 : st->st_mtime = synced_st.st_mtime;
577 : st->st_mtimensec = synced_st.st_mtimensec;
578 : }
579 :
580 : if (st->st_size != synced_st.st_size) {
581 : #ifndef _WIN32
582 : log_fatal(ESOFT, "WARNING! Detected uncached size change from %" PRIu64 " to %" PRIu64 " for file '%s'\n",
583 : (uint64_t)st->st_size, (uint64_t)synced_st.st_size, sub);
584 : log_fatal(ESOFT, "It's better if you run SnapRAID without other processes running.\n");
585 : #endif
586 : st->st_size = synced_st.st_size;
587 : }
588 :
589 : if (st->st_nlink != synced_st.st_nlink) {
590 : #ifndef _WIN32
591 : log_fatal(ESOFT, "WARNING! Detected uncached nlink change from %u to %u for file '%s'\n",
592 : (uint32_t)st->st_nlink, (uint32_t)synced_st.st_nlink, sub);
593 : log_fatal(ESOFT, "It's better if you run SnapRAID without other processes running.\n");
594 : #endif
595 : st->st_nlink = synced_st.st_nlink;
596 : }
597 :
598 : if (st->st_ino != synced_st.st_ino) {
599 : log_fatal(ESOFT, "DANGER! Detected uncached inode change from %" PRIu64 " to %" PRIu64 " for file '%s'\n",
600 : (uint64_t)st->st_ino, (uint64_t)synced_st.st_ino, sub);
601 : log_fatal(ESOFT, "It's better if you run SnapRAID without other processes running.\n");
602 : /* at this point, it's too late to change inode */
603 : /* and having inconsistent inodes may result to internal failures */
604 : /* so, it's better to abort */
605 : exit(EXIT_FAILURE);
606 : }
607 : }
608 : #else
609 : (void)scan;
610 : (void)sub;
611 : (void)st;
612 : (void)physical;
613 : #endif
614 76753 : }
615 :
616 : /**
617 : * Insert the file in the data set.
618 : */
619 99390 : static void scan_file_insert(struct snapraid_scan* scan, struct snapraid_file* file)
620 : {
621 99390 : struct snapraid_disk* disk = scan->disk;
622 :
623 : /* insert the file in the containers */
624 99390 : if (!file_flag_has(file, FILE_IS_WITHOUT_INODE))
625 99390 : tommy_hashdyn_insert(&disk->inodeset, &file->nodeset, file, file_inode_hash(file->inode));
626 :
627 99390 : stamp_lock(disk);
628 99390 : tommy_hashdyn_insert(&disk->pathset, &file->pathset, file, file_path_hash(file->sub));
629 99390 : tommy_hashdyn_insert(&disk->stampset, &file->stampset, file, file_stamp_hash(file->size, file->mtime_sec, file->mtime_nsec));
630 99390 : stamp_unlock(disk);
631 :
632 : /* delayed allocation of the parity */
633 99390 : scan_file_delayed_allocate(scan, file);
634 99390 : }
635 :
636 : /**
637 : * Remove the file from the data set.
638 : *
639 : * File is then deleted.
640 : */
641 54572 : static void scan_file_remove(struct snapraid_scan* scan, struct snapraid_file* file)
642 : {
643 54572 : struct snapraid_disk* disk = scan->disk;
644 :
645 : /* remove the file from the containers */
646 54572 : if (!file_flag_has(file, FILE_IS_WITHOUT_INODE))
647 54525 : tommy_hashdyn_remove_existing(&disk->inodeset, &file->nodeset);
648 54572 : tommy_hashdyn_remove_existing(&disk->pathset, &file->pathset);
649 :
650 54572 : stamp_lock(disk);
651 54572 : tommy_hashdyn_remove_existing(&disk->stampset, &file->stampset);
652 54572 : stamp_unlock(disk);
653 :
654 : /* deallocate the file from the parity */
655 54572 : scan_file_deallocate(scan, file);
656 54572 : }
657 :
658 : /**
659 : * Keep the file as it's (or with only a name/inode modification).
660 : *
661 : * If the file is kept, nothing has to be done.
662 : *
663 : * But if a file contains only blocks with invalid parity, it's reallocated to ensure
664 : * to always minimize the space used in the parity.
665 : *
666 : * This could happen after a failed sync, when some other files are deleted,
667 : * and then new ones can be moved backward to fill the hole created.
668 : */
669 1138644 : static void scan_file_keep(struct snapraid_scan* scan, struct snapraid_file* file)
670 : {
671 1138644 : struct snapraid_disk* disk = scan->disk;
672 :
673 : /* if the file is full invalid, schedule a reinsert at later stage */
674 1138644 : if (file_is_full_invalid_parity_and_stable(scan->state, disk, file)) {
675 22637 : struct snapraid_file* copy = file_dup(file);
676 :
677 : /* remove the file */
678 22637 : scan_file_remove(scan, file);
679 :
680 : /* reinsert the copy in the delayed list */
681 22637 : scan_file_insert(scan, copy);
682 : }
683 1138644 : }
684 :
685 : /**
686 : * Process a file.
687 : */
688 1215619 : static void scan_file(struct snapraid_scan* scan, int is_diff, const char* sub, struct stat* st, uint64_t physical)
689 : {
690 1215619 : struct snapraid_state* state = scan->state;
691 1215619 : struct snapraid_disk* disk = scan->disk;
692 : struct snapraid_file* file;
693 : tommy_node* i;
694 : int is_original_file_size_different_than_zero;
695 : int is_file_already_present;
696 : data_off_t file_already_present_size;
697 : int64_t file_already_present_mtime_sec;
698 : int file_already_present_mtime_nsec;
699 : int is_file_reported;
700 : char esc_buffer[ESC_MAX];
701 : char esc_buffer_alt[ESC_MAX];
702 :
703 : /*
704 : * If the disk has persistent inodes and UUID, try a search on the past inodes,
705 : * to detect moved files.
706 : *
707 : * For persistent inodes we mean inodes that keep their values when the file-system
708 : * is unmounted and remounted. This don't always happen.
709 : *
710 : * Cases found are:
711 : * - Linux FUSE with exFAT driver from https://code.google.com/p/exfat/.
712 : * Inodes are reassigned at every mount restarting from 1 and incrementing.
713 : * As worse, the exFAT support in FUSE doesn't use sub-second precision in timestamps
714 : * making inode collision more easy (exFAT by design supports 10ms precision).
715 : * - Linux VFAT kernel (3.2) driver. Inodes are fully reassigned at every mount.
716 : *
717 : * In such cases, to avoid possible random collisions, it's better to disable the moved
718 : * file recognition.
719 : *
720 : * For persistent UUID we mean that it has the same UUID as before.
721 : * Otherwise, if the UUID is changed, likely it's a new recreated file-system,
722 : * and then the inode have no meaning.
723 : *
724 : * Note that to disable the search by past inode, we do this implicitly
725 : * removing all the past inode before searching for files.
726 : * This ensures that no file is found with a past inode, but at the same time,
727 : * it allows to find new files with the same inode, to identify them as hardlinks.
728 : */
729 1215619 : int has_past_inodes = !disk->has_volatile_inodes && !disk->has_different_uuid && !disk->has_unsupported_uuid;
730 :
731 : /* always search with the new inode, in the all new inodes found until now, */
732 : /* with the eventual presence of also the past inodes */
733 1215619 : uint64_t inode = st->st_ino;
734 :
735 1215619 : file = tommy_hashdyn_search(&disk->inodeset, file_inode_compare_to_arg, &inode, file_inode_hash(inode));
736 :
737 : /* identify moved files with past inodes and hardlinks with the new inodes */
738 1215619 : if (file) {
739 : /* check if the file is not changed */
740 1050884 : if (file->size == st->st_size
741 1033297 : && file->mtime_sec == st->st_mtime
742 1032025 : && (file->mtime_nsec == STAT_NSEC(st)
743 : /* always accept the stored value if it's STAT_NSEC_INVALID */
744 : /* it happens when upgrading from an old version of SnapRAID */
745 : /* not yet supporting the nanosecond field */
746 6 : || file->mtime_nsec == STAT_NSEC_INVALID
747 : )
748 : ) {
749 : /* check if multiple files have the same inode */
750 1032019 : if (file_flag_has(file, FILE_IS_PRESENT)) {
751 : /* if has_volatile_hardlinks is true, the nlink value is not reliable */
752 222 : if (!disk->has_volatile_hardlinks && st->st_nlink == 1) {
753 : /* LCOV_EXCL_START */
754 : log_fatal(EINTERNAL, "Internal inode '%" PRIu64 "' inconsistency for file '%s%s' already present\n", (uint64_t)st->st_ino, disk->dir, sub);
755 : os_abort();
756 : /* LCOV_EXCL_STOP */
757 : }
758 :
759 : /* it's a hardlink */
760 222 : scan_link(scan, is_diff, sub, file->sub, FILE_IS_HARDLINK);
761 1138866 : return;
762 : }
763 :
764 : /* mark as present */
765 1031797 : file_flag_set(file, FILE_IS_PRESENT);
766 :
767 : /* update the nanoseconds mtime only if different */
768 : /* to avoid unneeded updates */
769 1031797 : if (file->mtime_nsec == STAT_NSEC_INVALID
770 0 : && STAT_NSEC(st) != file->mtime_nsec
771 : ) {
772 0 : file->mtime_nsec = STAT_NSEC(st);
773 :
774 : /* we have to save the new mtime */
775 0 : scan->need_write = 1;
776 : }
777 :
778 1031797 : if (strcmp(file->sub, sub) != 0) {
779 : /* if the path is different, it means a moved file with the same inode */
780 155 : ++scan->count_move;
781 :
782 155 : log_tag("scan:move:%s:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer), esc_tag(sub, esc_buffer_alt));
783 155 : if (is_diff) {
784 0 : msg_info("move %s -> %s\n", fmt_term(disk, file->sub, esc_buffer), fmt_term(disk, sub, esc_buffer_alt));
785 : }
786 :
787 : /* remove from the name set */
788 155 : tommy_hashdyn_remove_existing(&disk->pathset, &file->pathset);
789 :
790 : /* save the new name */
791 155 : file_rename(file, sub);
792 :
793 : /* reinsert in the name set */
794 155 : tommy_hashdyn_insert(&disk->pathset, &file->pathset, file, file_path_hash(file->sub));
795 :
796 : /* we have to save the new name */
797 155 : scan->need_write = 1;
798 : } else {
799 : /* otherwise it's equal */
800 1031642 : ++scan->count_equal;
801 :
802 1031642 : if (state->opt.gui_verbose) {
803 0 : log_tag("scan:equal:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
804 : }
805 : }
806 :
807 : /* mark the file as kept */
808 1031797 : scan_file_keep(scan, file);
809 :
810 : /* nothing more to do */
811 1031797 : return;
812 : }
813 :
814 : /*
815 : * Here the file matches the inode, but not the other info
816 : *
817 : * It could be a modified file with the same name,
818 : * or a restored/copied file that get assigned a previously used inode,
819 : * or a file-system with not persistent inodes.
820 : *
821 : * In NTFS it could be also a hardlink, because in NTFS
822 : * hardlink don't share the same directory information,
823 : * like attribute and time.
824 : *
825 : * For example:
826 : * C:> echo A > A
827 : * C:> mklink /H B A
828 : * ...wait one minute
829 : * C:> echo AAAAAAAAAAAAAA > A
830 : * C:> dir
831 : * ...both time and size of A and B don't match!
832 : */
833 18865 : if (file_flag_has(file, FILE_IS_PRESENT)) {
834 : /* if has_volatile_hardlinks is true, the nlink value is not reliable */
835 0 : if (!disk->has_volatile_hardlinks && st->st_nlink == 1) {
836 : /* LCOV_EXCL_START */
837 : log_fatal(EINTERNAL, "Internal inode '%" PRIu64 "' inconsistency for files '%s%s' and '%s%s' with same inode but different attributes: size %" PRIu64 "?%" PRIu64 ", sec %" PRIu64 "?%" PRIu64 ", nsec %d?%d\n",
838 : file->inode, disk->dir, sub, disk->dir, file->sub,
839 : file->size, (uint64_t)st->st_size,
840 : file->mtime_sec, (uint64_t)st->st_mtime,
841 : file->mtime_nsec, STAT_NSEC(st));
842 : os_abort();
843 : /* LCOV_EXCL_STOP */
844 : }
845 :
846 : /* LCOV_EXCL_START */
847 : /* suppose it's hardlink with not synced metadata */
848 : scan_link(scan, is_diff, sub, file->sub, FILE_IS_HARDLINK);
849 : return;
850 : /* LCOV_EXCL_STOP */
851 : }
852 :
853 : /* assume a previously used inode, it's the worst case */
854 : /* and we handle it removing the duplicate stored inode. */
855 : /* If the file is found by name later, it will have the inode restored, */
856 : /* otherwise, it will get removed */
857 :
858 : /* remove from the inode set */
859 18865 : tommy_hashdyn_remove_existing(&disk->inodeset, &file->nodeset);
860 :
861 : /* clear the inode */
862 : /* this is not really needed for correct functionality */
863 : /* because we are going to set FILE_IS_WITHOUT_INODE */
864 : /* but it's easier for debugging to have invalid inodes set to 0 */
865 18865 : file->inode = 0;
866 :
867 : /* mark as missing inode */
868 18865 : file_flag_set(file, FILE_IS_WITHOUT_INODE);
869 :
870 : /* go further to find it by name */
871 : }
872 :
873 : /* initialize for later overwrite */
874 183600 : is_file_reported = 0;
875 183600 : is_original_file_size_different_than_zero = 0;
876 :
877 : /* then try finding it by name */
878 183600 : file = tommy_hashdyn_search(&disk->pathset, file_path_compare_to_arg, sub, file_path_hash(sub));
879 :
880 : /* keep track if the file already exists */
881 183600 : is_file_already_present = file != 0;
882 :
883 183600 : if (is_file_already_present) {
884 : /* if the file is without an inode */
885 109693 : if (file_flag_has(file, FILE_IS_WITHOUT_INODE)) {
886 : /* set it now */
887 18832 : file->inode = st->st_ino;
888 :
889 : /* insert in the set */
890 18832 : tommy_hashdyn_insert(&disk->inodeset, &file->nodeset, file, file_inode_hash(file->inode));
891 :
892 : /* unmark as missing inode */
893 18832 : file_flag_clear(file, FILE_IS_WITHOUT_INODE);
894 : } else {
895 : /* here the inode has to be different, otherwise we would have found it before */
896 90861 : if (file->inode == st->st_ino) {
897 : /* LCOV_EXCL_START */
898 : log_fatal(EINTERNAL, "Internal inconsistency in inode '%" PRIu64 "' for files '%s%s' as unexpected matching\n", file->inode, disk->dir, sub);
899 : os_abort();
900 : /* LCOV_EXCL_STOP */
901 : }
902 : }
903 :
904 : /* for sure it cannot be already present */
905 109693 : if (file_flag_has(file, FILE_IS_PRESENT)) {
906 : /* LCOV_EXCL_START */
907 : log_fatal(EINTERNAL, "Internal inconsistency in path for file '%s%s' matching and already present\n", disk->dir, sub);
908 : os_abort();
909 : /* LCOV_EXCL_STOP */
910 : }
911 :
912 : /* check if the file is not changed */
913 109693 : if (file->size == st->st_size
914 109101 : && file->mtime_sec == st->st_mtime
915 106847 : && (file->mtime_nsec == STAT_NSEC(st)
916 : /* always accept the stored value if it's STAT_NSEC_INVALID */
917 : /* it happens when upgrading from an old version of SnapRAID */
918 : /* not yet supporting the nanosecond field */
919 0 : || file->mtime_nsec == STAT_NSEC_INVALID
920 : )
921 : ) {
922 : /* mark as present */
923 106847 : file_flag_set(file, FILE_IS_PRESENT);
924 :
925 : /* update the nano seconds mtime only if different */
926 : /* to avoid unneeded updates */
927 106847 : if (file->mtime_nsec == STAT_NSEC_INVALID
928 0 : && STAT_NSEC(st) != STAT_NSEC_INVALID
929 : ) {
930 0 : file->mtime_nsec = STAT_NSEC(st);
931 :
932 : /* we have to save the new mtime */
933 0 : scan->need_write = 1;
934 : }
935 :
936 : /* if when processing the disk we used the past inodes values */
937 106847 : if (has_past_inodes) {
938 : /* if persistent inodes are supported, we are sure that the inode number */
939 : /* is now different, because otherwise the file would have been found */
940 : /* when searching by inode. */
941 : /* if the inode is different, it means a rewritten file with the same path */
942 : /* like when restoring a backup that restores also the timestamp */
943 106833 : ++scan->count_restore;
944 :
945 106833 : log_tag("scan:restore:%s:%s\n", disk->name, esc_tag(sub, esc_buffer));
946 106833 : if (is_diff) {
947 0 : msg_info("restore %s\n", fmt_term(disk, sub, esc_buffer));
948 : }
949 :
950 : /* remove from the inode set */
951 106833 : tommy_hashdyn_remove_existing(&disk->inodeset, &file->nodeset);
952 :
953 : /* save the new inode */
954 106833 : file->inode = st->st_ino;
955 :
956 : /* reinsert in the inode set */
957 106833 : tommy_hashdyn_insert(&disk->inodeset, &file->nodeset, file, file_inode_hash(file->inode));
958 :
959 : /* we have to save the new inode */
960 106833 : scan->need_write = 1;
961 : } else {
962 : /* otherwise it's the case of not persistent inode, where doesn't */
963 : /* matter if the inode is different or equal, because they have no */
964 : /* meaning, and then we don't even save them */
965 14 : ++scan->count_equal;
966 :
967 14 : if (state->opt.gui_verbose) {
968 0 : log_tag("scan:equal:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
969 : }
970 : }
971 :
972 : /* mark the file as kept */
973 106847 : scan_file_keep(scan, file);
974 :
975 : /* nothing more to do */
976 106847 : return;
977 : }
978 :
979 : /* here if the file is changed but with the correct name */
980 :
981 : /* save the info for later printout */
982 2846 : file_already_present_size = file->size;
983 2846 : file_already_present_mtime_sec = file->mtime_sec;
984 2846 : file_already_present_mtime_nsec = file->mtime_nsec;
985 :
986 : /* keep track if the original file was not of zero size */
987 2846 : is_original_file_size_different_than_zero = file->size != 0;
988 :
989 : /* remove it, and continue to insert it again */
990 2846 : scan_file_remove(scan, file);
991 :
992 : /* and continue to insert it again */
993 : } else {
994 73907 : file_already_present_size = 0;
995 73907 : file_already_present_mtime_sec = 0;
996 73907 : file_already_present_mtime_nsec = 0;
997 : }
998 :
999 : /* refresh the info, to ensure that they are synced, */
1000 : /* note that we refresh only the info of the new or modified files */
1001 : /* because this is slow operation */
1002 76753 : scan_file_refresh(scan, sub, st, &physical);
1003 :
1004 : #ifndef _WIN32
1005 : /* do a safety check to ensure that the common ext4 case of zeroing */
1006 : /* the size of a file after a crash doesn't propagate to the backup */
1007 : /* this check is specific for Linux, so we disable it on Windows */
1008 76753 : if (is_original_file_size_different_than_zero && st->st_size == 0) {
1009 0 : if (!state->opt.force_zero) {
1010 : /* LCOV_EXCL_START */
1011 : log_fatal(ESOFT, "The file '%s%s' has unexpected zero size!\n", disk->dir, sub);
1012 : log_fatal(ESOFT, "It's possible that after a kernel crash this file was lost,\n");
1013 : log_fatal(ESOFT, "and you can use 'snapraid fix -f /%s' to recover it.\n", fmt_poll(disk, sub, esc_buffer));
1014 : if (!is_diff) {
1015 : log_fatal(ESOFT, "If this an expected condition you can '%s' anyway using 'snapraid --force-zero %s'\n", state->command, state->command);
1016 : exit(EXIT_FAILURE);
1017 : }
1018 : /* LCOV_EXCL_STOP */
1019 : }
1020 : }
1021 : #else
1022 : /* avoid the unused warning in Windows */
1023 : (void)is_original_file_size_different_than_zero;
1024 : #endif
1025 :
1026 : /* insert it */
1027 76753 : file = file_alloc(state->block_size, sub, st->st_size, st->st_mtime, STAT_NSEC(st), st->st_ino, physical);
1028 :
1029 : /* mark it as present */
1030 76753 : file_flag_set(file, FILE_IS_PRESENT);
1031 :
1032 : /* if copy detection is enabled */
1033 : /* note that the copy detection is tried also for updated files */
1034 : /* this makes sense because it may happen to have two different copies */
1035 : /* of the same file, and we move the right one over the wrong one */
1036 : /* in such case we have a "copy" over an "update" */
1037 76753 : if (!state->opt.force_nocopy) {
1038 76753 : tommy_uint32_t hash = file_stamp_hash(file->size, file->mtime_sec, file->mtime_nsec);
1039 :
1040 : /* search for a file with the same name and stamp in all the disks */
1041 410169 : for (i = state->disklist; i != 0; i = i->next) {
1042 356405 : struct snapraid_disk* other_disk = i->data;
1043 : struct snapraid_file* other_file;
1044 :
1045 356405 : stamp_lock(other_disk);
1046 : /* if the nanosecond part of the time stamp is valid, search */
1047 : /* for name and stamp, otherwise for path and stamp */
1048 356405 : if (file->mtime_nsec != 0 && file->mtime_nsec != STAT_NSEC_INVALID)
1049 356297 : other_file = tommy_hashdyn_search(&other_disk->stampset, file_namestamp_compare, file, hash);
1050 : else
1051 108 : other_file = tommy_hashdyn_search(&other_disk->stampset, file_pathstamp_compare, file, hash);
1052 356405 : stamp_unlock(other_disk);
1053 :
1054 : /* if found, and it's a fully hashed file */
1055 356405 : if (other_file && file_is_full_hashed_and_stable(scan->state, other_disk, other_file)) {
1056 : char path_other[PATH_MAX];
1057 : struct stat other_st;
1058 :
1059 : /*
1060 : * Protect the write as multiple threads may write the same FILE_IS_RELOCATED bit.
1061 : *
1062 : * The bit is always written as 1 and never read, so protection is likely unnecessary
1063 : * but still valuable to avoid data race reports from checker tools
1064 : */
1065 22989 : stamp_lock(other_disk);
1066 22989 : file_flag_set(other_file, FILE_IS_RELOCATED);
1067 22989 : stamp_unlock(other_disk);
1068 :
1069 : /* assume that the file is a copy, and reuse the hash */
1070 22989 : file_copy(other_file, file);
1071 :
1072 : /* check if other file still exists */
1073 22989 : pathprint(path_other, sizeof(path_other), "%s%s", other_disk->dir, other_file->sub);
1074 22989 : if (lstat(path_other, &other_st) == 0) {
1075 15975 : ++scan->count_copy;
1076 :
1077 15975 : log_tag("scan:copy:%s:%s:%s:%s\n", other_disk->name, esc_tag(other_file->sub, esc_buffer), disk->name, esc_tag(file->sub, esc_buffer_alt));
1078 15975 : if (is_diff) {
1079 0 : msg_info("copy %s -> %s\n", fmt_term(other_disk, other_file->sub, esc_buffer), fmt_term(disk, file->sub, esc_buffer_alt));
1080 : }
1081 : } else {
1082 7014 : ++scan->count_relocate;
1083 :
1084 7014 : log_tag("scan:relocate:%s:%s:%s:%s\n", other_disk->name, esc_tag(other_file->sub, esc_buffer), disk->name, esc_tag(file->sub, esc_buffer_alt));
1085 7014 : if (is_diff) {
1086 31 : msg_info("relocate %s -> %s\n", fmt_term(other_disk, other_file->sub, esc_buffer), fmt_term(disk, file->sub, esc_buffer_alt));
1087 : }
1088 : }
1089 :
1090 : /* mark it as reported */
1091 22989 : is_file_reported = 1;
1092 :
1093 : /* no need to continue the search */
1094 22989 : break;
1095 : }
1096 : }
1097 : }
1098 :
1099 : /* if not yet reported, do it now */
1100 : /* we postpone this to avoid to print two times the copied files */
1101 76753 : if (!is_file_reported) {
1102 53764 : if (is_file_already_present) {
1103 2846 : ++scan->count_change;
1104 :
1105 2846 : log_tag("scan:update:%s:%s: %" PRIu64 " %" PRIu64 ".%d -> %" PRIu64 " %" PRIu64 ".%d\n", disk->name, esc_tag(sub, esc_buffer),
1106 : file_already_present_size, file_already_present_mtime_sec, file_already_present_mtime_nsec,
1107 : file->size, file->mtime_sec, file->mtime_nsec
1108 : );
1109 :
1110 2846 : if (is_diff) {
1111 118 : msg_info("update %s\n", fmt_term(disk, sub, esc_buffer));
1112 : }
1113 : } else {
1114 50918 : ++scan->count_insert;
1115 :
1116 50918 : log_tag("scan:add:%s:%s\n", disk->name, esc_tag(sub, esc_buffer));
1117 50918 : if (is_diff) {
1118 17054 : msg_info("add %s\n", fmt_term(disk, sub, esc_buffer));
1119 : }
1120 : }
1121 : }
1122 :
1123 : /* insert the file in the delayed list */
1124 76753 : scan_file_insert(scan, file);
1125 : }
1126 :
1127 : /**
1128 : * Remove the specified dir from the data set.
1129 : */
1130 2 : static void scan_emptydir_remove(struct snapraid_scan* scan, struct snapraid_dir* dir)
1131 : {
1132 2 : struct snapraid_disk* disk = scan->disk;
1133 :
1134 : /* state changed */
1135 2 : scan->need_write = 1;
1136 :
1137 : /* remove the file from the dir containers */
1138 2 : tommy_hashdyn_remove_existing(&disk->dirset, &dir->nodeset);
1139 2 : tommy_list_remove_existing(&disk->dirlist, &dir->nodelist);
1140 :
1141 : /* deallocate */
1142 2 : dir_free(dir);
1143 2 : }
1144 :
1145 : /**
1146 : * Insert the specified dir in the data set.
1147 : */
1148 10 : static void scan_emptydir_insert(struct snapraid_scan* scan, struct snapraid_dir* dir)
1149 : {
1150 10 : struct snapraid_disk* disk = scan->disk;
1151 :
1152 : /* state changed */
1153 10 : scan->need_write = 1;
1154 :
1155 : /* insert the dir in the dir containers */
1156 10 : tommy_hashdyn_insert(&disk->dirset, &dir->nodeset, dir, dir_name_hash(dir->sub));
1157 10 : tommy_list_insert_tail(&disk->dirlist, &dir->nodelist, dir);
1158 10 : }
1159 :
1160 : /**
1161 : * Process a dir.
1162 : */
1163 295 : static void scan_emptydir(struct snapraid_scan* scan, const char* sub)
1164 : {
1165 295 : struct snapraid_disk* disk = scan->disk;
1166 : struct snapraid_dir* dir;
1167 :
1168 : /* check if the dir already exists */
1169 295 : dir = tommy_hashdyn_search(&disk->dirset, dir_name_compare, sub, dir_name_hash(sub));
1170 295 : if (dir) {
1171 : /* check if multiple files have the same name */
1172 285 : if (dir_flag_has(dir, FILE_IS_PRESENT)) {
1173 : /* LCOV_EXCL_START */
1174 : log_fatal(EINTERNAL, "Internal inconsistency for dir '%s%s'\n", disk->dir, sub);
1175 : os_abort();
1176 : /* LCOV_EXCL_STOP */
1177 : }
1178 :
1179 : /* mark as present */
1180 285 : dir_flag_set(dir, FILE_IS_PRESENT);
1181 :
1182 : /* nothing more to do */
1183 285 : return;
1184 : } else {
1185 : /* and continue to insert it */
1186 : }
1187 :
1188 : /* insert it */
1189 10 : dir = dir_alloc(sub);
1190 :
1191 : /* mark it as present */
1192 10 : dir_flag_set(dir, FILE_IS_PRESENT);
1193 :
1194 : /* insert it in the delayed insert list */
1195 10 : tommy_list_insert_tail(&scan->dir_insert_list, &dir->nodelist, dir);
1196 : }
1197 :
1198 : struct dirent_sorted {
1199 : /* node for data structures */
1200 : tommy_node node;
1201 :
1202 : #if HAVE_STRUCT_DIRENT_D_INO
1203 : uint64_t d_ino; /**< Inode number. */
1204 : #endif
1205 : #if HAVE_STRUCT_DIRENT_D_TYPE
1206 : uint32_t d_type; /**< File type. */
1207 : #endif
1208 : #if HAVE_STRUCT_DIRENT_D_STAT
1209 : struct stat d_stat; /**< Stat result. */
1210 : #endif
1211 : char d_name[]; /**< Variable length name. It must be the last field. */
1212 : };
1213 :
1214 : #if HAVE_STRUCT_DIRENT_D_INO
1215 103638 : static int dd_ino_compare(const void* void_a, const void* void_b)
1216 : {
1217 103638 : const struct dirent_sorted* a = void_a;
1218 103638 : const struct dirent_sorted* b = void_b;
1219 :
1220 103638 : if (a->d_ino < b->d_ino)
1221 49706 : return -1;
1222 53932 : if (a->d_ino > b->d_ino)
1223 53932 : return 1;
1224 :
1225 0 : return 0;
1226 : }
1227 : #endif
1228 :
1229 12667783 : static int dd_name_compare(const void* void_a, const void* void_b)
1230 : {
1231 12667783 : const struct dirent_sorted* a = void_a;
1232 12667783 : const struct dirent_sorted* b = void_b;
1233 :
1234 12667783 : return strcmp(a->d_name, b->d_name);
1235 : }
1236 :
1237 : /**
1238 : * Return the stat info of a dir entry.
1239 : */
1240 : #if HAVE_STRUCT_DIRENT_D_STAT
1241 : #define DSTAT(file, dd, buf) dstat(dd)
1242 : struct stat* dstat(struct dirent_sorted* dd)
1243 : {
1244 : return &dd->d_stat;
1245 : }
1246 : #else
1247 : #define DSTAT(file, dd, buf) dstat(file, buf)
1248 1217043 : struct stat* dstat(const char* file, struct stat* st)
1249 : {
1250 1217043 : if (lstat(file, st) != 0) {
1251 : /* LCOV_EXCL_START */
1252 : log_fatal(errno, "Error in stat file/directory '%s'. %s.\n", file, strerror(errno));
1253 : exit(EXIT_FAILURE);
1254 : /* LCOV_EXCL_STOP */
1255 : }
1256 1217043 : return st;
1257 : }
1258 : #endif
1259 :
1260 : /**
1261 : * Process a directory.
1262 : * Return != 0 if at least one file or link is processed.
1263 : */
1264 2053 : static int scan_sub(struct snapraid_scan* scan, int level, int is_diff, char* path_next, char* sub_next, char* tmp)
1265 : {
1266 2053 : struct snapraid_state* state = scan->state;
1267 2053 : struct snapraid_disk* disk = scan->disk;
1268 2053 : int processed = 0;
1269 : DIR* d;
1270 : tommy_list list;
1271 : tommy_node* node;
1272 : size_t path_len;
1273 : size_t sub_len;
1274 :
1275 2053 : path_len = strlen(path_next);
1276 2053 : sub_len = strlen(sub_next);
1277 :
1278 2053 : tommy_list_init(&list);
1279 :
1280 2053 : d = opendir(path_next);
1281 2053 : if (!d) {
1282 : /* LCOV_EXCL_START */
1283 : log_fatal(errno, "Error opening directory '%s'. %s.\n", path_next, strerror(errno));
1284 : if (level == 0)
1285 : log_fatal(errno, "If this is the disk mount point, remember to create it manually\n");
1286 : else
1287 : log_fatal(errno, "If it's a permission problem, you can exclude it in the config file with:\n\texclude /%s\n", sub_next);
1288 : exit(EXIT_FAILURE);
1289 : /* LCOV_EXCL_STOP */
1290 : }
1291 :
1292 : /* read the full directory */
1293 1260817 : while (1) {
1294 : struct dirent_sorted* entry;
1295 : const char* name;
1296 : struct dirent* dd;
1297 : size_t name_len;
1298 :
1299 : /*
1300 : * Clear errno to differentiate the end of the stream and an error condition
1301 : *
1302 : * From the Linux readdir() manpage:
1303 : * "If the end of the directory stream is reached, NULL is returned and errno is not changed.
1304 : * If an error occurs, NULL is returned and errno is set appropriately."
1305 : */
1306 1262870 : errno = 0;
1307 1262870 : dd = readdir(d);
1308 1262870 : if (dd == 0 && errno != 0) {
1309 : /* LCOV_EXCL_START */
1310 : /* restore removing additions */
1311 : path_next[path_len] = 0;
1312 : sub_next[sub_len] = 0;
1313 : log_fatal(errno, "Error reading directory '%s'. %s.\n", path_next, strerror(errno));
1314 : log_fatal(errno, "You can exclude it in the config file with:\n\texclude /%s\n", sub_next);
1315 : exit(EXIT_FAILURE);
1316 : /* LCOV_EXCL_STOP */
1317 : }
1318 1262870 : if (dd == 0) {
1319 2053 : break; /* finished */
1320 : }
1321 :
1322 : /* skip "." and ".." files */
1323 1260817 : name = dd->d_name;
1324 1260817 : if (name[0] == '.' && (name[1] == 0 || (name[1] == '.' && name[2] == 0)))
1325 4106 : continue;
1326 :
1327 1256711 : pathcatl(path_next, path_len, PATH_MAX, name);
1328 :
1329 : /* check for not supported file names */
1330 1256711 : if (name[0] == 0) {
1331 : /* LCOV_EXCL_START */
1332 : log_fatal(ESOFT, "Unsupported name '%s' in file '%s'.\n", name, path_next);
1333 : exit(EXIT_FAILURE);
1334 : /* LCOV_EXCL_STOP */
1335 : }
1336 :
1337 : /* exclude hidden files even before calling lstat() */
1338 1256711 : if (filter_hidden(state->filter_hidden, dd) != 0) {
1339 0 : msg_verbose("Excluding hidden '%s'\n", path_next);
1340 0 : continue;
1341 : }
1342 :
1343 : /* exclude content files even before calling lstat() */
1344 1256711 : if (filter_content(&state->contentlist, path_next) != 0) {
1345 0 : msg_verbose("Excluding content '%s'\n", path_next);
1346 0 : continue;
1347 : }
1348 :
1349 1256711 : name_len = strlen(dd->d_name);
1350 1256711 : entry = malloc_nofail(sizeof(struct dirent_sorted) + name_len + 1);
1351 :
1352 : /* copy the dir entry */
1353 : #if HAVE_STRUCT_DIRENT_D_INO
1354 1256711 : entry->d_ino = dd->d_ino;
1355 : #endif
1356 : #if HAVE_STRUCT_DIRENT_D_TYPE
1357 1256711 : entry->d_type = dd->d_type;
1358 : #endif
1359 : #if HAVE_STRUCT_DIRENT_D_STAT
1360 : /* convert dirent to lstat result */
1361 : dirent_lstat(dd, &entry->d_stat);
1362 :
1363 : /* note that at this point the st_mode may be 0 */
1364 : #endif
1365 1256711 : memcpy(entry->d_name, dd->d_name, name_len + 1);
1366 :
1367 : /* insert in the list */
1368 1256711 : tommy_list_insert_tail(&list, &entry->node, entry);
1369 :
1370 : /* process ignore files */
1371 1256711 : if (strcmp(".snapraidignore", dd->d_name) == 0)
1372 66 : state_load_ignore_file(&scan->local_filter_list, path_next, sub_next);
1373 : }
1374 :
1375 2053 : if (closedir(d) != 0) {
1376 : /* LCOV_EXCL_START */
1377 : /* restore removing additions */
1378 : path_next[path_len] = 0;
1379 : log_fatal(errno, "Error closing directory '%s'. %s.\n", path_next, strerror(errno));
1380 : exit(EXIT_FAILURE);
1381 : /* LCOV_EXCL_STOP */
1382 : }
1383 :
1384 2053 : if (state->opt.force_order == SORT_ALPHA) {
1385 : /* if requested sort alphabetically */
1386 : /* this is mainly done for testing to ensure to always */
1387 : /* process in the same way in different platforms */
1388 2017 : tommy_list_sort(&list, dd_name_compare);
1389 : }
1390 : #if HAVE_STRUCT_DIRENT_D_INO
1391 36 : else if (!disk->has_volatile_inodes) {
1392 : /* if inodes are persistent */
1393 : /* sort the list of dir entries by inodes */
1394 36 : tommy_list_sort(&list, dd_ino_compare);
1395 : }
1396 : /* otherwise just keep the insertion order */
1397 : #endif
1398 :
1399 : /* process the sorted dir entries */
1400 2053 : node = list;
1401 1258764 : while (node != 0) {
1402 1256711 : struct snapraid_filter* reason = 0;
1403 1256711 : struct dirent_sorted* dd = node->data;
1404 1256711 : const char* name = dd->d_name;
1405 : struct stat* st;
1406 : int type;
1407 : #if !HAVE_STRUCT_DIRENT_D_STAT
1408 : struct stat st_buf;
1409 : #endif
1410 :
1411 1256711 : pathcatl(path_next, path_len, PATH_MAX, name);
1412 1256711 : pathcatl(sub_next, sub_len, PATH_MAX, name);
1413 :
1414 : /* start with an unknown type */
1415 1256711 : type = -1;
1416 1256711 : st = 0;
1417 :
1418 : /* if dirent has the type, use it */
1419 : #if HAVE_STRUCT_DIRENT_D_TYPE
1420 1256711 : switch (dd->d_type) {
1421 0 : case DT_UNKNOWN : break;
1422 1215751 : case DT_REG : type = 0; break;
1423 39536 : case DT_LNK : type = 1; break;
1424 1424 : case DT_DIR : type = 2; break;
1425 0 : default : type = 3; break;
1426 : }
1427 : #endif
1428 :
1429 : /* if type is still unknown */
1430 1256711 : if (type < 0) {
1431 : /* get the type from stat */
1432 0 : st = DSTAT(path_next, dd, &st_buf);
1433 :
1434 : #if HAVE_STRUCT_DIRENT_D_STAT
1435 : /* if the st_mode field is missing, takes care to fill it using normal lstat() */
1436 : /* at now this can happen only in Windows (with HAVE_STRUCT_DIRENT_D_STAT defined), */
1437 : /* because we use a directory reading method that doesn't read info about ReparsePoint. */
1438 : /* Note that here we cannot call here lstat_sync(), because we don't know what kind */
1439 : /* of file is it, and lstat_sync() doesn't always work */
1440 : if (st->st_mode == 0) {
1441 : if (lstat(path_next, st) != 0) {
1442 : /* LCOV_EXCL_START */
1443 : log_fatal(errno, "Error in stat file/directory '%s'. %s.\n", path_next, strerror(errno));
1444 : exit(EXIT_FAILURE);
1445 : /* LCOV_EXCL_STOP */
1446 : }
1447 : }
1448 : #endif
1449 :
1450 0 : if (S_ISREG(st->st_mode))
1451 0 : type = 0;
1452 0 : else if (S_ISLNK(st->st_mode))
1453 0 : type = 1;
1454 0 : else if (S_ISDIR(st->st_mode))
1455 0 : type = 2;
1456 : else
1457 0 : type = 3;
1458 : }
1459 :
1460 1256711 : if (type == 0) { /* REG */
1461 1215751 : if (filter_path(&state->filterlist, &reason, disk->name, sub_next) == 0
1462 1215751 : && filter_path(&scan->local_filter_list, &reason, disk->name, sub_next) == 0) {
1463 :
1464 : /* late stat, if not yet called */
1465 1215619 : if (!st)
1466 1215619 : st = DSTAT(path_next, dd, &st_buf);
1467 :
1468 : #if HAVE_LSTAT_SYNC
1469 : /* if the st_ino field is missing, takes care to fill it using the extended lstat() */
1470 : /* this can happen only in Windows */
1471 : if (st->st_ino == 0 || st->st_nlink == 0) {
1472 : if (lstat_sync(path_next, st, 0) != 0) {
1473 : /* LCOV_EXCL_START */
1474 : log_fatal(errno, "Error in stat file '%s'. %s.\n", path_next, strerror(errno));
1475 : exit(EXIT_FAILURE);
1476 : /* LCOV_EXCL_STOP */
1477 : }
1478 : }
1479 : #endif
1480 :
1481 1215619 : scan_file(scan, is_diff, sub_next, st, FILEPHY_UNREAD_OFFSET);
1482 1215619 : processed = 1;
1483 : } else {
1484 132 : msg_verbose("Excluding file '%s' for rule '%s'\n", path_next, filter_type(reason, tmp, PATH_MAX));
1485 : }
1486 40960 : } else if (type == 1) { /* LNK */
1487 39536 : if (filter_path(&state->filterlist, &reason, disk->name, sub_next) == 0
1488 39536 : && filter_path(&scan->local_filter_list, &reason, disk->name, sub_next) == 0) {
1489 : int ret;
1490 :
1491 39534 : ret = readlink(path_next, tmp, PATH_MAX);
1492 39534 : if (ret >= PATH_MAX) {
1493 : /* LCOV_EXCL_START */
1494 : log_fatal(EINTERNAL, "Error in readlink file '%s'. Symlink too long.\n", path_next);
1495 : exit(EXIT_FAILURE);
1496 : /* LCOV_EXCL_STOP */
1497 : }
1498 39534 : if (ret < 0) {
1499 : /* LCOV_EXCL_START */
1500 : log_fatal(errno, "Error in readlink file '%s'. %s.\n", path_next, strerror(errno));
1501 : exit(EXIT_FAILURE);
1502 : /* LCOV_EXCL_STOP */
1503 : }
1504 39534 : if (ret == 0)
1505 0 : log_fatal(ESOFT, "WARNING! Empty symbolic link '%s'.\n", path_next);
1506 :
1507 : /* readlink doesn't put the final 0 */
1508 39534 : tmp[ret] = 0;
1509 :
1510 : /* process as a symbolic link */
1511 39534 : scan_link(scan, is_diff, sub_next, tmp, FILE_IS_SYMLINK);
1512 39534 : processed = 1;
1513 : } else {
1514 2 : msg_verbose("Excluding link '%s' for rule '%s'\n", path_next, filter_type(reason, tmp, PATH_MAX));
1515 : }
1516 1424 : } else if (type == 2) { /* DIR */
1517 1424 : if (filter_subdir(&state->filterlist, &reason, disk->name, sub_next) == 0
1518 1424 : && filter_subdir(&scan->local_filter_list, &reason, disk->name, sub_next) == 0) {
1519 : #ifndef _WIN32
1520 : /* late stat, if not yet called */
1521 1424 : if (!st)
1522 1424 : st = DSTAT(path_next, dd, &st_buf);
1523 :
1524 : /* in Unix don't follow mount points in different devices */
1525 : /* in Windows we are already skipping them reporting them as special files */
1526 1424 : if ((uint64_t)st->st_dev != disk->device) {
1527 0 : log_fatal(ESOFT, "WARNING! Ignoring mount point '%s' because it appears to be in a different device\n", path_next);
1528 : } else
1529 : #endif
1530 : {
1531 : /* recurse */
1532 1424 : pathslash(path_next, PATH_MAX);
1533 1424 : pathslash(sub_next, PATH_MAX);
1534 1424 : if (scan_sub(scan, level + 1, is_diff, path_next, sub_next, tmp) == 0) {
1535 : /* restore removing additions */
1536 295 : pathcatl(sub_next, sub_len, PATH_MAX, name);
1537 : /* scan the directory as empty dir */
1538 295 : scan_emptydir(scan, sub_next);
1539 : }
1540 : /* or we processed something internally, or we have added the empty dir */
1541 1424 : processed = 1;
1542 : }
1543 : } else {
1544 0 : msg_verbose("Excluding directory '%s' for rule '%s'\n", path_next, filter_type(reason, tmp, PATH_MAX));
1545 : }
1546 : } else {
1547 0 : if (filter_path(&state->filterlist, &reason, disk->name, sub_next) == 0
1548 0 : && filter_path(&scan->local_filter_list, &reason, disk->name, sub_next) == 0) {
1549 : /* late stat, if not yet called */
1550 0 : if (!st)
1551 0 : st = DSTAT(path_next, dd, &st_buf);
1552 :
1553 0 : log_fatal(ESOFT, "WARNING! Ignoring special '%s' file '%s'\n", stat_desc(st), path_next);
1554 : } else {
1555 0 : msg_verbose("Excluding special file '%s' for rule '%s'\n", path_next, filter_type(reason, tmp, PATH_MAX));
1556 : }
1557 : }
1558 :
1559 : /* next entry */
1560 1256711 : node = node->next;
1561 :
1562 : /* free the present one */
1563 1256711 : free(dd);
1564 : }
1565 :
1566 2053 : return processed;
1567 : }
1568 :
1569 : /**
1570 : * Process a directory.
1571 : * Return != 0 if at least one file or link is processed.
1572 : */
1573 629 : static int scan_dir(struct snapraid_scan* scan, int level, int is_diff, const char* dir, const char* sub)
1574 : {
1575 : /* working buffers used by scan_sub() */
1576 : char path_next[PATH_MAX];
1577 : char sub_next[PATH_MAX];
1578 : char tmp[PATH_MAX];
1579 :
1580 629 : pathcpy(path_next, sizeof(path_next), dir);
1581 629 : pathcpy(sub_next, sizeof(sub_next), sub);
1582 :
1583 629 : return scan_sub(scan, level, is_diff, path_next, sub_next, tmp);
1584 : }
1585 :
1586 629 : static void* scan_disk(void* arg)
1587 : {
1588 629 : struct snapraid_scan* scan = arg;
1589 629 : struct snapraid_disk* disk = scan->disk;
1590 : int ret;
1591 : int has_persistent_inodes;
1592 : int has_syncronized_hardlinks;
1593 : uint64_t start;
1594 :
1595 : /* check if the disk supports persistent inodes */
1596 629 : ret = fsinfo(disk->dir, &has_persistent_inodes, &has_syncronized_hardlinks, 0, 0, 0, 0, 0, 0);
1597 629 : if (ret < 0) {
1598 : /* LCOV_EXCL_START */
1599 : log_fatal(errno, "Error accessing disk '%s' to get file-system info. %s.\n", disk->dir, strerror(errno));
1600 : exit(EXIT_FAILURE);
1601 : /* LCOV_EXCL_STOP */
1602 : }
1603 629 : if (!has_persistent_inodes) {
1604 0 : disk->has_volatile_inodes = 1;
1605 : }
1606 629 : if (!has_syncronized_hardlinks) {
1607 0 : disk->has_volatile_hardlinks = 1;
1608 : }
1609 :
1610 : /* if inodes or UUID are not persistent/changed/unsupported */
1611 629 : if (disk->has_volatile_inodes || disk->has_different_uuid || disk->has_unsupported_uuid) {
1612 : /* remove all the inodes from the inode collection */
1613 : /* if they are not persistent, all of them could be changed now */
1614 : /* and we don't want to find false matching ones */
1615 : /* see scan_file() for more details */
1616 94 : tommy_node* node = disk->filelist;
1617 108 : while (node) {
1618 14 : struct snapraid_file* file = node->data;
1619 :
1620 14 : node = node->next;
1621 :
1622 : /* remove from the inode set */
1623 14 : tommy_hashdyn_remove_existing(&disk->inodeset, &file->nodeset);
1624 :
1625 : /* clear the inode */
1626 14 : file->inode = 0;
1627 :
1628 : /* mark as missing inode */
1629 14 : file_flag_set(file, FILE_IS_WITHOUT_INODE);
1630 : }
1631 : }
1632 :
1633 629 : start = os_tick_ms();
1634 :
1635 629 : scan_dir(scan, 0, scan->is_diff, disk->dir, "");
1636 :
1637 629 : if (!scan->is_diff)
1638 581 : msg_progress("Scanned %s in %" PRIu64 " seconds\n", disk->name, (os_tick_ms() - start) / 1000);
1639 :
1640 629 : return 0;
1641 : }
1642 :
1643 105 : static int state_diffscan(struct snapraid_state* state, int is_diff)
1644 : {
1645 : tommy_node* i;
1646 : tommy_node* j;
1647 : tommy_list scanlist;
1648 : int done;
1649 : msg_ptr* msg;
1650 : struct snapraid_scan total;
1651 : int no_difference;
1652 : char esc_buffer[ESC_MAX];
1653 :
1654 105 : tommy_list_init(&scanlist);
1655 :
1656 105 : if (is_diff)
1657 8 : msg_progress("Comparing...\n");
1658 : else
1659 97 : msg_progress("Scanning...\n");
1660 :
1661 105 : log_tag("list:scan_begin\n");
1662 :
1663 : /* allocate all the scan data */
1664 734 : for (i = state->disklist; i != 0; i = i->next) {
1665 629 : struct snapraid_disk* disk = i->data;
1666 : struct snapraid_scan* scan;
1667 :
1668 629 : scan = scan_alloc(state, disk, is_diff);
1669 :
1670 629 : tommy_list_insert_tail(&scanlist, &scan->node, scan);
1671 : }
1672 :
1673 : /* first scan all the directory and find new and deleted files */
1674 734 : for (i = scanlist; i != 0; i = i->next) {
1675 629 : struct snapraid_scan* scan = i->data;
1676 : #if HAVE_THREAD
1677 629 : if (state->opt.skip_multi_scan)
1678 0 : scan_disk(scan);
1679 : else
1680 629 : thread_create(&scan->thread, scan_disk, scan);
1681 : #else
1682 : scan_disk(scan);
1683 : #endif
1684 : }
1685 :
1686 : #if HAVE_THREAD
1687 : /* wait for all threads to terminate */
1688 734 : for (i = scanlist; i != 0; i = i->next) {
1689 629 : struct snapraid_scan* scan = i->data;
1690 : void* retval;
1691 :
1692 : /* wait for thread termination */
1693 629 : if (!state->opt.skip_multi_scan)
1694 629 : thread_join(scan->thread, &retval);
1695 : }
1696 : #endif
1697 :
1698 : /* we split the search in two phases because to detect files */
1699 : /* moved from one disk to another we have to start deletion */
1700 : /* only when all disks have all the new files found */
1701 :
1702 : /* now process all the new and deleted files */
1703 734 : for (i = scanlist; i != 0; i = i->next) {
1704 629 : struct snapraid_scan* scan = i->data;
1705 629 : struct snapraid_disk* disk = scan->disk;
1706 : tommy_node* node;
1707 : unsigned phy_dup;
1708 : uint64_t phy_last;
1709 : struct snapraid_file* phy_file_last;
1710 :
1711 : /* check for removed files */
1712 629 : node = disk->filelist;
1713 1145725 : while (node) {
1714 1145096 : struct snapraid_file* file = node->data;
1715 :
1716 : /* next node */
1717 1145096 : node = node->next;
1718 :
1719 : /* remove if not present */
1720 1145096 : if (!file_flag_has(file, FILE_IS_PRESENT)) {
1721 29089 : if (!file_flag_has(file, FILE_IS_RELOCATED)) {
1722 22075 : ++scan->count_remove;
1723 :
1724 22075 : log_tag("scan:remove:%s:%s\n", disk->name, esc_tag(file->sub, esc_buffer));
1725 22075 : if (is_diff) {
1726 66 : msg_info("remove %s\n", fmt_term(disk, file->sub, esc_buffer));
1727 : }
1728 : }
1729 :
1730 29089 : scan_file_remove(scan, file);
1731 : }
1732 : }
1733 :
1734 : /* check for removed links */
1735 629 : node = disk->linklist;
1736 38891 : while (node) {
1737 38262 : struct snapraid_link* slink = node->data;
1738 :
1739 : /* next node */
1740 38262 : node = node->next;
1741 :
1742 : /* remove if not present */
1743 38262 : if (!link_flag_has(slink, FILE_IS_PRESENT)) {
1744 955 : ++scan->count_remove;
1745 :
1746 955 : log_tag("scan:remove:%s:%s\n", disk->name, esc_tag(slink->sub, esc_buffer));
1747 955 : if (is_diff) {
1748 10 : msg_info("remove %s\n", fmt_term(disk, slink->sub, esc_buffer));
1749 : }
1750 :
1751 955 : scan_link_remove(scan, slink);
1752 : }
1753 : }
1754 :
1755 : /* check for removed dirs */
1756 629 : node = disk->dirlist;
1757 916 : while (node) {
1758 287 : struct snapraid_dir* dir = node->data;
1759 :
1760 : /* next node */
1761 287 : node = node->next;
1762 :
1763 : /* remove if not present */
1764 287 : if (!dir_flag_has(dir, FILE_IS_PRESENT)) {
1765 2 : scan_emptydir_remove(scan, dir);
1766 : }
1767 : }
1768 :
1769 : /* sort the files before inserting them */
1770 : /* we use a stable sort to ensure that if the reported physical offset/inode */
1771 : /* are always 0, we keep at least the directory order */
1772 629 : switch (state->opt.force_order) {
1773 12 : case SORT_PHYSICAL :
1774 12 : tommy_list_sort(&scan->file_insert_list, file_physical_compare);
1775 12 : break;
1776 0 : case SORT_INODE :
1777 0 : tommy_list_sort(&scan->file_insert_list, file_inode_compare);
1778 0 : break;
1779 617 : case SORT_ALPHA :
1780 617 : tommy_list_sort(&scan->file_insert_list, file_path_compare);
1781 617 : break;
1782 0 : case SORT_DIR :
1783 : /* already in order */
1784 0 : break;
1785 : }
1786 :
1787 : /* insert all the new files, we insert them only after the deletion */
1788 : /* to reuse the just freed space */
1789 : /* also check if the physical offset reported are fakes or not */
1790 629 : node = scan->file_insert_list;
1791 629 : phy_dup = 0;
1792 629 : phy_last = FILEPHY_UNREAD_OFFSET;
1793 629 : phy_file_last = 0;
1794 100019 : while (node) {
1795 99390 : struct snapraid_file* file = node->data;
1796 :
1797 : /* if the file is not empty, count duplicate physical offsets */
1798 99390 : if (state->opt.force_order == SORT_PHYSICAL && file->size != 0) {
1799 11418 : if (phy_file_last != 0 && file->physical == phy_last
1800 : /* files without offset are expected to have duplicates */
1801 0 : && phy_last != FILEPHY_WITHOUT_OFFSET
1802 : ) {
1803 : /* if verbose, print the list of duplicates real offsets */
1804 : /* other cases are for offsets not supported, so we don't need to report them file by file */
1805 0 : if (phy_last >= FILEPHY_REAL_OFFSET) {
1806 0 : log_fatal(ESOFT, "WARNING! Files '%s%s' and '%s%s' share the same physical offset %" PRId64 ".\n", disk->dir, phy_file_last->sub, disk->dir, file->sub, phy_last);
1807 : }
1808 0 : ++phy_dup;
1809 : }
1810 11418 : phy_file_last = file;
1811 11418 : phy_last = file->physical;
1812 : }
1813 :
1814 : /* next node */
1815 99390 : node = node->next;
1816 :
1817 : /* insert in the parity */
1818 99390 : scan_file_allocate(scan, file);
1819 : }
1820 :
1821 : /* mark the disk without reliable physical offset if it has duplicates */
1822 : /* here it should never happen because we already sorted out hardlinks */
1823 629 : if (state->opt.force_order == SORT_PHYSICAL && phy_dup > 0) {
1824 0 : disk->has_unreliable_physical = 1;
1825 : }
1826 :
1827 : /* insert all the new links */
1828 629 : node = scan->link_insert_list;
1829 3078 : while (node) {
1830 2449 : struct snapraid_link* slink = node->data;
1831 :
1832 : /* next node */
1833 2449 : node = node->next;
1834 :
1835 : /* insert it */
1836 2449 : scan_link_insert(scan, slink);
1837 : }
1838 :
1839 : /* insert all the new dirs */
1840 629 : node = scan->dir_insert_list;
1841 639 : while (node) {
1842 10 : struct snapraid_dir* dir = node->data;
1843 :
1844 : /* next node */
1845 10 : node = node->next;
1846 :
1847 : /* insert it */
1848 10 : scan_emptydir_insert(scan, dir);
1849 : }
1850 : }
1851 :
1852 : /* propagate the state change (after all the scan operations are called) */
1853 734 : for (i = scanlist; i != 0; i = i->next) {
1854 629 : struct snapraid_scan* scan = i->data;
1855 629 : if (scan->need_write) {
1856 218 : state->need_write = 1;
1857 : }
1858 : }
1859 :
1860 : /* check for disks where all the previously existing files where removed */
1861 105 : if (!state->opt.force_empty) {
1862 99 : int all_missing = 0;
1863 99 : int all_rewritten = 0;
1864 99 : done = 0;
1865 692 : for (i = state->disklist, j = scanlist; i != 0; i = i->next, j = j->next) {
1866 593 : struct snapraid_disk* disk = i->data;
1867 593 : struct snapraid_scan* scan = j->data;
1868 :
1869 593 : if (scan->count_equal == 0
1870 74 : && scan->count_move == 0
1871 74 : && scan->count_restore == 0
1872 74 : && (scan->count_remove != 0 || scan->count_change != 0)
1873 : ) {
1874 1 : if (!done) {
1875 1 : done = 1;
1876 1 : log_fatal(ESOFT, "WARNING! All the files previously present in disk '%s' at dir '%s'", disk->name, disk->dir);
1877 : } else {
1878 0 : log_fatal(ESOFT, ", disk '%s' at dir '%s'", disk->name, disk->dir);
1879 : }
1880 :
1881 : /* detect the special condition of all files missing */
1882 1 : if (scan->count_change == 0)
1883 1 : all_missing = 1;
1884 :
1885 : /* detect the special condition of all files rewritten */
1886 1 : if (scan->count_remove == 0)
1887 0 : all_rewritten = 1;
1888 : }
1889 : }
1890 99 : if (done) {
1891 1 : log_fatal(ESOFT, "\nare now missing or have been rewritten!\n");
1892 1 : if (all_rewritten) {
1893 0 : log_fatal(ESOFT, "This could occur when restoring a disk from a backup\n");
1894 0 : log_fatal(ESOFT, "program that is not setting correctly the timestamps.\n");
1895 : }
1896 1 : if (all_missing) {
1897 1 : log_fatal(ESOFT, "This could occur when some disks are not mounted\n");
1898 1 : log_fatal(ESOFT, "in the expected directory.\n");
1899 : }
1900 1 : if (!is_diff) {
1901 1 : log_fatal(ESOFT, "If you want to '%s' anyway, use 'snapraid --force-empty %s'.\n", state->command, state->command);
1902 1 : exit(EXIT_FAILURE);
1903 : }
1904 : }
1905 : }
1906 :
1907 : /* check for disks without the physical offset support */
1908 104 : if (state->opt.force_order == SORT_PHYSICAL) {
1909 2 : done = 0;
1910 14 : for (i = state->disklist; i != 0; i = i->next) {
1911 12 : struct snapraid_disk* disk = i->data;
1912 :
1913 12 : if (disk->has_unreliable_physical) {
1914 0 : if (!done) {
1915 0 : done = 1;
1916 0 : log_fatal(ESOFT, "WARNING! Physical offsets not supported for disk '%s'", disk->name);
1917 : } else {
1918 0 : log_fatal(ESOFT, ", '%s'", disk->name);
1919 : }
1920 : }
1921 : }
1922 2 : if (done) {
1923 0 : log_fatal(ESOFT, ". The order of files won't be optimal.\n");
1924 : }
1925 : }
1926 :
1927 : /* check for disks without persistent inodes */
1928 104 : done = 0;
1929 727 : for (i = state->disklist; i != 0; i = i->next) {
1930 623 : struct snapraid_disk* disk = i->data;
1931 :
1932 623 : if (disk->has_volatile_inodes) {
1933 0 : if (!done) {
1934 0 : done = 1;
1935 0 : log_fatal(ESOFT, "WARNING! Inodes are not persistent for disks: '%s'", disk->name);
1936 : } else {
1937 0 : log_fatal(ESOFT, ", '%s'", disk->name);
1938 : }
1939 : }
1940 : }
1941 104 : if (done) {
1942 0 : log_fatal(ESOFT, ". Inodes are not used to detect move operations.\n");
1943 : }
1944 :
1945 : /* check for disks with changed UUID */
1946 104 : done = 0;
1947 727 : for (i = state->disklist; i != 0; i = i->next) {
1948 623 : struct snapraid_disk* disk = i->data;
1949 :
1950 : /* don't print the message if the UUID changed because before */
1951 : /* it was no set. */
1952 : /* this is the normal condition for an empty disk because it */
1953 : /* isn't stored */
1954 623 : if (disk->has_different_uuid && !disk->had_empty_uuid) {
1955 4 : if (!done) {
1956 2 : done = 1;
1957 2 : log_fatal(ESOFT, "WARNING! UUID is changed for disks: '%s'", disk->name);
1958 : } else {
1959 2 : log_fatal(ESOFT, ", '%s'", disk->name);
1960 : }
1961 : }
1962 : }
1963 104 : if (done) {
1964 2 : log_fatal(ESOFT, ". Inodes are not used to detect move operations.\n");
1965 : }
1966 :
1967 : /* check for disks with unsupported UUID */
1968 104 : done = 0;
1969 727 : for (i = state->disklist; i != 0; i = i->next) {
1970 623 : struct snapraid_disk* disk = i->data;
1971 :
1972 623 : if (disk->has_unsupported_uuid) {
1973 0 : if (!done) {
1974 0 : done = 1;
1975 0 : log_fatal(ESOFT, "WARNING! UUID is unsupported for disks: '%s'", disk->name);
1976 : } else {
1977 0 : log_fatal(ESOFT, ", '%s'", disk->name);
1978 : }
1979 : }
1980 : }
1981 104 : if (done) {
1982 0 : log_fatal(ESOFT, ". Not using inodes to detect move operations.\n");
1983 : #if defined(_linux) && !HAVE_BLKID
1984 : log_fatal(ESOFT, "The 'blkid' library is not linked in SnapRAID!\n");
1985 : log_fatal(ESOFT, "Try rebuilding it after installing the libblkid-dev or libblkid-devel package.\n");
1986 : #endif
1987 : }
1988 :
1989 104 : total.count_equal = 0;
1990 104 : total.count_move = 0;
1991 104 : total.count_copy = 0;
1992 104 : total.count_relocate = 0;
1993 104 : total.count_restore = 0;
1994 104 : total.count_change = 0;
1995 104 : total.count_remove = 0;
1996 104 : total.count_insert = 0;
1997 :
1998 727 : for (i = scanlist; i != 0; i = i->next) {
1999 623 : struct snapraid_scan* scan = i->data;
2000 623 : total.count_equal += scan->count_equal;
2001 623 : total.count_move += scan->count_move;
2002 623 : total.count_copy += scan->count_copy;
2003 623 : total.count_relocate += scan->count_relocate;
2004 623 : total.count_restore += scan->count_restore;
2005 623 : total.count_change += scan->count_change;
2006 623 : total.count_remove += scan->count_remove;
2007 623 : total.count_insert += scan->count_insert;
2008 : }
2009 :
2010 104 : if (is_diff) {
2011 8 : msg_status("\n");
2012 8 : msg = msg_status;
2013 : } else {
2014 96 : msg = msg_verbose;
2015 : }
2016 :
2017 104 : msg("%8u equal\n", total.count_equal);
2018 104 : msg("%8u added\n", total.count_insert);
2019 104 : msg("%8u removed\n", total.count_remove);
2020 104 : msg("%8u updated\n", total.count_change);
2021 104 : msg("%8u moved\n", total.count_move);
2022 104 : msg("%8u copied\n", total.count_copy);
2023 104 : msg("%8u relocated\n", total.count_relocate);
2024 104 : msg("%8u restored\n", total.count_restore);
2025 :
2026 104 : log_tag("summary:equal:%u\n", total.count_equal);
2027 104 : log_tag("summary:added:%u\n", total.count_insert);
2028 104 : log_tag("summary:removed:%u\n", total.count_remove);
2029 104 : log_tag("summary:updated:%u\n", total.count_change);
2030 104 : log_tag("summary:moved:%u\n", total.count_move);
2031 104 : log_tag("summary:copied:%u\n", total.count_copy);
2032 104 : log_tag("summary:relocated:%u\n", total.count_relocate);
2033 104 : log_tag("summary:restored:%u\n", total.count_restore);
2034 104 : log_tag("list:scan_end\n");
2035 :
2036 : /* save in the state */
2037 104 : state->removed_files = total.count_remove;
2038 104 : state->updated_files = total.count_change;
2039 :
2040 103 : no_difference = !total.count_move && !total.count_copy && !total.count_relocate && !total.count_restore
2041 207 : && !total.count_change && !total.count_remove && !total.count_insert;
2042 :
2043 104 : if (is_diff) {
2044 8 : if (!no_difference) {
2045 6 : msg_status("There are differences!\n");
2046 : } else {
2047 2 : msg_status("No differences\n");
2048 : }
2049 8 : if (state->unsynced_blocks != 0)
2050 1 : log_error(EUSER, "The last sync was interrupted. Run it again!\n");
2051 :
2052 8 : if (state->unsynced_blocks != 0) {
2053 1 : log_tag("summary:exit:unsynced\n");
2054 7 : } else if (!no_difference) {
2055 6 : log_tag("summary:exit:diff\n");
2056 : } else {
2057 1 : log_tag("summary:exit:equal\n");
2058 : }
2059 : }
2060 :
2061 104 : log_flush();
2062 :
2063 104 : tommy_list_foreach(&scanlist, (tommy_foreach_func*)scan_free);
2064 :
2065 : /* check the file-system on all disks */
2066 104 : state_fscheck(state, "after scan");
2067 :
2068 104 : if (is_diff) {
2069 : /* check for file difference */
2070 8 : if (!no_difference)
2071 6 : return 1;
2072 :
2073 : /* check also for incomplete "sync" */
2074 2 : if (state->unsynced_blocks != 0)
2075 1 : return 1;
2076 : }
2077 :
2078 97 : return 0;
2079 : }
2080 :
2081 8 : int state_diff(struct snapraid_state* state)
2082 : {
2083 8 : return state_diffscan(state, 1);
2084 : }
2085 :
2086 97 : void state_scan(struct snapraid_state* state)
2087 : {
2088 97 : (void)state_diffscan(state, 0); /* ignore return value */
2089 96 : }
2090 :
|