Line data Source code
1 : // SPDX-License-Identifier: GPL-3.0-or-later
2 : // Copyright (C) 2011 Andrea Mazzoleni
3 :
4 : #include "portable.h"
5 :
6 : #include "support.h"
7 : #include "util.h"
8 : #include "elem.h"
9 : #include "state.h"
10 : #include "parity.h"
11 : #include "handle.h"
12 :
13 : /****************************************************************************/
14 : /* dup */
15 :
16 : struct snapraid_hash {
17 : struct snapraid_disk* disk; /**< Disk. */
18 : struct snapraid_file* file; /**< File. */
19 : unsigned char hash[HASH_MAX]; /**< Hash of the whole file. */
20 :
21 : /* nodes for data structures */
22 : tommy_hashdyn_node node;
23 : };
24 :
25 16976 : struct snapraid_hash* hash_alloc(struct snapraid_state* state, struct snapraid_disk* disk, struct snapraid_file* file)
26 : {
27 : struct snapraid_hash* hash;
28 : block_off_t i;
29 : unsigned char* buf;
30 16976 : size_t hash_size = BLOCK_HASH_SIZE;
31 :
32 16976 : hash = malloc_nofail(sizeof(struct snapraid_hash));
33 16976 : hash->disk = disk;
34 16976 : hash->file = file;
35 :
36 16976 : buf = malloc_nofail(file->blockmax * hash_size);
37 :
38 : /* set the back pointer */
39 58996 : for (i = 0; i < file->blockmax; ++i) {
40 42020 : struct snapraid_block* block = fs_file2block_get(file, i);
41 :
42 42020 : memcpy(buf + i * hash_size, block->hash, hash_size);
43 :
44 42020 : if (!block_has_updated_hash(block)) {
45 0 : free(buf);
46 0 : free(hash);
47 0 : return 0;
48 : }
49 : }
50 :
51 16976 : memhash(state->besthash, state->hashseed, hash->hash, buf, file->blockmax * hash_size);
52 :
53 16976 : free(buf);
54 :
55 16976 : return hash;
56 : }
57 :
58 16976 : static inline tommy_uint32_t hash_hash(struct snapraid_hash* hash)
59 : {
60 16976 : return tommy_hash_u32(0, hash->hash, HASH_MAX);
61 : }
62 :
63 16976 : void hash_free(struct snapraid_hash* hash)
64 : {
65 16976 : free(hash);
66 16976 : }
67 :
68 18 : int hash_compare(const void* void_arg, const void* void_data)
69 : {
70 18 : const char* arg = void_arg;
71 18 : const struct snapraid_hash* hash = void_data;
72 :
73 18 : return memcmp(arg, hash->hash, HASH_MAX);
74 : }
75 :
76 3 : void state_dup(struct snapraid_state* state)
77 : {
78 : tommy_hashdyn hashset;
79 : tommy_node* i;
80 : unsigned count;
81 : data_off_t size;
82 : char esc_buffer[ESC_MAX];
83 : char esc_buffer_alt[ESC_MAX];
84 :
85 3 : tommy_hashdyn_init(&hashset);
86 :
87 3 : count = 0;
88 3 : size = 0;
89 :
90 3 : msg_progress("Comparing...\n");
91 :
92 : /* for each disk */
93 21 : for (i = state->disklist; i != 0; i = i->next) {
94 : tommy_node* j;
95 18 : struct snapraid_disk* disk = i->data;
96 :
97 : /* for each file */
98 17018 : for (j = disk->filelist; j != 0; j = j->next) {
99 17000 : struct snapraid_file* file = j->data;
100 : struct snapraid_hash* hash;
101 : tommy_hash_t hash32;
102 :
103 : /* if empty, skip it */
104 17000 : if (file->size == 0)
105 24 : continue;
106 :
107 16976 : hash = hash_alloc(state, disk, file);
108 :
109 : /* if no hash, skip it */
110 16976 : if (!hash)
111 0 : continue;
112 :
113 16976 : hash32 = hash_hash(hash);
114 :
115 16976 : struct snapraid_hash* found = tommy_hashdyn_search(&hashset, hash_compare, hash->hash, hash32);
116 16976 : if (found) {
117 18 : ++count;
118 18 : size += found->file->size;
119 18 : log_tag("dup:%s:%s:%s:%s:%" PRIu64 ": dup\n", disk->name, esc_tag(file->sub, esc_buffer), found->disk->name, esc_tag(found->file->sub, esc_buffer_alt), found->file->size);
120 18 : printf("%12" PRIu64 " %s = %s\n", file->size, fmt_term(disk, file->sub, esc_buffer), fmt_term(found->disk, found->file->sub, esc_buffer_alt));
121 18 : hash_free(hash);
122 : } else {
123 16958 : tommy_hashdyn_insert(&hashset, &hash->node, hash, hash32);
124 : }
125 : }
126 : }
127 :
128 3 : tommy_hashdyn_foreach(&hashset, (tommy_foreach_func*)hash_free);
129 3 : tommy_hashdyn_done(&hashset);
130 :
131 3 : msg_status("\n");
132 3 : msg_status("%8u duplicates, for %" PRIu64 " GB\n", count, size / GIGA);
133 3 : if (count)
134 2 : msg_status("There are duplicates!\n");
135 : else
136 1 : msg_status("No duplicates\n");
137 :
138 3 : log_tag("summary:dup_count:%u\n", count);
139 3 : log_tag("summary:dup_size:%" PRIu64 "\n", size);
140 3 : if (count == 0) {
141 1 : log_tag("summary:exit:unique\n");
142 : } else {
143 2 : log_tag("summary:exit:dup\n");
144 : }
145 3 : log_flush();
146 3 : }
147 :
|