Line data Source code
1 : /*
2 : * Copyright (C) 2011 Andrea Mazzoleni
3 : *
4 : * This program is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 : */
17 :
18 : #include "portable.h"
19 :
20 : #include "support.h"
21 : #include "util.h"
22 : #include "elem.h"
23 : #include "state.h"
24 : #include "parity.h"
25 : #include "handle.h"
26 :
27 : /****************************************************************************/
28 : /* dup */
29 :
30 : struct snapraid_hash {
31 : struct snapraid_disk* disk; /**< Disk. */
32 : struct snapraid_file* file; /**< File. */
33 : unsigned char hash[HASH_MAX]; /**< Hash of the whole file. */
34 :
35 : /* nodes for data structures */
36 : tommy_hashdyn_node node;
37 : };
38 :
39 16976 : struct snapraid_hash* hash_alloc(struct snapraid_state* state, struct snapraid_disk* disk, struct snapraid_file* file)
40 : {
41 : struct snapraid_hash* hash;
42 : block_off_t i;
43 : unsigned char* buf;
44 16976 : size_t hash_size = BLOCK_HASH_SIZE;
45 :
46 16976 : hash = malloc_nofail(sizeof(struct snapraid_hash));
47 16976 : hash->disk = disk;
48 16976 : hash->file = file;
49 :
50 16976 : buf = malloc_nofail(file->blockmax * hash_size);
51 :
52 : /* set the back pointer */
53 58996 : for (i = 0; i < file->blockmax; ++i) {
54 42020 : struct snapraid_block* block = fs_file2block_get(file, i);
55 :
56 42020 : memcpy(buf + i * hash_size, block->hash, hash_size);
57 :
58 42020 : if (!block_has_updated_hash(block)) {
59 0 : free(buf);
60 0 : free(hash);
61 0 : return 0;
62 : }
63 : }
64 :
65 16976 : memhash(state->besthash, state->hashseed, hash->hash, buf, file->blockmax * hash_size);
66 :
67 16976 : free(buf);
68 :
69 16976 : return hash;
70 : }
71 :
72 16976 : static inline tommy_uint32_t hash_hash(struct snapraid_hash* hash)
73 : {
74 16976 : return tommy_hash_u32(0, hash->hash, HASH_MAX);
75 : }
76 :
77 16976 : void hash_free(struct snapraid_hash* hash)
78 : {
79 16976 : free(hash);
80 16976 : }
81 :
82 18 : int hash_compare(const void* void_arg, const void* void_data)
83 : {
84 18 : const char* arg = void_arg;
85 18 : const struct snapraid_hash* hash = void_data;
86 :
87 18 : return memcmp(arg, hash->hash, HASH_MAX);
88 : }
89 :
90 3 : void state_dup(struct snapraid_state* state)
91 : {
92 : tommy_hashdyn hashset;
93 : tommy_node* i;
94 : unsigned count;
95 : data_off_t size;
96 : char esc_buffer[ESC_MAX];
97 : char esc_buffer_alt[ESC_MAX];
98 :
99 3 : tommy_hashdyn_init(&hashset);
100 :
101 3 : count = 0;
102 3 : size = 0;
103 :
104 3 : msg_progress("Comparing...\n");
105 :
106 : /* for each disk */
107 21 : for (i = state->disklist; i != 0; i = i->next) {
108 : tommy_node* j;
109 18 : struct snapraid_disk* disk = i->data;
110 :
111 : /* for each file */
112 17018 : for (j = disk->filelist; j != 0; j = j->next) {
113 17000 : struct snapraid_file* file = j->data;
114 : struct snapraid_hash* hash;
115 : tommy_hash_t hash32;
116 :
117 : /* if empty, skip it */
118 17000 : if (file->size == 0)
119 24 : continue;
120 :
121 16976 : hash = hash_alloc(state, disk, file);
122 :
123 : /* if no hash, skip it */
124 16976 : if (!hash)
125 0 : continue;
126 :
127 16976 : hash32 = hash_hash(hash);
128 :
129 16976 : struct snapraid_hash* found = tommy_hashdyn_search(&hashset, hash_compare, hash->hash, hash32);
130 16976 : if (found) {
131 18 : ++count;
132 18 : size += found->file->size;
133 18 : log_tag("dup:%s:%s:%s:%s:%" PRIu64 ": dup\n", disk->name, esc_tag(file->sub, esc_buffer), found->disk->name, esc_tag(found->file->sub, esc_buffer_alt), found->file->size);
134 18 : printf("%12" PRIu64 " %s = %s\n", file->size, fmt_term(disk, file->sub, esc_buffer), fmt_term(found->disk, found->file->sub, esc_buffer_alt));
135 18 : hash_free(hash);
136 : } else {
137 16958 : tommy_hashdyn_insert(&hashset, &hash->node, hash, hash32);
138 : }
139 : }
140 : }
141 :
142 3 : tommy_hashdyn_foreach(&hashset, (tommy_foreach_func*)hash_free);
143 3 : tommy_hashdyn_done(&hashset);
144 :
145 3 : msg_status("\n");
146 3 : msg_status("%8u duplicates, for %" PRIu64 " GB\n", count, size / GIGA);
147 3 : if (count)
148 2 : msg_status("There are duplicates!\n");
149 : else
150 1 : msg_status("No duplicates\n");
151 :
152 3 : log_tag("summary:dup_count:%u\n", count);
153 3 : log_tag("summary:dup_size:%" PRIu64 "\n", size);
154 3 : if (count == 0) {
155 1 : log_tag("summary:exit:unique\n");
156 : } else {
157 2 : log_tag("summary:exit:dup\n");
158 : }
159 3 : log_flush();
160 3 : }
161 :
|