Line data Source code
1 : /*
2 : * Copyright (C) 2011 Andrea Mazzoleni
3 : *
4 : * This program is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 : */
17 :
18 : #include "portable.h"
19 :
20 : #include "support.h"
21 : #include "util.h"
22 : #include "elem.h"
23 : #include "state.h"
24 : #include "parity.h"
25 : #include "handle.h"
26 :
27 : /****************************************************************************/
28 : /* dup */
29 :
30 : struct snapraid_hash {
31 : struct snapraid_disk* disk; /**< Disk. */
32 : struct snapraid_file* file; /**< File. */
33 : unsigned char hash[HASH_MAX]; /**< Hash of the whole file. */
34 :
35 : /* nodes for data structures */
36 : tommy_hashdyn_node node;
37 : };
38 :
39 16976 : struct snapraid_hash* hash_alloc(struct snapraid_state* state, struct snapraid_disk* disk, struct snapraid_file* file)
40 : {
41 : struct snapraid_hash* hash;
42 : block_off_t i;
43 : unsigned char* buf;
44 :
45 16976 : hash = malloc_nofail(sizeof(struct snapraid_hash));
46 16976 : hash->disk = disk;
47 16976 : hash->file = file;
48 :
49 16976 : buf = malloc_nofail(file->blockmax * BLOCK_HASH_SIZE);
50 :
51 : /* set the back pointer */
52 58996 : for (i = 0; i < file->blockmax; ++i) {
53 42020 : struct snapraid_block* block = fs_file2block_get(file, i);
54 :
55 42020 : memcpy(buf + i * BLOCK_HASH_SIZE, block->hash, BLOCK_HASH_SIZE);
56 :
57 42020 : if (!block_has_updated_hash(block)) {
58 0 : free(buf);
59 0 : free(hash);
60 0 : return 0;
61 : }
62 : }
63 :
64 16976 : memhash(state->besthash, state->hashseed, hash->hash, buf, file->blockmax * BLOCK_HASH_SIZE);
65 :
66 16976 : free(buf);
67 :
68 16976 : return hash;
69 : }
70 :
71 16976 : static inline tommy_uint32_t hash_hash(struct snapraid_hash* hash)
72 : {
73 16976 : return tommy_hash_u32(0, hash->hash, HASH_MAX);
74 : }
75 :
76 16976 : void hash_free(struct snapraid_hash* hash)
77 : {
78 16976 : free(hash);
79 16976 : }
80 :
81 18 : int hash_compare(const void* void_arg, const void* void_data)
82 : {
83 18 : const char* arg = void_arg;
84 18 : const struct snapraid_hash* hash = void_data;
85 :
86 18 : return memcmp(arg, hash->hash, HASH_MAX);
87 : }
88 :
89 3 : void state_dup(struct snapraid_state* state)
90 : {
91 : tommy_hashdyn hashset;
92 : tommy_node* i;
93 : unsigned count;
94 : data_off_t size;
95 : char esc_buffer[ESC_MAX];
96 : char esc_buffer_alt[ESC_MAX];
97 :
98 3 : tommy_hashdyn_init(&hashset);
99 :
100 3 : count = 0;
101 3 : size = 0;
102 :
103 3 : msg_progress("Comparing...\n");
104 :
105 : /* for each disk */
106 21 : for (i = state->disklist; i != 0; i = i->next) {
107 : tommy_node* j;
108 18 : struct snapraid_disk* disk = i->data;
109 :
110 : /* for each file */
111 17018 : for (j = disk->filelist; j != 0; j = j->next) {
112 17000 : struct snapraid_file* file = j->data;
113 : struct snapraid_hash* hash;
114 : tommy_hash_t hash32;
115 :
116 : /* if empty, skip it */
117 17000 : if (file->size == 0)
118 24 : continue;
119 :
120 16976 : hash = hash_alloc(state, disk, file);
121 :
122 : /* if no hash, skip it */
123 16976 : if (!hash)
124 0 : continue;
125 :
126 16976 : hash32 = hash_hash(hash);
127 :
128 16976 : struct snapraid_hash* found = tommy_hashdyn_search(&hashset, hash_compare, hash->hash, hash32);
129 16976 : if (found) {
130 18 : ++count;
131 18 : size += found->file->size;
132 18 : log_tag("dup:%s:%s:%s:%s:%" PRIu64 ": dup\n", disk->name, esc_tag(file->sub, esc_buffer), found->disk->name, esc_tag(found->file->sub, esc_buffer_alt), found->file->size);
133 18 : printf("%12" PRIu64 " %s = %s\n", file->size, fmt_term(disk, file->sub, esc_buffer), fmt_term(found->disk, found->file->sub, esc_buffer_alt));
134 18 : hash_free(hash);
135 : } else {
136 16958 : tommy_hashdyn_insert(&hashset, &hash->node, hash, hash32);
137 : }
138 : }
139 : }
140 :
141 3 : tommy_hashdyn_foreach(&hashset, (tommy_foreach_func*)hash_free);
142 3 : tommy_hashdyn_done(&hashset);
143 :
144 3 : msg_status("\n");
145 3 : msg_status("%8u duplicates, for %" PRIu64 " GB\n", count, size / GIGA);
146 3 : if (count)
147 2 : msg_status("There are duplicates!\n");
148 : else
149 1 : msg_status("No duplicates\n");
150 :
151 3 : log_tag("summary:dup_count:%u\n", count);
152 3 : log_tag("summary:dup_size:%" PRIu64 "\n", size);
153 3 : if (count == 0) {
154 1 : log_tag("summary:exit:unique\n");
155 : } else {
156 2 : log_tag("summary:exit:dup\n");
157 : }
158 3 : log_flush();
159 3 : }
160 :
|