LCOV - code coverage report
Current view: top level - cmdline - dup.c (source / functions) Hit Total Coverage
Test: lcov.info Lines: 58 62 93.5 %
Date: 2025-10-28 11:59:11 Functions: 5 5 100.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2011 Andrea Mazzoleni
       3             :  *
       4             :  * This program is free software: you can redistribute it and/or modify
       5             :  * it under the terms of the GNU General Public License as published by
       6             :  * the Free Software Foundation, either version 3 of the License, or
       7             :  * (at your option) any later version.
       8             :  *
       9             :  * This program is distributed in the hope that it will be useful,
      10             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             :  * GNU General Public License for more details.
      13             :  *
      14             :  * You should have received a copy of the GNU General Public License
      15             :  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
      16             :  */
      17             : 
      18             : #include "portable.h"
      19             : 
      20             : #include "support.h"
      21             : #include "util.h"
      22             : #include "elem.h"
      23             : #include "state.h"
      24             : #include "parity.h"
      25             : #include "handle.h"
      26             : 
      27             : /****************************************************************************/
      28             : /* dup */
      29             : 
      30             : struct snapraid_hash {
      31             :         struct snapraid_disk* disk; /**< Disk. */
      32             :         struct snapraid_file* file; /**< File. */
      33             :         unsigned char hash[HASH_MAX]; /**< Hash of the whole file. */
      34             : 
      35             :         /* nodes for data structures */
      36             :         tommy_hashdyn_node node;
      37             : };
      38             : 
      39       16976 : struct snapraid_hash* hash_alloc(struct snapraid_state* state, struct snapraid_disk* disk, struct snapraid_file* file)
      40             : {
      41             :         struct snapraid_hash* hash;
      42             :         block_off_t i;
      43             :         unsigned char* buf;
      44       16976 :         size_t hash_size = BLOCK_HASH_SIZE;
      45             : 
      46       16976 :         hash = malloc_nofail(sizeof(struct snapraid_hash));
      47       16976 :         hash->disk = disk;
      48       16976 :         hash->file = file;
      49             : 
      50       16976 :         buf = malloc_nofail(file->blockmax * hash_size);
      51             : 
      52             :         /* set the back pointer */
      53       58996 :         for (i = 0; i < file->blockmax; ++i) {
      54       42020 :                 struct snapraid_block* block = fs_file2block_get(file, i);
      55             : 
      56       42020 :                 memcpy(buf + i * hash_size, block->hash, hash_size);
      57             : 
      58       42020 :                 if (!block_has_updated_hash(block)) {
      59           0 :                         free(buf);
      60           0 :                         free(hash);
      61           0 :                         return 0;
      62             :                 }
      63             :         }
      64             : 
      65       16976 :         memhash(state->besthash, state->hashseed, hash->hash, buf, file->blockmax * hash_size);
      66             : 
      67       16976 :         free(buf);
      68             : 
      69       16976 :         return hash;
      70             : }
      71             : 
      72       16976 : static inline tommy_uint32_t hash_hash(struct snapraid_hash* hash)
      73             : {
      74       16976 :         return tommy_hash_u32(0, hash->hash, HASH_MAX);
      75             : }
      76             : 
      77       16976 : void hash_free(struct snapraid_hash* hash)
      78             : {
      79       16976 :         free(hash);
      80       16976 : }
      81             : 
      82          18 : int hash_compare(const void* void_arg, const void* void_data)
      83             : {
      84          18 :         const char* arg = void_arg;
      85          18 :         const struct snapraid_hash* hash = void_data;
      86             : 
      87          18 :         return memcmp(arg, hash->hash, HASH_MAX);
      88             : }
      89             : 
      90           3 : void state_dup(struct snapraid_state* state)
      91             : {
      92             :         tommy_hashdyn hashset;
      93             :         tommy_node* i;
      94             :         unsigned count;
      95             :         data_off_t size;
      96             :         char esc_buffer[ESC_MAX];
      97             :         char esc_buffer_alt[ESC_MAX];
      98             : 
      99           3 :         tommy_hashdyn_init(&hashset);
     100             : 
     101           3 :         count = 0;
     102           3 :         size = 0;
     103             : 
     104           3 :         msg_progress("Comparing...\n");
     105             : 
     106             :         /* for each disk */
     107          21 :         for (i = state->disklist; i != 0; i = i->next) {
     108             :                 tommy_node* j;
     109          18 :                 struct snapraid_disk* disk = i->data;
     110             : 
     111             :                 /* for each file */
     112       17018 :                 for (j = disk->filelist; j != 0; j = j->next) {
     113       17000 :                         struct snapraid_file* file = j->data;
     114             :                         struct snapraid_hash* hash;
     115             :                         tommy_hash_t hash32;
     116             : 
     117             :                         /* if empty, skip it */
     118       17000 :                         if (file->size == 0)
     119          24 :                                 continue;
     120             : 
     121       16976 :                         hash = hash_alloc(state, disk, file);
     122             : 
     123             :                         /* if no hash, skip it */
     124       16976 :                         if (!hash)
     125           0 :                                 continue;
     126             : 
     127       16976 :                         hash32 = hash_hash(hash);
     128             : 
     129       16976 :                         struct snapraid_hash* found = tommy_hashdyn_search(&hashset, hash_compare, hash->hash, hash32);
     130       16976 :                         if (found) {
     131          18 :                                 ++count;
     132          18 :                                 size += found->file->size;
     133          18 :                                 log_tag("dup:%s:%s:%s:%s:%" PRIu64 ": dup\n", disk->name, esc_tag(file->sub, esc_buffer), found->disk->name, esc_tag(found->file->sub, esc_buffer_alt), found->file->size);
     134          18 :                                 printf("%12" PRIu64 " %s = %s\n", file->size, fmt_term(disk, file->sub, esc_buffer), fmt_term(found->disk, found->file->sub, esc_buffer_alt));
     135          18 :                                 hash_free(hash);
     136             :                         } else {
     137       16958 :                                 tommy_hashdyn_insert(&hashset, &hash->node, hash, hash32);
     138             :                         }
     139             :                 }
     140             :         }
     141             : 
     142           3 :         tommy_hashdyn_foreach(&hashset, (tommy_foreach_func*)hash_free);
     143           3 :         tommy_hashdyn_done(&hashset);
     144             : 
     145           3 :         msg_status("\n");
     146           3 :         msg_status("%8u duplicates, for %" PRIu64 " GB\n", count, size / GIGA);
     147           3 :         if (count)
     148           2 :                 msg_status("There are duplicates!\n");
     149             :         else
     150           1 :                 msg_status("No duplicates\n");
     151             : 
     152           3 :         log_tag("summary:dup_count:%u\n", count);
     153           3 :         log_tag("summary:dup_size:%" PRIu64 "\n", size);
     154           3 :         if (count == 0) {
     155           1 :                 log_tag("summary:exit:unique\n");
     156             :         } else {
     157           2 :                 log_tag("summary:exit:dup\n");
     158             :         }
     159           3 :         log_flush();
     160           3 : }
     161             : 

Generated by: LCOV version 1.0