LCOV - code coverage report
Current view: top level - cmdline - dup.c (source / functions) Hit Total Coverage
Test: lcov.info Lines: 58 62 93.5 %
Date: 2026-04-29 15:04:44 Functions: 5 5 100.0 %

          Line data    Source code
       1             : // SPDX-License-Identifier: GPL-3.0-or-later
       2             : // Copyright (C) 2011 Andrea Mazzoleni
       3             : 
       4             : #include "portable.h"
       5             : 
       6             : #include "support.h"
       7             : #include "util.h"
       8             : #include "elem.h"
       9             : #include "state.h"
      10             : #include "parity.h"
      11             : #include "handle.h"
      12             : 
      13             : /****************************************************************************/
      14             : /* dup */
      15             : 
      16             : struct snapraid_hash {
      17             :         struct snapraid_disk* disk; /**< Disk. */
      18             :         struct snapraid_file* file; /**< File. */
      19             :         unsigned char hash[HASH_MAX]; /**< Hash of the whole file. */
      20             : 
      21             :         /* nodes for data structures */
      22             :         tommy_hashdyn_node node;
      23             : };
      24             : 
      25       16976 : struct snapraid_hash* hash_alloc(struct snapraid_state* state, struct snapraid_disk* disk, struct snapraid_file* file)
      26             : {
      27             :         struct snapraid_hash* hash;
      28             :         block_off_t i;
      29             :         unsigned char* buf;
      30       16976 :         size_t hash_size = BLOCK_HASH_SIZE;
      31             : 
      32       16976 :         hash = malloc_nofail(sizeof(struct snapraid_hash));
      33       16976 :         hash->disk = disk;
      34       16976 :         hash->file = file;
      35             : 
      36       16976 :         buf = malloc_nofail(file->blockmax * hash_size);
      37             : 
      38             :         /* set the back pointer */
      39       58996 :         for (i = 0; i < file->blockmax; ++i) {
      40       42020 :                 struct snapraid_block* block = fs_file2block_get(file, i);
      41             : 
      42       42020 :                 memcpy(buf + i * hash_size, block->hash, hash_size);
      43             : 
      44       42020 :                 if (!block_has_updated_hash(block)) {
      45           0 :                         free(buf);
      46           0 :                         free(hash);
      47           0 :                         return 0;
      48             :                 }
      49             :         }
      50             : 
      51       16976 :         memhash(state->besthash, state->hashseed, hash->hash, buf, file->blockmax * hash_size);
      52             : 
      53       16976 :         free(buf);
      54             : 
      55       16976 :         return hash;
      56             : }
      57             : 
      58       16976 : static inline tommy_uint32_t hash_hash(struct snapraid_hash* hash)
      59             : {
      60       16976 :         return tommy_hash_u32(0, hash->hash, HASH_MAX);
      61             : }
      62             : 
      63       16976 : void hash_free(struct snapraid_hash* hash)
      64             : {
      65       16976 :         free(hash);
      66       16976 : }
      67             : 
      68          18 : int hash_compare(const void* void_arg, const void* void_data)
      69             : {
      70          18 :         const char* arg = void_arg;
      71          18 :         const struct snapraid_hash* hash = void_data;
      72             : 
      73          18 :         return memcmp(arg, hash->hash, HASH_MAX);
      74             : }
      75             : 
      76           3 : void state_dup(struct snapraid_state* state)
      77             : {
      78             :         tommy_hashdyn hashset;
      79             :         tommy_node* i;
      80             :         unsigned count;
      81             :         data_off_t size;
      82             :         char esc_buffer[ESC_MAX];
      83             :         char esc_buffer_alt[ESC_MAX];
      84             : 
      85           3 :         tommy_hashdyn_init(&hashset);
      86             : 
      87           3 :         count = 0;
      88           3 :         size = 0;
      89             : 
      90           3 :         msg_progress("Comparing...\n");
      91             : 
      92             :         /* for each disk */
      93          21 :         for (i = state->disklist; i != 0; i = i->next) {
      94             :                 tommy_node* j;
      95          18 :                 struct snapraid_disk* disk = i->data;
      96             : 
      97             :                 /* for each file */
      98       17018 :                 for (j = disk->filelist; j != 0; j = j->next) {
      99       17000 :                         struct snapraid_file* file = j->data;
     100             :                         struct snapraid_hash* hash;
     101             :                         tommy_hash_t hash32;
     102             : 
     103             :                         /* if empty, skip it */
     104       17000 :                         if (file->size == 0)
     105          24 :                                 continue;
     106             : 
     107       16976 :                         hash = hash_alloc(state, disk, file);
     108             : 
     109             :                         /* if no hash, skip it */
     110       16976 :                         if (!hash)
     111           0 :                                 continue;
     112             : 
     113       16976 :                         hash32 = hash_hash(hash);
     114             : 
     115       16976 :                         struct snapraid_hash* found = tommy_hashdyn_search(&hashset, hash_compare, hash->hash, hash32);
     116       16976 :                         if (found) {
     117          18 :                                 ++count;
     118          18 :                                 size += found->file->size;
     119          18 :                                 log_tag("dup:%s:%s:%s:%s:%" PRIu64 ": dup\n", disk->name, esc_tag(file->sub, esc_buffer), found->disk->name, esc_tag(found->file->sub, esc_buffer_alt), found->file->size);
     120          18 :                                 printf("%12" PRIu64 " %s = %s\n", file->size, fmt_term(disk, file->sub, esc_buffer), fmt_term(found->disk, found->file->sub, esc_buffer_alt));
     121          18 :                                 hash_free(hash);
     122             :                         } else {
     123       16958 :                                 tommy_hashdyn_insert(&hashset, &hash->node, hash, hash32);
     124             :                         }
     125             :                 }
     126             :         }
     127             : 
     128           3 :         tommy_hashdyn_foreach(&hashset, (tommy_foreach_func*)hash_free);
     129           3 :         tommy_hashdyn_done(&hashset);
     130             : 
     131           3 :         msg_status("\n");
     132           3 :         msg_status("%8u duplicates, for %" PRIu64 " GB\n", count, size / GIGA);
     133           3 :         if (count)
     134           2 :                 msg_status("There are duplicates!\n");
     135             :         else
     136           1 :                 msg_status("No duplicates\n");
     137             : 
     138           3 :         log_tag("summary:dup_count:%u\n", count);
     139           3 :         log_tag("summary:dup_size:%" PRIu64 "\n", size);
     140           3 :         if (count == 0) {
     141           1 :                 log_tag("summary:exit:unique\n");
     142             :         } else {
     143           2 :                 log_tag("summary:exit:dup\n");
     144             :         }
     145           3 :         log_flush();
     146           3 : }
     147             : 

Generated by: LCOV version 1.0