LCOV - code coverage report
Current view: top level - cmdline - dup.c (source / functions) Hit Total Coverage
Test: lcov.info Lines: 57 61 93.4 %
Date: 2017-11-06 22:14:04 Functions: 5 5 100.0 %

          Line data    Source code
       1             : /*
       2             :  * Copyright (C) 2011 Andrea Mazzoleni
       3             :  *
       4             :  * This program is free software: you can redistribute it and/or modify
       5             :  * it under the terms of the GNU General Public License as published by
       6             :  * the Free Software Foundation, either version 3 of the License, or
       7             :  * (at your option) any later version.
       8             :  *
       9             :  * This program is distributed in the hope that it will be useful,
      10             :  * but WITHOUT ANY WARRANTY; without even the implied warranty of
      11             :  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      12             :  * GNU General Public License for more details.
      13             :  *
      14             :  * You should have received a copy of the GNU General Public License
      15             :  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
      16             :  */
      17             : 
      18             : #include "portable.h"
      19             : 
      20             : #include "support.h"
      21             : #include "util.h"
      22             : #include "elem.h"
      23             : #include "state.h"
      24             : #include "parity.h"
      25             : #include "handle.h"
      26             : 
      27             : /****************************************************************************/
      28             : /* dup */
      29             : 
      30             : struct snapraid_hash {
      31             :         struct snapraid_disk* disk; /**< Disk. */
      32             :         struct snapraid_file* file; /**< File. */
      33             :         unsigned char hash[HASH_MAX]; /**< Hash of the whole file. */
      34             : 
      35             :         /* nodes for data structures */
      36             :         tommy_hashdyn_node node;
      37             : };
      38             : 
      39       16976 : struct snapraid_hash* hash_alloc(struct snapraid_state* state, struct snapraid_disk* disk, struct snapraid_file* file)
      40             : {
      41             :         struct snapraid_hash* hash;
      42             :         block_off_t i;
      43             :         unsigned char* buf;
      44             : 
      45       16976 :         hash = malloc_nofail(sizeof(struct snapraid_hash));
      46       16976 :         hash->disk = disk;
      47       16976 :         hash->file = file;
      48             : 
      49       16976 :         buf = malloc_nofail(file->blockmax * BLOCK_HASH_SIZE);
      50             : 
      51             :         /* set the back pointer */
      52       58996 :         for (i = 0; i < file->blockmax; ++i) {
      53       42020 :                 struct snapraid_block* block = fs_file2block_get(file, i);
      54             : 
      55       42020 :                 memcpy(buf + i * BLOCK_HASH_SIZE, block->hash, BLOCK_HASH_SIZE);
      56             : 
      57       42020 :                 if (!block_has_updated_hash(block)) {
      58           0 :                         free(buf);
      59           0 :                         free(hash);
      60           0 :                         return 0;
      61             :                 }
      62             :         }
      63             : 
      64       16976 :         memhash(state->besthash, state->hashseed, hash->hash, buf, file->blockmax * BLOCK_HASH_SIZE);
      65             : 
      66       16976 :         free(buf);
      67             : 
      68       16976 :         return hash;
      69             : }
      70             : 
      71       16976 : static inline tommy_uint32_t hash_hash(struct snapraid_hash* hash)
      72             : {
      73       16976 :         return tommy_hash_u32(0, hash->hash, HASH_MAX);
      74             : }
      75             : 
      76       16976 : void hash_free(struct snapraid_hash* hash)
      77             : {
      78       16976 :         free(hash);
      79       16976 : }
      80             : 
      81          18 : int hash_compare(const void* void_arg, const void* void_data)
      82             : {
      83          18 :         const char* arg = void_arg;
      84          18 :         const struct snapraid_hash* hash = void_data;
      85             : 
      86          18 :         return memcmp(arg, hash->hash, HASH_MAX);
      87             : }
      88             : 
      89           3 : void state_dup(struct snapraid_state* state)
      90             : {
      91             :         tommy_hashdyn hashset;
      92             :         tommy_node* i;
      93             :         unsigned count;
      94             :         data_off_t size;
      95             :         char esc_buffer[ESC_MAX];
      96             :         char esc_buffer_alt[ESC_MAX];
      97             : 
      98           3 :         tommy_hashdyn_init(&hashset);
      99             : 
     100           3 :         count = 0;
     101           3 :         size = 0;
     102             : 
     103           3 :         msg_progress("Comparing...\n");
     104             : 
     105             :         /* for each disk */
     106          21 :         for (i = state->disklist; i != 0; i = i->next) {
     107             :                 tommy_node* j;
     108          18 :                 struct snapraid_disk* disk = i->data;
     109             : 
     110             :                 /* for each file */
     111       17018 :                 for (j = disk->filelist; j != 0; j = j->next) {
     112       17000 :                         struct snapraid_file* file = j->data;
     113             :                         struct snapraid_hash* hash;
     114             :                         tommy_hash_t hash32;
     115             : 
     116             :                         /* if empty, skip it */
     117       17000 :                         if (file->size == 0)
     118          24 :                                 continue;
     119             : 
     120       16976 :                         hash = hash_alloc(state, disk, file);
     121             : 
     122             :                         /* if no hash, skip it */
     123       16976 :                         if (!hash)
     124           0 :                                 continue;
     125             : 
     126       16976 :                         hash32 = hash_hash(hash);
     127             : 
     128       16976 :                         struct snapraid_hash* found = tommy_hashdyn_search(&hashset, hash_compare, hash->hash, hash32);
     129       16976 :                         if (found) {
     130          18 :                                 ++count;
     131          18 :                                 size += found->file->size;
     132          18 :                                 log_tag("dup:%s:%s:%s:%s:%" PRIu64 ": dup\n", disk->name, esc_tag(file->sub, esc_buffer), found->disk->name, esc_tag(found->file->sub, esc_buffer_alt), found->file->size);
     133          18 :                                 printf("%12" PRIu64 " %s = %s\n", file->size, fmt_term(disk, file->sub, esc_buffer), fmt_term(found->disk, found->file->sub, esc_buffer_alt));
     134          18 :                                 hash_free(hash);
     135             :                         } else {
     136       16958 :                                 tommy_hashdyn_insert(&hashset, &hash->node, hash, hash32);
     137             :                         }
     138             :                 }
     139             :         }
     140             : 
     141           3 :         tommy_hashdyn_foreach(&hashset, (tommy_foreach_func*)hash_free);
     142           3 :         tommy_hashdyn_done(&hashset);
     143             : 
     144           3 :         msg_status("\n");
     145           3 :         msg_status("%8u duplicates, for %" PRIu64 " GB\n", count, size / GIGA);
     146           3 :         if (count)
     147           2 :                 msg_status("There are duplicates!\n");
     148             :         else
     149           1 :                 msg_status("No duplicates\n");
     150             : 
     151           3 :         log_tag("summary:dup_count:%u\n", count);
     152           3 :         log_tag("summary:dup_size:%" PRIu64 "\n", size);
     153           3 :         if (count == 0) {
     154           1 :                 log_tag("summary:exit:unique\n");
     155             :         } else {
     156           2 :                 log_tag("summary:exit:dup\n");
     157             :         }
     158           3 :         log_flush();
     159           3 : }
     160             : 

Generated by: LCOV version 1.13