Line data Source code
1 : /*
2 : * Copyright (C) 2011 Andrea Mazzoleni
3 : *
4 : * This program is free software: you can redistribute it and/or modify
5 : * it under the terms of the GNU General Public License as published by
6 : * the Free Software Foundation, either version 3 of the License, or
7 : * (at your option) any later version.
8 : *
9 : * This program is distributed in the hope that it will be useful,
10 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 : * GNU General Public License for more details.
13 : *
14 : * You should have received a copy of the GNU General Public License
15 : * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 : */
17 :
18 : /*
19 : * Derivative work from MurmorHash3.cpp revision r136
20 : *
21 : * SMHasher & MurmurHash
22 : * http://code.google.com/p/smhasher/
23 : *
24 : * Exact source used as reference:
25 : * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp?spec=svn136&r=136
26 : */
27 :
28 : // MurmurHash3 was written by Austin Appleby, and is placed in the public
29 : // domain. The author hereby disclaims copyright to this source code.
30 :
31 : /* Finalization mix - force all bits of a hash block to avalanche */
32 8181188 : static inline uint32_t fmix32(uint32_t h)
33 : {
34 8181188 : h ^= h >> 16;
35 8181188 : h *= 0x85ebca6b;
36 8181188 : h ^= h >> 13;
37 8181188 : h *= 0xc2b2ae35;
38 8181188 : h ^= h >> 16;
39 8181188 : return h;
40 : }
41 :
42 : /*
43 : * Warning!
44 : * Don't declare these variables static, otherwise the gcc optimizer
45 : * may generate very slow code for multiplication with these constants,
46 : * like:
47 :
48 : -> .cpp
49 : k1 *= c1;
50 : -> .asm
51 : 152: 8d 14 80 lea (%eax,%eax,4),%edx
52 : 155: 8d 14 90 lea (%eax,%edx,4),%edx
53 : 158: c1 e2 03 shl $0x3,%edx
54 : 15b: 29 c2 sub %eax,%edx
55 : 15d: 8d 14 d2 lea (%edx,%edx,8),%edx
56 : 160: 8d 14 90 lea (%eax,%edx,4),%edx
57 : 163: 8d 14 d0 lea (%eax,%edx,8),%edx
58 : 166: 8d 14 90 lea (%eax,%edx,4),%edx
59 : 169: 8d 14 50 lea (%eax,%edx,2),%edx
60 : 16c: 8d 14 90 lea (%eax,%edx,4),%edx
61 : 16f: 8d 14 92 lea (%edx,%edx,4),%edx
62 : 172: 8d 14 50 lea (%eax,%edx,2),%edx
63 : 175: 8d 04 d0 lea (%eax,%edx,8),%eax
64 : 178: 8d 14 c5 00 00 00 00 lea 0x0(,%eax,8),%edx
65 : 17f: 29 d0 sub %edx,%eax
66 :
67 : * resulting in speeds of 500 MB/s instead of 3000 MB/s.
68 : *
69 : * Verified with gcc 4.4.4 compiling with :
70 : *
71 : * g++ -g -c -O2 MurmurHash3.cpp -o MurmurHash3.o
72 : */
73 : uint32_t c1 = 0x239b961b;
74 : uint32_t c2 = 0xab0e9789;
75 : uint32_t c3 = 0x38b34ae5;
76 : uint32_t c4 = 0xa1e38b93;
77 :
78 2045297 : void MurmurHash3_x86_128(const void* data, size_t size, const uint8_t* seed, void* digest)
79 : {
80 : size_t nblocks;
81 : const uint32_t* blocks;
82 : const uint32_t* end;
83 : size_t size_remainder;
84 : uint32_t h1, h2, h3, h4;
85 :
86 2045297 : h1 = util_read32(seed + 0);
87 2045297 : h2 = util_read32(seed + 4);
88 2045297 : h3 = util_read32(seed + 8);
89 2045297 : h4 = util_read32(seed + 12);
90 :
91 2045297 : nblocks = size / 16;
92 2045297 : blocks = data;
93 2045297 : end = blocks + nblocks * 4;
94 :
95 : /* body */
96 109317278 : while (blocks < end) {
97 105226684 : uint32_t k1 = blocks[0];
98 105226684 : uint32_t k2 = blocks[1];
99 105226684 : uint32_t k3 = blocks[2];
100 105226684 : uint32_t k4 = blocks[3];
101 :
102 : #if WORDS_BIGENDIAN
103 : k1 = util_swap32(k1);
104 : k2 = util_swap32(k2);
105 : k3 = util_swap32(k3);
106 : k4 = util_swap32(k4);
107 : #endif
108 :
109 105226684 : k1 *= c1; k1 = util_rotl32(k1, 15); k1 *= c2; h1 ^= k1;
110 :
111 105226684 : h1 = util_rotl32(h1, 19); h1 += h2; h1 = h1 * 5 + 0x561ccd1b;
112 :
113 105226684 : k2 *= c2; k2 = util_rotl32(k2, 16); k2 *= c3; h2 ^= k2;
114 :
115 105226684 : h2 = util_rotl32(h2, 17); h2 += h3; h2 = h2 * 5 + 0x0bcaa747;
116 :
117 105226684 : k3 *= c3; k3 = util_rotl32(k3, 17); k3 *= c4; h3 ^= k3;
118 :
119 105226684 : h3 = util_rotl32(h3, 15); h3 += h4; h3 = h3 * 5 + 0x96cd1c35;
120 :
121 105226684 : k4 *= c4; k4 = util_rotl32(k4, 18); k4 *= c1; h4 ^= k4;
122 :
123 105226684 : h4 = util_rotl32(h4, 13); h4 += h1; h4 = h4 * 5 + 0x32ac3b17;
124 :
125 105226684 : blocks += 4;
126 : }
127 :
128 : /* tail */
129 2045297 : size_remainder = size & 15;
130 2045297 : if (size_remainder != 0) {
131 773834 : const uint8_t* tail = (const uint8_t*)blocks;
132 :
133 773834 : uint32_t k1 = 0;
134 773834 : uint32_t k2 = 0;
135 773834 : uint32_t k3 = 0;
136 773834 : uint32_t k4 = 0;
137 :
138 773834 : switch (size_remainder) {
139 50889 : case 15 : k4 ^= (uint32_t)tail[14] << 16;
140 103246 : case 14 : k4 ^= (uint32_t)tail[13] << 8;
141 155057 : case 13 : k4 ^= (uint32_t)tail[12] << 0;
142 155057 : k4 *= c4; k4 = util_rotl32(k4, 18); k4 *= c1; h4 ^= k4;
143 207471 : case 12 : k3 ^= (uint32_t)tail[11] << 24;
144 259606 : case 11 : k3 ^= (uint32_t)tail[10] << 16;
145 311419 : case 10 : k3 ^= (uint32_t)tail[ 9] << 8;
146 364325 : case 9 : k3 ^= (uint32_t)tail[ 8] << 0;
147 364325 : k3 *= c3; k3 = util_rotl32(k3, 17); k3 *= c4; h3 ^= k3;
148 417914 : case 8 : k2 ^= (uint32_t)tail[ 7] << 24;
149 467089 : case 7 : k2 ^= (uint32_t)tail[ 6] << 16;
150 519422 : case 6 : k2 ^= (uint32_t)tail[ 5] << 8;
151 569576 : case 5 : k2 ^= (uint32_t)tail[ 4] << 0;
152 569576 : k2 *= c2; k2 = util_rotl32(k2, 16); k2 *= c3; h2 ^= k2;
153 621172 : case 4 : k1 ^= (uint32_t)tail[ 3] << 24;
154 671182 : case 3 : k1 ^= (uint32_t)tail[ 2] << 16;
155 721916 : case 2 : k1 ^= (uint32_t)tail[ 1] << 8;
156 773834 : case 1 : k1 ^= (uint32_t)tail[ 0] << 0;
157 773834 : k1 *= c1; k1 = util_rotl32(k1, 15); k1 *= c2; h1 ^= k1;
158 : }
159 : }
160 :
161 : /* finalization */
162 2045297 : h1 ^= size; h2 ^= size; h3 ^= size; h4 ^= size;
163 :
164 2045297 : h1 += h2; h1 += h3; h1 += h4;
165 2045297 : h2 += h1; h3 += h1; h4 += h1;
166 :
167 2045297 : h1 = fmix32(h1);
168 2045297 : h2 = fmix32(h2);
169 2045297 : h3 = fmix32(h3);
170 2045297 : h4 = fmix32(h4);
171 :
172 2045297 : h1 += h2; h1 += h3; h1 += h4;
173 2045297 : h2 += h1; h3 += h1; h4 += h1;
174 :
175 2045297 : util_write32(digest + 0, h1);
176 2045297 : util_write32(digest + 4, h2);
177 2045297 : util_write32(digest + 8, h3);
178 2045297 : util_write32(digest + 12, h4);
179 2045297 : }
180 :
|