Kigs Framework  Doc version 0.8
Open source multi purpose Rapid Application Development framework
meow_hash.h
1 /* ========================================================================
2 
3  Meow - A Fast Non-cryptographic Hash
4  (C) Copyright 2018 by Molly Rocket, Inc. (https://mollyrocket.com)
5 
6  See https://mollyrocket.com/meowhash for details.
7 
8  ========================================================================
9 
10  zlib License
11 
12  (C) Copyright 2018 Molly Rocket, Inc.
13 
14  This software is provided 'as-is', without any express or implied
15  warranty. In no event will the authors be held liable for any damages
16  arising from the use of this software.
17 
18  Permission is granted to anyone to use this software for any purpose,
19  including commercial applications, and to alter it and redistribute it
20  freely, subject to the following restrictions:
21 
22  1. The origin of this software must not be misrepresented; you must not
23  claim that you wrote the original software. If you use this software
24  in a product, an acknowledgment in the product documentation would be
25  appreciated but is not required.
26  2. Altered source versions must be plainly marked as such, and must not be
27  misrepresented as being the original software.
28  3. This notice may not be removed or altered from any source distribution.
29 
30  ========================================================================
31 
32  FAQ
33 
34  Q: What is it?
35 
36  A: Meow is a 128-bit non-cryptographic hash that operates at high speeds
37  on x64 and ARM processors that provide AES instructions. It is
38  designed to be truncatable to 64 and 32-bit hash values and still
39  retain good collision resistance.
40 
41  Q: What is it GOOD for?
42 
43  A: Quickly hashing any amount of data for comparison purposes such as
44  block deduplication or change detection. It is extremely fast on
45  all buffer sizes, from one byte to one gigabyte and up.
46 
47  Q: What is it BAD for?
48 
49  A: Anything security-related. It should be assumed that it provides
50  no protection from adversaries whatsoever. It is also not particularly
51  fast on processors that don't support AES instructions (eg., non-x64/ARM
52  processors).
53 
54  Q: Why is it called the "Meow hash"?
55 
56  A: It is named after a character in Meow the Infinite
57  (https://meowtheinfinite.com)
58 
59  Q: Who wrote it?
60 
61  A: CASEY MURATORI (https://caseymuratori.com) wrote the original
62  implementation for use in processing large-footprint assets for
63  the game 1935 (https://molly1935.com).
64 
65  After the initial version, the hash was refined via collaboration
66  with several great programmers who contributed suggestions and
67  modifications:
68 
69  JEFF ROBERTS (https://radgametools.com) provided a super slick
70  way to handle the residual end-of-buffer bytes that dramatically
71  improved Meow's small hash performance.
72 
73  MARTINS MOZEIKO (https://matrins.ninja) ported Meow to ARM and
74  ANSI-C, and added the proper preprocessor dressing for clean
75  compilation on a variety of compiler configurations.
76 
77  FABIAN GIESEN (https://fgiesen.wordpress.com) provided support
78  for getting the benchmarking working properly across a number
79  of platforms.
80 
81  ARAS PRANCKEVICIUS (https://aras-p.info) provided the allocation
82  shim for compilation on Mac OS X.
83 
84  ========================================================================
85 
86  USAGE
87 
88  For a complete working example, see meow_example.cpp. Briefly:
89 
90  // Include meow_intrinsics if you want it to detect platforms
91  // and define types and intrinsics for you. Omit it if you
92  // want to define them yourself.
93  #include "meow_intrinsics.h"
94 
95  // Include meow_hash for the Meow hash function
96  #include "meow_hash.h"
97 
98  // Hash a block of data using CPU-specific acceleration
99  meow_u128 MeowHash_Accelerated(u64 Seed, u64 Len, void *Source);
100 
101  // Check if two Meow hashes are the same
102  // (returns zero if they aren't, non-zero if they are)
103  int MeowHashesAreEqual(meow_u128 A, meow_u128 B)
104 
105  // Truncate a Meow hash to 64 bits
106  meow_u64 MeowU64From(meow_u128 Hash);
107 
108  // Truncate a Meow hash to 32 bits
109  meow_u32 MeowU32From(meow_u128 Hash);
110 
111  **** VERY IMPORTANT X64 COMPILATION NOTES ****
112 
113  On x64, Meow uses the AESDEC instruction, which comes in two flavors:
114  SSE (aesdec) and AVX (vaesdec). If you are compiling _with_ AVX support,
115  your compiler will probably emit the AVX variant, which means your code
116  WILL NOT RUN on computers that do not have AVX. If you need to deploy
117  this hash on computers that do not have AVX, you must take care to
118  TURN OFF support for AVX in your compiler for the file that includes
119  the Meow hash!
120 
121  ======================================================================== */
122 
123 //
124 // NOTE(casey): This version is EXPERIMENTAL. The Meow hash is still
125 // undergoing testing and finalization.
126 //
127 // **** EXPECT HASHES/APIs TO CHANGE UNTIL THE VERSION NUMBER HITS 1.0. ****
128 //
129 // You have been warned.
130 //
131 
132 static const unsigned char MeowShiftAdjust[31] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128};
133 static const unsigned char MeowMaskLen[32] = {255,255,255,255, 255,255,255,255, 255,255,255,255, 255,255,255,255, 0,0,0,0, 0,0,0,0, 0,0,0,0, 0,0,0,0};
134 
135 // TODO(casey): These constants are loaded to initialize the lanes. Jacob should
136 // give us some feedback on what they should _actually_ be set to.
137 #define MEOW_S0_INIT { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11, 12,13,14,15}
138 #define MEOW_S1_INIT {16,17,18,19, 20,21,22,23, 24,25,26,27, 28,29,30,31}
139 #define MEOW_S2_INIT {32,33,34,35, 36,37,38,39, 40,41,42,43, 44,45,46,47}
140 #define MEOW_S3_INIT {48,49,50,51, 52,53,54,55, 56,57,58,59, 60,61,62,63}
141 static const unsigned char MeowS0Init[] = MEOW_S0_INIT;
142 static const unsigned char MeowS1Init[] = MEOW_S1_INIT;
143 static const unsigned char MeowS2Init[] = MEOW_S2_INIT;
144 static const unsigned char MeowS3Init[] = MEOW_S3_INIT;
145 
146 //
147 // NOTE(casey): 128-wide AES-NI Meow (maximum of 16 bytes/clock single threaded)
148 //
149 
150 static meow_hash
151 MeowHash_Accelerated(meow_u64 Seed, meow_u64 TotalLengthInBytes, void *SourceInit)
152 {
153  //
154  // NOTE(casey): Initialize the four AES streams and the mixer
155  //
156 
157  meow_aes_128 S0 = Meow128_GetAESConstant(MeowS0Init);
158  meow_aes_128 S1 = Meow128_GetAESConstant(MeowS1Init);
159  meow_aes_128 S2 = Meow128_GetAESConstant(MeowS2Init);
160  meow_aes_128 S3 = Meow128_GetAESConstant(MeowS3Init);
161 
162  meow_u128 Mixer = Meow128_Set64x2(Seed - TotalLengthInBytes,
163  Seed + TotalLengthInBytes + 1);
164 
165  //
166  // NOTE(casey): Handle as many full 256-byte blocks as possible
167  //
168 
169  meow_u8 *Source = (meow_u8 *)SourceInit;
170  meow_u64 Len = TotalLengthInBytes;
171  int unsigned Len8 = Len & 15;
172  int unsigned Len128 = Len & 48;
173 
174  while(Len >= 64)
175  {
176  S0 = Meow128_AESDEC_Mem(S0, Source);
177  S1 = Meow128_AESDEC_Mem(S1, Source + 16);
178  S2 = Meow128_AESDEC_Mem(S2, Source + 32);
179  S3 = Meow128_AESDEC_Mem(S3, Source + 48);
180 
181  Len -= 64;
182  Source += 64;
183  }
184 
185  //
186  // NOTE(casey): Overhanging individual bytes
187  //
188 
189  if(Len8)
190  {
191  meow_u8 *Overhang = Source + Len128;
192  int Align = ((int)(meow_umm)Overhang) & 15;
193  if(Align)
194  {
195  int End = ((int)(meow_umm)Overhang) & (MEOW_PAGESIZE - 1);
196 
197  // NOTE(jeffr): If we are nowhere near the page end, use full unaligned load (cmov to set)
198  if (End <= (MEOW_PAGESIZE - 16))
199  {
200  Align = 0;
201  }
202 
203  // NOTE(jeffr): If we will read over the page end, use a full unaligned load (cmov to set)
204  if ((End + Len8) > MEOW_PAGESIZE)
205  {
206  Align = 0;
207  }
208 
209  meow_u128 Partial = Meow128_Shuffle_Mem(Overhang - Align, &MeowShiftAdjust[Align]);
210 
211  Partial = Meow128_And_Mem( Partial, &MeowMaskLen[16 - Len8] );
212  S3 = Meow128_AESDEC(S3, Partial);
213  }
214  else
215  {
216  // NOTE(casey): We don't have to do Jeff's heroics when we know the
217  // buffer is aligned, since we cannot span a memory page (by definition).
218  meow_u128 Partial = Meow128_And_Mem(*(meow_u128 *)Overhang, &MeowMaskLen[16 - Len8]);
219  S3 = Meow128_AESDEC(S3, Partial);
220  }
221  }
222 
223  //
224  // NOTE(casey): Overhanging full 128-bit lanes
225  //
226 
227  switch(Len128)
228  {
229  case 48: S2 = Meow128_AESDEC_Mem(S2, Source + 32);
230  case 32: S1 = Meow128_AESDEC_Mem(S1, Source + 16);
231  case 16: S0 = Meow128_AESDEC_Mem(S0, Source);
232  }
233 
234  //
235  // NOTE(casey): Mix the four lanes down to one 128-bit hash
236  //
237 
238  S3 = Meow128_AESDEC(S3, Mixer);
239  S2 = Meow128_AESDEC(S2, Mixer);
240  S1 = Meow128_AESDEC(S1, Mixer);
241  S0 = Meow128_AESDEC(S0, Mixer);
242 
243  S2 = Meow128_AESDEC(S2, Meow128_AESDEC_Finalize(S3));
244  S0 = Meow128_AESDEC(S0, Meow128_AESDEC_Finalize(S1));
245 
246  S2 = Meow128_AESDEC(S2, Mixer);
247 
248  S0 = Meow128_AESDEC(S0, Meow128_AESDEC_Finalize(S2));
249  S0 = Meow128_AESDEC(S0, Mixer);
250 
251  meow_hash Result;
252  Meow128_CopyToHash(Meow128_AESDEC_Finalize(S0), Result);
253 
254  return(Result);
255 }