You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

435 lines
13 KiB

  1. /* Set of hash utility functions to help maintaining the invariant that
  2. if a==b then hash(a)==hash(b)
  3. All the utility functions (_Py_Hash*()) return "-1" to signify an error.
  4. */
  5. #include "Python.h"
  6. #ifdef __APPLE__
  7. # include <libkern/OSByteOrder.h>
  8. #elif defined(HAVE_LE64TOH) && defined(HAVE_ENDIAN_H)
  9. # include <endian.h>
  10. #elif defined(HAVE_LE64TOH) && defined(HAVE_SYS_ENDIAN_H)
  11. # include <sys/endian.h>
  12. #endif
  13. #ifdef __cplusplus
  14. extern "C" {
  15. #endif
  16. _Py_HashSecret_t _Py_HashSecret = {{0}};
  17. #if Py_HASH_ALGORITHM == Py_HASH_EXTERNAL
  18. extern PyHash_FuncDef PyHash_Func;
  19. #else
  20. static PyHash_FuncDef PyHash_Func;
  21. #endif
  22. /* Count _Py_HashBytes() calls */
  23. #ifdef Py_HASH_STATS
  24. #define Py_HASH_STATS_MAX 32
  25. static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
  26. #endif
  27. /* For numeric types, the hash of a number x is based on the reduction
  28. of x modulo the prime P = 2**_PyHASH_BITS - 1. It's designed so that
  29. hash(x) == hash(y) whenever x and y are numerically equal, even if
  30. x and y have different types.
  31. A quick summary of the hashing strategy:
  32. (1) First define the 'reduction of x modulo P' for any rational
  33. number x; this is a standard extension of the usual notion of
  34. reduction modulo P for integers. If x == p/q (written in lowest
  35. terms), the reduction is interpreted as the reduction of p times
  36. the inverse of the reduction of q, all modulo P; if q is exactly
  37. divisible by P then define the reduction to be infinity. So we've
  38. got a well-defined map
  39. reduce : { rational numbers } -> { 0, 1, 2, ..., P-1, infinity }.
  40. (2) Now for a rational number x, define hash(x) by:
  41. reduce(x) if x >= 0
  42. -reduce(-x) if x < 0
  43. If the result of the reduction is infinity (this is impossible for
  44. integers, floats and Decimals) then use the predefined hash value
  45. _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
  46. _PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
  47. hashes of float and Decimal infinities and nans.
  48. A selling point for the above strategy is that it makes it possible
  49. to compute hashes of decimal and binary floating-point numbers
  50. efficiently, even if the exponent of the binary or decimal number
  51. is large. The key point is that
  52. reduce(x * y) == reduce(x) * reduce(y) (modulo _PyHASH_MODULUS)
  53. provided that {reduce(x), reduce(y)} != {0, infinity}. The reduction of a
  54. binary or decimal float is never infinity, since the denominator is a power
  55. of 2 (for binary) or a divisor of a power of 10 (for decimal). So we have,
  56. for nonnegative x,
  57. reduce(x * 2**e) == reduce(x) * reduce(2**e) % _PyHASH_MODULUS
  58. reduce(x * 10**e) == reduce(x) * reduce(10**e) % _PyHASH_MODULUS
  59. and reduce(10**e) can be computed efficiently by the usual modular
  60. exponentiation algorithm. For reduce(2**e) it's even better: since
  61. P is of the form 2**n-1, reduce(2**e) is 2**(e mod n), and multiplication
  62. by 2**(e mod n) modulo 2**n-1 just amounts to a rotation of bits.
  63. */
  64. Py_hash_t
  65. _Py_HashDouble(double v)
  66. {
  67. int e, sign;
  68. double m;
  69. Py_uhash_t x, y;
  70. if (!Py_IS_FINITE(v)) {
  71. if (Py_IS_INFINITY(v))
  72. return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
  73. else
  74. return _PyHASH_NAN;
  75. }
  76. m = frexp(v, &e);
  77. sign = 1;
  78. if (m < 0) {
  79. sign = -1;
  80. m = -m;
  81. }
  82. /* process 28 bits at a time; this should work well both for binary
  83. and hexadecimal floating point. */
  84. x = 0;
  85. while (m) {
  86. x = ((x << 28) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - 28);
  87. m *= 268435456.0; /* 2**28 */
  88. e -= 28;
  89. y = (Py_uhash_t)m; /* pull out integer part */
  90. m -= y;
  91. x += y;
  92. if (x >= _PyHASH_MODULUS)
  93. x -= _PyHASH_MODULUS;
  94. }
  95. /* adjust for the exponent; first reduce it modulo _PyHASH_BITS */
  96. e = e >= 0 ? e % _PyHASH_BITS : _PyHASH_BITS-1-((-1-e) % _PyHASH_BITS);
  97. x = ((x << e) & _PyHASH_MODULUS) | x >> (_PyHASH_BITS - e);
  98. x = x * sign;
  99. if (x == (Py_uhash_t)-1)
  100. x = (Py_uhash_t)-2;
  101. return (Py_hash_t)x;
  102. }
  103. Py_hash_t
  104. _Py_HashPointer(const void *p)
  105. {
  106. Py_hash_t x;
  107. size_t y = (size_t)p;
  108. /* bottom 3 or 4 bits are likely to be 0; rotate y by 4 to avoid
  109. excessive hash collisions for dicts and sets */
  110. y = (y >> 4) | (y << (8 * SIZEOF_VOID_P - 4));
  111. x = (Py_hash_t)y;
  112. if (x == -1)
  113. x = -2;
  114. return x;
  115. }
  116. Py_hash_t
  117. _Py_HashBytes(const void *src, Py_ssize_t len)
  118. {
  119. Py_hash_t x;
  120. /*
  121. We make the hash of the empty string be 0, rather than using
  122. (prefix ^ suffix), since this slightly obfuscates the hash secret
  123. */
  124. if (len == 0) {
  125. return 0;
  126. }
  127. #ifdef Py_HASH_STATS
  128. hashstats[(len <= Py_HASH_STATS_MAX) ? len : 0]++;
  129. #endif
  130. #if Py_HASH_CUTOFF > 0
  131. if (len < Py_HASH_CUTOFF) {
  132. /* Optimize hashing of very small strings with inline DJBX33A. */
  133. Py_uhash_t hash;
  134. const unsigned char *p = src;
  135. hash = 5381; /* DJBX33A starts with 5381 */
  136. switch(len) {
  137. /* ((hash << 5) + hash) + *p == hash * 33 + *p */
  138. case 7: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
  139. case 6: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
  140. case 5: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
  141. case 4: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
  142. case 3: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
  143. case 2: hash = ((hash << 5) + hash) + *p++; /* fallthrough */
  144. case 1: hash = ((hash << 5) + hash) + *p++; break;
  145. default:
  146. Py_UNREACHABLE();
  147. }
  148. hash ^= len;
  149. hash ^= (Py_uhash_t) _Py_HashSecret.djbx33a.suffix;
  150. x = (Py_hash_t)hash;
  151. }
  152. else
  153. #endif /* Py_HASH_CUTOFF */
  154. x = PyHash_Func.hash(src, len);
  155. if (x == -1)
  156. return -2;
  157. return x;
  158. }
  159. void
  160. _PyHash_Fini(void)
  161. {
  162. #ifdef Py_HASH_STATS
  163. int i;
  164. Py_ssize_t total = 0;
  165. const char *fmt = "%2i %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n";
  166. fprintf(stderr, "len calls total\n");
  167. for (i = 1; i <= Py_HASH_STATS_MAX; i++) {
  168. total += hashstats[i];
  169. fprintf(stderr, fmt, i, hashstats[i], total);
  170. }
  171. total += hashstats[0];
  172. fprintf(stderr, "> %8" PY_FORMAT_SIZE_T "d %8" PY_FORMAT_SIZE_T "d\n",
  173. hashstats[0], total);
  174. #endif
  175. }
  176. PyHash_FuncDef *
  177. PyHash_GetFuncDef(void)
  178. {
  179. return &PyHash_Func;
  180. }
  181. /* Optimized memcpy() for Windows */
  182. #ifdef _MSC_VER
  183. # if SIZEOF_PY_UHASH_T == 4
  184. # define PY_UHASH_CPY(dst, src) do { \
  185. dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
  186. } while(0)
  187. # elif SIZEOF_PY_UHASH_T == 8
  188. # define PY_UHASH_CPY(dst, src) do { \
  189. dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = src[3]; \
  190. dst[4] = src[4]; dst[5] = src[5]; dst[6] = src[6]; dst[7] = src[7]; \
  191. } while(0)
  192. # else
  193. # error SIZEOF_PY_UHASH_T must be 4 or 8
  194. # endif /* SIZEOF_PY_UHASH_T */
  195. #else /* not Windows */
  196. # define PY_UHASH_CPY(dst, src) memcpy(dst, src, SIZEOF_PY_UHASH_T)
  197. #endif /* _MSC_VER */
  198. #if Py_HASH_ALGORITHM == Py_HASH_FNV
  199. /* **************************************************************************
  200. * Modified Fowler-Noll-Vo (FNV) hash function
  201. */
  202. static Py_hash_t
  203. fnv(const void *src, Py_ssize_t len)
  204. {
  205. const unsigned char *p = src;
  206. Py_uhash_t x;
  207. Py_ssize_t remainder, blocks;
  208. union {
  209. Py_uhash_t value;
  210. unsigned char bytes[SIZEOF_PY_UHASH_T];
  211. } block;
  212. #ifdef Py_DEBUG
  213. assert(_Py_HashSecret_Initialized);
  214. #endif
  215. remainder = len % SIZEOF_PY_UHASH_T;
  216. if (remainder == 0) {
  217. /* Process at least one block byte by byte to reduce hash collisions
  218. * for strings with common prefixes. */
  219. remainder = SIZEOF_PY_UHASH_T;
  220. }
  221. blocks = (len - remainder) / SIZEOF_PY_UHASH_T;
  222. x = (Py_uhash_t) _Py_HashSecret.fnv.prefix;
  223. x ^= (Py_uhash_t) *p << 7;
  224. while (blocks--) {
  225. PY_UHASH_CPY(block.bytes, p);
  226. x = (_PyHASH_MULTIPLIER * x) ^ block.value;
  227. p += SIZEOF_PY_UHASH_T;
  228. }
  229. /* add remainder */
  230. for (; remainder > 0; remainder--)
  231. x = (_PyHASH_MULTIPLIER * x) ^ (Py_uhash_t) *p++;
  232. x ^= (Py_uhash_t) len;
  233. x ^= (Py_uhash_t) _Py_HashSecret.fnv.suffix;
  234. if (x == (Py_uhash_t) -1) {
  235. x = (Py_uhash_t) -2;
  236. }
  237. return x;
  238. }
  239. static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
  240. 16 * SIZEOF_PY_HASH_T};
  241. #endif /* Py_HASH_ALGORITHM == Py_HASH_FNV */
  242. /* **************************************************************************
  243. <MIT License>
  244. Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
  245. Permission is hereby granted, free of charge, to any person obtaining a copy
  246. of this software and associated documentation files (the "Software"), to deal
  247. in the Software without restriction, including without limitation the rights
  248. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  249. copies of the Software, and to permit persons to whom the Software is
  250. furnished to do so, subject to the following conditions:
  251. The above copyright notice and this permission notice shall be included in
  252. all copies or substantial portions of the Software.
  253. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  254. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  255. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  256. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  257. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  258. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
  259. THE SOFTWARE.
  260. </MIT License>
  261. Original location:
  262. https://github.com/majek/csiphash/
  263. Solution inspired by code from:
  264. Samuel Neves (supercop/crypto_auth/siphash24/little)
  265. djb (supercop/crypto_auth/siphash24/little2)
  266. Jean-Philippe Aumasson (https://131002.net/siphash/siphash24.c)
  267. Modified for Python by Christian Heimes:
  268. - C89 / MSVC compatibility
  269. - _rotl64() on Windows
  270. - letoh64() fallback
  271. */
  272. /* byte swap little endian to host endian
  273. * Endian conversion not only ensures that the hash function returns the same
  274. * value on all platforms. It is also required to for a good dispersion of
  275. * the hash values' least significant bits.
  276. */
  277. #if PY_LITTLE_ENDIAN
  278. # define _le64toh(x) ((uint64_t)(x))
  279. #elif defined(__APPLE__)
  280. # define _le64toh(x) OSSwapLittleToHostInt64(x)
  281. #elif defined(HAVE_LETOH64)
  282. # define _le64toh(x) le64toh(x)
  283. #else
  284. # define _le64toh(x) (((uint64_t)(x) << 56) | \
  285. (((uint64_t)(x) << 40) & 0xff000000000000ULL) | \
  286. (((uint64_t)(x) << 24) & 0xff0000000000ULL) | \
  287. (((uint64_t)(x) << 8) & 0xff00000000ULL) | \
  288. (((uint64_t)(x) >> 8) & 0xff000000ULL) | \
  289. (((uint64_t)(x) >> 24) & 0xff0000ULL) | \
  290. (((uint64_t)(x) >> 40) & 0xff00ULL) | \
  291. ((uint64_t)(x) >> 56))
  292. #endif
  293. #ifdef _MSC_VER
  294. # define ROTATE(x, b) _rotl64(x, b)
  295. #else
  296. # define ROTATE(x, b) (uint64_t)( ((x) << (b)) | ( (x) >> (64 - (b))) )
  297. #endif
  298. #define HALF_ROUND(a,b,c,d,s,t) \
  299. a += b; c += d; \
  300. b = ROTATE(b, s) ^ a; \
  301. d = ROTATE(d, t) ^ c; \
  302. a = ROTATE(a, 32);
  303. #define DOUBLE_ROUND(v0,v1,v2,v3) \
  304. HALF_ROUND(v0,v1,v2,v3,13,16); \
  305. HALF_ROUND(v2,v1,v0,v3,17,21); \
  306. HALF_ROUND(v0,v1,v2,v3,13,16); \
  307. HALF_ROUND(v2,v1,v0,v3,17,21);
  308. static uint64_t
  309. siphash24(uint64_t k0, uint64_t k1, const void *src, Py_ssize_t src_sz) {
  310. uint64_t b = (uint64_t)src_sz << 56;
  311. const uint8_t *in = (const uint8_t*)src;
  312. uint64_t v0 = k0 ^ 0x736f6d6570736575ULL;
  313. uint64_t v1 = k1 ^ 0x646f72616e646f6dULL;
  314. uint64_t v2 = k0 ^ 0x6c7967656e657261ULL;
  315. uint64_t v3 = k1 ^ 0x7465646279746573ULL;
  316. uint64_t t;
  317. uint8_t *pt;
  318. while (src_sz >= 8) {
  319. uint64_t mi;
  320. memcpy(&mi, in, sizeof(mi));
  321. mi = _le64toh(mi);
  322. in += sizeof(mi);
  323. src_sz -= sizeof(mi);
  324. v3 ^= mi;
  325. DOUBLE_ROUND(v0,v1,v2,v3);
  326. v0 ^= mi;
  327. }
  328. t = 0;
  329. pt = (uint8_t *)&t;
  330. switch (src_sz) {
  331. case 7: pt[6] = in[6]; /* fall through */
  332. case 6: pt[5] = in[5]; /* fall through */
  333. case 5: pt[4] = in[4]; /* fall through */
  334. case 4: memcpy(pt, in, sizeof(uint32_t)); break;
  335. case 3: pt[2] = in[2]; /* fall through */
  336. case 2: pt[1] = in[1]; /* fall through */
  337. case 1: pt[0] = in[0]; /* fall through */
  338. }
  339. b |= _le64toh(t);
  340. v3 ^= b;
  341. DOUBLE_ROUND(v0,v1,v2,v3);
  342. v0 ^= b;
  343. v2 ^= 0xff;
  344. DOUBLE_ROUND(v0,v1,v2,v3);
  345. DOUBLE_ROUND(v0,v1,v2,v3);
  346. /* modified */
  347. t = (v0 ^ v1) ^ (v2 ^ v3);
  348. return t;
  349. }
  350. uint64_t
  351. _Py_KeyedHash(uint64_t key, const void *src, Py_ssize_t src_sz)
  352. {
  353. return siphash24(key, 0, src, src_sz);
  354. }
  355. #if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
  356. static Py_hash_t
  357. pysiphash(const void *src, Py_ssize_t src_sz) {
  358. return (Py_hash_t)siphash24(
  359. _le64toh(_Py_HashSecret.siphash.k0), _le64toh(_Py_HashSecret.siphash.k1),
  360. src, src_sz);
  361. }
  362. static PyHash_FuncDef PyHash_Func = {pysiphash, "siphash24", 64, 128};
  363. #endif
  364. #ifdef __cplusplus
  365. }
  366. #endif