hbxxh.h 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317
  1. /**
  2. * @file hbxxh.h
  3. * @brief High performances vectorized hashing based on xxHash.
  4. *
  5. * $LicenseInfo:firstyear=2023&license=viewergpl$
  6. *
  7. * Copyright (c) 2023, Henri Beauchamp.
  8. *
  9. * Second Life Viewer Source Code
  10. * The source code in this file ("Source Code") is provided by Linden Lab
  11. * to you under the terms of the GNU General Public License, version 2.0
  12. * ("GPL"), unless you have obtained a separate licensing agreement
  13. * ("Other License"), formally executed by you and Linden Lab. Terms of
  14. * the GPL can be found in doc/GPL-license.txt in this distribution, or
  15. * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
  16. *
  17. * There are special exceptions to the terms and conditions of the GPL as
  18. * it is applied to this Source Code. View the full text of the exception
  19. * in the file doc/FLOSS-exception.txt in this software distribution, or
  20. * online at
  21. * http://secondlifegrid.net/programs/open_source/licensing/flossexception
  22. *
  23. * By copying, modifying or distributing this software, you acknowledge
  24. * that you have read and understood your obligations described above,
  25. * and agree to abide by those obligations.
  26. *
  27. * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
  28. * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
  29. * COMPLETENESS OR PERFORMANCE.
  30. * $/LicenseInfo$
  31. */
  32. #ifndef LL_HBXXH_H
  33. #define LL_HBXXH_H
  34. #include "lluuid.h"
  35. // HBXXH* classes are to be used where speed matters and cryptographic quality
  36. // is not required (no "one-way" guarantee, though they are likely not worst in
  37. // this respect than MD5 which got busted and is now considered too weak). The
  38. // xxHash code they are built upon is vectorized and about 50 times faster than
  39. // MD5. A 64 bits hash class is also provided for when 128 bits of entropy are
  40. // not needed. The hashes collision rate is similar to MD5's.
  41. // See https://github.com/Cyan4973/xxHash#readme for details.
  42. // 64 bits hashing class
  43. class HBXXH64
  44. {
  45. friend std::ostream& operator<<(std::ostream&, HBXXH64);
  46. protected:
  47. LOG_CLASS(HBXXH64);
  48. public:
  49. LL_INLINE HBXXH64() { init(); }
  50. // Constructors for special circumstances; they all digest the first passed
  51. // parameter. Set 'do_finalize' to false if you do not want to finalize the
  52. // context, which is useful/needed when you want to update() it afterwards.
  53. // Ideally, the compiler should be smart enough to get our clue and
  54. // optimize out the const bool test during inlining...
  55. LL_INLINE HBXXH64(const void* buffer, size_t len,
  56. const bool do_finalize = true)
  57. {
  58. init();
  59. update(buffer, len);
  60. if (do_finalize)
  61. {
  62. finalize();
  63. }
  64. }
  65. LL_INLINE HBXXH64(const std::string& str, const bool do_finalize = true)
  66. {
  67. init();
  68. update(str);
  69. if (do_finalize)
  70. {
  71. finalize();
  72. }
  73. }
  74. LL_INLINE HBXXH64(std::istream& s, const bool do_finalize = true)
  75. {
  76. init();
  77. update(s);
  78. if (do_finalize)
  79. {
  80. finalize();
  81. }
  82. }
  83. LL_INLINE HBXXH64(FILE* file, const bool do_finalize = true)
  84. {
  85. init();
  86. update(file);
  87. if (do_finalize)
  88. {
  89. finalize();
  90. }
  91. }
  92. // Make this class no-copy (it would be possible, with custom copy
  93. // operators, but it is not trivially copyable, because of the mState
  94. // pointer): it does not really make sense to allow copying it anyway,
  95. // since all we care about is the resulting digest (so you should only
  96. // need and care about storing/copying the digest and not a class
  97. // instance).
  98. HBXXH64(const HBXXH64&) noexcept = delete;
  99. HBXXH64& operator=(const HBXXH64&) noexcept = delete;
  100. ~HBXXH64();
  101. void update(const void* buffer, size_t len);
  102. void update(const std::string& str);
  103. void update(std::istream& s);
  104. void update(FILE* file);
  105. // Convenience template to hash other types.
  106. // IMPORTANT: do only use for types represented in memory as a *continuous*
  107. // block making up the value. E.g. LLUUIDs, U32, F64, etc... NOT to be used
  108. // for containers such as std::map, std::set, etc... For structures,
  109. // classes etc, be wary of padding bytes between values and any trailing
  110. // padding bytes (accounted for in sizeof(T)): these *must* have been
  111. // zeroed on construction, or the hash will be random) !
  112. template<typename T>
  113. LL_INLINE void update(const T& value)
  114. {
  115. update((const void*)value, sizeof(T));
  116. }
  117. // Note that unlike what happens with LLMD5, you do not need to finalize()
  118. // HBXXH64 before using digest(), and you may keep updating() it even after
  119. // you got a first digest() (the next digest would of course change after
  120. // any update). It is still useful to use finalize() when you do not want
  121. // to store a final digest() result in a separate U64; after this method
  122. // has been called, digest() simply returns mDigest value.
  123. void finalize();
  124. U64 digest() const;
  125. // Fast static methods. Use them when hashing just one contiguous block of
  126. // data.
  127. static U64 digest(const void* buffer, size_t len);
  128. static U64 digest(const char* str); // str must be NUL-terminated
  129. static U64 digest(const std::string& str);
  130. private:
  131. void init();
  132. private:
  133. // We use a void pointer to avoid including xxhash.h here for XXH3_state_t
  134. // (which cannot either be trivially forward-declared, due to complex API
  135. // related pre-processor macros in xxhash.h).
  136. void* mState;
  137. U64 mDigest;
  138. };
  139. LL_INLINE bool operator==(const HBXXH64& a, const HBXXH64& b)
  140. {
  141. return a.digest() == b.digest();
  142. }
  143. LL_INLINE bool operator!=(const HBXXH64& a, const HBXXH64& b)
  144. {
  145. return a.digest() != b.digest();
  146. }
  147. // 128 bits hashing class
  148. class HBXXH128
  149. {
  150. friend std::ostream& operator<<(std::ostream&, HBXXH128);
  151. protected:
  152. LOG_CLASS(HBXXH128);
  153. public:
  154. LL_INLINE HBXXH128() { init(); }
  155. // Constructors for special circumstances; they all digest the first passed
  156. // parameter. Set 'do_finalize' to false if you do not want to finalize the
  157. // context, which is useful/needed when you want to update() it afterwards.
  158. // Ideally, the compiler should be smart enough to get our clue and
  159. // optimize out the const bool test during inlining...
  160. LL_INLINE HBXXH128(const void* buffer, size_t len,
  161. const bool do_finalize = true)
  162. {
  163. init();
  164. update(buffer, len);
  165. if (do_finalize)
  166. {
  167. finalize();
  168. }
  169. }
  170. LL_INLINE HBXXH128(const std::string& str, const bool do_finalize = true)
  171. {
  172. init();
  173. update(str);
  174. if (do_finalize)
  175. {
  176. finalize();
  177. }
  178. }
  179. LL_INLINE HBXXH128(std::istream& s, const bool do_finalize = true)
  180. {
  181. init();
  182. update(s);
  183. if (do_finalize)
  184. {
  185. finalize();
  186. }
  187. }
  188. LL_INLINE HBXXH128(FILE* file, const bool do_finalize = true)
  189. {
  190. init();
  191. update(file);
  192. if (do_finalize)
  193. {
  194. finalize();
  195. }
  196. }
  197. // Make this class no-copy (it would be possible, with custom copy
  198. // operators, but it is not trivially copyable, because of the mState
  199. // pointer): it does not really make sense to allow copying it anyway,
  200. // since all we care about is the resulting digest (so you should only
  201. // need and care about storing/copying the digest and not a class
  202. // instance).
  203. HBXXH128(const HBXXH128&) noexcept = delete;
  204. HBXXH128& operator=(const HBXXH128&) noexcept = delete;
  205. ~HBXXH128();
  206. void update(const void* buffer, size_t len);
  207. void update(const std::string& str);
  208. void update(std::istream& s);
  209. void update(FILE* file);
  210. // Convenience template to hash other types.
  211. // IMPORTANT: do only use for types represented in memory as a *continuous*
  212. // block making up the value. E.g. LLUUIDs, U32, F64, etc... NOT to be used
  213. // for containers such as std::map, std::set, etc... For structures,
  214. // classes etc, be wary of padding bytes between values and any trailing
  215. // padding bytes (accounted for in sizeof(T)): these *must* have been
  216. // zeroed on construction, or the hash will be random) !
  217. template<typename T>
  218. LL_INLINE void update(const T& value)
  219. {
  220. update((const void*)value, sizeof(T));
  221. }
  222. // Note that unlike what happens with LLMD5, you do not need to finalize()
  223. // HBXXH128 before using digest(), and you may keep updating() it even
  224. // after you got a first digest() (the next digest would of course change
  225. // after any update). It is still useful to use finalize() when you do not
  226. // want to store a final digest() result in a separate LLUUID; after this
  227. // method has been called, digest() simply returns a reference on mDigest.
  228. void finalize();
  229. // We use an LLUUID for the digest, since this is a 128 bits wide native
  230. // type available in the viewer code, making it easy to manipulate. It also
  231. // allows to use HBXXH128 digests efficiently as keys for std, boost or
  232. // phmap containers, since I already provided a very efficient hash_value()
  233. // function override for LLUUID (a simple XOR of the two 64 bits words).
  234. const LLUUID& digest() const;
  235. // Here, we avoid an LLUUID copy whenever we already got one to store the
  236. // result *and* we did not yet call finalize().
  237. void digest(LLUUID& result) const;
  238. // Fast static methods. Use them when hashing just one contiguous block of
  239. // data.
  240. static LLUUID digest(const void* buffer, size_t len);
  241. static LLUUID digest(const char* str); // str must be NUL-terminated
  242. static LLUUID digest(const std::string& str);
  243. // Same as above, but saves you from an LLUUID copy when you already got
  244. // one for storage use.
  245. static void digest(LLUUID& result, const void* buffer, size_t len);
  246. static void digest(LLUUID& result, const char* str); // str NUL-terminated
  247. static void digest(LLUUID& result, const std::string& str);
  248. private:
  249. void init();
  250. private:
  251. // We use a void pointer to avoid including xxhash.h here for XXH3_state_t
  252. // (which cannot either be trivially forward-declared, due to complex API
  253. // related pre-processor macros in xxhash.h).
  254. void* mState;
  255. LLUUID mDigest;
  256. };
  257. LL_INLINE bool operator==(const HBXXH128& a, const HBXXH128& b)
  258. {
  259. return a.digest() == b.digest();
  260. }
  261. LL_INLINE bool operator!=(const HBXXH128& a, const HBXXH128& b)
  262. {
  263. return a.digest() != b.digest();
  264. }
  265. // Utility function to reduce the size of a 64 bits digest to 32 bits while
  266. // preserving as much entropy as possible. HB
  267. LL_INLINE U32 digest64to32(U64 digest64)
  268. {
  269. return U32(digest64) ^ U32(digest64 >> 32);
  270. }
  271. #endif // LL_HBXXH_H