hbxxh.cpp 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. /**
  2. * @file hbxxh.cpp
  3. * @brief High performances vectorized hashing based on xxHash.
  4. *
  5. * $LicenseInfo:firstyear=2023&license=viewergpl$
  6. *
  7. * Copyright (c) 2023, Henri Beauchamp.
  8. *
  9. * Second Life Viewer Source Code
  10. * The source code in this file ("Source Code") is provided by Linden Lab
  11. * to you under the terms of the GNU General Public License, version 2.0
  12. * ("GPL"), unless you have obtained a separate licensing agreement
  13. * ("Other License"), formally executed by you and Linden Lab. Terms of
  14. * the GPL can be found in doc/GPL-license.txt in this distribution, or
  15. * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
  16. *
  17. * There are special exceptions to the terms and conditions of the GPL as
  18. * it is applied to this Source Code. View the full text of the exception
  19. * in the file doc/FLOSS-exception.txt in this software distribution, or
  20. * online at
  21. * http://secondlifegrid.net/programs/open_source/licensing/flossexception
  22. *
  23. * By copying, modifying or distributing this software, you acknowledge
  24. * that you have read and understood your obligations described above,
  25. * and agree to abide by those obligations.
  26. *
  27. * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
  28. * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
  29. * COMPLETENESS OR PERFORMANCE.
  30. * $/LicenseInfo$
  31. */
  32. #include "linden_common.h"
  33. // This define ensures that xxHash will be compiled within this module, with
  34. // vectorized (*) and inlined functions (with no exported API symbol); our
  35. // xxhash "pre-built library" package actually only contains the xxhash.h
  36. // header (no library needed at link time).
  37. // (*) SSE2 is normally used for x86(_64) builds, unless you enabled AVX2
  38. // in your build, in which case the latter would be used instead. For ARM64
  39. // builds, this would also automatically enable NEON vectorization.
  40. #define XXH_INLINE_ALL
  41. #include "xxhash.h"
  42. #include "hbxxh.h"
  43. // How many bytes to grab at a time when hashing files or streams
  44. constexpr size_t BLOCK_LEN = 4096;
  45. ///////////////////////////////////////////////////////////////////////////////
  46. // HBXXH64 class
  47. ///////////////////////////////////////////////////////////////////////////////
  48. //static
  49. U64 HBXXH64::digest(const void* buffer, size_t len)
  50. {
  51. return XXH3_64bits(buffer, len);
  52. }
  53. //static
  54. U64 HBXXH64::digest(const char* str)
  55. {
  56. return XXH3_64bits((const void*)str, strlen(str));
  57. }
  58. //static
  59. U64 HBXXH64::digest(const std::string& str)
  60. {
  61. return XXH3_64bits((const void*)str.c_str(), str.size());
  62. }
  63. // Must be called by all constructors.
  64. void HBXXH64::init()
  65. {
  66. mDigest = 0;
  67. mState = (void*)XXH3_createState();
  68. if (!mState || XXH3_64bits_reset((XXH3_state_t*)mState) != XXH_OK)
  69. {
  70. llwarns << "Failed to initialize state !" << llendl;
  71. }
  72. }
  73. HBXXH64::~HBXXH64()
  74. {
  75. if (mState)
  76. {
  77. XXH3_freeState((XXH3_state_t*)mState);
  78. }
  79. }
  80. void HBXXH64::update(const void* buffer, size_t len)
  81. {
  82. if (mState)
  83. {
  84. XXH3_64bits_update((XXH3_state_t*)mState, buffer, len);
  85. }
  86. else
  87. {
  88. llwarns << "Cannot update a finalized digest !" << llendl;
  89. }
  90. }
  91. void HBXXH64::update(const std::string& str)
  92. {
  93. if (mState)
  94. {
  95. XXH3_64bits_update((XXH3_state_t*)mState, (const void*)str.c_str(),
  96. str.length());
  97. }
  98. else
  99. {
  100. llwarns << "Cannot update a finalized digest !" << llendl;
  101. }
  102. }
  103. void HBXXH64::update(std::istream& stream)
  104. {
  105. if (!mState)
  106. {
  107. llwarns << "Cannot update a finalized digest !" << llendl;
  108. return;
  109. }
  110. char buffer[BLOCK_LEN];
  111. size_t len;
  112. while (stream.good())
  113. {
  114. stream.read(buffer, BLOCK_LEN);
  115. len = stream.gcount();
  116. XXH3_64bits_update((XXH3_state_t*)mState, (const void*)buffer, len);
  117. }
  118. }
  119. void HBXXH64::update(FILE* file)
  120. {
  121. if (!mState)
  122. {
  123. llwarns << "Cannot update a finalized digest !" << llendl;
  124. return;
  125. }
  126. char buffer[BLOCK_LEN];
  127. size_t len;
  128. while ((len = fread((void*)buffer, 1, BLOCK_LEN, file)))
  129. {
  130. XXH3_64bits_update((XXH3_state_t*)mState, (const void*)buffer, len);
  131. }
  132. fclose(file);
  133. }
  134. void HBXXH64::finalize()
  135. {
  136. if (!mState)
  137. {
  138. llwarns << "Already finalized !" << llendl;
  139. return;
  140. }
  141. mDigest = XXH3_64bits_digest((XXH3_state_t*)mState);
  142. XXH3_freeState((XXH3_state_t*)mState);
  143. mState = NULL;
  144. }
  145. U64 HBXXH64::digest() const
  146. {
  147. return mState ? XXH3_64bits_digest((XXH3_state_t*)mState) : mDigest;
  148. }
  149. std::ostream& operator<<(std::ostream& stream, HBXXH64 context)
  150. {
  151. stream << context.digest();
  152. return stream;
  153. }
  154. ///////////////////////////////////////////////////////////////////////////////
  155. // HBXXH128 class
  156. ///////////////////////////////////////////////////////////////////////////////
  157. //static
  158. LLUUID HBXXH128::digest(const void* buffer, size_t len)
  159. {
  160. XXH128_hash_t hash = XXH3_128bits(buffer, len);
  161. LLUUID id;
  162. U64* data = (U64*)id.mData;
  163. // Note: we do not check endianness here and we just store in the same
  164. // order as XXH128_hash_t, that is low word "first".
  165. data[0] = hash.low64;
  166. data[1] = hash.high64;
  167. return id;
  168. }
  169. //static
  170. LLUUID HBXXH128::digest(const char* str)
  171. {
  172. XXH128_hash_t hash = XXH3_128bits((const void*)str, strlen(str));
  173. LLUUID id;
  174. U64* data = (U64*)id.mData;
  175. // Note: we do not check endianness here and we just store in the same
  176. // order as XXH128_hash_t, that is low word "first".
  177. data[0] = hash.low64;
  178. data[1] = hash.high64;
  179. return id;
  180. }
  181. //static
  182. LLUUID HBXXH128::digest(const std::string& str)
  183. {
  184. XXH128_hash_t hash = XXH3_128bits((const void*)str.c_str(), str.size());
  185. LLUUID id;
  186. U64* data = (U64*)id.mData;
  187. // Note: we do not check endianness here and we just store in the same
  188. // order as XXH128_hash_t, that is low word "first".
  189. data[0] = hash.low64;
  190. data[1] = hash.high64;
  191. return id;
  192. }
  193. //static
  194. void HBXXH128::digest(LLUUID& result, const void* buffer, size_t len)
  195. {
  196. XXH128_hash_t hash = XXH3_128bits(buffer, len);
  197. U64* data = (U64*)result.mData;
  198. // Note: we do not check endianness here and we just store in the same
  199. // order as XXH128_hash_t, that is low word "first".
  200. data[0] = hash.low64;
  201. data[1] = hash.high64;
  202. }
  203. //static
  204. void HBXXH128::digest(LLUUID& result, const char* str)
  205. {
  206. XXH128_hash_t hash = XXH3_128bits((const void*)str, strlen(str));
  207. U64* data = (U64*)result.mData;
  208. // Note: we do not check endianness here and we just store in the same
  209. // order as XXH128_hash_t, that is low word "first".
  210. data[0] = hash.low64;
  211. data[1] = hash.high64;
  212. }
  213. //static
  214. void HBXXH128::digest(LLUUID& result, const std::string& str)
  215. {
  216. XXH128_hash_t hash = XXH3_128bits((const void*)str.c_str(), str.size());
  217. U64* data = (U64*)result.mData;
  218. // Note: we do not check endianness here and we just store in the same
  219. // order as XXH128_hash_t, that is low word "first".
  220. data[0] = hash.low64;
  221. data[1] = hash.high64;
  222. }
  223. // Must be called by all constructors.
  224. void HBXXH128::init()
  225. {
  226. mState = (void*)XXH3_createState();
  227. if (!mState || XXH3_128bits_reset((XXH3_state_t*)mState) != XXH_OK)
  228. {
  229. llwarns << "Failed to initialize state !" << llendl;
  230. }
  231. }
  232. HBXXH128::~HBXXH128()
  233. {
  234. if (mState)
  235. {
  236. XXH3_freeState((XXH3_state_t*)mState);
  237. }
  238. }
  239. void HBXXH128::update(const void* buffer, size_t len)
  240. {
  241. if (mState)
  242. {
  243. XXH3_128bits_update((XXH3_state_t*)mState, buffer, len);
  244. }
  245. else
  246. {
  247. llwarns << "Cannot update a finalized digest !" << llendl;
  248. }
  249. }
  250. void HBXXH128::update(const std::string& str)
  251. {
  252. if (mState)
  253. {
  254. XXH3_128bits_update((XXH3_state_t*)mState, (const void*)str.c_str(),
  255. str.length());
  256. }
  257. else
  258. {
  259. llwarns << "Cannot update a finalized digest !" << llendl;
  260. }
  261. }
  262. void HBXXH128::update(std::istream& stream)
  263. {
  264. if (!mState)
  265. {
  266. llwarns << "Cannot update a finalized digest !" << llendl;
  267. return;
  268. }
  269. char buffer[BLOCK_LEN];
  270. size_t len;
  271. while (stream.good())
  272. {
  273. stream.read(buffer, BLOCK_LEN);
  274. len = stream.gcount();
  275. XXH3_128bits_update((XXH3_state_t*)mState, (const void*)buffer, len);
  276. }
  277. }
  278. void HBXXH128::update(FILE* file)
  279. {
  280. if (!mState)
  281. {
  282. llwarns << "Cannot update a finalized digest !" << llendl;
  283. return;
  284. }
  285. char buffer[BLOCK_LEN];
  286. size_t len;
  287. while ((len = fread((void*)buffer, 1, BLOCK_LEN, file)))
  288. {
  289. XXH3_128bits_update((XXH3_state_t*)mState, (const void*)buffer, len);
  290. }
  291. fclose(file);
  292. }
  293. void HBXXH128::finalize()
  294. {
  295. if (!mState)
  296. {
  297. llwarns << "Already finalized !" << llendl;
  298. return;
  299. }
  300. XXH128_hash_t hash = XXH3_128bits_digest((XXH3_state_t*)mState);
  301. U64* data = (U64*)mDigest.mData;
  302. // Note: we do not check endianness here and we just store in the same
  303. // order as XXH128_hash_t, that is low word "first".
  304. data[0] = hash.low64;
  305. data[1] = hash.high64;
  306. XXH3_freeState((XXH3_state_t*)mState);
  307. mState = NULL;
  308. }
  309. const LLUUID& HBXXH128::digest() const
  310. {
  311. if (mState)
  312. {
  313. XXH128_hash_t hash = XXH3_128bits_digest((XXH3_state_t*)mState);
  314. // We cheat the const-ness of the method here, but this is OK, since
  315. // mDigest is private and cannot be accessed indirectly by other
  316. // methods than digest() ones, that do check for mState to decide
  317. // whether mDigest's current value may be provided as is or not. This
  318. // cheat saves us a temporary LLLUID copy.
  319. U64* data = (U64*)mDigest.mData;
  320. // Note: we do not check endianness here and we just store in the same
  321. // order as XXH128_hash_t, that is low word "first".
  322. data[0] = hash.low64;
  323. data[1] = hash.high64;
  324. }
  325. return mDigest;
  326. }
  327. void HBXXH128::digest(LLUUID& result) const
  328. {
  329. if (!mState)
  330. {
  331. result = mDigest;
  332. return;
  333. }
  334. XXH128_hash_t hash = XXH3_128bits_digest((XXH3_state_t*)mState);
  335. U64* data = (U64*)result.mData;
  336. // Note: we do not check endianness here and we just store in the same
  337. // order as XXH128_hash_t, that is low word "first".
  338. data[0] = hash.low64;
  339. data[1] = hash.high64;
  340. }
  341. std::ostream& operator<<(std::ostream& stream, HBXXH128 context)
  342. {
  343. stream << context.digest();
  344. return stream;
  345. }