123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317 |
- /**
- * @file hbxxh.h
- * @brief High performances vectorized hashing based on xxHash.
- *
- * $LicenseInfo:firstyear=2023&license=viewergpl$
- *
- * Copyright (c) 2023, Henri Beauchamp.
- *
- * Second Life Viewer Source Code
- * The source code in this file ("Source Code") is provided by Linden Lab
- * to you under the terms of the GNU General Public License, version 2.0
- * ("GPL"), unless you have obtained a separate licensing agreement
- * ("Other License"), formally executed by you and Linden Lab. Terms of
- * the GPL can be found in doc/GPL-license.txt in this distribution, or
- * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
- *
- * There are special exceptions to the terms and conditions of the GPL as
- * it is applied to this Source Code. View the full text of the exception
- * in the file doc/FLOSS-exception.txt in this software distribution, or
- * online at
- * http://secondlifegrid.net/programs/open_source/licensing/flossexception
- *
- * By copying, modifying or distributing this software, you acknowledge
- * that you have read and understood your obligations described above,
- * and agree to abide by those obligations.
- *
- * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
- * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
- * COMPLETENESS OR PERFORMANCE.
- * $/LicenseInfo$
- */
- #ifndef LL_HBXXH_H
- #define LL_HBXXH_H
- #include "lluuid.h"
- // HBXXH* classes are to be used where speed matters and cryptographic quality
- // is not required (no "one-way" guarantee, though they are likely not worst in
- // this respect than MD5 which got busted and is now considered too weak). The
- // xxHash code they are built upon is vectorized and about 50 times faster than
- // MD5. A 64 bits hash class is also provided for when 128 bits of entropy are
- // not needed. The hashes collision rate is similar to MD5's.
- // See https://github.com/Cyan4973/xxHash#readme for details.
- // 64 bits hashing class
- class HBXXH64
- {
- friend std::ostream& operator<<(std::ostream&, HBXXH64);
- protected:
- LOG_CLASS(HBXXH64);
- public:
- LL_INLINE HBXXH64() { init(); }
- // Constructors for special circumstances; they all digest the first passed
- // parameter. Set 'do_finalize' to false if you do not want to finalize the
- // context, which is useful/needed when you want to update() it afterwards.
- // Ideally, the compiler should be smart enough to get our clue and
- // optimize out the const bool test during inlining...
- LL_INLINE HBXXH64(const void* buffer, size_t len,
- const bool do_finalize = true)
- {
- init();
- update(buffer, len);
- if (do_finalize)
- {
- finalize();
- }
- }
- LL_INLINE HBXXH64(const std::string& str, const bool do_finalize = true)
- {
- init();
- update(str);
- if (do_finalize)
- {
- finalize();
- }
- }
- LL_INLINE HBXXH64(std::istream& s, const bool do_finalize = true)
- {
- init();
- update(s);
- if (do_finalize)
- {
- finalize();
- }
- }
- LL_INLINE HBXXH64(FILE* file, const bool do_finalize = true)
- {
- init();
- update(file);
- if (do_finalize)
- {
- finalize();
- }
- }
- // Make this class no-copy (it would be possible, with custom copy
- // operators, but it is not trivially copyable, because of the mState
- // pointer): it does not really make sense to allow copying it anyway,
- // since all we care about is the resulting digest (so you should only
- // need and care about storing/copying the digest and not a class
- // instance).
- HBXXH64(const HBXXH64&) noexcept = delete;
- HBXXH64& operator=(const HBXXH64&) noexcept = delete;
- ~HBXXH64();
- void update(const void* buffer, size_t len);
- void update(const std::string& str);
- void update(std::istream& s);
- void update(FILE* file);
- // Convenience template to hash other types.
- // IMPORTANT: do only use for types represented in memory as a *continuous*
- // block making up the value. E.g. LLUUIDs, U32, F64, etc... NOT to be used
- // for containers such as std::map, std::set, etc... For structures,
- // classes etc, be wary of padding bytes between values and any trailing
- // padding bytes (accounted for in sizeof(T)): these *must* have been
- // zeroed on construction, or the hash will be random) !
- template<typename T>
- LL_INLINE void update(const T& value)
- {
- update((const void*)value, sizeof(T));
- }
- // Note that unlike what happens with LLMD5, you do not need to finalize()
- // HBXXH64 before using digest(), and you may keep updating() it even after
- // you got a first digest() (the next digest would of course change after
- // any update). It is still useful to use finalize() when you do not want
- // to store a final digest() result in a separate U64; after this method
- // has been called, digest() simply returns mDigest value.
- void finalize();
- U64 digest() const;
- // Fast static methods. Use them when hashing just one contiguous block of
- // data.
- static U64 digest(const void* buffer, size_t len);
- static U64 digest(const char* str); // str must be NUL-terminated
- static U64 digest(const std::string& str);
- private:
- void init();
- private:
- // We use a void pointer to avoid including xxhash.h here for XXH3_state_t
- // (which cannot either be trivially forward-declared, due to complex API
- // related pre-processor macros in xxhash.h).
- void* mState;
- U64 mDigest;
- };
- LL_INLINE bool operator==(const HBXXH64& a, const HBXXH64& b)
- {
- return a.digest() == b.digest();
- }
- LL_INLINE bool operator!=(const HBXXH64& a, const HBXXH64& b)
- {
- return a.digest() != b.digest();
- }
- // 128 bits hashing class
- class HBXXH128
- {
- friend std::ostream& operator<<(std::ostream&, HBXXH128);
- protected:
- LOG_CLASS(HBXXH128);
- public:
- LL_INLINE HBXXH128() { init(); }
- // Constructors for special circumstances; they all digest the first passed
- // parameter. Set 'do_finalize' to false if you do not want to finalize the
- // context, which is useful/needed when you want to update() it afterwards.
- // Ideally, the compiler should be smart enough to get our clue and
- // optimize out the const bool test during inlining...
- LL_INLINE HBXXH128(const void* buffer, size_t len,
- const bool do_finalize = true)
- {
- init();
- update(buffer, len);
- if (do_finalize)
- {
- finalize();
- }
- }
- LL_INLINE HBXXH128(const std::string& str, const bool do_finalize = true)
- {
- init();
- update(str);
- if (do_finalize)
- {
- finalize();
- }
- }
- LL_INLINE HBXXH128(std::istream& s, const bool do_finalize = true)
- {
- init();
- update(s);
- if (do_finalize)
- {
- finalize();
- }
- }
- LL_INLINE HBXXH128(FILE* file, const bool do_finalize = true)
- {
- init();
- update(file);
- if (do_finalize)
- {
- finalize();
- }
- }
- // Make this class no-copy (it would be possible, with custom copy
- // operators, but it is not trivially copyable, because of the mState
- // pointer): it does not really make sense to allow copying it anyway,
- // since all we care about is the resulting digest (so you should only
- // need and care about storing/copying the digest and not a class
- // instance).
- HBXXH128(const HBXXH128&) noexcept = delete;
- HBXXH128& operator=(const HBXXH128&) noexcept = delete;
- ~HBXXH128();
- void update(const void* buffer, size_t len);
- void update(const std::string& str);
- void update(std::istream& s);
- void update(FILE* file);
- // Convenience template to hash other types.
- // IMPORTANT: do only use for types represented in memory as a *continuous*
- // block making up the value. E.g. LLUUIDs, U32, F64, etc... NOT to be used
- // for containers such as std::map, std::set, etc... For structures,
- // classes etc, be wary of padding bytes between values and any trailing
- // padding bytes (accounted for in sizeof(T)): these *must* have been
- // zeroed on construction, or the hash will be random) !
- template<typename T>
- LL_INLINE void update(const T& value)
- {
- update((const void*)value, sizeof(T));
- }
- // Note that unlike what happens with LLMD5, you do not need to finalize()
- // HBXXH128 before using digest(), and you may keep updating() it even
- // after you got a first digest() (the next digest would of course change
- // after any update). It is still useful to use finalize() when you do not
- // want to store a final digest() result in a separate LLUUID; after this
- // method has been called, digest() simply returns a reference on mDigest.
- void finalize();
- // We use an LLUUID for the digest, since this is a 128 bits wide native
- // type available in the viewer code, making it easy to manipulate. It also
- // allows to use HBXXH128 digests efficiently as keys for std, boost or
- // phmap containers, since I already provided a very efficient hash_value()
- // function override for LLUUID (a simple XOR of the two 64 bits words).
- const LLUUID& digest() const;
- // Here, we avoid an LLUUID copy whenever we already got one to store the
- // result *and* we did not yet call finalize().
- void digest(LLUUID& result) const;
- // Fast static methods. Use them when hashing just one contiguous block of
- // data.
- static LLUUID digest(const void* buffer, size_t len);
- static LLUUID digest(const char* str); // str must be NUL-terminated
- static LLUUID digest(const std::string& str);
- // Same as above, but saves you from an LLUUID copy when you already got
- // one for storage use.
- static void digest(LLUUID& result, const void* buffer, size_t len);
- static void digest(LLUUID& result, const char* str); // str NUL-terminated
- static void digest(LLUUID& result, const std::string& str);
- private:
- void init();
- private:
- // We use a void pointer to avoid including xxhash.h here for XXH3_state_t
- // (which cannot either be trivially forward-declared, due to complex API
- // related pre-processor macros in xxhash.h).
- void* mState;
- LLUUID mDigest;
- };
- LL_INLINE bool operator==(const HBXXH128& a, const HBXXH128& b)
- {
- return a.digest() == b.digest();
- }
- LL_INLINE bool operator!=(const HBXXH128& a, const HBXXH128& b)
- {
- return a.digest() != b.digest();
- }
- // Utility function to reduce the size of a 64 bits digest to 32 bits while
- // preserving as much entropy as possible. HB
- LL_INLINE U32 digest64to32(U64 digest64)
- {
- return U32(digest64) ^ U32(digest64 >> 32);
- }
- #endif // LL_HBXXH_H
|