mikk_util.hh 4.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. /* SPDX-FileCopyrightText: 2022-2023 Blender Authors
  2. *
  3. * SPDX-License-Identifier: Apache-2.0 */
  4. /** \file
  5. * \ingroup mikktspace
  6. */
  7. #pragma once
  8. #include <cassert>
  9. #include <cmath>
  10. #ifndef M_PI_F
  11. # define M_PI_F (3.1415926535897932f) /* pi */
  12. #endif
  13. namespace mikk {
  14. inline bool not_zero(const float fX)
  15. {
  16. return fabsf(fX) > FLT_MIN;
  17. }
  18. /* Helpers for (un)packing a 2-bit vertex index and a 30-bit face index to one integer. */
  19. static uint pack_index(const uint face, const uint vert)
  20. {
  21. assert((vert & 0x3) == vert);
  22. return (face << 2) | (vert & 0x3);
  23. }
  24. static void unpack_index(uint &face, uint &vert, const uint indexIn)
  25. {
  26. vert = indexIn & 0x3;
  27. face = indexIn >> 2;
  28. }
  29. /* From intern/cycles/util/math_fast.h */
  30. inline float fast_acosf(float x)
  31. {
  32. const float f = fabsf(x);
  33. /* clamp and crush denormals. */
  34. const float m = (f < 1.0f) ? 1.0f - (1.0f - f) : 1.0f;
  35. /* Based on http://www.pouet.net/topic.php?which=9132&page=2
  36. * 85% accurate (ULP 0)
  37. * Examined 2130706434 values of acos:
  38. * 15.2000597 avg ULP diff, 4492 max ULP, 4.51803e-05 max error // without "denormal crush"
  39. * Examined 2130706434 values of acos:
  40. * 15.2007108 avg ULP diff, 4492 max ULP, 4.51803e-05 max error // with "denormal crush"
  41. */
  42. const float a = sqrtf(1.0f - m) *
  43. (1.5707963267f + m * (-0.213300989f + m * (0.077980478f + m * -0.02164095f)));
  44. return x < 0 ? M_PI_F - a : a;
  45. }
  46. static uint rotl(uint x, uint k)
  47. {
  48. return (x << k) | (x >> (32 - k));
  49. }
  50. static uint hash_uint3(uint kx, uint ky, uint kz)
  51. {
  52. uint a, b, c;
  53. a = b = c = 0xdeadbeef + (2 << 2) + 13;
  54. c += kz;
  55. b += ky;
  56. a += kx;
  57. c = (c ^ b) - rotl(b, 14);
  58. a = (a ^ c) - rotl(c, 11);
  59. b = (b ^ a) - rotl(a, 25);
  60. c = (c ^ b) - rotl(b, 16);
  61. return c;
  62. }
  63. static uint hash_uint3_fast(const uint x, const uint y, const uint z)
  64. {
  65. return (x * 73856093) ^ (y * 19349663) ^ (z * 83492791);
  66. }
  67. static uint float_as_uint(const float v)
  68. {
  69. return *((uint *)(&v));
  70. }
  71. static float uint_as_float(const uint v)
  72. {
  73. return *((float *)(&v));
  74. }
  75. static uint hash_float3_fast(const float x, const float y, const float z)
  76. {
  77. return hash_uint3_fast(float_as_uint(x), float_as_uint(y), float_as_uint(z));
  78. }
  79. static uint hash_float3x3(const float3 &x, const float3 &y, const float3 &z)
  80. {
  81. return hash_uint3(hash_float3_fast(x.x, x.y, x.z),
  82. hash_float3_fast(y.x, y.y, y.z),
  83. hash_float3_fast(z.x, z.y, z.z));
  84. }
  85. template<typename T, typename KeyGetter>
  86. void radixsort(std::vector<T> &data, std::vector<T> &data2, KeyGetter getKey)
  87. {
  88. typedef decltype(getKey(data[0])) key_t;
  89. constexpr size_t datasize = sizeof(key_t);
  90. static_assert(datasize % 2 == 0);
  91. static_assert(std::is_integral<key_t>::value);
  92. uint bins[datasize][257] = {{0}};
  93. /* Count number of elements per bin. */
  94. for (const T &item : data) {
  95. key_t key = getKey(item);
  96. for (uint pass = 0; pass < datasize; pass++)
  97. bins[pass][((key >> (8 * pass)) & 0xff) + 1]++;
  98. }
  99. /* Compute prefix sum to find position of each bin in the sorted array. */
  100. for (uint pass = 0; pass < datasize; pass++) {
  101. for (uint i = 2; i < 256; i++) {
  102. bins[pass][i] += bins[pass][i - 1];
  103. }
  104. }
  105. int shift = 0;
  106. for (uint pass = 0; pass < datasize; pass++, shift += 8) {
  107. /* Insert the elements in their correct location based on their bin. */
  108. for (const T &item : data) {
  109. uint pos = bins[pass][(getKey(item) >> shift) & 0xff]++;
  110. data2[pos] = item;
  111. }
  112. /* Swap arrays. */
  113. std::swap(data, data2);
  114. }
  115. }
  116. static void float_add_atomic(float *val, float add)
  117. {
  118. /* Hacky, but atomic floats are only supported from C++20 onward.
  119. * This works in practice since `std::atomic<uint32_t>` is really just an `uint32_t` in memory,
  120. * so this cast lets us do a 32-bit CAS operation (which is used to build the atomic float
  121. * operation) without needing any external libraries or compiler-specific builtins. */
  122. std::atomic<uint32_t> *atomic_val = reinterpret_cast<std::atomic<uint32_t> *>(val);
  123. for (;;) {
  124. uint32_t old_v = atomic_val->load();
  125. uint32_t new_v = float_as_uint(uint_as_float(old_v) + add);
  126. if (atomic_val->compare_exchange_weak(old_v, new_v)) {
  127. return;
  128. }
  129. }
  130. }
  131. } // namespace mikk