digit_comparison.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442
  1. // Copyright 2020-2023 Daniel Lemire
  2. // Copyright 2023 Matt Borland
  3. // Distributed under the Boost Software License, Version 1.0.
  4. // https://www.boost.org/LICENSE_1_0.txt
  5. //
  6. // Derivative of: https://github.com/fastfloat/fast_float
  7. #ifndef BOOST_CHARCONV_DETAIL_FASTFLOAT_DIGIT_COMPARISON_HPP
  8. #define BOOST_CHARCONV_DETAIL_FASTFLOAT_DIGIT_COMPARISON_HPP
  9. #include <boost/charconv/detail/fast_float/float_common.hpp>
  10. #include <boost/charconv/detail/fast_float/bigint.hpp>
  11. #include <boost/charconv/detail/fast_float/ascii_number.hpp>
  12. #include <algorithm>
  13. #include <cstdint>
  14. #include <cstring>
  15. #include <iterator>
  16. namespace boost { namespace charconv { namespace detail { namespace fast_float {
  17. // 1e0 to 1e19
  18. constexpr static uint64_t powers_of_ten_uint64[] = {
  19. 1UL, 10UL, 100UL, 1000UL, 10000UL, 100000UL, 1000000UL, 10000000UL, 100000000UL,
  20. 1000000000UL, 10000000000UL, 100000000000UL, 1000000000000UL, 10000000000000UL,
  21. 100000000000000UL, 1000000000000000UL, 10000000000000000UL, 100000000000000000UL,
  22. 1000000000000000000UL, 10000000000000000000UL};
  23. // calculate the exponent, in scientific notation, of the number.
  24. // this algorithm is not even close to optimized, but it has no practical
  25. // effect on performance: in order to have a faster algorithm, we'd need
  26. // to slow down performance for faster algorithms, and this is still fast.
  27. template <typename UC>
  28. BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
  29. int32_t scientific_exponent(parsed_number_string_t<UC> & num) noexcept {
  30. uint64_t mantissa = num.mantissa;
  31. int32_t exponent = int32_t(num.exponent);
  32. while (mantissa >= 10000) {
  33. mantissa /= 10000;
  34. exponent += 4;
  35. }
  36. while (mantissa >= 100) {
  37. mantissa /= 100;
  38. exponent += 2;
  39. }
  40. while (mantissa >= 10) {
  41. mantissa /= 10;
  42. exponent += 1;
  43. }
  44. return exponent;
  45. }
  46. // this converts a native floating-point number to an extended-precision float.
  47. template <typename T>
  48. BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
  49. adjusted_mantissa to_extended(T value) noexcept {
  50. using equiv_uint = typename binary_format<T>::equiv_uint;
  51. constexpr equiv_uint exponent_mask = binary_format<T>::exponent_mask();
  52. constexpr equiv_uint mantissa_mask = binary_format<T>::mantissa_mask();
  53. constexpr equiv_uint hidden_bit_mask = binary_format<T>::hidden_bit_mask();
  54. adjusted_mantissa am;
  55. int32_t bias = binary_format<T>::mantissa_explicit_bits() - binary_format<T>::minimum_exponent();
  56. equiv_uint bits;
  57. #if BOOST_CHARCONV_FASTFLOAT_HAS_BIT_CAST
  58. bits = std::bit_cast<equiv_uint>(value);
  59. #else
  60. ::memcpy(&bits, &value, sizeof(T));
  61. #endif
  62. if ((bits & exponent_mask) == 0) {
  63. // denormal
  64. am.power2 = 1 - bias;
  65. am.mantissa = bits & mantissa_mask;
  66. } else {
  67. // normal
  68. am.power2 = int32_t((bits & exponent_mask) >> binary_format<T>::mantissa_explicit_bits());
  69. am.power2 -= bias;
  70. am.mantissa = (bits & mantissa_mask) | hidden_bit_mask;
  71. }
  72. return am;
  73. }
  74. // get the extended precision value of the halfway point between b and b+u.
  75. // we are given a native float that represents b, so we need to adjust it
  76. // halfway between b and b+u.
  77. template <typename T>
  78. BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
  79. adjusted_mantissa to_extended_halfway(T value) noexcept {
  80. adjusted_mantissa am = to_extended(value);
  81. am.mantissa <<= 1;
  82. am.mantissa += 1;
  83. am.power2 -= 1;
  84. return am;
  85. }
  86. // round an extended-precision float to the nearest machine float.
  87. template <typename T, typename callback>
  88. BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
  89. void round(adjusted_mantissa& am, callback cb) noexcept {
  90. int32_t mantissa_shift = 64 - binary_format<T>::mantissa_explicit_bits() - 1;
  91. if (-am.power2 >= mantissa_shift) {
  92. // have a denormal float
  93. int32_t shift = -am.power2 + 1;
  94. cb(am, std::min<int32_t>(shift, 64));
  95. // check for round-up: if rounding-nearest carried us to the hidden bit.
  96. am.power2 = (am.mantissa < (uint64_t(1) << binary_format<T>::mantissa_explicit_bits())) ? 0 : 1;
  97. return;
  98. }
  99. // have a normal float, use the default shift.
  100. cb(am, mantissa_shift);
  101. // check for carry
  102. if (am.mantissa >= (uint64_t(2) << binary_format<T>::mantissa_explicit_bits())) {
  103. am.mantissa = (uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
  104. am.power2++;
  105. }
  106. // check for infinite: we could have carried to an infinite power
  107. am.mantissa &= ~(uint64_t(1) << binary_format<T>::mantissa_explicit_bits());
  108. if (am.power2 >= binary_format<T>::infinite_power()) {
  109. am.power2 = binary_format<T>::infinite_power();
  110. am.mantissa = 0;
  111. }
  112. }
  113. template <typename callback>
  114. BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
  115. void round_nearest_tie_even(adjusted_mantissa& am, int32_t shift, callback cb) noexcept {
  116. const uint64_t mask
  117. = (shift == 64)
  118. ? UINT64_MAX
  119. : (uint64_t(1) << shift) - 1;
  120. const uint64_t halfway
  121. = (shift == 0)
  122. ? 0
  123. : uint64_t(1) << (shift - 1);
  124. uint64_t truncated_bits = am.mantissa & mask;
  125. bool is_above = truncated_bits > halfway;
  126. bool is_halfway = truncated_bits == halfway;
  127. // shift digits into position
  128. if (shift == 64) {
  129. am.mantissa = 0;
  130. } else {
  131. am.mantissa >>= shift;
  132. }
  133. am.power2 += shift;
  134. bool is_odd = (am.mantissa & 1) == 1;
  135. am.mantissa += uint64_t(cb(is_odd, is_halfway, is_above));
  136. }
  137. BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
  138. void round_down(adjusted_mantissa& am, int32_t shift) noexcept {
  139. if (shift == 64) {
  140. am.mantissa = 0;
  141. } else {
  142. am.mantissa >>= shift;
  143. }
  144. am.power2 += shift;
  145. }
  146. template <typename UC>
  147. BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
  148. void skip_zeros(UC const * & first, UC const * last) noexcept {
  149. uint64_t val;
  150. while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len<UC>()) {
  151. ::memcpy(&val, first, sizeof(uint64_t));
  152. if (val != int_cmp_zeros<UC>()) {
  153. break;
  154. }
  155. first += int_cmp_len<UC>();
  156. }
  157. while (first != last) {
  158. if (*first != UC('0')) {
  159. break;
  160. }
  161. first++;
  162. }
  163. }
  164. // determine if any non-zero digits were truncated.
  165. // all characters must be valid digits.
  166. template <typename UC>
  167. BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
  168. bool is_truncated(UC const * first, UC const * last) noexcept {
  169. // do 8-bit optimizations, can just compare to 8 literal 0s.
  170. uint64_t val;
  171. while (!cpp20_and_in_constexpr() && std::distance(first, last) >= int_cmp_len<UC>()) {
  172. ::memcpy(&val, first, sizeof(uint64_t));
  173. if (val != int_cmp_zeros<UC>()) {
  174. return true;
  175. }
  176. first += int_cmp_len<UC>();
  177. }
  178. while (first != last) {
  179. if (*first != UC('0')) {
  180. return true;
  181. }
  182. ++first;
  183. }
  184. return false;
  185. }
  186. template <typename UC>
  187. BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
  188. bool is_truncated(span<const UC> s) noexcept {
  189. return is_truncated(s.ptr, s.ptr + s.len());
  190. }
  191. BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
  192. void parse_eight_digits(const char16_t*& , limb& , size_t& , size_t& ) noexcept {
  193. // currently unused
  194. }
  195. BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
  196. void parse_eight_digits(const char32_t*& , limb& , size_t& , size_t& ) noexcept {
  197. // currently unused
  198. }
  199. BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
  200. void parse_eight_digits(const char*& p, limb& value, size_t& counter, size_t& count) noexcept {
  201. value = value * 100000000 + parse_eight_digits_unrolled(p);
  202. p += 8;
  203. counter += 8;
  204. count += 8;
  205. }
  206. template <typename UC>
  207. BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR14
  208. void parse_one_digit(UC const *& p, limb& value, size_t& counter, size_t& count) noexcept {
  209. value = value * 10 + limb(*p - UC('0'));
  210. p++;
  211. counter++;
  212. count++;
  213. }
  214. BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
  215. void add_native(bigint& big, limb power, limb value) noexcept {
  216. big.mul(power);
  217. big.add(value);
  218. }
  219. BOOST_FORCEINLINE BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
  220. void round_up_bigint(bigint& big, size_t& count) noexcept {
  221. // need to round-up the digits, but need to avoid rounding
  222. // ....9999 to ...10000, which could cause a false halfway point.
  223. add_native(big, 10, 1);
  224. count++;
  225. }
  226. // parse the significant digits into a big integer
  227. template <typename UC>
  228. inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
  229. void parse_mantissa(bigint& result, parsed_number_string_t<UC>& num, size_t max_digits, size_t& digits) noexcept {
  230. // try to minimize the number of big integer and scalar multiplication.
  231. // therefore, try to parse 8 digits at a time, and multiply by the largest
  232. // scalar value (9 or 19 digits) for each step.
  233. size_t counter = 0;
  234. digits = 0;
  235. limb value = 0;
  236. #ifdef BOOST_CHARCONV_FASTFLOAT_64BIT_LIMB
  237. constexpr size_t step = 19;
  238. #else
  239. constexpr size_t step = 9;
  240. #endif
  241. // process all integer digits.
  242. UC const * p = num.integer.ptr;
  243. UC const * pend = p + num.integer.len();
  244. skip_zeros(p, pend);
  245. // process all digits, in increments of step per loop
  246. while (p != pend) {
  247. if (std::is_same<UC,char>::value) {
  248. while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
  249. parse_eight_digits(p, value, counter, digits);
  250. }
  251. }
  252. while (counter < step && p != pend && digits < max_digits) {
  253. parse_one_digit(p, value, counter, digits);
  254. }
  255. if (digits == max_digits) {
  256. // add the temporary value, then check if we've truncated any digits
  257. add_native(result, limb(powers_of_ten_uint64[counter]), value);
  258. bool truncated = is_truncated(p, pend);
  259. if (num.fraction.ptr != nullptr) {
  260. truncated |= is_truncated(num.fraction);
  261. }
  262. if (truncated) {
  263. round_up_bigint(result, digits);
  264. }
  265. return;
  266. } else {
  267. add_native(result, limb(powers_of_ten_uint64[counter]), value);
  268. counter = 0;
  269. value = 0;
  270. }
  271. }
  272. // add our fraction digits, if they're available.
  273. if (num.fraction.ptr != nullptr) {
  274. p = num.fraction.ptr;
  275. pend = p + num.fraction.len();
  276. if (digits == 0) {
  277. skip_zeros(p, pend);
  278. }
  279. // process all digits, in increments of step per loop
  280. while (p != pend) {
  281. if (std::is_same<UC,char>::value) {
  282. while ((std::distance(p, pend) >= 8) && (step - counter >= 8) && (max_digits - digits >= 8)) {
  283. parse_eight_digits(p, value, counter, digits);
  284. }
  285. }
  286. while (counter < step && p != pend && digits < max_digits) {
  287. parse_one_digit(p, value, counter, digits);
  288. }
  289. if (digits == max_digits) {
  290. // add the temporary value, then check if we've truncated any digits
  291. add_native(result, limb(powers_of_ten_uint64[counter]), value);
  292. bool truncated = is_truncated(p, pend);
  293. if (truncated) {
  294. round_up_bigint(result, digits);
  295. }
  296. return;
  297. } else {
  298. add_native(result, limb(powers_of_ten_uint64[counter]), value);
  299. counter = 0;
  300. value = 0;
  301. }
  302. }
  303. }
  304. if (counter != 0) {
  305. add_native(result, limb(powers_of_ten_uint64[counter]), value);
  306. }
  307. }
  308. template <typename T>
  309. inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
  310. adjusted_mantissa positive_digit_comp(bigint& bigmant, int32_t exponent) noexcept {
  311. BOOST_CHARCONV_FASTFLOAT_ASSERT(bigmant.pow10(uint32_t(exponent)));
  312. adjusted_mantissa answer;
  313. bool truncated;
  314. answer.mantissa = bigmant.hi64(truncated);
  315. int bias = binary_format<T>::mantissa_explicit_bits() - binary_format<T>::minimum_exponent();
  316. answer.power2 = bigmant.bit_length() - 64 + bias;
  317. round<T>(answer, [truncated](adjusted_mantissa& a, int32_t shift) {
  318. round_nearest_tie_even(a, shift, [truncated](bool is_odd, bool is_halfway, bool is_above) -> bool {
  319. return is_above || (is_halfway && truncated) || (is_odd && is_halfway);
  320. });
  321. });
  322. return answer;
  323. }
  324. // the scaling here is quite simple: we have, for the real digits `m * 10^e`,
  325. // and for the theoretical digits `n * 2^f`. Since `e` is always negative,
  326. // to scale them identically, we do `n * 2^f * 5^-f`, so we now have `m * 2^e`.
  327. // we then need to scale by `2^(f- e)`, and then the two significant digits
  328. // are of the same magnitude.
  329. template <typename T>
  330. inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
  331. adjusted_mantissa negative_digit_comp(bigint& bigmant, adjusted_mantissa am, int32_t exponent) noexcept {
  332. bigint& real_digits = bigmant;
  333. int32_t real_exp = exponent;
  334. // get the value of `b`, rounded down, and get a bigint representation of b+h
  335. adjusted_mantissa am_b = am;
  336. // gcc7 buf: use a lambda to remove the noexcept qualifier bug with -Wnoexcept-type.
  337. round<T>(am_b, [](adjusted_mantissa&a, int32_t shift) { round_down(a, shift); });
  338. T b;
  339. to_float(false, am_b, b);
  340. adjusted_mantissa theor = to_extended_halfway(b);
  341. bigint theor_digits(theor.mantissa);
  342. int32_t theor_exp = theor.power2;
  343. // scale real digits and theor digits to be same power.
  344. int32_t pow2_exp = theor_exp - real_exp;
  345. uint32_t pow5_exp = uint32_t(-real_exp);
  346. if (pow5_exp != 0) {
  347. BOOST_CHARCONV_FASTFLOAT_ASSERT(theor_digits.pow5(pow5_exp));
  348. }
  349. if (pow2_exp > 0) {
  350. BOOST_CHARCONV_FASTFLOAT_ASSERT(theor_digits.pow2(uint32_t(pow2_exp)));
  351. } else if (pow2_exp < 0) {
  352. BOOST_CHARCONV_FASTFLOAT_ASSERT(real_digits.pow2(uint32_t(-pow2_exp)));
  353. }
  354. // compare digits, and use it to director rounding
  355. int ord = real_digits.compare(theor_digits);
  356. adjusted_mantissa answer = am;
  357. round<T>(answer, [ord](adjusted_mantissa& a, int32_t shift) {
  358. round_nearest_tie_even(a, shift, [ord](bool is_odd, bool, bool) -> bool {
  359. if (ord > 0) {
  360. return true;
  361. } else if (ord < 0) {
  362. return false;
  363. } else {
  364. return is_odd;
  365. }
  366. });
  367. });
  368. return answer;
  369. }
  370. // parse the significant digits as a big integer to unambiguously round
  371. // the significant digits. here, we are trying to determine how to round
  372. // an extended float representation close to `b+h`, halfway between `b`
  373. // (the float rounded-down) and `b+u`, the next positive float. this
  374. // algorithm is always correct, and uses one of two approaches. when
  375. // the exponent is positive relative to the significant digits (such as
  376. // 1234), we create a big-integer representation, get the high 64-bits,
  377. // determine if any lower bits are truncated, and use that to direct
  378. // rounding. in case of a negative exponent relative to the significant
  379. // digits (such as 1.2345), we create a theoretical representation of
  380. // `b` as a big-integer type, scaled to the same binary exponent as
  381. // the actual digits. we then compare the big integer representations
  382. // of both, and use that to direct rounding.
  383. template <typename T, typename UC>
  384. inline BOOST_CHARCONV_FASTFLOAT_CONSTEXPR20
  385. adjusted_mantissa digit_comp(parsed_number_string_t<UC>& num, adjusted_mantissa am) noexcept {
  386. // remove the invalid exponent bias
  387. am.power2 -= invalid_am_bias;
  388. int32_t sci_exp = scientific_exponent(num);
  389. size_t max_digits = binary_format<T>::max_digits();
  390. size_t digits = 0;
  391. bigint bigmant;
  392. parse_mantissa(bigmant, num, max_digits, digits);
  393. // can't underflow, since digits is at most max_digits.
  394. int32_t exponent = sci_exp + 1 - int32_t(digits);
  395. if (exponent >= 0) {
  396. return positive_digit_comp<T>(bigmant, exponent);
  397. } else {
  398. return negative_digit_comp<T>(bigmant, am, exponent);
  399. }
  400. }
  401. }}}} // namespace fast_float
  402. #endif