add_unsigned.hpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387
  1. ///////////////////////////////////////////////////////////////
  2. // Copyright 2020 Madhur Chauhan.
  3. // Copyright 2020 John Maddock. Distributed under the Boost
  4. // Software License, Version 1.0. (See accompanying file
  5. // LICENSE_1_0.txt or copy at https://www.boost.org/LICENSE_1_0.txt
  6. #ifndef BOOST_MP_ADD_UNSIGNED_HPP
  7. #define BOOST_MP_ADD_UNSIGNED_HPP
  8. #include <boost/multiprecision/cpp_int/intel_intrinsics.hpp>
  9. #include <boost/multiprecision/detail/assert.hpp>
  10. namespace boost { namespace multiprecision { namespace backends {
  11. template <class CppInt1, class CppInt2, class CppInt3>
  12. inline BOOST_MP_CXX14_CONSTEXPR void add_unsigned_constexpr(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
  13. {
  14. using ::boost::multiprecision::std_constexpr::swap;
  15. //
  16. // This is the generic, C++ only version of addition.
  17. // It's also used for all constexpr branches, hence the name.
  18. // Nothing fancy, just let uintmax_t take the strain:
  19. //
  20. double_limb_type carry = 0;
  21. std::size_t m(0), x(0);
  22. std::size_t as = a.size();
  23. std::size_t bs = b.size();
  24. minmax(as, bs, m, x);
  25. if (x == 1)
  26. {
  27. bool s = a.sign();
  28. result = static_cast<double_limb_type>(*a.limbs()) + static_cast<double_limb_type>(*b.limbs());
  29. result.sign(s);
  30. return;
  31. }
  32. result.resize(x, x);
  33. typename CppInt2::const_limb_pointer pa = a.limbs();
  34. typename CppInt3::const_limb_pointer pb = b.limbs();
  35. typename CppInt1::limb_pointer pr = result.limbs();
  36. typename CppInt1::limb_pointer pr_end = pr + m;
  37. if (as < bs)
  38. swap(pa, pb);
  39. // First where a and b overlap:
  40. while (pr != pr_end)
  41. {
  42. carry += static_cast<double_limb_type>(*pa) + static_cast<double_limb_type>(*pb);
  43. #ifdef __MSVC_RUNTIME_CHECKS
  44. *pr = static_cast<limb_type>(carry & ~static_cast<limb_type>(0));
  45. #else
  46. *pr = static_cast<limb_type>(carry);
  47. #endif
  48. carry >>= CppInt1::limb_bits;
  49. ++pr, ++pa, ++pb;
  50. }
  51. pr_end += x - m;
  52. // Now where only a has digits:
  53. while (pr != pr_end)
  54. {
  55. if (!carry)
  56. {
  57. if (pa != pr)
  58. std_constexpr::copy(pa, pa + (pr_end - pr), pr);
  59. break;
  60. }
  61. carry += static_cast<double_limb_type>(*pa);
  62. #ifdef __MSVC_RUNTIME_CHECKS
  63. *pr = static_cast<limb_type>(carry & ~static_cast<limb_type>(0));
  64. #else
  65. *pr = static_cast<limb_type>(carry);
  66. #endif
  67. carry >>= CppInt1::limb_bits;
  68. ++pr, ++pa;
  69. }
  70. if (carry)
  71. {
  72. // We overflowed, need to add one more limb:
  73. result.resize(x + 1, x + 1);
  74. if (result.size() > x)
  75. result.limbs()[x] = static_cast<limb_type>(1u);
  76. }
  77. result.normalize();
  78. result.sign(a.sign());
  79. }
  80. //
  81. // Core subtraction routine for all non-trivial cpp_int's:
  82. //
  83. template <class CppInt1, class CppInt2, class CppInt3>
  84. inline BOOST_MP_CXX14_CONSTEXPR void subtract_unsigned_constexpr(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
  85. {
  86. using ::boost::multiprecision::std_constexpr::swap;
  87. //
  88. // This is the generic, C++ only version of subtraction.
  89. // It's also used for all constexpr branches, hence the name.
  90. // Nothing fancy, just let uintmax_t take the strain:
  91. //
  92. double_limb_type borrow = 0;
  93. std::size_t m(0), x(0);
  94. minmax(a.size(), b.size(), m, x);
  95. //
  96. // special cases for small limb counts:
  97. //
  98. if (x == 1)
  99. {
  100. bool s = a.sign();
  101. limb_type al = *a.limbs();
  102. limb_type bl = *b.limbs();
  103. if (bl > al)
  104. {
  105. ::boost::multiprecision::std_constexpr::swap(al, bl);
  106. s = !s;
  107. }
  108. result = al - bl;
  109. result.sign(s);
  110. return;
  111. }
  112. // This isn't used till later, but comparison has to occur before we resize the result,
  113. // as that may also resize a or b if this is an inplace operation:
  114. int c = a.compare_unsigned(b);
  115. // Set up the result vector:
  116. result.resize(x, x);
  117. // Now that a, b, and result are stable, get pointers to their limbs:
  118. typename CppInt2::const_limb_pointer pa = a.limbs();
  119. typename CppInt3::const_limb_pointer pb = b.limbs();
  120. typename CppInt1::limb_pointer pr = result.limbs();
  121. bool swapped = false;
  122. if (c < 0)
  123. {
  124. swap(pa, pb);
  125. swapped = true;
  126. }
  127. else if (c == 0)
  128. {
  129. result = static_cast<limb_type>(0);
  130. return;
  131. }
  132. std::size_t i = 0;
  133. // First where a and b overlap:
  134. while (i < m)
  135. {
  136. borrow = static_cast<double_limb_type>(pa[i]) - static_cast<double_limb_type>(pb[i]) - borrow;
  137. pr[i] = static_cast<limb_type>(borrow);
  138. borrow = (borrow >> CppInt1::limb_bits) & 1u;
  139. ++i;
  140. }
  141. // Now where only a has digits, only as long as we've borrowed:
  142. while (borrow && (i < x))
  143. {
  144. borrow = static_cast<double_limb_type>(pa[i]) - borrow;
  145. pr[i] = static_cast<limb_type>(borrow);
  146. borrow = (borrow >> CppInt1::limb_bits) & 1u;
  147. ++i;
  148. }
  149. // Any remaining digits are the same as those in pa:
  150. if ((x != i) && (pa != pr))
  151. std_constexpr::copy(pa + i, pa + x, pr + i);
  152. BOOST_MP_ASSERT(0 == borrow);
  153. //
  154. // We may have lost digits, if so update limb usage count:
  155. //
  156. result.normalize();
  157. result.sign(a.sign());
  158. if (swapped)
  159. result.negate();
  160. }
  161. #ifdef BOOST_MP_HAS_IMMINTRIN_H
  162. //
  163. // This is the key addition routine where all the argument types are non-trivial cpp_int's:
  164. //
  165. //
  166. // This optimization is limited to: GCC, LLVM, ICC (Intel), MSVC for x86_64 and i386.
  167. // If your architecture and compiler supports ADC intrinsic, please file a bug
  168. //
  169. // As of May, 2020 major compilers don't recognize carry chain though adc
  170. // intrinsics are used to hint compilers to use ADC and still compilers don't
  171. // unroll the loop efficiently (except LLVM) so manual unrolling is done.
  172. //
  173. // Also note that these intrinsics were only introduced by Intel as part of the
  174. // ADX processor extensions, even though the addc instruction has been available
  175. // for basically all x86 processors. That means gcc-9, clang-9, msvc-14.2 and up
  176. // are required to support these intrinsics.
  177. //
  178. template <class CppInt1, class CppInt2, class CppInt3>
  179. inline BOOST_MP_CXX14_CONSTEXPR void add_unsigned(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
  180. {
  181. #ifndef BOOST_MP_NO_CONSTEXPR_DETECTION
  182. if (BOOST_MP_IS_CONST_EVALUATED(a.size()))
  183. {
  184. add_unsigned_constexpr(result, a, b);
  185. }
  186. else
  187. #endif
  188. {
  189. using std::swap;
  190. // Nothing fancy, just let uintmax_t take the strain:
  191. std::size_t m(0), x(0);
  192. std::size_t as = a.size();
  193. std::size_t bs = b.size();
  194. minmax(as, bs, m, x);
  195. if (x == 1)
  196. {
  197. bool s = a.sign();
  198. result = static_cast<double_limb_type>(*a.limbs()) + static_cast<double_limb_type>(*b.limbs());
  199. result.sign(s);
  200. return;
  201. }
  202. result.resize(x, x);
  203. typename CppInt2::const_limb_pointer pa = a.limbs();
  204. typename CppInt3::const_limb_pointer pb = b.limbs();
  205. typename CppInt1::limb_pointer pr = result.limbs();
  206. if (as < bs)
  207. swap(pa, pb);
  208. // First where a and b overlap:
  209. std::size_t i = 0;
  210. unsigned char carry = 0;
  211. #if defined(BOOST_MSVC) && !defined(BOOST_HAS_INT128) && defined(_M_X64)
  212. //
  213. // Special case for 32-bit limbs on 64-bit architecture - we can process
  214. // 2 limbs with each instruction.
  215. //
  216. for (; i + 8 <= m; i += 8)
  217. {
  218. carry = _addcarry_u64(carry, *(unsigned long long*)(pa + i + 0), *(unsigned long long*)(pb + i + 0), (unsigned long long*)(pr + i));
  219. carry = _addcarry_u64(carry, *(unsigned long long*)(pa + i + 2), *(unsigned long long*)(pb + i + 2), (unsigned long long*)(pr + i + 2));
  220. carry = _addcarry_u64(carry, *(unsigned long long*)(pa + i + 4), *(unsigned long long*)(pb + i + 4), (unsigned long long*)(pr + i + 4));
  221. carry = _addcarry_u64(carry, *(unsigned long long*)(pa + i + 6), *(unsigned long long*)(pb + i + 6), (unsigned long long*)(pr + i + 6));
  222. }
  223. #else
  224. for (; i + 4 <= m; i += 4)
  225. {
  226. carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i + 0], pb[i + 0], pr + i);
  227. carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i + 1], pb[i + 1], pr + i + 1);
  228. carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i + 2], pb[i + 2], pr + i + 2);
  229. carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i + 3], pb[i + 3], pr + i + 3);
  230. }
  231. #endif
  232. for (; i < m; ++i)
  233. carry = ::boost::multiprecision::detail::addcarry_limb(carry, pa[i], pb[i], pr + i);
  234. for (; i < x && carry; ++i)
  235. // We know carry is 1, so we just need to increment pa[i] (ie add a literal 1) and capture the carry:
  236. carry = ::boost::multiprecision::detail::addcarry_limb(0, pa[i], 1, pr + i);
  237. if (i == x && carry)
  238. {
  239. // We overflowed, need to add one more limb:
  240. result.resize(x + 1, x + 1);
  241. if (result.size() > x)
  242. result.limbs()[x] = static_cast<limb_type>(1u);
  243. }
  244. else if ((x != i) && (pa != pr))
  245. // Copy remaining digits only if we need to:
  246. std_constexpr::copy(pa + i, pa + x, pr + i);
  247. result.normalize();
  248. result.sign(a.sign());
  249. }
  250. }
  251. template <class CppInt1, class CppInt2, class CppInt3>
  252. inline BOOST_MP_CXX14_CONSTEXPR void subtract_unsigned(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
  253. {
  254. #ifndef BOOST_MP_NO_CONSTEXPR_DETECTION
  255. if (BOOST_MP_IS_CONST_EVALUATED(a.size()))
  256. {
  257. subtract_unsigned_constexpr(result, a, b);
  258. }
  259. else
  260. #endif
  261. {
  262. using std::swap;
  263. // Nothing fancy, just let uintmax_t take the strain:
  264. std::size_t m(0), x(0);
  265. minmax(a.size(), b.size(), m, x);
  266. //
  267. // special cases for small limb counts:
  268. //
  269. if (x == 1)
  270. {
  271. bool s = a.sign();
  272. limb_type al = *a.limbs();
  273. limb_type bl = *b.limbs();
  274. if (bl > al)
  275. {
  276. ::boost::multiprecision::std_constexpr::swap(al, bl);
  277. s = !s;
  278. }
  279. result = al - bl;
  280. result.sign(s);
  281. return;
  282. }
  283. // This isn't used till later, but comparison has to occur before we resize the result,
  284. // as that may also resize a or b if this is an inplace operation:
  285. int c = a.compare_unsigned(b);
  286. // Set up the result vector:
  287. result.resize(x, x);
  288. // Now that a, b, and result are stable, get pointers to their limbs:
  289. typename CppInt2::const_limb_pointer pa = a.limbs();
  290. typename CppInt3::const_limb_pointer pb = b.limbs();
  291. typename CppInt1::limb_pointer pr = result.limbs();
  292. bool swapped = false;
  293. if (c < 0)
  294. {
  295. swap(pa, pb);
  296. swapped = true;
  297. }
  298. else if (c == 0)
  299. {
  300. result = static_cast<limb_type>(0);
  301. return;
  302. }
  303. std::size_t i = 0;
  304. unsigned char borrow = 0;
  305. // First where a and b overlap:
  306. #if defined(BOOST_MSVC) && !defined(BOOST_HAS_INT128) && defined(_M_X64)
  307. //
  308. // Special case for 32-bit limbs on 64-bit architecture - we can process
  309. // 2 limbs with each instruction.
  310. //
  311. for (; i + 8 <= m; i += 8)
  312. {
  313. borrow = _subborrow_u64(borrow, *reinterpret_cast<const unsigned long long*>(pa + i), *reinterpret_cast<const unsigned long long*>(pb + i), reinterpret_cast<unsigned long long*>(pr + i));
  314. borrow = _subborrow_u64(borrow, *reinterpret_cast<const unsigned long long*>(pa + i + 2), *reinterpret_cast<const unsigned long long*>(pb + i + 2), reinterpret_cast<unsigned long long*>(pr + i + 2));
  315. borrow = _subborrow_u64(borrow, *reinterpret_cast<const unsigned long long*>(pa + i + 4), *reinterpret_cast<const unsigned long long*>(pb + i + 4), reinterpret_cast<unsigned long long*>(pr + i + 4));
  316. borrow = _subborrow_u64(borrow, *reinterpret_cast<const unsigned long long*>(pa + i + 6), *reinterpret_cast<const unsigned long long*>(pb + i + 6), reinterpret_cast<unsigned long long*>(pr + i + 6));
  317. }
  318. #else
  319. for(; i + 4 <= m; i += 4)
  320. {
  321. borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i], pb[i], pr + i);
  322. borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i + 1], pb[i + 1], pr + i + 1);
  323. borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i + 2], pb[i + 2], pr + i + 2);
  324. borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i + 3], pb[i + 3], pr + i + 3);
  325. }
  326. #endif
  327. for (; i < m; ++i)
  328. borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i], pb[i], pr + i);
  329. // Now where only a has digits, only as long as we've borrowed:
  330. while (borrow && (i < x))
  331. {
  332. borrow = boost::multiprecision::detail::subborrow_limb(borrow, pa[i], 0, pr + i);
  333. ++i;
  334. }
  335. // Any remaining digits are the same as those in pa:
  336. if ((x != i) && (pa != pr))
  337. std_constexpr::copy(pa + i, pa + x, pr + i);
  338. BOOST_MP_ASSERT(0 == borrow);
  339. //
  340. // We may have lost digits, if so update limb usage count:
  341. //
  342. result.normalize();
  343. result.sign(a.sign());
  344. if (swapped)
  345. result.negate();
  346. } // constepxr.
  347. }
  348. #else
  349. template <class CppInt1, class CppInt2, class CppInt3>
  350. inline BOOST_MP_CXX14_CONSTEXPR void add_unsigned(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
  351. {
  352. add_unsigned_constexpr(result, a, b);
  353. }
  354. template <class CppInt1, class CppInt2, class CppInt3>
  355. inline BOOST_MP_CXX14_CONSTEXPR void subtract_unsigned(CppInt1& result, const CppInt2& a, const CppInt3& b) noexcept(is_non_throwing_cpp_int<CppInt1>::value)
  356. {
  357. subtract_unsigned_constexpr(result, a, b);
  358. }
  359. #endif
  360. } } } // namespaces
  361. #endif