123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384 |
- /* boost random/detail/polynomial.hpp header file
- *
- * Copyright Steven Watanabe 2014
- * Distributed under the Boost Software License, Version 1.0. (See
- * accompanying file LICENSE_1_0.txt or copy at
- * http://www.boost.org/LICENSE_1_0.txt)
- *
- * See http://www.boost.org for most recent version including documentation.
- *
- * $Id$
- */
- #ifndef BOOST_RANDOM_DETAIL_POLYNOMIAL_HPP
- #define BOOST_RANDOM_DETAIL_POLYNOMIAL_HPP
- #include <cstddef>
- #include <limits>
- #include <vector>
- #include <algorithm>
- #include <boost/assert.hpp>
- #include <boost/cstdint.hpp>
- namespace boost {
- namespace random {
- namespace detail {
- class polynomial_ops {
- public:
- typedef unsigned long digit_t;
- static void add(std::size_t size, const digit_t * lhs,
- const digit_t * rhs, digit_t * output)
- {
- for(std::size_t i = 0; i < size; ++i) {
- output[i] = lhs[i] ^ rhs[i];
- }
- }
- static void add_shifted_inplace(std::size_t size, const digit_t * lhs,
- digit_t * output, std::size_t shift)
- {
- if(shift == 0) {
- add(size, lhs, output, output);
- return;
- }
- std::size_t bits = std::numeric_limits<digit_t>::digits;
- digit_t prev = 0;
- for(std::size_t i = 0; i < size; ++i) {
- digit_t tmp = lhs[i];
- output[i] ^= (tmp << shift) | (prev >> (bits-shift));
- prev = tmp;
- }
- output[size] ^= (prev >> (bits-shift));
- }
- static void multiply_simple(std::size_t size, const digit_t * lhs,
- const digit_t * rhs, digit_t * output)
- {
- std::size_t bits = std::numeric_limits<digit_t>::digits;
- for(std::size_t i = 0; i < 2*size; ++i) {
- output[i] = 0;
- }
- for(std::size_t i = 0; i < size; ++i) {
- for(std::size_t j = 0; j < bits; ++j) {
- if((lhs[i] & (digit_t(1) << j)) != 0) {
- add_shifted_inplace(size, rhs, output + i, j);
- }
- }
- }
- }
- // memory requirements: (size - cutoff) * 4 + next_smaller
- static void multiply_karatsuba(std::size_t size,
- const digit_t * lhs, const digit_t * rhs,
- digit_t * output)
- {
- if(size < 64) {
- multiply_simple(size, lhs, rhs, output);
- return;
- }
- // split in half
- std::size_t cutoff = size/2;
- multiply_karatsuba(cutoff, lhs, rhs, output);
- multiply_karatsuba(size - cutoff, lhs + cutoff, rhs + cutoff,
- output + cutoff*2);
- std::vector<digit_t> local1(size - cutoff);
- std::vector<digit_t> local2(size - cutoff);
- // combine the digits for the inner multiply
- add(cutoff, lhs, lhs + cutoff, &local1[0]);
- if(size & 1) local1[cutoff] = lhs[size - 1];
- add(cutoff, rhs + cutoff, rhs, &local2[0]);
- if(size & 1) local2[cutoff] = rhs[size - 1];
- std::vector<digit_t> local3((size - cutoff) * 2);
- multiply_karatsuba(size - cutoff, &local1[0], &local2[0], &local3[0]);
- add(cutoff * 2, output, &local3[0], &local3[0]);
- add((size - cutoff) * 2, output + cutoff*2, &local3[0], &local3[0]);
- // Finally, add the inner result
- add((size - cutoff) * 2, output + cutoff, &local3[0], output + cutoff);
- }
-
- static void multiply_add_karatsuba(std::size_t size,
- const digit_t * lhs, const digit_t * rhs,
- digit_t * output)
- {
- std::vector<digit_t> buf(size * 2);
- multiply_karatsuba(size, lhs, rhs, &buf[0]);
- add(size * 2, &buf[0], output, output);
- }
- static void multiply(const digit_t * lhs, std::size_t lhs_size,
- const digit_t * rhs, std::size_t rhs_size,
- digit_t * output)
- {
- std::fill_n(output, lhs_size + rhs_size, digit_t(0));
- multiply_add(lhs, lhs_size, rhs, rhs_size, output);
- }
- static void multiply_add(const digit_t * lhs, std::size_t lhs_size,
- const digit_t * rhs, std::size_t rhs_size,
- digit_t * output)
- {
- // split into pieces that can be passed to
- // karatsuba multiply.
- while(lhs_size != 0) {
- if(lhs_size < rhs_size) {
- std::swap(lhs, rhs);
- std::swap(lhs_size, rhs_size);
- }
-
- multiply_add_karatsuba(rhs_size, lhs, rhs, output);
-
- lhs += rhs_size;
- lhs_size -= rhs_size;
- output += rhs_size;
- }
- }
- static void copy_bits(const digit_t * x, std::size_t low, std::size_t high,
- digit_t * out)
- {
- const std::size_t bits = std::numeric_limits<digit_t>::digits;
- std::size_t offset = low/bits;
- x += offset;
- low -= offset*bits;
- high -= offset*bits;
- std::size_t n = (high-low)/bits;
- if(low == 0) {
- for(std::size_t i = 0; i < n; ++i) {
- out[i] = x[i];
- }
- } else {
- for(std::size_t i = 0; i < n; ++i) {
- out[i] = (x[i] >> low) | (x[i+1] << (bits-low));
- }
- }
- if((high-low)%bits) {
- digit_t low_mask = (digit_t(1) << ((high-low)%bits)) - 1;
- digit_t result = (x[n] >> low);
- if(low != 0 && (n+1)*bits < high) {
- result |= (x[n+1] << (bits-low));
- }
- out[n] = (result & low_mask);
- }
- }
- static void shift_left(digit_t * val, std::size_t size, std::size_t shift)
- {
- const std::size_t bits = std::numeric_limits<digit_t>::digits;
- BOOST_ASSERT(shift > 0);
- BOOST_ASSERT(shift < bits);
- digit_t prev = 0;
- for(std::size_t i = 0; i < size; ++i) {
- digit_t tmp = val[i];
- val[i] = (prev >> (bits - shift)) | (val[i] << shift);
- prev = tmp;
- }
- }
- static digit_t sqr(digit_t val) {
- const std::size_t bits = std::numeric_limits<digit_t>::digits;
- digit_t mask = (digit_t(1) << bits/2) - 1;
- for(std::size_t i = bits; i > 1; i /= 2) {
- val = ((val & ~mask) << i/2) | (val & mask);
- mask = mask & (mask >> i/4);
- mask = mask | (mask << i/2);
- }
- return val;
- }
- static void sqr(digit_t * val, std::size_t size)
- {
- const std::size_t bits = std::numeric_limits<digit_t>::digits;
- digit_t mask = (digit_t(1) << bits/2) - 1;
- for(std::size_t i = 0; i < size; ++i) {
- digit_t x = val[size - i - 1];
- val[(size - i - 1) * 2] = sqr(x & mask);
- val[(size - i - 1) * 2 + 1] = sqr(x >> bits/2);
- }
- }
- // optimized for the case when the modulus has few bits set.
- struct sparse_mod {
- sparse_mod(const digit_t * divisor, std::size_t divisor_bits)
- {
- const std::size_t bits = std::numeric_limits<digit_t>::digits;
- _remainder_bits = divisor_bits - 1;
- for(std::size_t i = 0; i < divisor_bits; ++i) {
- if(divisor[i/bits] & (digit_t(1) << i%bits)) {
- _bit_indices.push_back(i);
- }
- }
- BOOST_ASSERT(_bit_indices.back() == divisor_bits - 1);
- _bit_indices.pop_back();
- if(_bit_indices.empty()) {
- _block_bits = divisor_bits;
- _lower_bits = 0;
- } else {
- _block_bits = divisor_bits - _bit_indices.back() - 1;
- _lower_bits = _bit_indices.back() + 1;
- }
-
- _partial_quotient.resize((_block_bits + bits - 1)/bits);
- }
- void operator()(digit_t * dividend, std::size_t dividend_bits)
- {
- const std::size_t bits = std::numeric_limits<digit_t>::digits;
- while(dividend_bits > _remainder_bits) {
- std::size_t block_start = (std::max)(dividend_bits - _block_bits, _remainder_bits);
- std::size_t block_size = (dividend_bits - block_start + bits - 1) / bits;
- copy_bits(dividend, block_start, dividend_bits, &_partial_quotient[0]);
- for(std::size_t i = 0; i < _bit_indices.size(); ++i) {
- std::size_t pos = _bit_indices[i] + block_start - _remainder_bits;
- add_shifted_inplace(block_size, &_partial_quotient[0], dividend + pos/bits, pos%bits);
- }
- add_shifted_inplace(block_size, &_partial_quotient[0], dividend + block_start/bits, block_start%bits);
- dividend_bits = block_start;
- }
- }
- std::vector<digit_t> _partial_quotient;
- std::size_t _remainder_bits;
- std::size_t _block_bits;
- std::size_t _lower_bits;
- std::vector<std::size_t> _bit_indices;
- };
- // base should have the same number of bits as mod
- // base, and mod should both be able to hold a power
- // of 2 >= mod_bits. out needs to be twice as large.
- static void mod_pow_x(boost::uintmax_t exponent, const digit_t * mod, std::size_t mod_bits, digit_t * out)
- {
- const std::size_t bits = std::numeric_limits<digit_t>::digits;
- const std::size_t n = (mod_bits + bits - 1) / bits;
- const std::size_t highbit = mod_bits - 1;
- if(exponent == 0) {
- out[0] = 1;
- std::fill_n(out + 1, n - 1, digit_t(0));
- return;
- }
- boost::uintmax_t i = std::numeric_limits<boost::uintmax_t>::digits - 1;
- while(((boost::uintmax_t(1) << i) & exponent) == 0) {
- --i;
- }
- out[0] = 2;
- std::fill_n(out + 1, n - 1, digit_t(0));
- sparse_mod m(mod, mod_bits);
- while(i--) {
- sqr(out, n);
- m(out, 2 * mod_bits - 1);
- if((boost::uintmax_t(1) << i) & exponent) {
- shift_left(out, n, 1);
- if(out[highbit / bits] & (digit_t(1) << highbit%bits))
- add(n, out, mod, out);
- }
- }
- }
- };
- class polynomial
- {
- typedef polynomial_ops::digit_t digit_t;
- public:
- polynomial() : _size(0) {}
- class reference {
- public:
- reference(digit_t &value, int idx)
- : _value(value), _idx(idx) {}
- operator bool() const { return (_value & (digit_t(1) << _idx)) != 0; }
- reference& operator=(bool b)
- {
- if(b) {
- _value |= (digit_t(1) << _idx);
- } else {
- _value &= ~(digit_t(1) << _idx);
- }
- return *this;
- }
- reference &operator^=(bool b)
- {
- _value ^= (digit_t(b) << _idx);
- return *this;
- }
- reference &operator=(const reference &other)
- {
- return *this = static_cast<bool>(other);
- }
- private:
- digit_t &_value;
- int _idx;
- };
- reference operator[](std::size_t i)
- {
- static const std::size_t bits = std::numeric_limits<digit_t>::digits;
- ensure_bit(i);
- return reference(_storage[i/bits], i%bits);
- }
- bool operator[](std::size_t i) const
- {
- static const std::size_t bits = std::numeric_limits<digit_t>::digits;
- if(i < size())
- return (_storage[i/bits] & (digit_t(1) << (i%bits))) != 0;
- else
- return false;
- }
- std::size_t size() const
- {
- return _size;
- }
- void resize(std::size_t n)
- {
- static const std::size_t bits = std::numeric_limits<digit_t>::digits;
- _storage.resize((n + bits - 1)/bits);
- // clear the high order bits in case we're shrinking.
- if(n%bits) {
- _storage.back() &= ((digit_t(1) << (n%bits)) - 1);
- }
- _size = n;
- }
- friend polynomial operator*(const polynomial &lhs, const polynomial &rhs);
- friend polynomial mod_pow_x(boost::uintmax_t exponent, polynomial mod);
- private:
- std::vector<polynomial_ops::digit_t> _storage;
- std::size_t _size;
- void ensure_bit(std::size_t i)
- {
- if(i >= size()) {
- resize(i + 1);
- }
- }
- void normalize()
- {
- while(size() && (*this)[size() - 1] == 0)
- resize(size() - 1);
- }
- };
- inline polynomial operator*(const polynomial &lhs, const polynomial &rhs)
- {
- polynomial result;
- result._storage.resize(lhs._storage.size() + rhs._storage.size());
- polynomial_ops::multiply(&lhs._storage[0], lhs._storage.size(),
- &rhs._storage[0], rhs._storage.size(),
- &result._storage[0]);
- result._size = lhs._size + rhs._size;
- return result;
- }
- inline polynomial mod_pow_x(boost::uintmax_t exponent, polynomial mod)
- {
- polynomial result;
- mod.normalize();
- std::size_t mod_size = mod.size();
- result._storage.resize(mod._storage.size() * 2);
- result._size = mod.size() * 2;
- polynomial_ops::mod_pow_x(exponent, &mod._storage[0], mod_size, &result._storage[0]);
- result.resize(mod.size() - 1);
- return result;
- }
- }
- }
- }
- #endif // BOOST_RANDOM_DETAIL_POLYNOMIAL_HPP
|