accumulate.hpp 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. //---------------------------------------------------------------------------//
  2. // Copyright (c) 2013 Kyle Lutz <[email protected]>
  3. //
  4. // Distributed under the Boost Software License, Version 1.0
  5. // See accompanying file LICENSE_1_0.txt or copy at
  6. // http://www.boost.org/LICENSE_1_0.txt
  7. //
  8. // See http://boostorg.github.com/compute for more information.
  9. //---------------------------------------------------------------------------//
  10. #ifndef BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP
  11. #define BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP
  12. #include <boost/static_assert.hpp>
  13. #include <boost/preprocessor/seq/for_each.hpp>
  14. #include <boost/compute/system.hpp>
  15. #include <boost/compute/functional.hpp>
  16. #include <boost/compute/command_queue.hpp>
  17. #include <boost/compute/algorithm/reduce.hpp>
  18. #include <boost/compute/algorithm/detail/serial_accumulate.hpp>
  19. #include <boost/compute/container/array.hpp>
  20. #include <boost/compute/container/vector.hpp>
  21. #include <boost/compute/type_traits/is_device_iterator.hpp>
  22. #include <boost/compute/detail/iterator_range_size.hpp>
  23. namespace boost {
  24. namespace compute {
  25. namespace detail {
  26. // Space complexity O(1)
  27. template<class InputIterator, class T, class BinaryFunction>
  28. inline T generic_accumulate(InputIterator first,
  29. InputIterator last,
  30. T init,
  31. BinaryFunction function,
  32. command_queue &queue)
  33. {
  34. const context &context = queue.get_context();
  35. size_t size = iterator_range_size(first, last);
  36. if(size == 0){
  37. return init;
  38. }
  39. // accumulate on device
  40. array<T, 1> device_result(context);
  41. detail::serial_accumulate(
  42. first, last, device_result.begin(), init, function, queue
  43. );
  44. // copy result to host
  45. T result;
  46. ::boost::compute::copy_n(device_result.begin(), 1, &result, queue);
  47. return result;
  48. }
  49. // returns true if we can use reduce() instead of accumulate() when
  50. // accumulate() this is true when the function is commutative (such as
  51. // addition of integers) and the initial value is the identity value
  52. // for the operation (zero for addition, one for multiplication).
  53. template<class T, class F>
  54. inline bool can_accumulate_with_reduce(T init, F function)
  55. {
  56. (void) init;
  57. (void) function;
  58. return false;
  59. }
  60. /// \internal_
  61. #define BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE(r, data, type) \
  62. inline bool can_accumulate_with_reduce(type init, plus<type>) \
  63. { \
  64. return init == type(0); \
  65. } \
  66. inline bool can_accumulate_with_reduce(type init, multiplies<type>) \
  67. { \
  68. return init == type(1); \
  69. }
  70. BOOST_PP_SEQ_FOR_EACH(
  71. BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE,
  72. _,
  73. (char_)(uchar_)(short_)(ushort_)(int_)(uint_)(long_)(ulong_)
  74. )
  75. template<class T>
  76. inline bool can_accumulate_with_reduce(T init, min<T>)
  77. {
  78. return init == (std::numeric_limits<T>::max)();
  79. }
  80. template<class T>
  81. inline bool can_accumulate_with_reduce(T init, max<T>)
  82. {
  83. return init == (std::numeric_limits<T>::min)();
  84. }
  85. #undef BOOST_COMPUTE_DETAIL_DECLARE_CAN_ACCUMULATE_WITH_REDUCE
  86. template<class InputIterator, class T, class BinaryFunction>
  87. inline T dispatch_accumulate(InputIterator first,
  88. InputIterator last,
  89. T init,
  90. BinaryFunction function,
  91. command_queue &queue)
  92. {
  93. size_t size = iterator_range_size(first, last);
  94. if(size == 0){
  95. return init;
  96. }
  97. if(can_accumulate_with_reduce(init, function)){
  98. T result;
  99. reduce(first, last, &result, function, queue);
  100. return result;
  101. }
  102. else {
  103. return generic_accumulate(first, last, init, function, queue);
  104. }
  105. }
  106. } // end detail namespace
  107. /// Returns the result of applying \p function to the elements in the
  108. /// range [\p first, \p last) and \p init.
  109. ///
  110. /// If no function is specified, \c plus will be used.
  111. ///
  112. /// \param first first element in the input range
  113. /// \param last last element in the input range
  114. /// \param init initial value
  115. /// \param function binary reduction function
  116. /// \param queue command queue to perform the operation
  117. ///
  118. /// \return the accumulated result value
  119. ///
  120. /// In specific situations the call to \c accumulate() can be automatically
  121. /// optimized to a call to the more efficient \c reduce() algorithm. This
  122. /// occurs when the binary reduction function is recognized as associative
  123. /// (such as the \c plus<int> function).
  124. ///
  125. /// Note that because floating-point addition is not associative, calling
  126. /// \c accumulate() with \c plus<float> results in a less efficient serial
  127. /// reduction algorithm being executed. If a slight loss in precision is
  128. /// acceptable, the more efficient parallel \c reduce() algorithm should be
  129. /// used instead.
  130. ///
  131. /// For example:
  132. /// \code
  133. /// // with vec = boost::compute::vector<int>
  134. /// accumulate(vec.begin(), vec.end(), 0, plus<int>()); // fast
  135. /// reduce(vec.begin(), vec.end(), &result, plus<int>()); // fast
  136. ///
  137. /// // with vec = boost::compute::vector<float>
  138. /// accumulate(vec.begin(), vec.end(), 0, plus<float>()); // slow
  139. /// reduce(vec.begin(), vec.end(), &result, plus<float>()); // fast
  140. /// \endcode
  141. ///
  142. /// Space complexity: \Omega(1)<br>
  143. /// Space complexity when optimized to \c reduce(): \Omega(n)
  144. ///
  145. /// \see reduce()
  146. template<class InputIterator, class T, class BinaryFunction>
  147. inline T accumulate(InputIterator first,
  148. InputIterator last,
  149. T init,
  150. BinaryFunction function,
  151. command_queue &queue = system::default_queue())
  152. {
  153. BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value);
  154. return detail::dispatch_accumulate(first, last, init, function, queue);
  155. }
  156. /// \overload
  157. template<class InputIterator, class T>
  158. inline T accumulate(InputIterator first,
  159. InputIterator last,
  160. T init,
  161. command_queue &queue = system::default_queue())
  162. {
  163. BOOST_STATIC_ASSERT(is_device_iterator<InputIterator>::value);
  164. typedef typename std::iterator_traits<InputIterator>::value_type IT;
  165. return detail::dispatch_accumulate(first, last, init, plus<IT>(), queue);
  166. }
  167. } // end compute namespace
  168. } // end boost namespace
  169. #endif // BOOST_COMPUTE_ALGORITHM_ACCUMULATE_HPP