collectives.hpp 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746
  1. // Copyright (C) 2005-2006 Douglas Gregor <doug.gregor -at- gmail.com>.
  2. // Use, modification and distribution is subject to the Boost Software
  3. // License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
  4. // http://www.boost.org/LICENSE_1_0.txt)
  5. // Message Passing Interface 1.1 -- Section 4. MPI Collectives
  6. /** @file collectives.hpp
  7. *
  8. * This header contains MPI collective operations, which implement
  9. * various parallel algorithms that require the coordination of all
  10. * processes within a communicator. The header @c collectives_fwd.hpp
  11. * provides forward declarations for each of these operations. To
  12. * include only specific collective algorithms, use the headers @c
  13. * boost/mpi/collectives/algorithm_name.hpp.
  14. */
  15. #ifndef BOOST_MPI_COLLECTIVES_HPP
  16. #define BOOST_MPI_COLLECTIVES_HPP
  17. #include <boost/mpi/communicator.hpp>
  18. #include <boost/mpi/inplace.hpp>
  19. #include <vector>
  20. namespace boost { namespace mpi {
  21. /**
  22. * @brief Gather the values stored at every process into vectors of
  23. * values from each process.
  24. *
  25. * @c all_gather is a collective algorithm that collects the values
  26. * stored at each process into a vector of values indexed by the
  27. * process number they came from. The type @c T of the values may be
  28. * any type that is serializable or has an associated MPI data type.
  29. *
  30. * When the type @c T has an associated MPI data type, this routine
  31. * invokes @c MPI_Allgather to gather the values.
  32. *
  33. * @param comm The communicator over which the all-gather will
  34. * occur.
  35. *
  36. * @param in_value The value to be transmitted by each process. To
  37. * gather an array of values, @c in_values points to the @c n local
  38. * values to be transmitted.
  39. *
  40. * @param out_values A vector or pointer to storage that will be
  41. * populated with the values from each process, indexed by the
  42. * process ID number. If it is a vector, the vector will be resized
  43. * accordingly.
  44. */
  45. template<typename T>
  46. void
  47. all_gather(const communicator& comm, const T& in_value,
  48. std::vector<T>& out_values);
  49. /**
  50. * \overload
  51. */
  52. template<typename T>
  53. void
  54. all_gather(const communicator& comm, const T& in_value, T* out_values);
  55. /**
  56. * \overload
  57. */
  58. template<typename T>
  59. void
  60. all_gather(const communicator& comm, const T* in_values, int n,
  61. std::vector<T>& out_values);
  62. /**
  63. * \overload
  64. */
  65. template<typename T>
  66. void
  67. all_gather(const communicator& comm, const T* in_values, int n, T* out_values);
  68. /**
  69. * \overload
  70. */
  71. template<typename T>
  72. void
  73. all_gatherv(const communicator& comm, const T& in_value, T* out_values,
  74. const std::vector<int>& sizes);
  75. /**
  76. * \overload
  77. */
  78. template<typename T>
  79. void
  80. all_gatherv(const communicator& comm, const T* in_values, T* out_values,
  81. const std::vector<int>& sizes);
  82. /**
  83. * \overload
  84. */
  85. template<typename T>
  86. void
  87. all_gatherv(const communicator& comm, std::vector<T> const& in_values, std::vector<T>& out_values,
  88. const std::vector<int>& sizes);
  89. /**
  90. * \overload
  91. */
  92. template<typename T>
  93. void
  94. all_gatherv(const communicator& comm, const T& in_value, T* out_values,
  95. const std::vector<int>& sizes, const std::vector<int>& displs);
  96. /**
  97. * \overload
  98. */
  99. template<typename T>
  100. void
  101. all_gatherv(const communicator& comm, const T* in_values, T* out_values,
  102. const std::vector<int>& sizes, const std::vector<int>& displs);
  103. /**
  104. * \overload
  105. */
  106. template<typename T>
  107. void
  108. all_gatherv(const communicator& comm, std::vector<T> const& in_values, std::vector<T>& out_values,
  109. const std::vector<int>& sizes, const std::vector<int>& displs);
  110. /**
  111. * @brief Combine the values stored by each process into a single
  112. * value available to all processes.
  113. *
  114. * @c all_reduce is a collective algorithm that combines the values
  115. * stored by each process into a single value available to all
  116. * processes. The values are combined in a user-defined way,
  117. * specified via a function object. The type @c T of the values may
  118. * be any type that is serializable or has an associated MPI data
  119. * type. One can think of this operation as a @c all_gather, followed
  120. * by an @c std::accumulate() over the gather values and using the
  121. * operation @c op.
  122. *
  123. * When the type @c T has an associated MPI data type, this routine
  124. * invokes @c MPI_Allreduce to perform the reduction. If possible,
  125. * built-in MPI operations will be used; otherwise, @c all_reduce()
  126. * will create a custom MPI_Op for the call to MPI_Allreduce.
  127. *
  128. * @param comm The communicator over which the reduction will
  129. * occur.
  130. * @param value The local value to be combined with the local
  131. * values of every other process. For reducing arrays, @c in_values
  132. * is a pointer to the local values to be reduced and @c n is the
  133. * number of values to reduce. See @c reduce for more information.
  134. *
  135. * If wrapped in a @c inplace_t object, combine the usage of both
  136. * input and $c out_value and the local value will be overwritten
  137. * (a convenience function @c inplace is provided for the wrapping).
  138. *
  139. * @param out_value Will receive the result of the reduction
  140. * operation. If this parameter is omitted, the outgoing value will
  141. * instead be returned.
  142. *
  143. * @param op The binary operation that combines two values of type
  144. * @c T and returns a third value of type @c T. For types @c T that has
  145. * ssociated MPI data types, @c op will either be translated into
  146. * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
  147. * directly to a built-in MPI operation. See @c is_mpi_op in the @c
  148. * operations.hpp header for more details on this mapping. For any
  149. * non-built-in operation, commutativity will be determined by the
  150. * @c is_commmutative trait (also in @c operations.hpp): users are
  151. * encouraged to mark commutative operations as such, because it
  152. * gives the implementation additional lattitude to optimize the
  153. * reduction operation.
  154. *
  155. * @param n Indicated the size of the buffers of array type.
  156. * @returns If no @p out_value parameter is supplied, returns the
  157. * result of the reduction operation.
  158. */
  159. template<typename T, typename Op>
  160. void
  161. all_reduce(const communicator& comm, const T* value, int n, T* out_value,
  162. Op op);
  163. /**
  164. * \overload
  165. */
  166. template<typename T, typename Op>
  167. void
  168. all_reduce(const communicator& comm, const T& value, T& out_value, Op op);
  169. /**
  170. * \overload
  171. */
  172. template<typename T, typename Op>
  173. T all_reduce(const communicator& comm, const T& value, Op op);
  174. /**
  175. * \overload
  176. */
  177. template<typename T, typename Op>
  178. void
  179. all_reduce(const communicator& comm, inplace_t<T*> value, int n,
  180. Op op);
  181. /**
  182. * \overload
  183. */
  184. template<typename T, typename Op>
  185. void
  186. all_reduce(const communicator& comm, inplace_t<T> value, Op op);
  187. /**
  188. * @brief Send data from every process to every other process.
  189. *
  190. * @c all_to_all is a collective algorithm that transmits @c p values
  191. * from every process to every other process. On process i, jth value
  192. * of the @p in_values vector is sent to process j and placed in the
  193. * ith position of the @p out_values vector in process @p j. The type
  194. * @c T of the values may be any type that is serializable or has an
  195. * associated MPI data type. If @c n is provided, then arrays of @p n
  196. * values will be transferred from one process to another.
  197. *
  198. * When the type @c T has an associated MPI data type, this routine
  199. * invokes @c MPI_Alltoall to scatter the values.
  200. *
  201. * @param comm The communicator over which the all-to-all
  202. * communication will occur.
  203. *
  204. * @param in_values A vector or pointer to storage that contains
  205. * the values to send to each process, indexed by the process ID
  206. * number.
  207. *
  208. * @param out_values A vector or pointer to storage that will be
  209. * updated to contain the values received from other processes. The
  210. * jth value in @p out_values will come from the procss with rank j.
  211. */
  212. template<typename T>
  213. void
  214. all_to_all(const communicator& comm, const std::vector<T>& in_values,
  215. std::vector<T>& out_values);
  216. /**
  217. * \overload
  218. */
  219. template<typename T>
  220. void all_to_all(const communicator& comm, const T* in_values, T* out_values);
  221. /**
  222. * \overload
  223. */
  224. template<typename T>
  225. void
  226. all_to_all(const communicator& comm, const std::vector<T>& in_values, int n,
  227. std::vector<T>& out_values);
  228. /**
  229. * \overload
  230. */
  231. template<typename T>
  232. void
  233. all_to_all(const communicator& comm, const T* in_values, int n, T* out_values);
  234. /**
  235. * @brief Broadcast a value from a root process to all other
  236. * processes.
  237. *
  238. * @c broadcast is a collective algorithm that transfers a value from
  239. * an arbitrary @p root process to every other process that is part of
  240. * the given communicator. The @c broadcast algorithm can transmit any
  241. * Serializable value, values that have associated MPI data types,
  242. * packed archives, skeletons, and the content of skeletons; see the
  243. * @c send primitive for communicators for a complete list. The type
  244. * @c T shall be the same for all processes that are a part of the
  245. * communicator @p comm, unless packed archives are being transferred:
  246. * with packed archives, the root sends a @c packed_oarchive or @c
  247. * packed_skeleton_oarchive whereas the other processes receive a
  248. * @c packed_iarchive or @c packed_skeleton_iarchve, respectively.
  249. *
  250. * When the type @c T has an associated MPI data type, this routine
  251. * invokes @c MPI_Bcast to perform the broadcast.
  252. *
  253. * @param comm The communicator over which the broadcast will
  254. * occur.
  255. *
  256. * @param value The value (or values, if @p n is provided) to be
  257. * transmitted (if the rank of @p comm is equal to @p root) or
  258. * received (if the rank of @p comm is not equal to @p root). When
  259. * the @p value is a @c skeleton_proxy, only the skeleton of the
  260. * object will be broadcast. In this case, the @p root will build a
  261. * skeleton from the object help in the proxy and all of the
  262. * non-roots will reshape the objects held in their proxies based on
  263. * the skeleton sent from the root.
  264. *
  265. * @param n When supplied, the number of values that the pointer @p
  266. * values points to, for broadcasting an array of values. The value
  267. * of @p n must be the same for all processes in @p comm.
  268. *
  269. * @param root The rank/process ID of the process that will be
  270. * transmitting the value.
  271. */
  272. template<typename T>
  273. void broadcast(const communicator& comm, T& value, int root);
  274. /**
  275. * \overload
  276. */
  277. template<typename T>
  278. void broadcast(const communicator& comm, T* values, int n, int root);
  279. /**
  280. * \overload
  281. */
  282. template<typename T>
  283. void broadcast(const communicator& comm, skeleton_proxy<T>& value, int root);
  284. /**
  285. * \overload
  286. */
  287. template<typename T>
  288. void
  289. broadcast(const communicator& comm, const skeleton_proxy<T>& value, int root);
  290. /**
  291. * @brief Gather the values stored at every process into a vector at
  292. * the root process.
  293. *
  294. * @c gather is a collective algorithm that collects the values
  295. * stored at each process into a vector of values at the @p root
  296. * process. This vector is indexed by the process number that the
  297. * value came from. The type @c T of the values may be any type that
  298. * is serializable or has an associated MPI data type.
  299. *
  300. * When the type @c T has an associated MPI data type, this routine
  301. * invokes @c MPI_Gather to gather the values.
  302. *
  303. * @param comm The communicator over which the gather will occur.
  304. *
  305. * @param in_value The value to be transmitted by each process. For
  306. * gathering arrays of values, @c in_values points to storage for
  307. * @c n*comm.size() values.
  308. *
  309. * @param out_values A vector or pointer to storage that will be
  310. * populated with the values from each process, indexed by the
  311. * process ID number. If it is a vector, it will be resized
  312. * accordingly. For non-root processes, this parameter may be
  313. * omitted. If it is still provided, however, it will be unchanged.
  314. *
  315. * @param root The process ID number that will collect the
  316. * values. This value must be the same on all processes.
  317. */
  318. template<typename T>
  319. void
  320. gather(const communicator& comm, const T& in_value, std::vector<T>& out_values,
  321. int root);
  322. /**
  323. * \overload
  324. */
  325. template<typename T>
  326. void
  327. gather(const communicator& comm, const T& in_value, T* out_values, int root);
  328. /**
  329. * \overload
  330. */
  331. template<typename T>
  332. void gather(const communicator& comm, const T& in_value, int root);
  333. /**
  334. * \overload
  335. */
  336. template<typename T>
  337. void
  338. gather(const communicator& comm, const T* in_values, int n,
  339. std::vector<T>& out_values, int root);
  340. /**
  341. * \overload
  342. */
  343. template<typename T>
  344. void
  345. gather(const communicator& comm, const T* in_values, int n, T* out_values,
  346. int root);
  347. /**
  348. * \overload
  349. */
  350. template<typename T>
  351. void gather(const communicator& comm, const T* in_values, int n, int root);
  352. /**
  353. * @brief Similar to boost::mpi::gather with the difference that the number
  354. * of values to be send by non-root processes can vary.
  355. *
  356. * @param comm The communicator over which the gather will occur.
  357. *
  358. * @param in_values The array of values to be transmitted by each process.
  359. *
  360. * @param in_size For each non-root process this specifies the size
  361. * of @p in_values.
  362. *
  363. * @param out_values A pointer to storage that will be populated with
  364. * the values from each process. For non-root processes, this parameter
  365. * may be omitted. If it is still provided, however, it will be unchanged.
  366. *
  367. * @param sizes A vector containing the number of elements each non-root
  368. * process will send.
  369. *
  370. * @param displs A vector such that the i-th entry specifies the
  371. * displacement (relative to @p out_values) from which to take the ingoing
  372. * data at the @p root process. Overloaded versions for which @p displs is
  373. * omitted assume that the data is to be placed contiguously at the root process.
  374. *
  375. * @param root The process ID number that will collect the
  376. * values. This value must be the same on all processes.
  377. */
  378. template<typename T>
  379. void
  380. gatherv(const communicator& comm, const std::vector<T>& in_values,
  381. T* out_values, const std::vector<int>& sizes, const std::vector<int>& displs,
  382. int root);
  383. /**
  384. * \overload
  385. */
  386. template<typename T>
  387. void
  388. gatherv(const communicator& comm, const T* in_values, int in_size,
  389. T* out_values, const std::vector<int>& sizes, const std::vector<int>& displs,
  390. int root);
  391. /**
  392. * \overload
  393. */
  394. template<typename T>
  395. void gatherv(const communicator& comm, const std::vector<T>& in_values, int root);
  396. /**
  397. * \overload
  398. */
  399. template<typename T>
  400. void gatherv(const communicator& comm, const T* in_values, int in_size, int root);
  401. /**
  402. * \overload
  403. */
  404. template<typename T>
  405. void
  406. gatherv(const communicator& comm, const T* in_values, int in_size,
  407. T* out_values, const std::vector<int>& sizes, int root);
  408. /**
  409. * \overload
  410. */
  411. template<typename T>
  412. void
  413. gatherv(const communicator& comm, const std::vector<T>& in_values,
  414. T* out_values, const std::vector<int>& sizes, int root);
  415. /**
  416. * @brief Scatter the values stored at the root to all processes
  417. * within the communicator.
  418. *
  419. * @c scatter is a collective algorithm that scatters the values
  420. * stored in the @p root process (inside a vector) to all of the
  421. * processes in the communicator. The vector @p out_values (only
  422. * significant at the @p root) is indexed by the process number to
  423. * which the corresponding value will be sent. The type @c T of the
  424. * values may be any type that is serializable or has an associated
  425. * MPI data type.
  426. *
  427. * When the type @c T has an associated MPI data type, this routine
  428. * invokes @c MPI_Scatter to scatter the values.
  429. *
  430. * @param comm The communicator over which the scatter will occur.
  431. *
  432. * @param in_values A vector or pointer to storage that will contain
  433. * the values to send to each process, indexed by the process rank.
  434. * For non-root processes, this parameter may be omitted. If it is
  435. * still provided, however, it will be unchanged.
  436. *
  437. * @param out_value The value received by each process. When
  438. * scattering an array of values, @p out_values points to the @p n
  439. * values that will be received by each process.
  440. *
  441. * @param root The process ID number that will scatter the
  442. * values. This value must be the same on all processes.
  443. */
  444. template<typename T>
  445. void
  446. scatter(const communicator& comm, const std::vector<T>& in_values, T& out_value,
  447. int root);
  448. /**
  449. * \overload
  450. */
  451. template<typename T>
  452. void
  453. scatter(const communicator& comm, const T* in_values, T& out_value, int root);
  454. /**
  455. * \overload
  456. */
  457. template<typename T>
  458. void scatter(const communicator& comm, T& out_value, int root);
  459. /**
  460. * \overload
  461. */
  462. template<typename T>
  463. void
  464. scatter(const communicator& comm, const std::vector<T>& in_values,
  465. T* out_values, int n, int root);
  466. /**
  467. * \overload
  468. */
  469. template<typename T>
  470. void
  471. scatter(const communicator& comm, const T* in_values, T* out_values, int n,
  472. int root);
  473. /**
  474. * \overload
  475. */
  476. template<typename T>
  477. void scatter(const communicator& comm, T* out_values, int n, int root);
  478. /**
  479. * @brief Similar to boost::mpi::scatter with the difference that the number
  480. * of values stored at the root process does not need to be a multiple of
  481. * the communicator's size.
  482. *
  483. * @param comm The communicator over which the scatter will occur.
  484. *
  485. * @param in_values A vector or pointer to storage that will contain
  486. * the values to send to each process, indexed by the process rank.
  487. * For non-root processes, this parameter may be omitted. If it is
  488. * still provided, however, it will be unchanged.
  489. *
  490. * @param sizes A vector containing the number of elements each non-root
  491. * process will receive.
  492. *
  493. * @param displs A vector such that the i-th entry specifies the
  494. * displacement (relative to @p in_values) from which to take the outgoing
  495. * data to process i. Overloaded versions for which @p displs is omitted
  496. * assume that the data is contiguous at the @p root process.
  497. *
  498. * @param out_values The array of values received by each process.
  499. *
  500. * @param out_size For each non-root process this will contain the size
  501. * of @p out_values.
  502. *
  503. * @param root The process ID number that will scatter the
  504. * values. This value must be the same on all processes.
  505. */
  506. template<typename T>
  507. void
  508. scatterv(const communicator& comm, const std::vector<T>& in_values,
  509. const std::vector<int>& sizes, const std::vector<int>& displs,
  510. T* out_values, int out_size, int root);
  511. /**
  512. * \overload
  513. */
  514. template<typename T>
  515. void
  516. scatterv(const communicator& comm, const T* in_values,
  517. const std::vector<int>& sizes, const std::vector<int>& displs,
  518. T* out_values, int out_size, int root);
  519. /**
  520. * \overload
  521. */
  522. template<typename T>
  523. void scatterv(const communicator& comm, T* out_values, int out_size, int root);
  524. /**
  525. * \overload
  526. */
  527. template<typename T>
  528. void
  529. scatterv(const communicator& comm, const T* in_values,
  530. const std::vector<int>& sizes, T* out_values, int root);
  531. /**
  532. * \overload
  533. */
  534. template<typename T>
  535. void
  536. scatterv(const communicator& comm, const std::vector<T>& in_values,
  537. const std::vector<int>& sizes, T* out_values, int root);
  538. /**
  539. * @brief Combine the values stored by each process into a single
  540. * value at the root.
  541. *
  542. * @c reduce is a collective algorithm that combines the values
  543. * stored by each process into a single value at the @c root. The
  544. * values can be combined arbitrarily, specified via a function
  545. * object. The type @c T of the values may be any type that is
  546. * serializable or has an associated MPI data type. One can think of
  547. * this operation as a @c gather to the @p root, followed by an @c
  548. * std::accumulate() over the gathered values and using the operation
  549. * @c op.
  550. *
  551. * When the type @c T has an associated MPI data type, this routine
  552. * invokes @c MPI_Reduce to perform the reduction. If possible,
  553. * built-in MPI operations will be used; otherwise, @c reduce() will
  554. * create a custom MPI_Op for the call to MPI_Reduce.
  555. *
  556. * @param comm The communicator over which the reduction will
  557. * occur.
  558. *
  559. * @param in_value The local value to be combined with the local
  560. * values of every other process. For reducing arrays, @c in_values
  561. * contains a pointer to the local values. In this case, @c n is
  562. * the number of values that will be reduced. Reduction occurs
  563. * independently for each of the @p n values referenced by @p
  564. * in_values, e.g., calling reduce on an array of @p n values is
  565. * like calling @c reduce @p n separate times, one for each
  566. * location in @p in_values and @p out_values.
  567. *
  568. * @param out_value Will receive the result of the reduction
  569. * operation, but only for the @p root process. Non-root processes
  570. * may omit if parameter; if they choose to supply the parameter,
  571. * it will be unchanged. For reducing arrays, @c out_values
  572. * contains a pointer to the storage for the output values.
  573. *
  574. * @param op The binary operation that combines two values of type
  575. * @c T into a third value of type @c T. For types @c T that has
  576. * ssociated MPI data types, @c op will either be translated into
  577. * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
  578. * directly to a built-in MPI operation. See @c is_mpi_op in the @c
  579. * operations.hpp header for more details on this mapping. For any
  580. * non-built-in operation, commutativity will be determined by the
  581. * @c is_commmutative trait (also in @c operations.hpp): users are
  582. * encouraged to mark commutative operations as such, because it
  583. * gives the implementation additional lattitude to optimize the
  584. * reduction operation.
  585. *
  586. * @param root The process ID number that will receive the final,
  587. * combined value. This value must be the same on all processes.
  588. */
  589. template<typename T, typename Op>
  590. void
  591. reduce(const communicator& comm, const T& in_value, T& out_value, Op op,
  592. int root);
  593. /**
  594. * \overload
  595. */
  596. template<typename T, typename Op>
  597. void reduce(const communicator& comm, const T& in_value, Op op, int root);
  598. /**
  599. * \overload
  600. */
  601. template<typename T, typename Op>
  602. void
  603. reduce(const communicator& comm, const T* in_values, int n, T* out_values,
  604. Op op, int root);
  605. /**
  606. * \overload
  607. */
  608. template<typename T, typename Op>
  609. void
  610. reduce(const communicator& comm, const T* in_values, int n, Op op, int root);
  611. /**
  612. * @brief Compute a prefix reduction of values from all processes in
  613. * the communicator.
  614. *
  615. * @c scan is a collective algorithm that combines the values stored
  616. * by each process with the values of all processes with a smaller
  617. * rank. The values can be arbitrarily combined, specified via a
  618. * function object @p op. The type @c T of the values may be any type
  619. * that is serializable or has an associated MPI data type. One can
  620. * think of this operation as a @c gather to some process, followed
  621. * by an @c std::prefix_sum() over the gathered values using the
  622. * operation @c op. The ith process returns the ith value emitted by
  623. * @c std::prefix_sum().
  624. *
  625. * When the type @c T has an associated MPI data type, this routine
  626. * invokes @c MPI_Scan to perform the reduction. If possible,
  627. * built-in MPI operations will be used; otherwise, @c scan() will
  628. * create a custom @c MPI_Op for the call to MPI_Scan.
  629. *
  630. * @param comm The communicator over which the prefix reduction
  631. * will occur.
  632. *
  633. * @param in_value The local value to be combined with the local
  634. * values of other processes. For the array variant, the @c
  635. * in_values parameter points to the @c n local values that will be
  636. * combined.
  637. *
  638. * @param out_value If provided, the ith process will receive the
  639. * value @c op(in_value[0], op(in_value[1], op(..., in_value[i])
  640. * ... )). For the array variant, @c out_values contains a pointer
  641. * to storage for the @c n output values. The prefix reduction
  642. * occurs independently for each of the @p n values referenced by
  643. * @p in_values, e.g., calling scan on an array of @p n values is
  644. * like calling @c scan @p n separate times, one for each location
  645. * in @p in_values and @p out_values.
  646. *
  647. * @param op The binary operation that combines two values of type
  648. * @c T into a third value of type @c T. For types @c T that has
  649. * ssociated MPI data types, @c op will either be translated into
  650. * an @c MPI_Op (via @c MPI_Op_create) or, if possible, mapped
  651. * directly to a built-in MPI operation. See @c is_mpi_op in the @c
  652. * operations.hpp header for more details on this mapping. For any
  653. * non-built-in operation, commutativity will be determined by the
  654. * @c is_commmutative trait (also in @c operations.hpp).
  655. *
  656. * @returns If no @p out_value parameter is provided, returns the
  657. * result of prefix reduction.
  658. */
  659. template<typename T, typename Op>
  660. void
  661. scan(const communicator& comm, const T& in_value, T& out_value, Op op);
  662. /**
  663. * \overload
  664. */
  665. template<typename T, typename Op>
  666. T
  667. scan(const communicator& comm, const T& in_value, Op op);
  668. /**
  669. * \overload
  670. */
  671. template<typename T, typename Op>
  672. void
  673. scan(const communicator& comm, const T* in_values, int n, T* out_values, Op op);
  674. } } // end namespace boost::mpi
  675. #endif // BOOST_MPI_COLLECTIVES_HPP
  676. #ifndef BOOST_MPI_COLLECTIVES_FORWARD_ONLY
  677. // Include implementations of each of the collectives
  678. # include <boost/mpi/collectives/all_gather.hpp>
  679. # include <boost/mpi/collectives/all_gatherv.hpp>
  680. # include <boost/mpi/collectives/all_reduce.hpp>
  681. # include <boost/mpi/collectives/all_to_all.hpp>
  682. # include <boost/mpi/collectives/broadcast.hpp>
  683. # include <boost/mpi/collectives/gather.hpp>
  684. # include <boost/mpi/collectives/gatherv.hpp>
  685. # include <boost/mpi/collectives/scatter.hpp>
  686. # include <boost/mpi/collectives/scatterv.hpp>
  687. # include <boost/mpi/collectives/reduce.hpp>
  688. # include <boost/mpi/collectives/scan.hpp>
  689. #endif