bivariate_statistics.hpp 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596
  1. // (C) Copyright Nick Thompson 2018.
  2. // Use, modification and distribution are subject to the
  3. // Boost Software License, Version 1.0. (See accompanying file
  4. // LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  5. #ifndef BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP
  6. #define BOOST_MATH_TOOLS_BIVARIATE_STATISTICS_HPP
  7. #include <iterator>
  8. #include <tuple>
  9. #include <limits>
  10. #include <boost/math/tools/assert.hpp>
  11. #include <boost/math/tools/header_deprecated.hpp>
  12. BOOST_MATH_HEADER_DEPRECATED("<boost/math/statistics/bivariate_statistics.hpp>");
  13. namespace boost{ namespace math{ namespace tools {
  14. template<class Container>
  15. auto means_and_covariance(Container const & u, Container const & v)
  16. {
  17. using Real = typename Container::value_type;
  18. using std::size;
  19. BOOST_MATH_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance.");
  20. BOOST_MATH_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least one sample.");
  21. // See Equation III.9 of "Numerically Stable, Single-Pass, Parallel Statistics Algorithms", Bennet et al.
  22. Real cov = 0;
  23. Real mu_u = u[0];
  24. Real mu_v = v[0];
  25. for(size_t i = 1; i < size(u); ++i)
  26. {
  27. Real u_tmp = (u[i] - mu_u)/(i+1);
  28. Real v_tmp = v[i] - mu_v;
  29. cov += i*u_tmp*v_tmp;
  30. mu_u = mu_u + u_tmp;
  31. mu_v = mu_v + v_tmp/(i+1);
  32. }
  33. return std::make_tuple(mu_u, mu_v, cov/size(u));
  34. }
  35. template<class Container>
  36. auto covariance(Container const & u, Container const & v)
  37. {
  38. auto [mu_u, mu_v, cov] = boost::math::tools::means_and_covariance(u, v);
  39. return cov;
  40. }
  41. template<class Container>
  42. auto correlation_coefficient(Container const & u, Container const & v)
  43. {
  44. using Real = typename Container::value_type;
  45. using std::size;
  46. BOOST_MATH_ASSERT_MSG(size(u) == size(v), "The size of each vector must be the same to compute covariance.");
  47. BOOST_MATH_ASSERT_MSG(size(u) > 0, "Computing covariance requires at least two samples.");
  48. Real cov = 0;
  49. Real mu_u = u[0];
  50. Real mu_v = v[0];
  51. Real Qu = 0;
  52. Real Qv = 0;
  53. for(size_t i = 1; i < size(u); ++i)
  54. {
  55. Real u_tmp = u[i] - mu_u;
  56. Real v_tmp = v[i] - mu_v;
  57. Qu = Qu + (i*u_tmp*u_tmp)/(i+1);
  58. Qv = Qv + (i*v_tmp*v_tmp)/(i+1);
  59. cov += i*u_tmp*v_tmp/(i+1);
  60. mu_u = mu_u + u_tmp/(i+1);
  61. mu_v = mu_v + v_tmp/(i+1);
  62. }
  63. // If one dataset is constant, then they have no correlation:
  64. // See https://stats.stackexchange.com/questions/23676/normalized-correlation-with-a-constant-vector
  65. // Thanks to zbjornson for pointing this out.
  66. if (Qu == 0 || Qv == 0)
  67. {
  68. return std::numeric_limits<Real>::quiet_NaN();
  69. }
  70. // Make sure rho in [-1, 1], even in the presence of numerical noise.
  71. Real rho = cov/sqrt(Qu*Qv);
  72. if (rho > 1) {
  73. rho = 1;
  74. }
  75. if (rho < -1) {
  76. rho = -1;
  77. }
  78. return rho;
  79. }
  80. }}}
  81. #endif