primary_transform.hpp 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120
  1. /*
  2. *
  3. * Copyright (c) 1998-2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE: primary_transform.hpp
  14. * VERSION: see <boost/version.hpp>
  15. * DESCRIPTION: Heuristically determines the sort string format in use
  16. * by the current locale.
  17. */
  18. #ifndef BOOST_REGEX_PRIMARY_TRANSFORM
  19. #define BOOST_REGEX_PRIMARY_TRANSFORM
  20. namespace boost{
  21. namespace BOOST_REGEX_DETAIL_NS{
  22. enum{
  23. sort_C,
  24. sort_fixed,
  25. sort_delim,
  26. sort_unknown
  27. };
  28. template <class S, class charT>
  29. unsigned count_chars(const S& s, charT c)
  30. {
  31. //
  32. // Count how many occurrences of character c occur
  33. // in string s: if c is a delimeter between collation
  34. // fields, then this should be the same value for all
  35. // sort keys:
  36. //
  37. unsigned int count = 0;
  38. for(unsigned pos = 0; pos < s.size(); ++pos)
  39. {
  40. if(s[pos] == c) ++count;
  41. }
  42. return count;
  43. }
  44. template <class traits, class charT>
  45. unsigned find_sort_syntax(const traits* pt, charT* delim)
  46. {
  47. //
  48. // compare 'a' with 'A' to see how similar they are,
  49. // should really use a-accute but we can't portably do that,
  50. //
  51. typedef typename traits::string_type string_type;
  52. typedef typename traits::char_type char_type;
  53. // Suppress incorrect warning for MSVC
  54. (void)pt;
  55. char_type a[2] = {'a', '\0', };
  56. string_type sa(pt->transform(a, a+1));
  57. if(sa == a)
  58. {
  59. *delim = 0;
  60. return sort_C;
  61. }
  62. char_type A[2] = { 'A', '\0', };
  63. string_type sA(pt->transform(A, A+1));
  64. char_type c[2] = { ';', '\0', };
  65. string_type sc(pt->transform(c, c+1));
  66. int pos = 0;
  67. while((pos <= static_cast<int>(sa.size())) && (pos <= static_cast<int>(sA.size())) && (sa[pos] == sA[pos])) ++pos;
  68. --pos;
  69. if(pos < 0)
  70. {
  71. *delim = 0;
  72. return sort_unknown;
  73. }
  74. //
  75. // at this point sa[pos] is either the end of a fixed width field
  76. // or the character that acts as a delimiter:
  77. //
  78. charT maybe_delim = sa[pos];
  79. if((pos != 0) && (count_chars(sa, maybe_delim) == count_chars(sA, maybe_delim)) && (count_chars(sa, maybe_delim) == count_chars(sc, maybe_delim)))
  80. {
  81. *delim = maybe_delim;
  82. return sort_delim;
  83. }
  84. //
  85. // OK doen't look like a delimiter, try for fixed width field:
  86. //
  87. if((sa.size() == sA.size()) && (sa.size() == sc.size()))
  88. {
  89. // note assumes that the fixed width field is less than
  90. // (numeric_limits<charT>::max)(), should be true for all types
  91. // I can't imagine 127 character fields...
  92. *delim = static_cast<charT>(++pos);
  93. return sort_fixed;
  94. }
  95. //
  96. // don't know what it is:
  97. //
  98. *delim = 0;
  99. return sort_unknown;
  100. }
  101. } // namespace BOOST_REGEX_DETAIL_NS
  102. } // namespace boost
  103. #endif