conversion.hpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249
  1. //
  2. // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // https://www.boost.org/LICENSE_1_0.txt
  6. #ifndef BOOST_LOCALE_CONVERTER_HPP_INCLUDED
  7. #define BOOST_LOCALE_CONVERTER_HPP_INCLUDED
  8. #include <boost/locale/detail/facet_id.hpp>
  9. #include <boost/locale/detail/is_supported_char.hpp>
  10. #include <boost/locale/util/string.hpp>
  11. #include <locale>
  12. #ifdef BOOST_MSVC
  13. # pragma warning(push)
  14. # pragma warning(disable : 4275 4251 4231 4660)
  15. #endif
  16. namespace boost { namespace locale {
  17. /// \defgroup convert Text Conversions
  18. ///
  19. /// This module provides various function for string manipulation like Unicode normalization, case conversion etc.
  20. /// @{
  21. /// \brief This class provides base flags for text manipulation. It is used as base for converter facet.
  22. class converter_base {
  23. public:
  24. /// The flag used for facet - the type of operation to perform
  25. enum conversion_type {
  26. normalization, ///< Apply Unicode normalization on the text
  27. upper_case, ///< Convert text to upper case
  28. lower_case, ///< Convert text to lower case
  29. case_folding, ///< Fold case in the text
  30. title_case ///< Convert text to title case
  31. };
  32. };
  33. /// \brief The facet that implements text manipulation
  34. ///
  35. /// It is used to perform text conversion operations defined by \ref converter_base::conversion_type.
  36. /// It is implemented for supported character types, at least \c char, \c wchar_t
  37. template<typename Char>
  38. class BOOST_SYMBOL_VISIBLE converter : public converter_base,
  39. public std::locale::facet,
  40. public detail::facet_id<converter<Char>> {
  41. BOOST_LOCALE_ASSERT_IS_SUPPORTED(Char);
  42. public:
  43. /// Standard constructor
  44. converter(size_t refs = 0) : std::locale::facet(refs) {}
  45. /// Convert text in range [\a begin, \a end) according to conversion method \a how. Parameter
  46. /// \a flags is used for specification of normalization method like nfd, nfc etc.
  47. virtual std::basic_string<Char>
  48. convert(conversion_type how, const Char* begin, const Char* end, int flags = 0) const = 0;
  49. };
  50. /// The type that defined <a href="http://unicode.org/reports/tr15/#Norm_Forms">normalization form</a>
  51. enum norm_type {
  52. norm_nfd, ///< Canonical decomposition
  53. norm_nfc, ///< Canonical decomposition followed by canonical composition
  54. norm_nfkd, ///< Compatibility decomposition
  55. norm_nfkc, ///< Compatibility decomposition followed by canonical composition.
  56. norm_default = norm_nfc, ///< Default normalization - canonical decomposition followed by canonical composition
  57. };
  58. /// Normalize Unicode string in range [begin,end) according to \ref norm_type "normalization form" \a n
  59. ///
  60. /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take
  61. /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside
  62. /// of a Unicode character set.
  63. ///
  64. /// \throws std::bad_cast: \a loc does not have \ref converter facet installed
  65. template<typename CharType>
  66. std::basic_string<CharType> normalize(const CharType* begin,
  67. const CharType* end,
  68. norm_type n = norm_default,
  69. const std::locale& loc = std::locale())
  70. {
  71. return std::use_facet<converter<CharType>>(loc).convert(converter_base::normalization, begin, end, n);
  72. }
  73. /// Normalize Unicode string \a str according to \ref norm_type "normalization form" \a n
  74. ///
  75. /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take
  76. /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside
  77. /// of a Unicode character set.
  78. ///
  79. /// \throws std::bad_cast: \a loc does not have \ref converter facet installed
  80. template<typename CharType>
  81. std::basic_string<CharType> normalize(const std::basic_string<CharType>& str,
  82. norm_type n = norm_default,
  83. const std::locale& loc = std::locale())
  84. {
  85. return normalize(str.data(), str.data() + str.size(), n, loc);
  86. }
  87. /// Normalize NULL terminated Unicode string \a str according to \ref norm_type "normalization form" \a n
  88. ///
  89. /// Note: This function receives only Unicode strings, i.e.: UTF-8, UTF-16 or UTF-32. It does not take
  90. /// in account the locale encoding, because Unicode decomposition and composition are meaningless outside
  91. /// of a Unicode character set.
  92. ///
  93. /// \throws std::bad_cast: \a loc does not have \ref converter facet installed
  94. template<typename CharType>
  95. std::basic_string<CharType>
  96. normalize(const CharType* str, norm_type n = norm_default, const std::locale& loc = std::locale())
  97. {
  98. return normalize(str, util::str_end(str), n, loc);
  99. }
  100. ///////////////////////////////////////////////////
  101. /// Convert a string in range [begin,end) to upper case according to locale \a loc
  102. ///
  103. /// \throws std::bad_cast: \a loc does not have \ref converter facet installed
  104. template<typename CharType>
  105. std::basic_string<CharType>
  106. to_upper(const CharType* begin, const CharType* end, const std::locale& loc = std::locale())
  107. {
  108. return std::use_facet<converter<CharType>>(loc).convert(converter_base::upper_case, begin, end);
  109. }
  110. /// Convert a string \a str to upper case according to locale \a loc
  111. ///
  112. /// \throws std::bad_cast: \a loc does not have \ref converter facet installed
  113. template<typename CharType>
  114. std::basic_string<CharType> to_upper(const std::basic_string<CharType>& str, const std::locale& loc = std::locale())
  115. {
  116. return to_upper(str.data(), str.data() + str.size(), loc);
  117. }
  118. /// Convert a NULL terminated string \a str to upper case according to locale \a loc
  119. ///
  120. /// \throws std::bad_cast: \a loc does not have \ref converter facet installed
  121. template<typename CharType>
  122. std::basic_string<CharType> to_upper(const CharType* str, const std::locale& loc = std::locale())
  123. {
  124. return to_upper(str, util::str_end(str), loc);
  125. }
  126. ///////////////////////////////////////////////////
  127. /// Convert a string in range [begin,end) to lower case according to locale \a loc
  128. ///
  129. /// \throws std::bad_cast: \a loc does not have \ref converter facet installed
  130. template<typename CharType>
  131. std::basic_string<CharType>
  132. to_lower(const CharType* begin, const CharType* end, const std::locale& loc = std::locale())
  133. {
  134. return std::use_facet<converter<CharType>>(loc).convert(converter_base::lower_case, begin, end);
  135. }
  136. /// Convert a string \a str to lower case according to locale \a loc
  137. ///
  138. /// \throws std::bad_cast: \a loc does not have \ref converter facet installed
  139. template<typename CharType>
  140. std::basic_string<CharType> to_lower(const std::basic_string<CharType>& str, const std::locale& loc = std::locale())
  141. {
  142. return to_lower(str.data(), str.data() + str.size(), loc);
  143. }
  144. /// Convert a NULL terminated string \a str to lower case according to locale \a loc
  145. ///
  146. /// \throws std::bad_cast: \a loc does not have \ref converter facet installed
  147. template<typename CharType>
  148. std::basic_string<CharType> to_lower(const CharType* str, const std::locale& loc = std::locale())
  149. {
  150. return to_lower(str, util::str_end(str), loc);
  151. }
  152. ///////////////////////////////////////////////////
  153. /// Convert a string in range [begin,end) to title case according to locale \a loc
  154. ///
  155. /// \throws std::bad_cast: \a loc does not have \ref converter facet installed
  156. template<typename CharType>
  157. std::basic_string<CharType>
  158. to_title(const CharType* begin, const CharType* end, const std::locale& loc = std::locale())
  159. {
  160. return std::use_facet<converter<CharType>>(loc).convert(converter_base::title_case, begin, end);
  161. }
  162. /// Convert a string \a str to title case according to locale \a loc
  163. ///
  164. /// \throws std::bad_cast: \a loc does not have \ref converter facet installed
  165. template<typename CharType>
  166. std::basic_string<CharType> to_title(const std::basic_string<CharType>& str, const std::locale& loc = std::locale())
  167. {
  168. return to_title(str.data(), str.data() + str.size(), loc);
  169. }
  170. /// Convert a NULL terminated string \a str to title case according to locale \a loc
  171. ///
  172. /// \throws std::bad_cast: \a loc does not have \ref converter facet installed
  173. template<typename CharType>
  174. std::basic_string<CharType> to_title(const CharType* str, const std::locale& loc = std::locale())
  175. {
  176. return to_title(str, util::str_end(str), loc);
  177. }
  178. ///////////////////////////////////////////////////
  179. /// Fold case of a string in range [begin,end) according to locale \a loc
  180. ///
  181. /// \throws std::bad_cast: \a loc does not have \ref converter facet installed
  182. template<typename CharType>
  183. std::basic_string<CharType>
  184. fold_case(const CharType* begin, const CharType* end, const std::locale& loc = std::locale())
  185. {
  186. return std::use_facet<converter<CharType>>(loc).convert(converter_base::case_folding, begin, end);
  187. }
  188. /// Fold case of a string \a str according to locale \a loc
  189. ///
  190. /// \throws std::bad_cast: \a loc does not have \ref converter facet installed
  191. template<typename CharType>
  192. std::basic_string<CharType> fold_case(const std::basic_string<CharType>& str,
  193. const std::locale& loc = std::locale())
  194. {
  195. return fold_case(str.data(), str.data() + str.size(), loc);
  196. }
  197. /// Fold case of a NULL terminated string \a str according to locale \a loc
  198. ///
  199. /// \throws std::bad_cast: \a loc does not have \ref converter facet installed
  200. template<typename CharType>
  201. std::basic_string<CharType> fold_case(const CharType* str, const std::locale& loc = std::locale())
  202. {
  203. return fold_case(str, util::str_end(str), loc);
  204. }
  205. ///@}
  206. }} // namespace boost::locale
  207. #ifdef BOOST_MSVC
  208. # pragma warning(pop)
  209. #endif
  210. /// \example conversions.cpp
  211. ///
  212. /// Example of using various text conversion functions.
  213. ///
  214. /// \example wconversions.cpp
  215. ///
  216. /// Example of using various text conversion functions with wide strings.
  217. #endif