encoding.hpp 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314
  1. //
  2. // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // https://www.boost.org/LICENSE_1_0.txt
  6. #ifndef BOOST_LOCALE_ENCODING_HPP_INCLUDED
  7. #define BOOST_LOCALE_ENCODING_HPP_INCLUDED
  8. #include <boost/locale/config.hpp>
  9. #include <boost/locale/detail/encoding.hpp>
  10. #include <boost/locale/encoding_errors.hpp>
  11. #include <boost/locale/encoding_utf.hpp>
  12. #include <boost/locale/info.hpp>
  13. #include <boost/locale/util/string.hpp>
  14. #include <memory>
  15. #ifdef BOOST_MSVC
  16. # pragma warning(push)
  17. # pragma warning(disable : 4275 4251 4231 4660)
  18. #endif
  19. namespace boost { namespace locale {
  20. /// \brief Namespace that contains all functions related to character set conversion
  21. namespace conv {
  22. /// \defgroup Charset conversion functions
  23. ///
  24. /// @{
  25. /// convert text in range [begin,end) encoded with \a charset to UTF according to policy \a how
  26. ///
  27. /// \throws invalid_charset_error: Character set is not supported
  28. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  29. /// encoded or decoded)
  30. template<typename CharType>
  31. BOOST_LOCALE_DECL std::basic_string<CharType>
  32. to_utf(const char* begin, const char* end, const std::string& charset, method_type how = default_method);
  33. /// convert UTF text in range [begin,end) to text encoded with \a charset according to policy \a how
  34. ///
  35. /// \throws invalid_charset_error: Character set is not supported
  36. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  37. /// encoded or decoded)
  38. template<typename CharType>
  39. BOOST_LOCALE_DECL std::string from_utf(const CharType* begin,
  40. const CharType* end,
  41. const std::string& charset,
  42. method_type how = default_method);
  43. /// convert \a text encoded with \a charset to UTF according to policy \a how
  44. ///
  45. /// \throws invalid_charset_error: Character set is not supported
  46. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  47. /// encoded or decoded)
  48. template<typename CharType>
  49. std::basic_string<CharType>
  50. to_utf(const std::string& text, const std::string& charset, method_type how = default_method)
  51. {
  52. return to_utf<CharType>(text.c_str(), text.c_str() + text.size(), charset, how);
  53. }
  54. /// Convert \a text encoded with \a charset to UTF according to policy \a how
  55. ///
  56. /// \throws invalid_charset_error: Character set is not supported
  57. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  58. /// encoded or decoded)
  59. template<typename CharType>
  60. std::basic_string<CharType>
  61. to_utf(const char* text, const std::string& charset, method_type how = default_method)
  62. {
  63. return to_utf<CharType>(text, util::str_end(text), charset, how);
  64. }
  65. /// convert text in range [begin,end) in locale encoding given by \a loc to UTF according to
  66. /// policy \a how
  67. ///
  68. /// \throws std::bad_cast: \a loc does not have \ref info facet installed
  69. /// \throws invalid_charset_error: Character set is not supported
  70. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  71. /// encoded or decoded)
  72. template<typename CharType>
  73. std::basic_string<CharType>
  74. to_utf(const char* begin, const char* end, const std::locale& loc, method_type how = default_method)
  75. {
  76. return to_utf<CharType>(begin, end, std::use_facet<info>(loc).encoding(), how);
  77. }
  78. /// Convert \a text in locale encoding given by \a loc to UTF according to policy \a how
  79. ///
  80. /// \throws std::bad_cast: \a loc does not have \ref info facet installed
  81. /// \throws invalid_charset_error: Character set is not supported
  82. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  83. /// encoded or decoded)
  84. template<typename CharType>
  85. std::basic_string<CharType>
  86. to_utf(const std::string& text, const std::locale& loc, method_type how = default_method)
  87. {
  88. return to_utf<CharType>(text, std::use_facet<info>(loc).encoding(), how);
  89. }
  90. /// Convert \a text in locale encoding given by \a loc to UTF according to policy \a how
  91. ///
  92. /// \throws std::bad_cast: \a loc does not have \ref info facet installed
  93. /// \throws invalid_charset_error: Character set is not supported
  94. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  95. /// encoded or decoded)
  96. template<typename CharType>
  97. std::basic_string<CharType> to_utf(const char* text, const std::locale& loc, method_type how = default_method)
  98. {
  99. return to_utf<CharType>(text, std::use_facet<info>(loc).encoding(), how);
  100. }
  101. /// convert \a text from UTF to text encoded with \a charset according to policy \a how
  102. ///
  103. /// \throws invalid_charset_error: Character set is not supported
  104. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  105. /// encoded or decoded)
  106. template<typename CharType>
  107. std::string
  108. from_utf(const std::basic_string<CharType>& text, const std::string& charset, method_type how = default_method)
  109. {
  110. return from_utf(text.c_str(), text.c_str() + text.size(), charset, how);
  111. }
  112. /// Convert \a text from UTF to \a charset according to policy \a how
  113. ///
  114. /// \throws invalid_charset_error: Character set is not supported
  115. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  116. /// encoded or decoded)
  117. template<typename CharType>
  118. std::string from_utf(const CharType* text, const std::string& charset, method_type how = default_method)
  119. {
  120. return from_utf(text, util::str_end(text), charset, how);
  121. }
  122. /// Convert UTF text in range [begin,end) to text in locale encoding given by \a loc according to policy \a how
  123. ///
  124. /// \throws std::bad_cast: \a loc does not have \ref info facet installed
  125. /// \throws invalid_charset_error: Character set is not supported
  126. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  127. /// encoded or decoded)
  128. template<typename CharType>
  129. std::string
  130. from_utf(const CharType* begin, const CharType* end, const std::locale& loc, method_type how = default_method)
  131. {
  132. return from_utf(begin, end, std::use_facet<info>(loc).encoding(), how);
  133. }
  134. /// Convert \a text from UTF to locale encoding given by \a loc according to policy \a how
  135. ///
  136. /// \throws std::bad_cast: \a loc does not have \ref info facet installed
  137. /// \throws invalid_charset_error: Character set is not supported
  138. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  139. /// encoded or decoded)
  140. template<typename CharType>
  141. std::string
  142. from_utf(const std::basic_string<CharType>& text, const std::locale& loc, method_type how = default_method)
  143. {
  144. return from_utf(text, std::use_facet<info>(loc).encoding(), how);
  145. }
  146. /// Convert \a text from UTF to locale encoding given by \a loc according to policy \a how
  147. ///
  148. /// \throws std::bad_cast: \a loc does not have \ref info facet installed
  149. /// \throws invalid_charset_error: Character set is not supported
  150. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  151. /// encoded or decoded)
  152. template<typename CharType>
  153. std::string from_utf(const CharType* text, const std::locale& loc, method_type how = default_method)
  154. {
  155. return from_utf(text, std::use_facet<info>(loc).encoding(), how);
  156. }
  157. /// Convert a text in range [begin,end) to \a to_encoding from \a from_encoding according to
  158. /// policy \a how
  159. ///
  160. /// \throws invalid_charset_error: Either character set is not supported
  161. /// \throws conversion_error: when the conversion fails (e.g. \a how is \c stop and any character cannot be
  162. /// encoded or decoded)
  163. BOOST_LOCALE_DECL
  164. std::string between(const char* begin,
  165. const char* end,
  166. const std::string& to_encoding,
  167. const std::string& from_encoding,
  168. method_type how = default_method);
  169. /// Convert \a text to \a to_encoding from \a from_encoding according to
  170. /// policy \a how
  171. ///
  172. /// \throws invalid_charset_error: Either character set is not supported
  173. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  174. /// encoded or decoded)
  175. inline std::string between(const char* text,
  176. const std::string& to_encoding,
  177. const std::string& from_encoding,
  178. method_type how = default_method)
  179. {
  180. return between(text, util::str_end(text), to_encoding, from_encoding, how);
  181. }
  182. /// Convert \a text to \a to_encoding from \a from_encoding according to
  183. /// policy \a how
  184. ///
  185. /// \throws invalid_charset_error: Either character set is not supported
  186. /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
  187. /// encoded or decoded)
  188. inline std::string between(const std::string& text,
  189. const std::string& to_encoding,
  190. const std::string& from_encoding,
  191. method_type how = default_method)
  192. {
  193. return between(text.c_str(), text.c_str() + text.size(), to_encoding, from_encoding, how);
  194. }
  195. /// @}
  196. /// Converter class to decode a narrow string using a local encoding and encode it with UTF
  197. template<typename CharType>
  198. class utf_encoder {
  199. std::unique_ptr<detail::utf_encoder<CharType>> impl_;
  200. public:
  201. using char_type = CharType;
  202. using string_type = std::basic_string<CharType>;
  203. /// Create an instance to convert text encoded with \a charset to UTF according to policy \a how
  204. ///
  205. /// Note: When converting only a single text \ref to_utf is likely faster.
  206. /// \throws invalid_charset_error: Character set is not supported
  207. utf_encoder(const std::string& charset, method_type how = default_method) :
  208. impl_(detail::make_utf_encoder<CharType>(charset, how))
  209. {}
  210. /// Convert text in range [begin,end) to UTF
  211. ///
  212. /// \throws conversion_error: Conversion failed
  213. string_type convert(const char* begin, const char* end) const { return impl_->convert(begin, end); }
  214. /// Convert \a text to UTF
  215. ///
  216. /// \throws conversion_error: Conversion failed
  217. string_type convert(const boost::string_view& text) const { return impl_->convert(text); }
  218. /// Convert \a text to UTF
  219. ///
  220. /// \throws conversion_error: Conversion failed
  221. string_type operator()(const boost::string_view& text) const { return convert(text); }
  222. };
  223. /// Converter class to decode an UTF string and encode it using a local encoding
  224. template<typename CharType>
  225. class utf_decoder {
  226. std::unique_ptr<detail::utf_decoder<CharType>> impl_;
  227. public:
  228. using char_type = CharType;
  229. using stringview_type = boost::basic_string_view<CharType>;
  230. /// Create an instance to convert UTF text to text encoded with \a charset according to policy \a how
  231. ///
  232. /// Note: When converting only a single text \ref from_utf is likely faster.
  233. /// \throws invalid_charset_error: Character set is not supported
  234. utf_decoder(const std::string& charset, method_type how = default_method) :
  235. impl_(detail::make_utf_decoder<CharType>(charset, how))
  236. {}
  237. /// Convert UTF text in range [begin,end) to local encoding
  238. ///
  239. /// \throws conversion_error: Conversion failed
  240. std::string convert(const CharType* begin, const CharType* end) const { return impl_->convert(begin, end); }
  241. /// Convert \a text from UTF to local encoding
  242. ///
  243. /// \throws conversion_error: Conversion failed
  244. std::string convert(const stringview_type& text) const { return impl_->convert(text); }
  245. /// Convert \a text from UTF to local encoding
  246. ///
  247. /// \throws conversion_error: Conversion failed
  248. std::string operator()(const stringview_type& text) const { return convert(text); }
  249. };
  250. class narrow_converter {
  251. std::unique_ptr<detail::narrow_converter> impl_;
  252. public:
  253. /// Create converter to convert text from \a src_encoding to \a target_encoding according to policy \a how
  254. ///
  255. /// \throws invalid_charset_error: Either character set is not supported
  256. narrow_converter(const std::string& src_encoding,
  257. const std::string& target_encoding,
  258. method_type how = default_method) :
  259. impl_(detail::make_narrow_converter(src_encoding, target_encoding, how))
  260. {}
  261. /// Convert text in range [begin,end)
  262. ///
  263. /// \throws conversion_error: Conversion failed
  264. std::string convert(const char* begin, const char* end) const { return impl_->convert(begin, end); }
  265. /// Convert \a text
  266. ///
  267. /// \throws conversion_error: Conversion failed
  268. std::string convert(const boost::string_view& text) const { return impl_->convert(text); }
  269. /// Convert \a text
  270. ///
  271. /// \throws conversion_error: Conversion failed
  272. std::string operator()(const boost::string_view& text) const { return convert(text); }
  273. };
  274. } // namespace conv
  275. }} // namespace boost::locale
  276. #ifdef BOOST_MSVC
  277. # pragma warning(pop)
  278. #endif
  279. #endif