123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314 |
- //
- // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
- //
- // Distributed under the Boost Software License, Version 1.0.
- // https://www.boost.org/LICENSE_1_0.txt
- #ifndef BOOST_LOCALE_ENCODING_HPP_INCLUDED
- #define BOOST_LOCALE_ENCODING_HPP_INCLUDED
- #include <boost/locale/config.hpp>
- #include <boost/locale/detail/encoding.hpp>
- #include <boost/locale/encoding_errors.hpp>
- #include <boost/locale/encoding_utf.hpp>
- #include <boost/locale/info.hpp>
- #include <boost/locale/util/string.hpp>
- #include <memory>
- #ifdef BOOST_MSVC
- # pragma warning(push)
- # pragma warning(disable : 4275 4251 4231 4660)
- #endif
- namespace boost { namespace locale {
- /// \brief Namespace that contains all functions related to character set conversion
- namespace conv {
- /// \defgroup Charset conversion functions
- ///
- /// @{
- /// convert text in range [begin,end) encoded with \a charset to UTF according to policy \a how
- ///
- /// \throws invalid_charset_error: Character set is not supported
- /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
- /// encoded or decoded)
- template<typename CharType>
- BOOST_LOCALE_DECL std::basic_string<CharType>
- to_utf(const char* begin, const char* end, const std::string& charset, method_type how = default_method);
- /// convert UTF text in range [begin,end) to text encoded with \a charset according to policy \a how
- ///
- /// \throws invalid_charset_error: Character set is not supported
- /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
- /// encoded or decoded)
- template<typename CharType>
- BOOST_LOCALE_DECL std::string from_utf(const CharType* begin,
- const CharType* end,
- const std::string& charset,
- method_type how = default_method);
- /// convert \a text encoded with \a charset to UTF according to policy \a how
- ///
- /// \throws invalid_charset_error: Character set is not supported
- /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
- /// encoded or decoded)
- template<typename CharType>
- std::basic_string<CharType>
- to_utf(const std::string& text, const std::string& charset, method_type how = default_method)
- {
- return to_utf<CharType>(text.c_str(), text.c_str() + text.size(), charset, how);
- }
- /// Convert \a text encoded with \a charset to UTF according to policy \a how
- ///
- /// \throws invalid_charset_error: Character set is not supported
- /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
- /// encoded or decoded)
- template<typename CharType>
- std::basic_string<CharType>
- to_utf(const char* text, const std::string& charset, method_type how = default_method)
- {
- return to_utf<CharType>(text, util::str_end(text), charset, how);
- }
- /// convert text in range [begin,end) in locale encoding given by \a loc to UTF according to
- /// policy \a how
- ///
- /// \throws std::bad_cast: \a loc does not have \ref info facet installed
- /// \throws invalid_charset_error: Character set is not supported
- /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
- /// encoded or decoded)
- template<typename CharType>
- std::basic_string<CharType>
- to_utf(const char* begin, const char* end, const std::locale& loc, method_type how = default_method)
- {
- return to_utf<CharType>(begin, end, std::use_facet<info>(loc).encoding(), how);
- }
- /// Convert \a text in locale encoding given by \a loc to UTF according to policy \a how
- ///
- /// \throws std::bad_cast: \a loc does not have \ref info facet installed
- /// \throws invalid_charset_error: Character set is not supported
- /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
- /// encoded or decoded)
- template<typename CharType>
- std::basic_string<CharType>
- to_utf(const std::string& text, const std::locale& loc, method_type how = default_method)
- {
- return to_utf<CharType>(text, std::use_facet<info>(loc).encoding(), how);
- }
- /// Convert \a text in locale encoding given by \a loc to UTF according to policy \a how
- ///
- /// \throws std::bad_cast: \a loc does not have \ref info facet installed
- /// \throws invalid_charset_error: Character set is not supported
- /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
- /// encoded or decoded)
- template<typename CharType>
- std::basic_string<CharType> to_utf(const char* text, const std::locale& loc, method_type how = default_method)
- {
- return to_utf<CharType>(text, std::use_facet<info>(loc).encoding(), how);
- }
- /// convert \a text from UTF to text encoded with \a charset according to policy \a how
- ///
- /// \throws invalid_charset_error: Character set is not supported
- /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
- /// encoded or decoded)
- template<typename CharType>
- std::string
- from_utf(const std::basic_string<CharType>& text, const std::string& charset, method_type how = default_method)
- {
- return from_utf(text.c_str(), text.c_str() + text.size(), charset, how);
- }
- /// Convert \a text from UTF to \a charset according to policy \a how
- ///
- /// \throws invalid_charset_error: Character set is not supported
- /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
- /// encoded or decoded)
- template<typename CharType>
- std::string from_utf(const CharType* text, const std::string& charset, method_type how = default_method)
- {
- return from_utf(text, util::str_end(text), charset, how);
- }
- /// Convert UTF text in range [begin,end) to text in locale encoding given by \a loc according to policy \a how
- ///
- /// \throws std::bad_cast: \a loc does not have \ref info facet installed
- /// \throws invalid_charset_error: Character set is not supported
- /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
- /// encoded or decoded)
- template<typename CharType>
- std::string
- from_utf(const CharType* begin, const CharType* end, const std::locale& loc, method_type how = default_method)
- {
- return from_utf(begin, end, std::use_facet<info>(loc).encoding(), how);
- }
- /// Convert \a text from UTF to locale encoding given by \a loc according to policy \a how
- ///
- /// \throws std::bad_cast: \a loc does not have \ref info facet installed
- /// \throws invalid_charset_error: Character set is not supported
- /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
- /// encoded or decoded)
- template<typename CharType>
- std::string
- from_utf(const std::basic_string<CharType>& text, const std::locale& loc, method_type how = default_method)
- {
- return from_utf(text, std::use_facet<info>(loc).encoding(), how);
- }
- /// Convert \a text from UTF to locale encoding given by \a loc according to policy \a how
- ///
- /// \throws std::bad_cast: \a loc does not have \ref info facet installed
- /// \throws invalid_charset_error: Character set is not supported
- /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
- /// encoded or decoded)
- template<typename CharType>
- std::string from_utf(const CharType* text, const std::locale& loc, method_type how = default_method)
- {
- return from_utf(text, std::use_facet<info>(loc).encoding(), how);
- }
- /// Convert a text in range [begin,end) to \a to_encoding from \a from_encoding according to
- /// policy \a how
- ///
- /// \throws invalid_charset_error: Either character set is not supported
- /// \throws conversion_error: when the conversion fails (e.g. \a how is \c stop and any character cannot be
- /// encoded or decoded)
- BOOST_LOCALE_DECL
- std::string between(const char* begin,
- const char* end,
- const std::string& to_encoding,
- const std::string& from_encoding,
- method_type how = default_method);
- /// Convert \a text to \a to_encoding from \a from_encoding according to
- /// policy \a how
- ///
- /// \throws invalid_charset_error: Either character set is not supported
- /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
- /// encoded or decoded)
- inline std::string between(const char* text,
- const std::string& to_encoding,
- const std::string& from_encoding,
- method_type how = default_method)
- {
- return between(text, util::str_end(text), to_encoding, from_encoding, how);
- }
- /// Convert \a text to \a to_encoding from \a from_encoding according to
- /// policy \a how
- ///
- /// \throws invalid_charset_error: Either character set is not supported
- /// \throws conversion_error: Conversion failed (e.g. \a how is \c stop and any character cannot be
- /// encoded or decoded)
- inline std::string between(const std::string& text,
- const std::string& to_encoding,
- const std::string& from_encoding,
- method_type how = default_method)
- {
- return between(text.c_str(), text.c_str() + text.size(), to_encoding, from_encoding, how);
- }
- /// @}
- /// Converter class to decode a narrow string using a local encoding and encode it with UTF
- template<typename CharType>
- class utf_encoder {
- std::unique_ptr<detail::utf_encoder<CharType>> impl_;
- public:
- using char_type = CharType;
- using string_type = std::basic_string<CharType>;
- /// Create an instance to convert text encoded with \a charset to UTF according to policy \a how
- ///
- /// Note: When converting only a single text \ref to_utf is likely faster.
- /// \throws invalid_charset_error: Character set is not supported
- utf_encoder(const std::string& charset, method_type how = default_method) :
- impl_(detail::make_utf_encoder<CharType>(charset, how))
- {}
- /// Convert text in range [begin,end) to UTF
- ///
- /// \throws conversion_error: Conversion failed
- string_type convert(const char* begin, const char* end) const { return impl_->convert(begin, end); }
- /// Convert \a text to UTF
- ///
- /// \throws conversion_error: Conversion failed
- string_type convert(const boost::string_view& text) const { return impl_->convert(text); }
- /// Convert \a text to UTF
- ///
- /// \throws conversion_error: Conversion failed
- string_type operator()(const boost::string_view& text) const { return convert(text); }
- };
- /// Converter class to decode an UTF string and encode it using a local encoding
- template<typename CharType>
- class utf_decoder {
- std::unique_ptr<detail::utf_decoder<CharType>> impl_;
- public:
- using char_type = CharType;
- using stringview_type = boost::basic_string_view<CharType>;
- /// Create an instance to convert UTF text to text encoded with \a charset according to policy \a how
- ///
- /// Note: When converting only a single text \ref from_utf is likely faster.
- /// \throws invalid_charset_error: Character set is not supported
- utf_decoder(const std::string& charset, method_type how = default_method) :
- impl_(detail::make_utf_decoder<CharType>(charset, how))
- {}
- /// Convert UTF text in range [begin,end) to local encoding
- ///
- /// \throws conversion_error: Conversion failed
- std::string convert(const CharType* begin, const CharType* end) const { return impl_->convert(begin, end); }
- /// Convert \a text from UTF to local encoding
- ///
- /// \throws conversion_error: Conversion failed
- std::string convert(const stringview_type& text) const { return impl_->convert(text); }
- /// Convert \a text from UTF to local encoding
- ///
- /// \throws conversion_error: Conversion failed
- std::string operator()(const stringview_type& text) const { return convert(text); }
- };
- class narrow_converter {
- std::unique_ptr<detail::narrow_converter> impl_;
- public:
- /// Create converter to convert text from \a src_encoding to \a target_encoding according to policy \a how
- ///
- /// \throws invalid_charset_error: Either character set is not supported
- narrow_converter(const std::string& src_encoding,
- const std::string& target_encoding,
- method_type how = default_method) :
- impl_(detail::make_narrow_converter(src_encoding, target_encoding, how))
- {}
- /// Convert text in range [begin,end)
- ///
- /// \throws conversion_error: Conversion failed
- std::string convert(const char* begin, const char* end) const { return impl_->convert(begin, end); }
- /// Convert \a text
- ///
- /// \throws conversion_error: Conversion failed
- std::string convert(const boost::string_view& text) const { return impl_->convert(text); }
- /// Convert \a text
- ///
- /// \throws conversion_error: Conversion failed
- std::string operator()(const boost::string_view& text) const { return convert(text); }
- };
- } // namespace conv
- }} // namespace boost::locale
- #ifdef BOOST_MSVC
- # pragma warning(pop)
- #endif
- #endif
|