utf8_codecvt.hpp 1.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556
  1. //
  2. // Copyright (c) 2015 Artyom Beilis (Tonkikh)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // https://www.boost.org/LICENSE_1_0.txt
  6. #ifndef BOOST_LOCALE_UTF8_CODECVT_HPP
  7. #define BOOST_LOCALE_UTF8_CODECVT_HPP
  8. #include <boost/locale/generic_codecvt.hpp>
  9. #include <boost/locale/utf.hpp>
  10. #include <boost/assert.hpp>
  11. #include <cstdint>
  12. #include <locale>
  13. namespace boost { namespace locale {
  14. /// \brief Generic utf8 codecvt facet, it allows to convert UTF-8 strings to UTF-16 and UTF-32 using wchar_t,
  15. /// char32_t and char16_t
  16. template<typename CharType>
  17. class utf8_codecvt : public generic_codecvt<CharType, utf8_codecvt<CharType>> {
  18. public:
  19. struct state_type {};
  20. utf8_codecvt(size_t refs = 0) : generic_codecvt<CharType, utf8_codecvt<CharType>>(refs) {}
  21. static int max_encoding_length() { return 4; }
  22. static state_type initial_state(generic_codecvt_base::initial_convertion_state /* unused */)
  23. {
  24. return state_type();
  25. }
  26. static utf::code_point to_unicode(state_type&, const char*& begin, const char* end)
  27. {
  28. const char* p = begin;
  29. utf::code_point c = utf::utf_traits<char>::decode(p, end);
  30. if(c != utf::illegal && c != utf::incomplete)
  31. begin = p;
  32. return c;
  33. }
  34. static utf::len_or_error from_unicode(state_type&, utf::code_point u, char* begin, const char* end)
  35. {
  36. BOOST_ASSERT(utf::is_valid_codepoint(u));
  37. const auto width = utf::utf_traits<char>::width(u);
  38. if(width > end - begin)
  39. return utf::incomplete;
  40. utf::utf_traits<char>::encode(u, begin);
  41. return width;
  42. }
  43. };
  44. }} // namespace boost::locale
  45. #endif