convert.hpp 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108
  1. //
  2. // Copyright (c) 2012 Artyom Beilis (Tonkikh)
  3. // Copyright (c) 2020 Alexander Grund
  4. //
  5. // Distributed under the Boost Software License, Version 1.0.
  6. // https://www.boost.org/LICENSE_1_0.txt
  7. #ifndef BOOST_NOWIDE_UTF_CONVERT_HPP_INCLUDED
  8. #define BOOST_NOWIDE_UTF_CONVERT_HPP_INCLUDED
  9. #include <boost/nowide/detail/is_string_container.hpp>
  10. #include <boost/nowide/replacement.hpp>
  11. #include <boost/nowide/utf/utf.hpp>
  12. #include <iterator>
  13. #include <string>
  14. namespace boost {
  15. namespace nowide {
  16. namespace utf {
  17. /// Return the length of the given string in code units.
  18. /// That is the number of elements of type Char until the first NULL character.
  19. /// Equivalent to `std::strlen(s)` but can handle wide-strings
  20. template<typename Char>
  21. size_t strlen(const Char* s)
  22. {
  23. const Char* end = s;
  24. while(*end)
  25. end++;
  26. return end - s;
  27. }
  28. /// Convert a buffer of UTF sequences in the range [source_begin, source_end)
  29. /// from \a CharIn to \a CharOut to the output \a buffer of size \a buffer_size.
  30. ///
  31. /// \return original buffer containing the NULL terminated string or NULL
  32. ///
  33. /// If there is not enough room in the buffer NULL is returned, and the content of the buffer is undefined.
  34. /// Any illegal sequences are replaced with the replacement character, see #BOOST_NOWIDE_REPLACEMENT_CHARACTER
  35. template<typename CharOut, typename CharIn>
  36. CharOut*
  37. convert_buffer(CharOut* buffer, size_t buffer_size, const CharIn* source_begin, const CharIn* source_end)
  38. {
  39. CharOut* rv = buffer;
  40. if(buffer_size == 0)
  41. return nullptr;
  42. buffer_size--;
  43. while(source_begin != source_end)
  44. {
  45. code_point c = utf_traits<CharIn>::decode(source_begin, source_end);
  46. if(c == illegal || c == incomplete)
  47. {
  48. c = BOOST_NOWIDE_REPLACEMENT_CHARACTER;
  49. }
  50. size_t width = utf_traits<CharOut>::width(c);
  51. if(buffer_size < width)
  52. {
  53. rv = nullptr;
  54. break;
  55. }
  56. buffer = utf_traits<CharOut>::encode(c, buffer);
  57. buffer_size -= width;
  58. }
  59. *buffer++ = 0;
  60. return rv;
  61. }
  62. /// Convert the UTF sequences in range [begin, end) from \a CharIn to \a CharOut
  63. /// and return it as a string
  64. ///
  65. /// Any illegal sequences are replaced with the replacement character, see #BOOST_NOWIDE_REPLACEMENT_CHARACTER
  66. /// \tparam CharOut Output character type
  67. template<typename CharOut, typename CharIn>
  68. std::basic_string<CharOut> convert_string(const CharIn* begin, const CharIn* end)
  69. {
  70. std::basic_string<CharOut> result;
  71. result.reserve(end - begin);
  72. using inserter_type = std::back_insert_iterator<std::basic_string<CharOut>>;
  73. inserter_type inserter(result);
  74. code_point c;
  75. while(begin != end)
  76. {
  77. c = utf_traits<CharIn>::decode(begin, end);
  78. if(c == illegal || c == incomplete)
  79. {
  80. c = BOOST_NOWIDE_REPLACEMENT_CHARACTER;
  81. }
  82. utf_traits<CharOut>::encode(c, inserter);
  83. }
  84. return result;
  85. }
  86. /// Convert the UTF sequence in the input string from \a CharIn to \a CharOut
  87. /// and return it as a string
  88. ///
  89. /// Any illegal sequences are replaced with the replacement character, see #BOOST_NOWIDE_REPLACEMENT_CHARACTER
  90. /// \tparam CharOut Output character type
  91. template<typename CharOut, typename CharIn>
  92. std::basic_string<CharOut> convert_string(const std::basic_string<CharIn>& s)
  93. {
  94. return convert_string<CharOut>(s.data(), s.data() + s.size());
  95. }
  96. } // namespace utf
  97. } // namespace nowide
  98. } // namespace boost
  99. #endif