segment.hpp 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. //
  2. // Copyright (c) 2009-2011 Artyom Beilis (Tonkikh)
  3. //
  4. // Distributed under the Boost Software License, Version 1.0.
  5. // https://www.boost.org/LICENSE_1_0.txt
  6. #ifndef BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED
  7. #define BOOST_LOCALE_BOUNDARY_SEGMENT_HPP_INCLUDED
  8. #include <boost/locale/util/string.hpp>
  9. #include <iosfwd>
  10. #include <iterator>
  11. #include <locale>
  12. #include <string>
  13. #ifdef BOOST_MSVC
  14. # pragma warning(push)
  15. # pragma warning(disable : 4275 4251 4231 4660)
  16. #endif
  17. namespace boost { namespace locale { namespace boundary {
  18. /// \cond INTERNAL
  19. namespace detail {
  20. template<typename LeftIterator, typename RightIterator>
  21. int compare_text(LeftIterator l_begin, LeftIterator l_end, RightIterator r_begin, RightIterator r_end)
  22. {
  23. typedef LeftIterator left_iterator;
  24. typedef typename std::iterator_traits<left_iterator>::value_type char_type;
  25. typedef std::char_traits<char_type> traits;
  26. while(l_begin != l_end && r_begin != r_end) {
  27. char_type lchar = *l_begin++;
  28. char_type rchar = *r_begin++;
  29. if(traits::eq(lchar, rchar))
  30. continue;
  31. if(traits::lt(lchar, rchar))
  32. return -1;
  33. else
  34. return 1;
  35. }
  36. if(l_begin == l_end && r_begin == r_end)
  37. return 0;
  38. if(l_begin == l_end)
  39. return -1;
  40. else
  41. return 1;
  42. }
  43. template<typename Left, typename Right>
  44. int compare_text(const Left& l, const Right& r)
  45. {
  46. return compare_text(l.begin(), l.end(), r.begin(), r.end());
  47. }
  48. template<typename Left, typename Char>
  49. int compare_string(const Left& l, const Char* begin)
  50. {
  51. return compare_text(l.begin(), l.end(), begin, util::str_end(begin));
  52. }
  53. template<typename Right, typename Char>
  54. int compare_string(const Char* begin, const Right& r)
  55. {
  56. return compare_text(begin, util::str_end(begin), r.begin(), r.end());
  57. }
  58. } // namespace detail
  59. /// \endcond
  60. /// \addtogroup boundary
  61. /// @{
  62. /// \brief a segment object that represents a pair of two iterators that define the range where
  63. /// this segment exits and a rule that defines it.
  64. ///
  65. /// This type of object is dereferenced by the iterators of segment_index. Using a rule() member function
  66. /// you can get a specific rule this segment was selected with. For example, when you use
  67. /// word boundary analysis, you can check if the specific word contains Kana letters by checking (rule() & \ref
  68. /// word_kana)!=0 For a sentence analysis you can check if the sentence is selected because a sentence terminator is
  69. /// found (\ref sentence_term) or there is a line break (\ref sentence_sep).
  70. ///
  71. /// This object can be automatically converted to std::basic_string with the same type of character. It is also
  72. /// valid range that has begin() and end() member functions returning iterators on the location of the segment.
  73. ///
  74. /// \see
  75. ///
  76. /// - \ref segment_index
  77. /// - \ref boundary_point
  78. /// - \ref boundary_point_index
  79. template<typename IteratorType>
  80. class segment : public std::pair<IteratorType, IteratorType> {
  81. public:
  82. /// The type of the underlying character
  83. typedef typename std::iterator_traits<IteratorType>::value_type char_type;
  84. /// The type of the string it is converted to
  85. typedef std::basic_string<char_type> string_type;
  86. /// The value that iterators return - the character itself
  87. typedef char_type value_type;
  88. /// The iterator that allows to iterate the range
  89. typedef IteratorType iterator;
  90. /// The iterator that allows to iterate the range
  91. typedef IteratorType const_iterator;
  92. /// The type that represent a difference between two iterators
  93. typedef typename std::iterator_traits<IteratorType>::difference_type difference_type;
  94. /// Default constructor
  95. segment() : rule_(0) {}
  96. /// Create a segment using two iterators and a rule that represents this point
  97. segment(iterator b, iterator e, rule_type r) : std::pair<IteratorType, IteratorType>(b, e), rule_(r) {}
  98. /// Set the start of the range
  99. void begin(const iterator& v) { this->first = v; }
  100. /// Set the end of the range
  101. void end(const iterator& v) { this->second = v; }
  102. /// Get the start of the range
  103. IteratorType begin() const { return this->first; }
  104. /// Set the end of the range
  105. IteratorType end() const { return this->second; }
  106. /// Convert the range to a string automatically
  107. template<class T, class A>
  108. operator std::basic_string<char_type, T, A>() const
  109. {
  110. return std::basic_string<char_type, T, A>(this->first, this->second);
  111. }
  112. /// Create a string from the range explicitly
  113. string_type str() const { return string_type(begin(), end()); }
  114. /// Get the length of the text chunk
  115. size_t length() const { return std::distance(begin(), end()); }
  116. /// Check if the segment is empty
  117. bool empty() const { return begin() == end(); }
  118. /// Get the rule that is used for selection of this segment.
  119. rule_type rule() const { return rule_; }
  120. /// Set a rule that is used for segment selection
  121. void rule(rule_type r) { rule_ = r; }
  122. // make sure we override std::pair's operator==
  123. /// Compare two segments
  124. bool operator==(const segment& other) const { return detail::compare_text(*this, other) == 0; }
  125. /// Compare two segments
  126. bool operator!=(const segment& other) const { return detail::compare_text(*this, other) != 0; }
  127. private:
  128. rule_type rule_;
  129. };
  130. /// Compare two segments
  131. template<typename IteratorL, typename IteratorR>
  132. bool operator==(const segment<IteratorL>& l, const segment<IteratorR>& r)
  133. {
  134. return detail::compare_text(l, r) == 0;
  135. }
  136. /// Compare two segments
  137. template<typename IteratorL, typename IteratorR>
  138. bool operator!=(const segment<IteratorL>& l, const segment<IteratorR>& r)
  139. {
  140. return detail::compare_text(l, r) != 0;
  141. }
  142. /// Compare two segments
  143. template<typename IteratorL, typename IteratorR>
  144. bool operator<(const segment<IteratorL>& l, const segment<IteratorR>& r)
  145. {
  146. return detail::compare_text(l, r) < 0;
  147. }
  148. /// Compare two segments
  149. template<typename IteratorL, typename IteratorR>
  150. bool operator<=(const segment<IteratorL>& l, const segment<IteratorR>& r)
  151. {
  152. return detail::compare_text(l, r) <= 0;
  153. }
  154. /// Compare two segments
  155. template<typename IteratorL, typename IteratorR>
  156. bool operator>(const segment<IteratorL>& l, const segment<IteratorR>& r)
  157. {
  158. return detail::compare_text(l, r) > 0;
  159. }
  160. /// Compare two segments
  161. template<typename IteratorL, typename IteratorR>
  162. bool operator>=(const segment<IteratorL>& l, const segment<IteratorR>& r)
  163. {
  164. return detail::compare_text(l, r) >= 0;
  165. }
  166. /// Compare string and segment
  167. template<typename CharType, typename Traits, typename Alloc, typename IteratorR>
  168. bool operator==(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r)
  169. {
  170. return detail::compare_text(l, r) == 0;
  171. }
  172. /// Compare string and segment
  173. template<typename CharType, typename Traits, typename Alloc, typename IteratorR>
  174. bool operator!=(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r)
  175. {
  176. return detail::compare_text(l, r) != 0;
  177. }
  178. /// Compare string and segment
  179. template<typename CharType, typename Traits, typename Alloc, typename IteratorR>
  180. bool operator<(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r)
  181. {
  182. return detail::compare_text(l, r) < 0;
  183. }
  184. /// Compare string and segment
  185. template<typename CharType, typename Traits, typename Alloc, typename IteratorR>
  186. bool operator<=(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r)
  187. {
  188. return detail::compare_text(l, r) <= 0;
  189. }
  190. /// Compare string and segment
  191. template<typename CharType, typename Traits, typename Alloc, typename IteratorR>
  192. bool operator>(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r)
  193. {
  194. return detail::compare_text(l, r) > 0;
  195. }
  196. /// Compare string and segment
  197. template<typename CharType, typename Traits, typename Alloc, typename IteratorR>
  198. bool operator>=(const std::basic_string<CharType, Traits, Alloc>& l, const segment<IteratorR>& r)
  199. {
  200. return detail::compare_text(l, r) >= 0;
  201. }
  202. /// Compare string and segment
  203. template<typename Iterator, typename CharType, typename Traits, typename Alloc>
  204. bool operator==(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r)
  205. {
  206. return detail::compare_text(l, r) == 0;
  207. }
  208. /// Compare string and segment
  209. template<typename Iterator, typename CharType, typename Traits, typename Alloc>
  210. bool operator!=(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r)
  211. {
  212. return detail::compare_text(l, r) != 0;
  213. }
  214. /// Compare string and segment
  215. template<typename Iterator, typename CharType, typename Traits, typename Alloc>
  216. bool operator<(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r)
  217. {
  218. return detail::compare_text(l, r) < 0;
  219. }
  220. /// Compare string and segment
  221. template<typename Iterator, typename CharType, typename Traits, typename Alloc>
  222. bool operator<=(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r)
  223. {
  224. return detail::compare_text(l, r) <= 0;
  225. }
  226. /// Compare string and segment
  227. template<typename Iterator, typename CharType, typename Traits, typename Alloc>
  228. bool operator>(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r)
  229. {
  230. return detail::compare_text(l, r) > 0;
  231. }
  232. /// Compare string and segment
  233. template<typename Iterator, typename CharType, typename Traits, typename Alloc>
  234. bool operator>=(const segment<Iterator>& l, const std::basic_string<CharType, Traits, Alloc>& r)
  235. {
  236. return detail::compare_text(l, r) >= 0;
  237. }
  238. /// Compare C string and segment
  239. template<typename CharType, typename IteratorR>
  240. bool operator==(const CharType* l, const segment<IteratorR>& r)
  241. {
  242. return detail::compare_string(l, r) == 0;
  243. }
  244. /// Compare C string and segment
  245. template<typename CharType, typename IteratorR>
  246. bool operator!=(const CharType* l, const segment<IteratorR>& r)
  247. {
  248. return detail::compare_string(l, r) != 0;
  249. }
  250. /// Compare C string and segment
  251. template<typename CharType, typename IteratorR>
  252. bool operator<(const CharType* l, const segment<IteratorR>& r)
  253. {
  254. return detail::compare_string(l, r) < 0;
  255. }
  256. /// Compare C string and segment
  257. template<typename CharType, typename IteratorR>
  258. bool operator<=(const CharType* l, const segment<IteratorR>& r)
  259. {
  260. return detail::compare_string(l, r) <= 0;
  261. }
  262. /// Compare C string and segment
  263. template<typename CharType, typename IteratorR>
  264. bool operator>(const CharType* l, const segment<IteratorR>& r)
  265. {
  266. return detail::compare_string(l, r) > 0;
  267. }
  268. /// Compare C string and segment
  269. template<typename CharType, typename IteratorR>
  270. bool operator>=(const CharType* l, const segment<IteratorR>& r)
  271. {
  272. return detail::compare_string(l, r) >= 0;
  273. }
  274. /// Compare C string and segment
  275. template<typename Iterator, typename CharType>
  276. bool operator==(const segment<Iterator>& l, const CharType* r)
  277. {
  278. return detail::compare_string(l, r) == 0;
  279. }
  280. /// Compare C string and segment
  281. template<typename Iterator, typename CharType>
  282. bool operator!=(const segment<Iterator>& l, const CharType* r)
  283. {
  284. return detail::compare_string(l, r) != 0;
  285. }
  286. /// Compare C string and segment
  287. template<typename Iterator, typename CharType>
  288. bool operator<(const segment<Iterator>& l, const CharType* r)
  289. {
  290. return detail::compare_string(l, r) < 0;
  291. }
  292. /// Compare C string and segment
  293. template<typename Iterator, typename CharType>
  294. bool operator<=(const segment<Iterator>& l, const CharType* r)
  295. {
  296. return detail::compare_string(l, r) <= 0;
  297. }
  298. /// Compare C string and segment
  299. template<typename Iterator, typename CharType>
  300. bool operator>(const segment<Iterator>& l, const CharType* r)
  301. {
  302. return detail::compare_string(l, r) > 0;
  303. }
  304. /// Compare C string and segment
  305. template<typename Iterator, typename CharType>
  306. bool operator>=(const segment<Iterator>& l, const CharType* r)
  307. {
  308. return detail::compare_string(l, r) >= 0;
  309. }
  310. typedef segment<std::string::const_iterator> ssegment; ///< convenience typedef
  311. typedef segment<std::wstring::const_iterator> wssegment; ///< convenience typedef
  312. #ifndef BOOST_LOCALE_NO_CXX20_STRING8
  313. typedef segment<std::u8string::const_iterator> u8ssegment; ///< convenience typedef
  314. #endif
  315. #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
  316. typedef segment<std::u16string::const_iterator> u16ssegment; ///< convenience typedef
  317. #endif
  318. #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
  319. typedef segment<std::u32string::const_iterator> u32ssegment; ///< convenience typedef
  320. #endif
  321. typedef segment<const char*> csegment; ///< convenience typedef
  322. typedef segment<const wchar_t*> wcsegment; ///< convenience typedef
  323. #ifdef __cpp_char8_t
  324. typedef segment<const char8_t*> u8csegment; ///< convenience typedef
  325. #endif
  326. #ifdef BOOST_LOCALE_ENABLE_CHAR16_T
  327. typedef segment<const char16_t*> u16csegment; ///< convenience typedef
  328. #endif
  329. #ifdef BOOST_LOCALE_ENABLE_CHAR32_T
  330. typedef segment<const char32_t*> u32csegment; ///< convenience typedef
  331. #endif
  332. /// Write the segment to the stream character by character
  333. template<typename CharType, typename TraitsType, typename Iterator>
  334. std::basic_ostream<CharType, TraitsType>& operator<<(std::basic_ostream<CharType, TraitsType>& out,
  335. const segment<Iterator>& seg)
  336. {
  337. for(const auto& p : seg)
  338. out << p;
  339. return out;
  340. }
  341. /// @}
  342. }}} // namespace boost::locale::boundary
  343. #ifdef BOOST_MSVC
  344. # pragma warning(pop)
  345. #endif
  346. #endif