string_parse_tree.hpp 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. #ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
  2. #define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
  3. /* Copyright (c) 2004-2005 CrystalClear Software, Inc.
  4. * Use, modification and distribution is subject to the
  5. * Boost Software License, Version 1.0. (See accompanying
  6. * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
  7. * Author: Jeff Garland, Bart Garst
  8. * $Date$
  9. */
  10. #include <boost/algorithm/string/case_conv.hpp>
  11. #include <cctype>
  12. #include <map>
  13. #include <string>
  14. #include <vector>
  15. #include <ostream>
  16. #include <iterator>
  17. #include <algorithm>
  18. namespace boost { namespace date_time {
  19. template<typename charT>
  20. struct parse_match_result
  21. {
  22. parse_match_result() :
  23. match_depth(0),
  24. current_match(PARSE_ERROR)
  25. {}
  26. typedef std::basic_string<charT> string_type;
  27. string_type remaining() const
  28. {
  29. if (match_depth == cache.size()) {
  30. return string_type();
  31. }
  32. if (current_match == PARSE_ERROR) {
  33. return cache;
  34. }
  35. //some of the cache was used return the rest
  36. return string_type(cache, match_depth);
  37. }
  38. charT last_char() const
  39. {
  40. return cache[cache.size()-1];
  41. }
  42. //! Returns true if more characters were parsed than was necessary
  43. /*! Should be used in conjunction with last_char()
  44. * to get the remaining character.
  45. */
  46. bool has_remaining() const
  47. {
  48. return (cache.size() > match_depth);
  49. }
  50. // cache will hold characters that have been read from the stream
  51. string_type cache;
  52. unsigned short match_depth;
  53. short current_match;
  54. enum PARSE_STATE { PARSE_ERROR = -1 };
  55. };
  56. //for debug -- really only char streams...
  57. template<typename charT>
  58. std::basic_ostream<charT>&
  59. operator<<(std::basic_ostream<charT>& os, parse_match_result<charT>& mr)
  60. {
  61. os << "cm: " << mr.current_match
  62. << " C: '" << mr.cache
  63. << "' md: " << mr.match_depth
  64. << " R: " << mr.remaining();
  65. return os;
  66. }
  67. //! Recursive data structure to allow efficient parsing of various strings
  68. /*! This class provides a quick lookup by building what amounts to a
  69. * tree data structure. It also features a match function which can
  70. * can handle nasty input interators by caching values as it recurses
  71. * the tree so that it can backtrack as needed.
  72. */
  73. template<typename charT>
  74. struct string_parse_tree
  75. {
  76. #if BOOST_WORKAROUND( BOOST_BORLANDC, BOOST_TESTED_AT(0x581) )
  77. typedef std::multimap<charT, string_parse_tree< charT> > ptree_coll;
  78. #else
  79. typedef std::multimap<charT, string_parse_tree > ptree_coll;
  80. #endif
  81. typedef typename ptree_coll::value_type value_type;
  82. typedef typename ptree_coll::iterator iterator;
  83. typedef typename ptree_coll::const_iterator const_iterator;
  84. typedef std::basic_string<charT> string_type;
  85. typedef std::vector<std::basic_string<charT> > collection_type;
  86. typedef parse_match_result<charT> parse_match_result_type;
  87. /*! Parameter "starting_point" designates where the numbering begins.
  88. * A starting_point of zero will start the numbering at zero
  89. * (Sun=0, Mon=1, ...) were a starting_point of one starts the
  90. * numbering at one (Jan=1, Feb=2, ...). The default is zero,
  91. * negative vaules are not allowed */
  92. string_parse_tree(collection_type names, unsigned int starting_point=0) :
  93. m_value(parse_match_result_type::PARSE_ERROR)
  94. {
  95. // iterate thru all the elements and build the tree
  96. unsigned short index = 0;
  97. while (index != names.size() ) {
  98. string_type s = boost::algorithm::to_lower_copy(names[index]);
  99. insert(s, static_cast<unsigned short>(index + starting_point));
  100. index++;
  101. }
  102. //set the last tree node = index+1 indicating a value
  103. index++;
  104. }
  105. string_parse_tree(short value = parse_match_result_type::PARSE_ERROR) :
  106. m_value(value)
  107. {}
  108. ptree_coll m_next_chars;
  109. short m_value;
  110. void insert(const string_type& s, unsigned short value)
  111. {
  112. unsigned int i = 0;
  113. iterator ti;
  114. while(i < s.size()) {
  115. if (i==0) {
  116. if (i == (s.size()-1)) {
  117. ti = m_next_chars.insert(value_type(s[i],
  118. string_parse_tree<charT>(value)));
  119. }
  120. else {
  121. ti = m_next_chars.insert(value_type(s[i],
  122. string_parse_tree<charT>()));
  123. }
  124. }
  125. else {
  126. if (i == (s.size()-1)) {
  127. ti = ti->second.m_next_chars.insert(value_type(s[i],
  128. string_parse_tree<charT>(value)));
  129. }
  130. else {
  131. ti = ti->second.m_next_chars.insert(value_type(s[i],
  132. string_parse_tree<charT>()));
  133. }
  134. }
  135. i++;
  136. }
  137. }
  138. //! Recursive function that finds a matching string in the tree.
  139. /*! Must check match_results::has_remaining() after match() is
  140. * called. This is required so the user can determine if
  141. * stream iterator is already pointing to the expected
  142. * character or not (match() might advance sitr to next char in stream).
  143. *
  144. * A parse_match_result that has been returned from a failed match
  145. * attempt can be sent in to the match function of a different
  146. * string_parse_tree to attempt a match there. Use the iterators
  147. * for the partially consumed stream, the parse_match_result object,
  148. * and '0' for the level parameter. */
  149. short
  150. match(std::istreambuf_iterator<charT>& sitr,
  151. std::istreambuf_iterator<charT>& stream_end,
  152. parse_match_result_type& result,
  153. unsigned int& level) const
  154. {
  155. level++;
  156. charT c;
  157. // if we conditionally advance sitr, we won't have
  158. // to consume the next character past the input
  159. bool adv_itr = true;
  160. if (level > result.cache.size()) {
  161. if (sitr == stream_end) return 0; //bail - input exhausted
  162. c = static_cast<charT>(std::tolower(*sitr));
  163. //result.cache += c;
  164. //sitr++;
  165. }
  166. else {
  167. // if we're looking for characters from the cache,
  168. // we don't want to increment sitr
  169. adv_itr = false;
  170. c = static_cast<charT>(std::tolower(result.cache[level-1]));
  171. }
  172. const_iterator litr = m_next_chars.lower_bound(c);
  173. const_iterator uitr = m_next_chars.upper_bound(c);
  174. while (litr != uitr) { // equal if not found
  175. if(adv_itr) {
  176. sitr++;
  177. result.cache += c;
  178. }
  179. if (litr->second.m_value != -1) { // -1 is default value
  180. if (result.match_depth < level) {
  181. result.current_match = litr->second.m_value;
  182. result.match_depth = static_cast<unsigned short>(level);
  183. }
  184. litr->second.match(sitr, stream_end,
  185. result, level);
  186. level--;
  187. }
  188. else {
  189. litr->second.match(sitr, stream_end,
  190. result, level);
  191. level--;
  192. }
  193. if(level <= result.cache.size()) {
  194. adv_itr = false;
  195. }
  196. litr++;
  197. }
  198. return result.current_match;
  199. }
  200. /*! Must check match_results::has_remaining() after match() is
  201. * called. This is required so the user can determine if
  202. * stream iterator is already pointing to the expected
  203. * character or not (match() might advance sitr to next char in stream).
  204. */
  205. parse_match_result_type
  206. match(std::istreambuf_iterator<charT>& sitr,
  207. std::istreambuf_iterator<charT>& stream_end) const
  208. {
  209. // lookup to_lower of char in tree.
  210. unsigned int level = 0;
  211. // string_type cache;
  212. parse_match_result_type result;
  213. match(sitr, stream_end, result, level);
  214. return result;
  215. }
  216. void printme(std::ostream& os, int& level)
  217. {
  218. level++;
  219. iterator itr = m_next_chars.begin();
  220. iterator end = m_next_chars.end();
  221. // os << "starting level: " << level << std::endl;
  222. while (itr != end) {
  223. os << "level: " << level
  224. << " node: " << itr->first
  225. << " value: " << itr->second.m_value
  226. << std::endl;
  227. itr->second.printme(os, level);
  228. itr++;
  229. }
  230. level--;
  231. }
  232. void print(std::ostream& os)
  233. {
  234. int level = 0;
  235. printme(os, level);
  236. }
  237. void printmatch(std::ostream& os, charT c)
  238. {
  239. iterator litr = m_next_chars.lower_bound(c);
  240. iterator uitr = m_next_chars.upper_bound(c);
  241. os << "matches for: " << c << std::endl;
  242. while (litr != uitr) {
  243. os << " node: " << litr->first
  244. << " value: " << litr->second.m_value
  245. << std::endl;
  246. litr++;
  247. }
  248. }
  249. };
  250. } } //namespace
  251. #endif