perl_matcher_common.hpp 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921
  1. /*
  2. *
  3. * Copyright (c) 2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE perl_matcher_common.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Definitions of perl_matcher member functions that are
  16. * common to both the recursive and non-recursive versions.
  17. */
  18. #ifndef BOOST_REGEX_V5_PERL_MATCHER_COMMON_HPP
  19. #define BOOST_REGEX_V5_PERL_MATCHER_COMMON_HPP
  20. #ifdef BOOST_REGEX_MSVC
  21. # pragma warning(push)
  22. #pragma warning(disable:4459)
  23. #if BOOST_REGEX_MSVC < 1910
  24. #pragma warning(disable:4800)
  25. #endif
  26. #endif
  27. namespace boost{
  28. namespace BOOST_REGEX_DETAIL_NS{
  29. #ifdef BOOST_REGEX_MSVC
  30. # pragma warning(push)
  31. #pragma warning(disable:26812)
  32. #endif
  33. template <class BidiIterator, class Allocator, class traits>
  34. void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_regex<char_type, traits>& e, match_flag_type f)
  35. {
  36. typedef typename std::iterator_traits<BidiIterator>::iterator_category category;
  37. typedef typename basic_regex<char_type, traits>::flag_type expression_flag_type;
  38. if(e.empty())
  39. {
  40. // precondition failure: e is not a valid regex.
  41. std::invalid_argument ex("Invalid regular expression object");
  42. #ifndef BOOST_REGEX_STANDALONE
  43. boost::throw_exception(ex);
  44. #else
  45. throw e;
  46. #endif
  47. }
  48. pstate = 0;
  49. m_match_flags = f;
  50. estimate_max_state_count(static_cast<category*>(0));
  51. expression_flag_type re_f = re.flags();
  52. icase = re_f & regex_constants::icase;
  53. if(!(m_match_flags & (match_perl|match_posix)))
  54. {
  55. if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0)
  56. m_match_flags |= match_perl;
  57. else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
  58. m_match_flags |= match_perl;
  59. else if((re_f & (regbase::main_option_type|regbase::literal)) == (regbase::literal))
  60. m_match_flags |= match_perl;
  61. else
  62. m_match_flags |= match_posix;
  63. }
  64. if(m_match_flags & match_posix)
  65. {
  66. m_temp_match.reset(new match_results<BidiIterator, Allocator>());
  67. m_presult = m_temp_match.get();
  68. }
  69. else
  70. m_presult = &m_result;
  71. m_stack_base = 0;
  72. m_backup_state = 0;
  73. // find the value to use for matching word boundaries:
  74. m_word_mask = re.get_data().m_word_mask;
  75. // find bitmask to use for matching '.':
  76. match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? BOOST_REGEX_DETAIL_NS::test_not_newline : BOOST_REGEX_DETAIL_NS::test_newline);
  77. // Disable match_any if requested in the state machine:
  78. if(e.get_data().m_disable_match_any)
  79. m_match_flags &= regex_constants::match_not_any;
  80. }
  81. #ifdef BOOST_REGEX_MSVC
  82. # pragma warning(pop)
  83. #endif
  84. template <class BidiIterator, class Allocator, class traits>
  85. void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*)
  86. {
  87. //
  88. // How many states should we allow our machine to visit before giving up?
  89. // This is a heuristic: it takes the greater of O(N^2) and O(NS^2)
  90. // where N is the length of the string, and S is the number of states
  91. // in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2)
  92. // but these take unreasonably amounts of time to bale out in pathological
  93. // cases.
  94. //
  95. // Calculate NS^2 first:
  96. //
  97. static const std::ptrdiff_t k = 100000;
  98. std::ptrdiff_t dist = std::distance(base, last);
  99. if(dist == 0)
  100. dist = 1;
  101. std::ptrdiff_t states = re.size();
  102. if(states == 0)
  103. states = 1;
  104. if ((std::numeric_limits<std::ptrdiff_t>::max)() / states < states)
  105. {
  106. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  107. return;
  108. }
  109. states *= states;
  110. if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
  111. {
  112. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  113. return;
  114. }
  115. states *= dist;
  116. if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
  117. {
  118. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  119. return;
  120. }
  121. states += k;
  122. max_state_count = states;
  123. //
  124. // Now calculate N^2:
  125. //
  126. states = dist;
  127. if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
  128. {
  129. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  130. return;
  131. }
  132. states *= dist;
  133. if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
  134. {
  135. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  136. return;
  137. }
  138. states += k;
  139. //
  140. // N^2 can be a very large number indeed, to prevent things getting out
  141. // of control, cap the max states:
  142. //
  143. if(states > BOOST_REGEX_MAX_STATE_COUNT)
  144. states = BOOST_REGEX_MAX_STATE_COUNT;
  145. //
  146. // If (the possibly capped) N^2 is larger than our first estimate,
  147. // use this instead:
  148. //
  149. if(states > max_state_count)
  150. max_state_count = states;
  151. }
  152. template <class BidiIterator, class Allocator, class traits>
  153. inline void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
  154. {
  155. // we don't know how long the sequence is:
  156. max_state_count = BOOST_REGEX_MAX_STATE_COUNT;
  157. }
  158. template <class BidiIterator, class Allocator, class traits>
  159. inline bool perl_matcher<BidiIterator, Allocator, traits>::match()
  160. {
  161. return match_imp();
  162. }
  163. template <class BidiIterator, class Allocator, class traits>
  164. bool perl_matcher<BidiIterator, Allocator, traits>::match_imp()
  165. {
  166. // initialise our stack if we are non-recursive:
  167. save_state_init init(&m_stack_base, &m_backup_state);
  168. used_block_count = BOOST_REGEX_MAX_BLOCKS;
  169. #if !defined(BOOST_NO_EXCEPTIONS)
  170. try{
  171. #endif
  172. // reset our state machine:
  173. position = base;
  174. search_base = base;
  175. state_count = 0;
  176. m_match_flags |= regex_constants::match_all;
  177. m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last);
  178. m_presult->set_base(base);
  179. m_presult->set_named_subs(this->re.get_named_subs());
  180. if(m_match_flags & match_posix)
  181. m_result = *m_presult;
  182. verify_options(re.flags(), m_match_flags);
  183. if(0 == match_prefix())
  184. return false;
  185. return (m_result[0].second == last) && (m_result[0].first == base);
  186. #if !defined(BOOST_NO_EXCEPTIONS)
  187. }
  188. catch(...)
  189. {
  190. // unwind all pushed states, apart from anything else this
  191. // ensures that all the states are correctly destructed
  192. // not just the memory freed.
  193. while(unwind(true)){}
  194. throw;
  195. }
  196. #endif
  197. }
  198. template <class BidiIterator, class Allocator, class traits>
  199. inline bool perl_matcher<BidiIterator, Allocator, traits>::find()
  200. {
  201. return find_imp();
  202. }
  203. template <class BidiIterator, class Allocator, class traits>
  204. bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
  205. {
  206. static matcher_proc_type const s_find_vtable[7] =
  207. {
  208. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_any,
  209. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_word,
  210. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_line,
  211. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf,
  212. &perl_matcher<BidiIterator, Allocator, traits>::match_prefix,
  213. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
  214. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
  215. };
  216. // initialise our stack if we are non-recursive:
  217. save_state_init init(&m_stack_base, &m_backup_state);
  218. used_block_count = BOOST_REGEX_MAX_BLOCKS;
  219. #if !defined(BOOST_NO_EXCEPTIONS)
  220. try{
  221. #endif
  222. state_count = 0;
  223. if((m_match_flags & regex_constants::match_init) == 0)
  224. {
  225. // reset our state machine:
  226. search_base = position = base;
  227. pstate = re.get_first_state();
  228. m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last);
  229. m_presult->set_base(base);
  230. m_presult->set_named_subs(this->re.get_named_subs());
  231. m_match_flags |= regex_constants::match_init;
  232. }
  233. else
  234. {
  235. // start again:
  236. search_base = position = m_result[0].second;
  237. // If last match was null and match_not_null was not set then increment
  238. // our start position, otherwise we go into an infinite loop:
  239. if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0))
  240. {
  241. if(position == last)
  242. return false;
  243. else
  244. ++position;
  245. }
  246. // reset $` start:
  247. m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last);
  248. //if((base != search_base) && (base == backstop))
  249. // m_match_flags |= match_prev_avail;
  250. }
  251. if(m_match_flags & match_posix)
  252. {
  253. m_result.set_size(static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last);
  254. m_result.set_base(base);
  255. }
  256. verify_options(re.flags(), m_match_flags);
  257. // find out what kind of expression we have:
  258. unsigned type = (m_match_flags & match_continuous) ?
  259. static_cast<unsigned int>(regbase::restart_continue)
  260. : static_cast<unsigned int>(re.get_restart_type());
  261. // call the appropriate search routine:
  262. matcher_proc_type proc = s_find_vtable[type];
  263. return (this->*proc)();
  264. #if !defined(BOOST_NO_EXCEPTIONS)
  265. }
  266. catch(...)
  267. {
  268. // unwind all pushed states, apart from anything else this
  269. // ensures that all the states are correctly destructed
  270. // not just the memory freed.
  271. while(unwind(true)){}
  272. throw;
  273. }
  274. #endif
  275. }
  276. template <class BidiIterator, class Allocator, class traits>
  277. bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix()
  278. {
  279. m_has_partial_match = false;
  280. m_has_found_match = false;
  281. pstate = re.get_first_state();
  282. m_presult->set_first(position);
  283. restart = position;
  284. match_all_states();
  285. if(!m_has_found_match && m_has_partial_match && (m_match_flags & match_partial))
  286. {
  287. m_has_found_match = true;
  288. m_presult->set_second(last, 0, false);
  289. position = last;
  290. if((m_match_flags & match_posix) == match_posix)
  291. {
  292. m_result.maybe_assign(*m_presult);
  293. }
  294. }
  295. #ifdef BOOST_REGEX_MATCH_EXTRA
  296. if(m_has_found_match && (match_extra & m_match_flags))
  297. {
  298. //
  299. // we have a match, reverse the capture information:
  300. //
  301. for(unsigned i = 0; i < m_presult->size(); ++i)
  302. {
  303. typename sub_match<BidiIterator>::capture_sequence_type & seq = ((*m_presult)[i]).get_captures();
  304. std::reverse(seq.begin(), seq.end());
  305. }
  306. }
  307. #endif
  308. if(!m_has_found_match)
  309. position = restart; // reset search postion
  310. return m_has_found_match;
  311. }
  312. template <class BidiIterator, class Allocator, class traits>
  313. bool perl_matcher<BidiIterator, Allocator, traits>::match_literal()
  314. {
  315. unsigned int len = static_cast<const re_literal*>(pstate)->length;
  316. const char_type* what = reinterpret_cast<const char_type*>(static_cast<const re_literal*>(pstate) + 1);
  317. //
  318. // compare string with what we stored in
  319. // our records:
  320. for(unsigned int i = 0; i < len; ++i, ++position)
  321. {
  322. if((position == last) || (traits_inst.translate(*position, icase) != what[i]))
  323. return false;
  324. }
  325. pstate = pstate->next.p;
  326. return true;
  327. }
  328. template <class BidiIterator, class Allocator, class traits>
  329. bool perl_matcher<BidiIterator, Allocator, traits>::match_start_line()
  330. {
  331. if(position == backstop)
  332. {
  333. if((m_match_flags & match_prev_avail) == 0)
  334. {
  335. if((m_match_flags & match_not_bol) == 0)
  336. {
  337. pstate = pstate->next.p;
  338. return true;
  339. }
  340. return false;
  341. }
  342. }
  343. else if(m_match_flags & match_single_line)
  344. return false;
  345. // check the previous value character:
  346. BidiIterator t(position);
  347. --t;
  348. if(position != last)
  349. {
  350. if(is_separator(*t) && !((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n'))) )
  351. {
  352. pstate = pstate->next.p;
  353. return true;
  354. }
  355. }
  356. else if(is_separator(*t))
  357. {
  358. pstate = pstate->next.p;
  359. return true;
  360. }
  361. return false;
  362. }
  363. template <class BidiIterator, class Allocator, class traits>
  364. bool perl_matcher<BidiIterator, Allocator, traits>::match_end_line()
  365. {
  366. if(position != last)
  367. {
  368. if(m_match_flags & match_single_line)
  369. return false;
  370. // we're not yet at the end so *first is always valid:
  371. if(is_separator(*position))
  372. {
  373. if((position != backstop) || (m_match_flags & match_prev_avail))
  374. {
  375. // check that we're not in the middle of \r\n sequence
  376. BidiIterator t(position);
  377. --t;
  378. if((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n')))
  379. {
  380. return false;
  381. }
  382. }
  383. pstate = pstate->next.p;
  384. return true;
  385. }
  386. }
  387. else if((m_match_flags & match_not_eol) == 0)
  388. {
  389. pstate = pstate->next.p;
  390. return true;
  391. }
  392. return false;
  393. }
  394. template <class BidiIterator, class Allocator, class traits>
  395. bool perl_matcher<BidiIterator, Allocator, traits>::match_wild()
  396. {
  397. if(position == last)
  398. return false;
  399. if(is_separator(*position) && ((match_any_mask & static_cast<const re_dot*>(pstate)->mask) == 0))
  400. return false;
  401. if((*position == char_type(0)) && (m_match_flags & match_not_dot_null))
  402. return false;
  403. pstate = pstate->next.p;
  404. ++position;
  405. return true;
  406. }
  407. template <class BidiIterator, class Allocator, class traits>
  408. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary()
  409. {
  410. bool b; // indcates whether next character is a word character
  411. if(position != last)
  412. {
  413. // prev and this character must be opposites:
  414. b = traits_inst.isctype(*position, m_word_mask);
  415. }
  416. else
  417. {
  418. if (m_match_flags & match_not_eow)
  419. return false;
  420. b = false;
  421. }
  422. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  423. {
  424. if(m_match_flags & match_not_bow)
  425. return false;
  426. else
  427. b ^= false;
  428. }
  429. else
  430. {
  431. --position;
  432. b ^= traits_inst.isctype(*position, m_word_mask);
  433. ++position;
  434. }
  435. if(b)
  436. {
  437. pstate = pstate->next.p;
  438. return true;
  439. }
  440. return false; // no match if we get to here...
  441. }
  442. template <class BidiIterator, class Allocator, class traits>
  443. bool perl_matcher<BidiIterator, Allocator, traits>::match_within_word()
  444. {
  445. bool b = !match_word_boundary();
  446. if(b)
  447. pstate = pstate->next.p;
  448. return b;
  449. /*
  450. if(position == last)
  451. return false;
  452. // both prev and this character must be m_word_mask:
  453. bool prev = traits_inst.isctype(*position, m_word_mask);
  454. {
  455. bool b;
  456. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  457. return false;
  458. else
  459. {
  460. --position;
  461. b = traits_inst.isctype(*position, m_word_mask);
  462. ++position;
  463. }
  464. if(b == prev)
  465. {
  466. pstate = pstate->next.p;
  467. return true;
  468. }
  469. }
  470. return false;
  471. */
  472. }
  473. template <class BidiIterator, class Allocator, class traits>
  474. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_start()
  475. {
  476. if(position == last)
  477. return false; // can't be starting a word if we're already at the end of input
  478. if(!traits_inst.isctype(*position, m_word_mask))
  479. return false; // next character isn't a word character
  480. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  481. {
  482. if(m_match_flags & match_not_bow)
  483. return false; // no previous input
  484. }
  485. else
  486. {
  487. // otherwise inside buffer:
  488. BidiIterator t(position);
  489. --t;
  490. if(traits_inst.isctype(*t, m_word_mask))
  491. return false; // previous character not non-word
  492. }
  493. // OK we have a match:
  494. pstate = pstate->next.p;
  495. return true;
  496. }
  497. template <class BidiIterator, class Allocator, class traits>
  498. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_end()
  499. {
  500. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  501. return false; // start of buffer can't be end of word
  502. BidiIterator t(position);
  503. --t;
  504. if(traits_inst.isctype(*t, m_word_mask) == false)
  505. return false; // previous character wasn't a word character
  506. if(position == last)
  507. {
  508. if(m_match_flags & match_not_eow)
  509. return false; // end of buffer but not end of word
  510. }
  511. else
  512. {
  513. // otherwise inside buffer:
  514. if(traits_inst.isctype(*position, m_word_mask))
  515. return false; // next character is a word character
  516. }
  517. pstate = pstate->next.p;
  518. return true; // if we fall through to here then we've succeeded
  519. }
  520. template <class BidiIterator, class Allocator, class traits>
  521. bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start()
  522. {
  523. if((position != backstop) || (m_match_flags & match_not_bob))
  524. return false;
  525. // OK match:
  526. pstate = pstate->next.p;
  527. return true;
  528. }
  529. template <class BidiIterator, class Allocator, class traits>
  530. bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end()
  531. {
  532. if((position != last) || (m_match_flags & match_not_eob))
  533. return false;
  534. // OK match:
  535. pstate = pstate->next.p;
  536. return true;
  537. }
  538. template <class BidiIterator, class Allocator, class traits>
  539. bool perl_matcher<BidiIterator, Allocator, traits>::match_backref()
  540. {
  541. //
  542. // Compare with what we previously matched.
  543. // Note that this succeeds if the backref did not partisipate
  544. // in the match, this is in line with ECMAScript, but not Perl
  545. // or PCRE.
  546. //
  547. int index = static_cast<const re_brace*>(pstate)->index;
  548. if(index >= hash_value_mask)
  549. {
  550. named_subexpressions::range_type r = re.get_data().equal_range(index);
  551. BOOST_REGEX_ASSERT(r.first != r.second);
  552. do
  553. {
  554. index = r.first->index;
  555. ++r.first;
  556. }while((r.first != r.second) && ((*m_presult)[index].matched != true));
  557. }
  558. if((m_match_flags & match_perl) && !(*m_presult)[index].matched)
  559. return false;
  560. BidiIterator i = (*m_presult)[index].first;
  561. BidiIterator j = (*m_presult)[index].second;
  562. while(i != j)
  563. {
  564. if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase)))
  565. return false;
  566. ++i;
  567. ++position;
  568. }
  569. pstate = pstate->next.p;
  570. return true;
  571. }
  572. template <class BidiIterator, class Allocator, class traits>
  573. bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set()
  574. {
  575. typedef typename traits::char_class_type char_class_type;
  576. // let the traits class do the work:
  577. if(position == last)
  578. return false;
  579. BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re.get_data(), icase);
  580. if(t != position)
  581. {
  582. pstate = pstate->next.p;
  583. position = t;
  584. return true;
  585. }
  586. return false;
  587. }
  588. template <class BidiIterator, class Allocator, class traits>
  589. bool perl_matcher<BidiIterator, Allocator, traits>::match_set()
  590. {
  591. if(position == last)
  592. return false;
  593. if(static_cast<const re_set*>(pstate)->_map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  594. {
  595. pstate = pstate->next.p;
  596. ++position;
  597. return true;
  598. }
  599. return false;
  600. }
  601. template <class BidiIterator, class Allocator, class traits>
  602. bool perl_matcher<BidiIterator, Allocator, traits>::match_jump()
  603. {
  604. pstate = static_cast<const re_jump*>(pstate)->alt.p;
  605. return true;
  606. }
  607. template <class BidiIterator, class Allocator, class traits>
  608. bool perl_matcher<BidiIterator, Allocator, traits>::match_combining()
  609. {
  610. if(position == last)
  611. return false;
  612. if(is_combining(traits_inst.translate(*position, icase)))
  613. return false;
  614. ++position;
  615. while((position != last) && is_combining(traits_inst.translate(*position, icase)))
  616. ++position;
  617. pstate = pstate->next.p;
  618. return true;
  619. }
  620. template <class BidiIterator, class Allocator, class traits>
  621. bool perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end()
  622. {
  623. if(m_match_flags & match_not_eob)
  624. return false;
  625. BidiIterator p(position);
  626. while((p != last) && is_separator(traits_inst.translate(*p, icase)))++p;
  627. if(p != last)
  628. return false;
  629. pstate = pstate->next.p;
  630. return true;
  631. }
  632. template <class BidiIterator, class Allocator, class traits>
  633. bool perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue()
  634. {
  635. if(position == search_base)
  636. {
  637. pstate = pstate->next.p;
  638. return true;
  639. }
  640. return false;
  641. }
  642. template <class BidiIterator, class Allocator, class traits>
  643. bool perl_matcher<BidiIterator, Allocator, traits>::match_backstep()
  644. {
  645. #ifdef BOOST_REGEX_MSVC
  646. #pragma warning(push)
  647. #pragma warning(disable:4127)
  648. #endif
  649. if( ::boost::is_random_access_iterator<BidiIterator>::value)
  650. {
  651. std::ptrdiff_t maxlen = std::distance(backstop, position);
  652. if(maxlen < static_cast<const re_brace*>(pstate)->index)
  653. return false;
  654. std::advance(position, -static_cast<const re_brace*>(pstate)->index);
  655. }
  656. else
  657. {
  658. int c = static_cast<const re_brace*>(pstate)->index;
  659. while(c--)
  660. {
  661. if(position == backstop)
  662. return false;
  663. --position;
  664. }
  665. }
  666. pstate = pstate->next.p;
  667. return true;
  668. #ifdef BOOST_REGEX_MSVC
  669. #pragma warning(pop)
  670. #endif
  671. }
  672. template <class BidiIterator, class Allocator, class traits>
  673. inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref()
  674. {
  675. // return true if marked sub-expression N has been matched:
  676. int index = static_cast<const re_brace*>(pstate)->index;
  677. bool result = false;
  678. if(index == 9999)
  679. {
  680. // Magic value for a (DEFINE) block:
  681. return false;
  682. }
  683. else if(index > 0)
  684. {
  685. // Have we matched subexpression "index"?
  686. // Check if index is a hash value:
  687. if(index >= hash_value_mask)
  688. {
  689. named_subexpressions::range_type r = re.get_data().equal_range(index);
  690. while(r.first != r.second)
  691. {
  692. if((*m_presult)[r.first->index].matched)
  693. {
  694. result = true;
  695. break;
  696. }
  697. ++r.first;
  698. }
  699. }
  700. else
  701. {
  702. result = (*m_presult)[index].matched;
  703. }
  704. pstate = pstate->next.p;
  705. }
  706. else
  707. {
  708. // Have we recursed into subexpression "index"?
  709. // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1.
  710. int idx = -(index+1);
  711. if(idx >= hash_value_mask)
  712. {
  713. named_subexpressions::range_type r = re.get_data().equal_range(idx);
  714. int stack_index = recursion_stack.empty() ? -1 : recursion_stack.back().idx;
  715. while(r.first != r.second)
  716. {
  717. result |= (stack_index == r.first->index);
  718. if(result)break;
  719. ++r.first;
  720. }
  721. }
  722. else
  723. {
  724. result = !recursion_stack.empty() && ((recursion_stack.back().idx == idx) || (index == 0));
  725. }
  726. pstate = pstate->next.p;
  727. }
  728. return result;
  729. }
  730. template <class BidiIterator, class Allocator, class traits>
  731. bool perl_matcher<BidiIterator, Allocator, traits>::match_fail()
  732. {
  733. // Just force a backtrack:
  734. return false;
  735. }
  736. template <class BidiIterator, class Allocator, class traits>
  737. bool perl_matcher<BidiIterator, Allocator, traits>::match_accept()
  738. {
  739. if(!recursion_stack.empty())
  740. {
  741. return skip_until_paren(recursion_stack.back().idx);
  742. }
  743. else
  744. {
  745. return skip_until_paren(INT_MAX);
  746. }
  747. }
  748. template <class BidiIterator, class Allocator, class traits>
  749. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()
  750. {
  751. #ifdef BOOST_REGEX_MSVC
  752. #pragma warning(push)
  753. #pragma warning(disable:4127)
  754. #endif
  755. const unsigned char* _map = re.get_map();
  756. while(true)
  757. {
  758. // skip everything we can't match:
  759. while((position != last) && !can_start(*position, _map, (unsigned char)mask_any) )
  760. ++position;
  761. if(position == last)
  762. {
  763. // run out of characters, try a null match if possible:
  764. if(re.can_be_null())
  765. return match_prefix();
  766. break;
  767. }
  768. // now try and obtain a match:
  769. if(match_prefix())
  770. return true;
  771. if(position == last)
  772. return false;
  773. ++position;
  774. }
  775. return false;
  776. #ifdef BOOST_REGEX_MSVC
  777. #pragma warning(pop)
  778. #endif
  779. }
  780. template <class BidiIterator, class Allocator, class traits>
  781. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_word()
  782. {
  783. #ifdef BOOST_REGEX_MSVC
  784. #pragma warning(push)
  785. #pragma warning(disable:4127)
  786. #endif
  787. // do search optimised for word starts:
  788. const unsigned char* _map = re.get_map();
  789. if((m_match_flags & match_prev_avail) || (position != base))
  790. --position;
  791. else if(match_prefix())
  792. return true;
  793. do
  794. {
  795. while((position != last) && traits_inst.isctype(*position, m_word_mask))
  796. ++position;
  797. while((position != last) && !traits_inst.isctype(*position, m_word_mask))
  798. ++position;
  799. if(position == last)
  800. break;
  801. if(can_start(*position, _map, (unsigned char)mask_any) )
  802. {
  803. if(match_prefix())
  804. return true;
  805. }
  806. if(position == last)
  807. break;
  808. } while(true);
  809. return false;
  810. #ifdef BOOST_REGEX_MSVC
  811. #pragma warning(pop)
  812. #endif
  813. }
  814. template <class BidiIterator, class Allocator, class traits>
  815. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_line()
  816. {
  817. // do search optimised for line starts:
  818. const unsigned char* _map = re.get_map();
  819. if(match_prefix())
  820. return true;
  821. while(position != last)
  822. {
  823. while((position != last) && !is_separator(*position))
  824. ++position;
  825. if(position == last)
  826. return false;
  827. ++position;
  828. if(position == last)
  829. {
  830. if(re.can_be_null() && match_prefix())
  831. return true;
  832. return false;
  833. }
  834. if( can_start(*position, _map, (unsigned char)mask_any) )
  835. {
  836. if(match_prefix())
  837. return true;
  838. }
  839. if(position == last)
  840. return false;
  841. //++position;
  842. }
  843. return false;
  844. }
  845. template <class BidiIterator, class Allocator, class traits>
  846. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf()
  847. {
  848. if((position == base) && ((m_match_flags & match_not_bob) == 0))
  849. return match_prefix();
  850. return false;
  851. }
  852. template <class BidiIterator, class Allocator, class traits>
  853. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit()
  854. {
  855. return false;
  856. }
  857. } // namespace BOOST_REGEX_DETAIL_NS
  858. } // namespace boost
  859. #ifdef BOOST_REGEX_MSVC
  860. # pragma warning(pop)
  861. #endif
  862. #endif