perl_matcher_common.hpp 30 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030
  1. /*
  2. *
  3. * Copyright (c) 2002
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE perl_matcher_common.cpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Definitions of perl_matcher member functions that are
  16. * common to both the recursive and non-recursive versions.
  17. */
  18. #ifndef BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
  19. #define BOOST_REGEX_V4_PERL_MATCHER_COMMON_HPP
  20. #ifdef BOOST_MSVC
  21. #pragma warning(push)
  22. #pragma warning(disable: 4103)
  23. #if BOOST_MSVC >= 1800
  24. #pragma warning(disable: 26812)
  25. #endif
  26. #endif
  27. #ifdef BOOST_HAS_ABI_HEADERS
  28. # include BOOST_ABI_PREFIX
  29. #endif
  30. #ifdef BOOST_MSVC
  31. #pragma warning(pop)
  32. #endif
  33. #ifdef BOOST_BORLANDC
  34. # pragma option push -w-8008 -w-8066
  35. #endif
  36. #ifdef BOOST_MSVC
  37. # pragma warning(push)
  38. #if BOOST_MSVC < 1910
  39. #pragma warning(disable:4800)
  40. #endif
  41. #endif
  42. namespace boost{
  43. namespace BOOST_REGEX_DETAIL_NS{
  44. #ifdef BOOST_MSVC
  45. # pragma warning(push)
  46. #pragma warning(disable:26812)
  47. #endif
  48. template <class BidiIterator, class Allocator, class traits>
  49. void perl_matcher<BidiIterator, Allocator, traits>::construct_init(const basic_regex<char_type, traits>& e, match_flag_type f)
  50. {
  51. typedef typename regex_iterator_traits<BidiIterator>::iterator_category category;
  52. typedef typename basic_regex<char_type, traits>::flag_type expression_flag_type;
  53. if(e.empty())
  54. {
  55. // precondition failure: e is not a valid regex.
  56. std::invalid_argument ex("Invalid regular expression object");
  57. boost::throw_exception(ex);
  58. }
  59. pstate = 0;
  60. m_match_flags = f;
  61. estimate_max_state_count(static_cast<category*>(0));
  62. expression_flag_type re_f = re.flags();
  63. icase = re_f & regex_constants::icase;
  64. if(!(m_match_flags & (match_perl|match_posix)))
  65. {
  66. if((re_f & (regbase::main_option_type|regbase::no_perl_ex)) == 0)
  67. m_match_flags |= match_perl;
  68. else if((re_f & (regbase::main_option_type|regbase::emacs_ex)) == (regbase::basic_syntax_group|regbase::emacs_ex))
  69. m_match_flags |= match_perl;
  70. else if((re_f & (regbase::main_option_type|regbase::literal)) == (regbase::literal))
  71. m_match_flags |= match_perl;
  72. else
  73. m_match_flags |= match_posix;
  74. }
  75. if(m_match_flags & match_posix)
  76. {
  77. m_temp_match.reset(new match_results<BidiIterator, Allocator>());
  78. m_presult = m_temp_match.get();
  79. }
  80. else
  81. m_presult = &m_result;
  82. #ifdef BOOST_REGEX_NON_RECURSIVE
  83. m_stack_base = 0;
  84. m_backup_state = 0;
  85. #elif defined(BOOST_REGEX_RECURSIVE)
  86. m_can_backtrack = true;
  87. m_have_accept = false;
  88. #endif
  89. // find the value to use for matching word boundaries:
  90. m_word_mask = re.get_data().m_word_mask;
  91. // find bitmask to use for matching '.':
  92. match_any_mask = static_cast<unsigned char>((f & match_not_dot_newline) ? BOOST_REGEX_DETAIL_NS::test_not_newline : BOOST_REGEX_DETAIL_NS::test_newline);
  93. // Disable match_any if requested in the state machine:
  94. if(e.get_data().m_disable_match_any)
  95. m_match_flags &= regex_constants::match_not_any;
  96. }
  97. #ifdef BOOST_MSVC
  98. # pragma warning(pop)
  99. #endif
  100. template <class BidiIterator, class Allocator, class traits>
  101. void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(std::random_access_iterator_tag*)
  102. {
  103. //
  104. // How many states should we allow our machine to visit before giving up?
  105. // This is a heuristic: it takes the greater of O(N^2) and O(NS^2)
  106. // where N is the length of the string, and S is the number of states
  107. // in the machine. It's tempting to up this to O(N^2S) or even O(N^2S^2)
  108. // but these take unreasonably amounts of time to bale out in pathological
  109. // cases.
  110. //
  111. // Calculate NS^2 first:
  112. //
  113. static const std::ptrdiff_t k = 100000;
  114. std::ptrdiff_t dist = boost::BOOST_REGEX_DETAIL_NS::distance(base, last);
  115. if(dist == 0)
  116. dist = 1;
  117. std::ptrdiff_t states = re.size();
  118. if(states == 0)
  119. states = 1;
  120. if ((std::numeric_limits<std::ptrdiff_t>::max)() / states < states)
  121. {
  122. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  123. return;
  124. }
  125. states *= states;
  126. if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
  127. {
  128. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  129. return;
  130. }
  131. states *= dist;
  132. if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
  133. {
  134. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  135. return;
  136. }
  137. states += k;
  138. max_state_count = states;
  139. //
  140. // Now calculate N^2:
  141. //
  142. states = dist;
  143. if((std::numeric_limits<std::ptrdiff_t>::max)() / dist < states)
  144. {
  145. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  146. return;
  147. }
  148. states *= dist;
  149. if((std::numeric_limits<std::ptrdiff_t>::max)() - k < states)
  150. {
  151. max_state_count = (std::min)((std::ptrdiff_t)BOOST_REGEX_MAX_STATE_COUNT, (std::numeric_limits<std::ptrdiff_t>::max)() - 2);
  152. return;
  153. }
  154. states += k;
  155. //
  156. // N^2 can be a very large number indeed, to prevent things getting out
  157. // of control, cap the max states:
  158. //
  159. if(states > BOOST_REGEX_MAX_STATE_COUNT)
  160. states = BOOST_REGEX_MAX_STATE_COUNT;
  161. //
  162. // If (the possibly capped) N^2 is larger than our first estimate,
  163. // use this instead:
  164. //
  165. if(states > max_state_count)
  166. max_state_count = states;
  167. }
  168. template <class BidiIterator, class Allocator, class traits>
  169. inline void perl_matcher<BidiIterator, Allocator, traits>::estimate_max_state_count(void*)
  170. {
  171. // we don't know how long the sequence is:
  172. max_state_count = BOOST_REGEX_MAX_STATE_COUNT;
  173. }
  174. #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
  175. template <class BidiIterator, class Allocator, class traits>
  176. inline bool perl_matcher<BidiIterator, Allocator, traits>::protected_call(
  177. protected_proc_type proc)
  178. {
  179. ::boost::BOOST_REGEX_DETAIL_NS::concrete_protected_call
  180. <perl_matcher<BidiIterator, Allocator, traits> >
  181. obj(this, proc);
  182. return obj.execute();
  183. }
  184. #endif
  185. template <class BidiIterator, class Allocator, class traits>
  186. inline bool perl_matcher<BidiIterator, Allocator, traits>::match()
  187. {
  188. #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
  189. return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::match_imp);
  190. #else
  191. return match_imp();
  192. #endif
  193. }
  194. template <class BidiIterator, class Allocator, class traits>
  195. bool perl_matcher<BidiIterator, Allocator, traits>::match_imp()
  196. {
  197. // initialise our stack if we are non-recursive:
  198. #ifdef BOOST_REGEX_NON_RECURSIVE
  199. save_state_init init(&m_stack_base, &m_backup_state);
  200. used_block_count = BOOST_REGEX_MAX_BLOCKS;
  201. #if !defined(BOOST_NO_EXCEPTIONS)
  202. try{
  203. #endif
  204. #endif
  205. // reset our state machine:
  206. position = base;
  207. search_base = base;
  208. state_count = 0;
  209. m_match_flags |= regex_constants::match_all;
  210. m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last);
  211. m_presult->set_base(base);
  212. m_presult->set_named_subs(this->re.get_named_subs());
  213. if(m_match_flags & match_posix)
  214. m_result = *m_presult;
  215. verify_options(re.flags(), m_match_flags);
  216. if(0 == match_prefix())
  217. return false;
  218. return (m_result[0].second == last) && (m_result[0].first == base);
  219. #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
  220. }
  221. catch(...)
  222. {
  223. // unwind all pushed states, apart from anything else this
  224. // ensures that all the states are correctly destructed
  225. // not just the memory freed.
  226. while(unwind(true)){}
  227. throw;
  228. }
  229. #endif
  230. }
  231. template <class BidiIterator, class Allocator, class traits>
  232. inline bool perl_matcher<BidiIterator, Allocator, traits>::find()
  233. {
  234. #ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
  235. return protected_call(&perl_matcher<BidiIterator, Allocator, traits>::find_imp);
  236. #else
  237. return find_imp();
  238. #endif
  239. }
  240. template <class BidiIterator, class Allocator, class traits>
  241. bool perl_matcher<BidiIterator, Allocator, traits>::find_imp()
  242. {
  243. static matcher_proc_type const s_find_vtable[7] =
  244. {
  245. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_any,
  246. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_word,
  247. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_line,
  248. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf,
  249. &perl_matcher<BidiIterator, Allocator, traits>::match_prefix,
  250. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
  251. &perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit,
  252. };
  253. // initialise our stack if we are non-recursive:
  254. #ifdef BOOST_REGEX_NON_RECURSIVE
  255. save_state_init init(&m_stack_base, &m_backup_state);
  256. used_block_count = BOOST_REGEX_MAX_BLOCKS;
  257. #if !defined(BOOST_NO_EXCEPTIONS)
  258. try{
  259. #endif
  260. #endif
  261. state_count = 0;
  262. if((m_match_flags & regex_constants::match_init) == 0)
  263. {
  264. // reset our state machine:
  265. search_base = position = base;
  266. pstate = re.get_first_state();
  267. m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last);
  268. m_presult->set_base(base);
  269. m_presult->set_named_subs(this->re.get_named_subs());
  270. m_match_flags |= regex_constants::match_init;
  271. }
  272. else
  273. {
  274. // start again:
  275. search_base = position = m_result[0].second;
  276. // If last match was null and match_not_null was not set then increment
  277. // our start position, otherwise we go into an infinite loop:
  278. if(((m_match_flags & match_not_null) == 0) && (m_result.length() == 0))
  279. {
  280. if(position == last)
  281. return false;
  282. else
  283. ++position;
  284. }
  285. // reset $` start:
  286. m_presult->set_size((m_match_flags & match_nosubs) ? 1u : static_cast<typename results_type::size_type>(1u + re.mark_count()), search_base, last);
  287. //if((base != search_base) && (base == backstop))
  288. // m_match_flags |= match_prev_avail;
  289. }
  290. if(m_match_flags & match_posix)
  291. {
  292. m_result.set_size(static_cast<typename results_type::size_type>(1u + re.mark_count()), base, last);
  293. m_result.set_base(base);
  294. }
  295. verify_options(re.flags(), m_match_flags);
  296. // find out what kind of expression we have:
  297. unsigned type = (m_match_flags & match_continuous) ?
  298. static_cast<unsigned int>(regbase::restart_continue)
  299. : static_cast<unsigned int>(re.get_restart_type());
  300. // call the appropriate search routine:
  301. matcher_proc_type proc = s_find_vtable[type];
  302. return (this->*proc)();
  303. #if defined(BOOST_REGEX_NON_RECURSIVE) && !defined(BOOST_NO_EXCEPTIONS)
  304. }
  305. catch(...)
  306. {
  307. // unwind all pushed states, apart from anything else this
  308. // ensures that all the states are correctly destructed
  309. // not just the memory freed.
  310. while(unwind(true)){}
  311. throw;
  312. }
  313. #endif
  314. }
  315. template <class BidiIterator, class Allocator, class traits>
  316. bool perl_matcher<BidiIterator, Allocator, traits>::match_prefix()
  317. {
  318. m_has_partial_match = false;
  319. m_has_found_match = false;
  320. pstate = re.get_first_state();
  321. m_presult->set_first(position);
  322. restart = position;
  323. match_all_states();
  324. if(!m_has_found_match && m_has_partial_match && (m_match_flags & match_partial))
  325. {
  326. m_has_found_match = true;
  327. m_presult->set_second(last, 0, false);
  328. position = last;
  329. if((m_match_flags & match_posix) == match_posix)
  330. {
  331. m_result.maybe_assign(*m_presult);
  332. }
  333. }
  334. #ifdef BOOST_REGEX_MATCH_EXTRA
  335. if(m_has_found_match && (match_extra & m_match_flags))
  336. {
  337. //
  338. // we have a match, reverse the capture information:
  339. //
  340. for(unsigned i = 0; i < m_presult->size(); ++i)
  341. {
  342. typename sub_match<BidiIterator>::capture_sequence_type & seq = ((*m_presult)[i]).get_captures();
  343. std::reverse(seq.begin(), seq.end());
  344. }
  345. }
  346. #endif
  347. if(!m_has_found_match)
  348. position = restart; // reset search postion
  349. #ifdef BOOST_REGEX_RECURSIVE
  350. m_can_backtrack = true; // reset for further searches
  351. #endif
  352. return m_has_found_match;
  353. }
  354. template <class BidiIterator, class Allocator, class traits>
  355. bool perl_matcher<BidiIterator, Allocator, traits>::match_literal()
  356. {
  357. unsigned int len = static_cast<const re_literal*>(pstate)->length;
  358. const char_type* what = reinterpret_cast<const char_type*>(static_cast<const re_literal*>(pstate) + 1);
  359. //
  360. // compare string with what we stored in
  361. // our records:
  362. for(unsigned int i = 0; i < len; ++i, ++position)
  363. {
  364. if((position == last) || (traits_inst.translate(*position, icase) != what[i]))
  365. return false;
  366. }
  367. pstate = pstate->next.p;
  368. return true;
  369. }
  370. template <class BidiIterator, class Allocator, class traits>
  371. bool perl_matcher<BidiIterator, Allocator, traits>::match_start_line()
  372. {
  373. if(position == backstop)
  374. {
  375. if((m_match_flags & match_prev_avail) == 0)
  376. {
  377. if((m_match_flags & match_not_bol) == 0)
  378. {
  379. pstate = pstate->next.p;
  380. return true;
  381. }
  382. return false;
  383. }
  384. }
  385. else if(m_match_flags & match_single_line)
  386. return false;
  387. // check the previous value character:
  388. BidiIterator t(position);
  389. --t;
  390. if(position != last)
  391. {
  392. if(is_separator(*t) && !((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n'))) )
  393. {
  394. pstate = pstate->next.p;
  395. return true;
  396. }
  397. }
  398. else if(is_separator(*t))
  399. {
  400. pstate = pstate->next.p;
  401. return true;
  402. }
  403. return false;
  404. }
  405. template <class BidiIterator, class Allocator, class traits>
  406. bool perl_matcher<BidiIterator, Allocator, traits>::match_end_line()
  407. {
  408. if(position != last)
  409. {
  410. if(m_match_flags & match_single_line)
  411. return false;
  412. // we're not yet at the end so *first is always valid:
  413. if(is_separator(*position))
  414. {
  415. if((position != backstop) || (m_match_flags & match_prev_avail))
  416. {
  417. // check that we're not in the middle of \r\n sequence
  418. BidiIterator t(position);
  419. --t;
  420. if((*t == static_cast<char_type>('\r')) && (*position == static_cast<char_type>('\n')))
  421. {
  422. return false;
  423. }
  424. }
  425. pstate = pstate->next.p;
  426. return true;
  427. }
  428. }
  429. else if((m_match_flags & match_not_eol) == 0)
  430. {
  431. pstate = pstate->next.p;
  432. return true;
  433. }
  434. return false;
  435. }
  436. template <class BidiIterator, class Allocator, class traits>
  437. bool perl_matcher<BidiIterator, Allocator, traits>::match_wild()
  438. {
  439. if(position == last)
  440. return false;
  441. if(is_separator(*position) && ((match_any_mask & static_cast<const re_dot*>(pstate)->mask) == 0))
  442. return false;
  443. if((*position == char_type(0)) && (m_match_flags & match_not_dot_null))
  444. return false;
  445. pstate = pstate->next.p;
  446. ++position;
  447. return true;
  448. }
  449. template <class BidiIterator, class Allocator, class traits>
  450. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_boundary()
  451. {
  452. bool b; // indcates whether next character is a word character
  453. if(position != last)
  454. {
  455. // prev and this character must be opposites:
  456. b = traits_inst.isctype(*position, m_word_mask);
  457. }
  458. else
  459. {
  460. if (m_match_flags & match_not_eow)
  461. return false;
  462. b = false;
  463. }
  464. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  465. {
  466. if(m_match_flags & match_not_bow)
  467. return false;
  468. else
  469. b ^= false;
  470. }
  471. else
  472. {
  473. --position;
  474. b ^= traits_inst.isctype(*position, m_word_mask);
  475. ++position;
  476. }
  477. if(b)
  478. {
  479. pstate = pstate->next.p;
  480. return true;
  481. }
  482. return false; // no match if we get to here...
  483. }
  484. template <class BidiIterator, class Allocator, class traits>
  485. bool perl_matcher<BidiIterator, Allocator, traits>::match_within_word()
  486. {
  487. if(position == last)
  488. return false;
  489. // both prev and this character must be m_word_mask:
  490. bool prev = traits_inst.isctype(*position, m_word_mask);
  491. {
  492. bool b;
  493. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  494. return false;
  495. else
  496. {
  497. --position;
  498. b = traits_inst.isctype(*position, m_word_mask);
  499. ++position;
  500. }
  501. if(b == prev)
  502. {
  503. pstate = pstate->next.p;
  504. return true;
  505. }
  506. }
  507. return false;
  508. }
  509. template <class BidiIterator, class Allocator, class traits>
  510. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_start()
  511. {
  512. if(position == last)
  513. return false; // can't be starting a word if we're already at the end of input
  514. if(!traits_inst.isctype(*position, m_word_mask))
  515. return false; // next character isn't a word character
  516. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  517. {
  518. if(m_match_flags & match_not_bow)
  519. return false; // no previous input
  520. }
  521. else
  522. {
  523. // otherwise inside buffer:
  524. BidiIterator t(position);
  525. --t;
  526. if(traits_inst.isctype(*t, m_word_mask))
  527. return false; // previous character not non-word
  528. }
  529. // OK we have a match:
  530. pstate = pstate->next.p;
  531. return true;
  532. }
  533. template <class BidiIterator, class Allocator, class traits>
  534. bool perl_matcher<BidiIterator, Allocator, traits>::match_word_end()
  535. {
  536. if((position == backstop) && ((m_match_flags & match_prev_avail) == 0))
  537. return false; // start of buffer can't be end of word
  538. BidiIterator t(position);
  539. --t;
  540. if(traits_inst.isctype(*t, m_word_mask) == false)
  541. return false; // previous character wasn't a word character
  542. if(position == last)
  543. {
  544. if(m_match_flags & match_not_eow)
  545. return false; // end of buffer but not end of word
  546. }
  547. else
  548. {
  549. // otherwise inside buffer:
  550. if(traits_inst.isctype(*position, m_word_mask))
  551. return false; // next character is a word character
  552. }
  553. pstate = pstate->next.p;
  554. return true; // if we fall through to here then we've succeeded
  555. }
  556. template <class BidiIterator, class Allocator, class traits>
  557. bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_start()
  558. {
  559. if((position != backstop) || (m_match_flags & match_not_bob))
  560. return false;
  561. // OK match:
  562. pstate = pstate->next.p;
  563. return true;
  564. }
  565. template <class BidiIterator, class Allocator, class traits>
  566. bool perl_matcher<BidiIterator, Allocator, traits>::match_buffer_end()
  567. {
  568. if((position != last) || (m_match_flags & match_not_eob))
  569. return false;
  570. // OK match:
  571. pstate = pstate->next.p;
  572. return true;
  573. }
  574. template <class BidiIterator, class Allocator, class traits>
  575. bool perl_matcher<BidiIterator, Allocator, traits>::match_backref()
  576. {
  577. //
  578. // Compare with what we previously matched.
  579. // Note that this succeeds if the backref did not partisipate
  580. // in the match, this is in line with ECMAScript, but not Perl
  581. // or PCRE.
  582. //
  583. int index = static_cast<const re_brace*>(pstate)->index;
  584. if(index >= hash_value_mask)
  585. {
  586. named_subexpressions::range_type r = re.get_data().equal_range(index);
  587. BOOST_REGEX_ASSERT(r.first != r.second);
  588. do
  589. {
  590. index = r.first->index;
  591. ++r.first;
  592. }while((r.first != r.second) && ((*m_presult)[index].matched != true));
  593. }
  594. if((m_match_flags & match_perl) && !(*m_presult)[index].matched)
  595. return false;
  596. BidiIterator i = (*m_presult)[index].first;
  597. BidiIterator j = (*m_presult)[index].second;
  598. while(i != j)
  599. {
  600. if((position == last) || (traits_inst.translate(*position, icase) != traits_inst.translate(*i, icase)))
  601. return false;
  602. ++i;
  603. ++position;
  604. }
  605. pstate = pstate->next.p;
  606. return true;
  607. }
  608. template <class BidiIterator, class Allocator, class traits>
  609. bool perl_matcher<BidiIterator, Allocator, traits>::match_long_set()
  610. {
  611. typedef typename traits::char_class_type char_class_type;
  612. // let the traits class do the work:
  613. if(position == last)
  614. return false;
  615. BidiIterator t = re_is_set_member(position, last, static_cast<const re_set_long<char_class_type>*>(pstate), re.get_data(), icase);
  616. if(t != position)
  617. {
  618. pstate = pstate->next.p;
  619. position = t;
  620. return true;
  621. }
  622. return false;
  623. }
  624. template <class BidiIterator, class Allocator, class traits>
  625. bool perl_matcher<BidiIterator, Allocator, traits>::match_set()
  626. {
  627. if(position == last)
  628. return false;
  629. if(static_cast<const re_set*>(pstate)->_map[static_cast<unsigned char>(traits_inst.translate(*position, icase))])
  630. {
  631. pstate = pstate->next.p;
  632. ++position;
  633. return true;
  634. }
  635. return false;
  636. }
  637. template <class BidiIterator, class Allocator, class traits>
  638. bool perl_matcher<BidiIterator, Allocator, traits>::match_jump()
  639. {
  640. pstate = static_cast<const re_jump*>(pstate)->alt.p;
  641. return true;
  642. }
  643. template <class BidiIterator, class Allocator, class traits>
  644. bool perl_matcher<BidiIterator, Allocator, traits>::match_combining()
  645. {
  646. if(position == last)
  647. return false;
  648. if(is_combining(traits_inst.translate(*position, icase)))
  649. return false;
  650. ++position;
  651. while((position != last) && is_combining(traits_inst.translate(*position, icase)))
  652. ++position;
  653. pstate = pstate->next.p;
  654. return true;
  655. }
  656. template <class BidiIterator, class Allocator, class traits>
  657. bool perl_matcher<BidiIterator, Allocator, traits>::match_soft_buffer_end()
  658. {
  659. if(m_match_flags & match_not_eob)
  660. return false;
  661. BidiIterator p(position);
  662. while((p != last) && is_separator(traits_inst.translate(*p, icase)))++p;
  663. if(p != last)
  664. return false;
  665. pstate = pstate->next.p;
  666. return true;
  667. }
  668. template <class BidiIterator, class Allocator, class traits>
  669. bool perl_matcher<BidiIterator, Allocator, traits>::match_restart_continue()
  670. {
  671. if(position == search_base)
  672. {
  673. pstate = pstate->next.p;
  674. return true;
  675. }
  676. return false;
  677. }
  678. template <class BidiIterator, class Allocator, class traits>
  679. bool perl_matcher<BidiIterator, Allocator, traits>::match_backstep()
  680. {
  681. #ifdef BOOST_MSVC
  682. #pragma warning(push)
  683. #pragma warning(disable:4127)
  684. #endif
  685. if( ::boost::is_random_access_iterator<BidiIterator>::value)
  686. {
  687. std::ptrdiff_t maxlen = ::boost::BOOST_REGEX_DETAIL_NS::distance(backstop, position);
  688. if(maxlen < static_cast<const re_brace*>(pstate)->index)
  689. return false;
  690. std::advance(position, -static_cast<const re_brace*>(pstate)->index);
  691. }
  692. else
  693. {
  694. int c = static_cast<const re_brace*>(pstate)->index;
  695. while(c--)
  696. {
  697. if(position == backstop)
  698. return false;
  699. --position;
  700. }
  701. }
  702. pstate = pstate->next.p;
  703. return true;
  704. #ifdef BOOST_MSVC
  705. #pragma warning(pop)
  706. #endif
  707. }
  708. template <class BidiIterator, class Allocator, class traits>
  709. inline bool perl_matcher<BidiIterator, Allocator, traits>::match_assert_backref()
  710. {
  711. // return true if marked sub-expression N has been matched:
  712. int index = static_cast<const re_brace*>(pstate)->index;
  713. bool result = false;
  714. if(index == 9999)
  715. {
  716. // Magic value for a (DEFINE) block:
  717. return false;
  718. }
  719. else if(index > 0)
  720. {
  721. // Have we matched subexpression "index"?
  722. // Check if index is a hash value:
  723. if(index >= hash_value_mask)
  724. {
  725. named_subexpressions::range_type r = re.get_data().equal_range(index);
  726. while(r.first != r.second)
  727. {
  728. if((*m_presult)[r.first->index].matched)
  729. {
  730. result = true;
  731. break;
  732. }
  733. ++r.first;
  734. }
  735. }
  736. else
  737. {
  738. result = (*m_presult)[index].matched;
  739. }
  740. pstate = pstate->next.p;
  741. }
  742. else
  743. {
  744. // Have we recursed into subexpression "index"?
  745. // If index == 0 then check for any recursion at all, otherwise for recursion to -index-1.
  746. int idx = -(index+1);
  747. if(idx >= hash_value_mask)
  748. {
  749. named_subexpressions::range_type r = re.get_data().equal_range(idx);
  750. int stack_index = recursion_stack.empty() ? -1 : recursion_stack.back().idx;
  751. while(r.first != r.second)
  752. {
  753. result |= (stack_index == r.first->index);
  754. if(result)break;
  755. ++r.first;
  756. }
  757. }
  758. else
  759. {
  760. result = !recursion_stack.empty() && ((recursion_stack.back().idx == idx) || (index == 0));
  761. }
  762. pstate = pstate->next.p;
  763. }
  764. return result;
  765. }
  766. template <class BidiIterator, class Allocator, class traits>
  767. bool perl_matcher<BidiIterator, Allocator, traits>::match_fail()
  768. {
  769. // Just force a backtrack:
  770. return false;
  771. }
  772. template <class BidiIterator, class Allocator, class traits>
  773. bool perl_matcher<BidiIterator, Allocator, traits>::match_accept()
  774. {
  775. if(!recursion_stack.empty())
  776. {
  777. return skip_until_paren(recursion_stack.back().idx);
  778. }
  779. else
  780. {
  781. return skip_until_paren(INT_MAX);
  782. }
  783. }
  784. template <class BidiIterator, class Allocator, class traits>
  785. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_any()
  786. {
  787. #ifdef BOOST_MSVC
  788. #pragma warning(push)
  789. #pragma warning(disable:4127)
  790. #endif
  791. const unsigned char* _map = re.get_map();
  792. while(true)
  793. {
  794. // skip everything we can't match:
  795. while((position != last) && !can_start(*position, _map, (unsigned char)mask_any) )
  796. ++position;
  797. if(position == last)
  798. {
  799. // run out of characters, try a null match if possible:
  800. if(re.can_be_null())
  801. return match_prefix();
  802. break;
  803. }
  804. // now try and obtain a match:
  805. if(match_prefix())
  806. return true;
  807. if(position == last)
  808. return false;
  809. ++position;
  810. }
  811. return false;
  812. #ifdef BOOST_MSVC
  813. #pragma warning(pop)
  814. #endif
  815. }
  816. template <class BidiIterator, class Allocator, class traits>
  817. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_word()
  818. {
  819. #ifdef BOOST_MSVC
  820. #pragma warning(push)
  821. #pragma warning(disable:4127)
  822. #endif
  823. // do search optimised for word starts:
  824. const unsigned char* _map = re.get_map();
  825. if((m_match_flags & match_prev_avail) || (position != base))
  826. --position;
  827. else if(match_prefix())
  828. return true;
  829. do
  830. {
  831. while((position != last) && traits_inst.isctype(*position, m_word_mask))
  832. ++position;
  833. while((position != last) && !traits_inst.isctype(*position, m_word_mask))
  834. ++position;
  835. if(position == last)
  836. break;
  837. if(can_start(*position, _map, (unsigned char)mask_any) )
  838. {
  839. if(match_prefix())
  840. return true;
  841. }
  842. if(position == last)
  843. break;
  844. } while(true);
  845. return false;
  846. #ifdef BOOST_MSVC
  847. #pragma warning(pop)
  848. #endif
  849. }
  850. template <class BidiIterator, class Allocator, class traits>
  851. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_line()
  852. {
  853. // do search optimised for line starts:
  854. const unsigned char* _map = re.get_map();
  855. if(match_prefix())
  856. return true;
  857. while(position != last)
  858. {
  859. while((position != last) && !is_separator(*position))
  860. ++position;
  861. if(position == last)
  862. return false;
  863. ++position;
  864. if(position == last)
  865. {
  866. if(re.can_be_null() && match_prefix())
  867. return true;
  868. return false;
  869. }
  870. if( can_start(*position, _map, (unsigned char)mask_any) )
  871. {
  872. if(match_prefix())
  873. return true;
  874. }
  875. if(position == last)
  876. return false;
  877. //++position;
  878. }
  879. return false;
  880. }
  881. template <class BidiIterator, class Allocator, class traits>
  882. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_buf()
  883. {
  884. if((position == base) && ((m_match_flags & match_not_bob) == 0))
  885. return match_prefix();
  886. return false;
  887. }
  888. template <class BidiIterator, class Allocator, class traits>
  889. bool perl_matcher<BidiIterator, Allocator, traits>::find_restart_lit()
  890. {
  891. #if 0
  892. if(position == last)
  893. return false; // can't possibly match if we're at the end already
  894. unsigned type = (m_match_flags & match_continuous) ?
  895. static_cast<unsigned int>(regbase::restart_continue)
  896. : static_cast<unsigned int>(re.get_restart_type());
  897. const kmp_info<char_type>* info = access::get_kmp(re);
  898. int len = info->len;
  899. const char_type* x = info->pstr;
  900. int j = 0;
  901. while (position != last)
  902. {
  903. while((j > -1) && (x[j] != traits_inst.translate(*position, icase)))
  904. j = info->kmp_next[j];
  905. ++position;
  906. ++j;
  907. if(j >= len)
  908. {
  909. if(type == regbase::restart_fixed_lit)
  910. {
  911. std::advance(position, -j);
  912. restart = position;
  913. std::advance(restart, len);
  914. m_result.set_first(position);
  915. m_result.set_second(restart);
  916. position = restart;
  917. return true;
  918. }
  919. else
  920. {
  921. restart = position;
  922. std::advance(position, -j);
  923. if(match_prefix())
  924. return true;
  925. else
  926. {
  927. for(int k = 0; (restart != position) && (k < j); ++k, --restart)
  928. {} // dwa 10/20/2000 - warning suppression for MWCW
  929. if(restart != last)
  930. ++restart;
  931. position = restart;
  932. j = 0; //we could do better than this...
  933. }
  934. }
  935. }
  936. }
  937. if((m_match_flags & match_partial) && (position == last) && j)
  938. {
  939. // we need to check for a partial match:
  940. restart = position;
  941. std::advance(position, -j);
  942. return match_prefix();
  943. }
  944. #endif
  945. return false;
  946. }
  947. } // namespace BOOST_REGEX_DETAIL_NS
  948. } // namespace boost
  949. #ifdef BOOST_MSVC
  950. # pragma warning(pop)
  951. #endif
  952. #ifdef BOOST_BORLANDC
  953. # pragma option pop
  954. #endif
  955. #ifdef BOOST_MSVC
  956. #pragma warning(push)
  957. #pragma warning(disable: 4103)
  958. #endif
  959. #ifdef BOOST_HAS_ABI_HEADERS
  960. # include BOOST_ABI_SUFFIX
  961. #endif
  962. #ifdef BOOST_MSVC
  963. #pragma warning(pop)
  964. #endif
  965. #endif