regex_traits_defaults.hpp 38 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997
  1. /*
  2. *
  3. * Copyright (c) 2004
  4. * John Maddock
  5. *
  6. * Use, modification and distribution are subject to the
  7. * Boost Software License, Version 1.0. (See accompanying file
  8. * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  9. *
  10. */
  11. /*
  12. * LOCATION: see http://www.boost.org for most recent version.
  13. * FILE regex_traits_defaults.hpp
  14. * VERSION see <boost/version.hpp>
  15. * DESCRIPTION: Declares API's for access to regex_traits default properties.
  16. */
  17. #ifndef BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED
  18. #define BOOST_REGEX_TRAITS_DEFAULTS_HPP_INCLUDED
  19. #ifdef BOOST_MSVC
  20. #pragma warning(push)
  21. #pragma warning(disable: 4103)
  22. #endif
  23. #ifdef BOOST_HAS_ABI_HEADERS
  24. # include BOOST_ABI_PREFIX
  25. #endif
  26. #ifdef BOOST_MSVC
  27. #pragma warning(pop)
  28. #endif
  29. #include <boost/regex/config.hpp>
  30. #include <boost/cstdint.hpp>
  31. #include <cctype>
  32. #include <cwctype>
  33. #include <locale>
  34. #ifndef BOOST_REGEX_SYNTAX_TYPE_HPP
  35. #include <boost/regex/v4/syntax_type.hpp>
  36. #endif
  37. #ifndef BOOST_REGEX_ERROR_TYPE_HPP
  38. #include <boost/regex/v4/error_type.hpp>
  39. #endif
  40. #include <boost/regex/v4/regex_workaround.hpp>
  41. #include <boost/type_traits/make_unsigned.hpp>
  42. #include <boost/utility/enable_if.hpp>
  43. #ifdef BOOST_NO_STDC_NAMESPACE
  44. namespace std{
  45. using ::strlen;
  46. }
  47. #endif
  48. namespace boost{ namespace BOOST_REGEX_DETAIL_NS{
  49. //
  50. // helpers to suppress warnings:
  51. //
  52. template <class charT>
  53. inline bool is_extended(charT c)
  54. {
  55. typedef typename make_unsigned<charT>::type unsigned_type;
  56. return (sizeof(charT) > 1) && (static_cast<unsigned_type>(c) >= 256u);
  57. }
  58. inline bool is_extended(char)
  59. { return false; }
  60. inline const char* BOOST_REGEX_CALL get_default_syntax(regex_constants::syntax_type n)
  61. {
  62. // if the user hasn't supplied a message catalog, then this supplies
  63. // default "messages" for us to load in the range 1-100.
  64. const char* messages[] = {
  65. "",
  66. "(",
  67. ")",
  68. "$",
  69. "^",
  70. ".",
  71. "*",
  72. "+",
  73. "?",
  74. "[",
  75. "]",
  76. "|",
  77. "\\",
  78. "#",
  79. "-",
  80. "{",
  81. "}",
  82. "0123456789",
  83. "b",
  84. "B",
  85. "<",
  86. ">",
  87. "",
  88. "",
  89. "A`",
  90. "z'",
  91. "\n",
  92. ",",
  93. "a",
  94. "f",
  95. "n",
  96. "r",
  97. "t",
  98. "v",
  99. "x",
  100. "c",
  101. ":",
  102. "=",
  103. "e",
  104. "",
  105. "",
  106. "",
  107. "",
  108. "",
  109. "",
  110. "",
  111. "",
  112. "E",
  113. "Q",
  114. "X",
  115. "C",
  116. "Z",
  117. "G",
  118. "!",
  119. "p",
  120. "P",
  121. "N",
  122. "gk",
  123. "K",
  124. "R",
  125. };
  126. return ((n >= (sizeof(messages) / sizeof(messages[1]))) ? "" : messages[n]);
  127. }
  128. inline const char* BOOST_REGEX_CALL get_default_error_string(regex_constants::error_type n)
  129. {
  130. static const char* const s_default_error_messages[] = {
  131. "Success", /* REG_NOERROR 0 error_ok */
  132. "No match", /* REG_NOMATCH 1 error_no_match */
  133. "Invalid regular expression.", /* REG_BADPAT 2 error_bad_pattern */
  134. "Invalid collation character.", /* REG_ECOLLATE 3 error_collate */
  135. "Invalid character class name, collating name, or character range.", /* REG_ECTYPE 4 error_ctype */
  136. "Invalid or unterminated escape sequence.", /* REG_EESCAPE 5 error_escape */
  137. "Invalid back reference: specified capturing group does not exist.", /* REG_ESUBREG 6 error_backref */
  138. "Unmatched [ or [^ in character class declaration.", /* REG_EBRACK 7 error_brack */
  139. "Unmatched marking parenthesis ( or \\(.", /* REG_EPAREN 8 error_paren */
  140. "Unmatched quantified repeat operator { or \\{.", /* REG_EBRACE 9 error_brace */
  141. "Invalid content of repeat range.", /* REG_BADBR 10 error_badbrace */
  142. "Invalid range end in character class", /* REG_ERANGE 11 error_range */
  143. "Out of memory.", /* REG_ESPACE 12 error_space NOT USED */
  144. "Invalid preceding regular expression prior to repetition operator.", /* REG_BADRPT 13 error_badrepeat */
  145. "Premature end of regular expression", /* REG_EEND 14 error_end NOT USED */
  146. "Regular expression is too large.", /* REG_ESIZE 15 error_size NOT USED */
  147. "Unmatched ) or \\)", /* REG_ERPAREN 16 error_right_paren NOT USED */
  148. "Empty regular expression.", /* REG_EMPTY 17 error_empty */
  149. "The complexity of matching the regular expression exceeded predefined bounds. "
  150. "Try refactoring the regular expression to make each choice made by the state machine unambiguous. "
  151. "This exception is thrown to prevent \"eternal\" matches that take an "
  152. "indefinite period time to locate.", /* REG_ECOMPLEXITY 18 error_complexity */
  153. "Ran out of stack space trying to match the regular expression.", /* REG_ESTACK 19 error_stack */
  154. "Invalid or unterminated Perl (?...) sequence.", /* REG_E_PERL 20 error_perl */
  155. "Unknown error.", /* REG_E_UNKNOWN 21 error_unknown */
  156. };
  157. return (n > ::boost::regex_constants::error_unknown) ? s_default_error_messages[::boost::regex_constants::error_unknown] : s_default_error_messages[n];
  158. }
  159. inline regex_constants::syntax_type BOOST_REGEX_CALL get_default_syntax_type(char c)
  160. {
  161. //
  162. // char_syntax determines how the compiler treats a given character
  163. // in a regular expression.
  164. //
  165. static regex_constants::syntax_type char_syntax[] = {
  166. regex_constants::syntax_char, /**/
  167. regex_constants::syntax_char, /**/
  168. regex_constants::syntax_char, /**/
  169. regex_constants::syntax_char, /**/
  170. regex_constants::syntax_char, /**/
  171. regex_constants::syntax_char, /**/
  172. regex_constants::syntax_char, /**/
  173. regex_constants::syntax_char, /**/
  174. regex_constants::syntax_char, /**/
  175. regex_constants::syntax_char, /**/
  176. regex_constants::syntax_newline, /**/
  177. regex_constants::syntax_char, /**/
  178. regex_constants::syntax_char, /**/
  179. regex_constants::syntax_char, /**/
  180. regex_constants::syntax_char, /**/
  181. regex_constants::syntax_char, /**/
  182. regex_constants::syntax_char, /**/
  183. regex_constants::syntax_char, /**/
  184. regex_constants::syntax_char, /**/
  185. regex_constants::syntax_char, /**/
  186. regex_constants::syntax_char, /**/
  187. regex_constants::syntax_char, /**/
  188. regex_constants::syntax_char, /**/
  189. regex_constants::syntax_char, /**/
  190. regex_constants::syntax_char, /**/
  191. regex_constants::syntax_char, /**/
  192. regex_constants::syntax_char, /**/
  193. regex_constants::syntax_char, /**/
  194. regex_constants::syntax_char, /**/
  195. regex_constants::syntax_char, /**/
  196. regex_constants::syntax_char, /**/
  197. regex_constants::syntax_char, /**/
  198. regex_constants::syntax_char, /* */ // 32
  199. regex_constants::syntax_not, /*!*/
  200. regex_constants::syntax_char, /*"*/
  201. regex_constants::syntax_hash, /*#*/
  202. regex_constants::syntax_dollar, /*$*/
  203. regex_constants::syntax_char, /*%*/
  204. regex_constants::syntax_char, /*&*/
  205. regex_constants::escape_type_end_buffer, /*'*/
  206. regex_constants::syntax_open_mark, /*(*/
  207. regex_constants::syntax_close_mark, /*)*/
  208. regex_constants::syntax_star, /***/
  209. regex_constants::syntax_plus, /*+*/
  210. regex_constants::syntax_comma, /*,*/
  211. regex_constants::syntax_dash, /*-*/
  212. regex_constants::syntax_dot, /*.*/
  213. regex_constants::syntax_char, /*/*/
  214. regex_constants::syntax_digit, /*0*/
  215. regex_constants::syntax_digit, /*1*/
  216. regex_constants::syntax_digit, /*2*/
  217. regex_constants::syntax_digit, /*3*/
  218. regex_constants::syntax_digit, /*4*/
  219. regex_constants::syntax_digit, /*5*/
  220. regex_constants::syntax_digit, /*6*/
  221. regex_constants::syntax_digit, /*7*/
  222. regex_constants::syntax_digit, /*8*/
  223. regex_constants::syntax_digit, /*9*/
  224. regex_constants::syntax_colon, /*:*/
  225. regex_constants::syntax_char, /*;*/
  226. regex_constants::escape_type_left_word, /*<*/
  227. regex_constants::syntax_equal, /*=*/
  228. regex_constants::escape_type_right_word, /*>*/
  229. regex_constants::syntax_question, /*?*/
  230. regex_constants::syntax_char, /*@*/
  231. regex_constants::syntax_char, /*A*/
  232. regex_constants::syntax_char, /*B*/
  233. regex_constants::syntax_char, /*C*/
  234. regex_constants::syntax_char, /*D*/
  235. regex_constants::syntax_char, /*E*/
  236. regex_constants::syntax_char, /*F*/
  237. regex_constants::syntax_char, /*G*/
  238. regex_constants::syntax_char, /*H*/
  239. regex_constants::syntax_char, /*I*/
  240. regex_constants::syntax_char, /*J*/
  241. regex_constants::syntax_char, /*K*/
  242. regex_constants::syntax_char, /*L*/
  243. regex_constants::syntax_char, /*M*/
  244. regex_constants::syntax_char, /*N*/
  245. regex_constants::syntax_char, /*O*/
  246. regex_constants::syntax_char, /*P*/
  247. regex_constants::syntax_char, /*Q*/
  248. regex_constants::syntax_char, /*R*/
  249. regex_constants::syntax_char, /*S*/
  250. regex_constants::syntax_char, /*T*/
  251. regex_constants::syntax_char, /*U*/
  252. regex_constants::syntax_char, /*V*/
  253. regex_constants::syntax_char, /*W*/
  254. regex_constants::syntax_char, /*X*/
  255. regex_constants::syntax_char, /*Y*/
  256. regex_constants::syntax_char, /*Z*/
  257. regex_constants::syntax_open_set, /*[*/
  258. regex_constants::syntax_escape, /*\*/
  259. regex_constants::syntax_close_set, /*]*/
  260. regex_constants::syntax_caret, /*^*/
  261. regex_constants::syntax_char, /*_*/
  262. regex_constants::syntax_char, /*`*/
  263. regex_constants::syntax_char, /*a*/
  264. regex_constants::syntax_char, /*b*/
  265. regex_constants::syntax_char, /*c*/
  266. regex_constants::syntax_char, /*d*/
  267. regex_constants::syntax_char, /*e*/
  268. regex_constants::syntax_char, /*f*/
  269. regex_constants::syntax_char, /*g*/
  270. regex_constants::syntax_char, /*h*/
  271. regex_constants::syntax_char, /*i*/
  272. regex_constants::syntax_char, /*j*/
  273. regex_constants::syntax_char, /*k*/
  274. regex_constants::syntax_char, /*l*/
  275. regex_constants::syntax_char, /*m*/
  276. regex_constants::syntax_char, /*n*/
  277. regex_constants::syntax_char, /*o*/
  278. regex_constants::syntax_char, /*p*/
  279. regex_constants::syntax_char, /*q*/
  280. regex_constants::syntax_char, /*r*/
  281. regex_constants::syntax_char, /*s*/
  282. regex_constants::syntax_char, /*t*/
  283. regex_constants::syntax_char, /*u*/
  284. regex_constants::syntax_char, /*v*/
  285. regex_constants::syntax_char, /*w*/
  286. regex_constants::syntax_char, /*x*/
  287. regex_constants::syntax_char, /*y*/
  288. regex_constants::syntax_char, /*z*/
  289. regex_constants::syntax_open_brace, /*{*/
  290. regex_constants::syntax_or, /*|*/
  291. regex_constants::syntax_close_brace, /*}*/
  292. regex_constants::syntax_char, /*~*/
  293. regex_constants::syntax_char, /**/
  294. regex_constants::syntax_char, /**/
  295. regex_constants::syntax_char, /**/
  296. regex_constants::syntax_char, /**/
  297. regex_constants::syntax_char, /**/
  298. regex_constants::syntax_char, /**/
  299. regex_constants::syntax_char, /**/
  300. regex_constants::syntax_char, /**/
  301. regex_constants::syntax_char, /**/
  302. regex_constants::syntax_char, /**/
  303. regex_constants::syntax_char, /**/
  304. regex_constants::syntax_char, /**/
  305. regex_constants::syntax_char, /**/
  306. regex_constants::syntax_char, /**/
  307. regex_constants::syntax_char, /**/
  308. regex_constants::syntax_char, /**/
  309. regex_constants::syntax_char, /**/
  310. regex_constants::syntax_char, /**/
  311. regex_constants::syntax_char, /**/
  312. regex_constants::syntax_char, /**/
  313. regex_constants::syntax_char, /**/
  314. regex_constants::syntax_char, /**/
  315. regex_constants::syntax_char, /**/
  316. regex_constants::syntax_char, /**/
  317. regex_constants::syntax_char, /**/
  318. regex_constants::syntax_char, /**/
  319. regex_constants::syntax_char, /**/
  320. regex_constants::syntax_char, /**/
  321. regex_constants::syntax_char, /**/
  322. regex_constants::syntax_char, /**/
  323. regex_constants::syntax_char, /**/
  324. regex_constants::syntax_char, /**/
  325. regex_constants::syntax_char, /**/
  326. regex_constants::syntax_char, /**/
  327. regex_constants::syntax_char, /**/
  328. regex_constants::syntax_char, /**/
  329. regex_constants::syntax_char, /**/
  330. regex_constants::syntax_char, /**/
  331. regex_constants::syntax_char, /**/
  332. regex_constants::syntax_char, /**/
  333. regex_constants::syntax_char, /**/
  334. regex_constants::syntax_char, /**/
  335. regex_constants::syntax_char, /**/
  336. regex_constants::syntax_char, /**/
  337. regex_constants::syntax_char, /**/
  338. regex_constants::syntax_char, /**/
  339. regex_constants::syntax_char, /**/
  340. regex_constants::syntax_char, /**/
  341. regex_constants::syntax_char, /**/
  342. regex_constants::syntax_char, /**/
  343. regex_constants::syntax_char, /**/
  344. regex_constants::syntax_char, /**/
  345. regex_constants::syntax_char, /**/
  346. regex_constants::syntax_char, /**/
  347. regex_constants::syntax_char, /**/
  348. regex_constants::syntax_char, /**/
  349. };
  350. return char_syntax[(unsigned char)c];
  351. }
  352. inline regex_constants::escape_syntax_type BOOST_REGEX_CALL get_default_escape_syntax_type(char c)
  353. {
  354. //
  355. // char_syntax determines how the compiler treats a given character
  356. // in a regular expression.
  357. //
  358. static regex_constants::escape_syntax_type char_syntax[] = {
  359. regex_constants::escape_type_identity, /**/
  360. regex_constants::escape_type_identity, /**/
  361. regex_constants::escape_type_identity, /**/
  362. regex_constants::escape_type_identity, /**/
  363. regex_constants::escape_type_identity, /**/
  364. regex_constants::escape_type_identity, /**/
  365. regex_constants::escape_type_identity, /**/
  366. regex_constants::escape_type_identity, /**/
  367. regex_constants::escape_type_identity, /**/
  368. regex_constants::escape_type_identity, /**/
  369. regex_constants::escape_type_identity, /**/
  370. regex_constants::escape_type_identity, /**/
  371. regex_constants::escape_type_identity, /**/
  372. regex_constants::escape_type_identity, /**/
  373. regex_constants::escape_type_identity, /**/
  374. regex_constants::escape_type_identity, /**/
  375. regex_constants::escape_type_identity, /**/
  376. regex_constants::escape_type_identity, /**/
  377. regex_constants::escape_type_identity, /**/
  378. regex_constants::escape_type_identity, /**/
  379. regex_constants::escape_type_identity, /**/
  380. regex_constants::escape_type_identity, /**/
  381. regex_constants::escape_type_identity, /**/
  382. regex_constants::escape_type_identity, /**/
  383. regex_constants::escape_type_identity, /**/
  384. regex_constants::escape_type_identity, /**/
  385. regex_constants::escape_type_identity, /**/
  386. regex_constants::escape_type_identity, /**/
  387. regex_constants::escape_type_identity, /**/
  388. regex_constants::escape_type_identity, /**/
  389. regex_constants::escape_type_identity, /**/
  390. regex_constants::escape_type_identity, /**/
  391. regex_constants::escape_type_identity, /* */ // 32
  392. regex_constants::escape_type_identity, /*!*/
  393. regex_constants::escape_type_identity, /*"*/
  394. regex_constants::escape_type_identity, /*#*/
  395. regex_constants::escape_type_identity, /*$*/
  396. regex_constants::escape_type_identity, /*%*/
  397. regex_constants::escape_type_identity, /*&*/
  398. regex_constants::escape_type_end_buffer, /*'*/
  399. regex_constants::syntax_open_mark, /*(*/
  400. regex_constants::syntax_close_mark, /*)*/
  401. regex_constants::escape_type_identity, /***/
  402. regex_constants::syntax_plus, /*+*/
  403. regex_constants::escape_type_identity, /*,*/
  404. regex_constants::escape_type_identity, /*-*/
  405. regex_constants::escape_type_identity, /*.*/
  406. regex_constants::escape_type_identity, /*/*/
  407. regex_constants::escape_type_decimal, /*0*/
  408. regex_constants::escape_type_backref, /*1*/
  409. regex_constants::escape_type_backref, /*2*/
  410. regex_constants::escape_type_backref, /*3*/
  411. regex_constants::escape_type_backref, /*4*/
  412. regex_constants::escape_type_backref, /*5*/
  413. regex_constants::escape_type_backref, /*6*/
  414. regex_constants::escape_type_backref, /*7*/
  415. regex_constants::escape_type_backref, /*8*/
  416. regex_constants::escape_type_backref, /*9*/
  417. regex_constants::escape_type_identity, /*:*/
  418. regex_constants::escape_type_identity, /*;*/
  419. regex_constants::escape_type_left_word, /*<*/
  420. regex_constants::escape_type_identity, /*=*/
  421. regex_constants::escape_type_right_word, /*>*/
  422. regex_constants::syntax_question, /*?*/
  423. regex_constants::escape_type_identity, /*@*/
  424. regex_constants::escape_type_start_buffer, /*A*/
  425. regex_constants::escape_type_not_word_assert, /*B*/
  426. regex_constants::escape_type_C, /*C*/
  427. regex_constants::escape_type_not_class, /*D*/
  428. regex_constants::escape_type_E, /*E*/
  429. regex_constants::escape_type_not_class, /*F*/
  430. regex_constants::escape_type_G, /*G*/
  431. regex_constants::escape_type_not_class, /*H*/
  432. regex_constants::escape_type_not_class, /*I*/
  433. regex_constants::escape_type_not_class, /*J*/
  434. regex_constants::escape_type_reset_start_mark, /*K*/
  435. regex_constants::escape_type_not_class, /*L*/
  436. regex_constants::escape_type_not_class, /*M*/
  437. regex_constants::escape_type_named_char, /*N*/
  438. regex_constants::escape_type_not_class, /*O*/
  439. regex_constants::escape_type_not_property, /*P*/
  440. regex_constants::escape_type_Q, /*Q*/
  441. regex_constants::escape_type_line_ending, /*R*/
  442. regex_constants::escape_type_not_class, /*S*/
  443. regex_constants::escape_type_not_class, /*T*/
  444. regex_constants::escape_type_not_class, /*U*/
  445. regex_constants::escape_type_not_class, /*V*/
  446. regex_constants::escape_type_not_class, /*W*/
  447. regex_constants::escape_type_X, /*X*/
  448. regex_constants::escape_type_not_class, /*Y*/
  449. regex_constants::escape_type_Z, /*Z*/
  450. regex_constants::escape_type_identity, /*[*/
  451. regex_constants::escape_type_identity, /*\*/
  452. regex_constants::escape_type_identity, /*]*/
  453. regex_constants::escape_type_identity, /*^*/
  454. regex_constants::escape_type_identity, /*_*/
  455. regex_constants::escape_type_start_buffer, /*`*/
  456. regex_constants::escape_type_control_a, /*a*/
  457. regex_constants::escape_type_word_assert, /*b*/
  458. regex_constants::escape_type_ascii_control, /*c*/
  459. regex_constants::escape_type_class, /*d*/
  460. regex_constants::escape_type_e, /*e*/
  461. regex_constants::escape_type_control_f, /*f*/
  462. regex_constants::escape_type_extended_backref, /*g*/
  463. regex_constants::escape_type_class, /*h*/
  464. regex_constants::escape_type_class, /*i*/
  465. regex_constants::escape_type_class, /*j*/
  466. regex_constants::escape_type_extended_backref, /*k*/
  467. regex_constants::escape_type_class, /*l*/
  468. regex_constants::escape_type_class, /*m*/
  469. regex_constants::escape_type_control_n, /*n*/
  470. regex_constants::escape_type_class, /*o*/
  471. regex_constants::escape_type_property, /*p*/
  472. regex_constants::escape_type_class, /*q*/
  473. regex_constants::escape_type_control_r, /*r*/
  474. regex_constants::escape_type_class, /*s*/
  475. regex_constants::escape_type_control_t, /*t*/
  476. regex_constants::escape_type_class, /*u*/
  477. regex_constants::escape_type_control_v, /*v*/
  478. regex_constants::escape_type_class, /*w*/
  479. regex_constants::escape_type_hex, /*x*/
  480. regex_constants::escape_type_class, /*y*/
  481. regex_constants::escape_type_end_buffer, /*z*/
  482. regex_constants::syntax_open_brace, /*{*/
  483. regex_constants::syntax_or, /*|*/
  484. regex_constants::syntax_close_brace, /*}*/
  485. regex_constants::escape_type_identity, /*~*/
  486. regex_constants::escape_type_identity, /**/
  487. regex_constants::escape_type_identity, /**/
  488. regex_constants::escape_type_identity, /**/
  489. regex_constants::escape_type_identity, /**/
  490. regex_constants::escape_type_identity, /**/
  491. regex_constants::escape_type_identity, /**/
  492. regex_constants::escape_type_identity, /**/
  493. regex_constants::escape_type_identity, /**/
  494. regex_constants::escape_type_identity, /**/
  495. regex_constants::escape_type_identity, /**/
  496. regex_constants::escape_type_identity, /**/
  497. regex_constants::escape_type_identity, /**/
  498. regex_constants::escape_type_identity, /**/
  499. regex_constants::escape_type_identity, /**/
  500. regex_constants::escape_type_identity, /**/
  501. regex_constants::escape_type_identity, /**/
  502. regex_constants::escape_type_identity, /**/
  503. regex_constants::escape_type_identity, /**/
  504. regex_constants::escape_type_identity, /**/
  505. regex_constants::escape_type_identity, /**/
  506. regex_constants::escape_type_identity, /**/
  507. regex_constants::escape_type_identity, /**/
  508. regex_constants::escape_type_identity, /**/
  509. regex_constants::escape_type_identity, /**/
  510. regex_constants::escape_type_identity, /**/
  511. regex_constants::escape_type_identity, /**/
  512. regex_constants::escape_type_identity, /**/
  513. regex_constants::escape_type_identity, /**/
  514. regex_constants::escape_type_identity, /**/
  515. regex_constants::escape_type_identity, /**/
  516. regex_constants::escape_type_identity, /**/
  517. regex_constants::escape_type_identity, /**/
  518. regex_constants::escape_type_identity, /**/
  519. regex_constants::escape_type_identity, /**/
  520. regex_constants::escape_type_identity, /**/
  521. regex_constants::escape_type_identity, /**/
  522. regex_constants::escape_type_identity, /**/
  523. regex_constants::escape_type_identity, /**/
  524. regex_constants::escape_type_identity, /**/
  525. regex_constants::escape_type_identity, /**/
  526. regex_constants::escape_type_identity, /**/
  527. regex_constants::escape_type_identity, /**/
  528. regex_constants::escape_type_identity, /**/
  529. regex_constants::escape_type_identity, /**/
  530. regex_constants::escape_type_identity, /**/
  531. regex_constants::escape_type_identity, /**/
  532. regex_constants::escape_type_identity, /**/
  533. regex_constants::escape_type_identity, /**/
  534. regex_constants::escape_type_identity, /**/
  535. regex_constants::escape_type_identity, /**/
  536. regex_constants::escape_type_identity, /**/
  537. regex_constants::escape_type_identity, /**/
  538. regex_constants::escape_type_identity, /**/
  539. regex_constants::escape_type_identity, /**/
  540. regex_constants::escape_type_identity, /**/
  541. regex_constants::escape_type_identity, /**/
  542. };
  543. return char_syntax[(unsigned char)c];
  544. }
  545. // is charT c a combining character?
  546. inline bool BOOST_REGEX_CALL is_combining_implementation(boost::uint_least16_t c)
  547. {
  548. const boost::uint_least16_t combining_ranges[] = { 0x0300, 0x0361,
  549. 0x0483, 0x0486,
  550. 0x0903, 0x0903,
  551. 0x093E, 0x0940,
  552. 0x0949, 0x094C,
  553. 0x0982, 0x0983,
  554. 0x09BE, 0x09C0,
  555. 0x09C7, 0x09CC,
  556. 0x09D7, 0x09D7,
  557. 0x0A3E, 0x0A40,
  558. 0x0A83, 0x0A83,
  559. 0x0ABE, 0x0AC0,
  560. 0x0AC9, 0x0ACC,
  561. 0x0B02, 0x0B03,
  562. 0x0B3E, 0x0B3E,
  563. 0x0B40, 0x0B40,
  564. 0x0B47, 0x0B4C,
  565. 0x0B57, 0x0B57,
  566. 0x0B83, 0x0B83,
  567. 0x0BBE, 0x0BBF,
  568. 0x0BC1, 0x0BCC,
  569. 0x0BD7, 0x0BD7,
  570. 0x0C01, 0x0C03,
  571. 0x0C41, 0x0C44,
  572. 0x0C82, 0x0C83,
  573. 0x0CBE, 0x0CBE,
  574. 0x0CC0, 0x0CC4,
  575. 0x0CC7, 0x0CCB,
  576. 0x0CD5, 0x0CD6,
  577. 0x0D02, 0x0D03,
  578. 0x0D3E, 0x0D40,
  579. 0x0D46, 0x0D4C,
  580. 0x0D57, 0x0D57,
  581. 0x0F7F, 0x0F7F,
  582. 0x20D0, 0x20E1,
  583. 0x3099, 0x309A,
  584. 0xFE20, 0xFE23,
  585. 0xffff, 0xffff, };
  586. const boost::uint_least16_t* p = combining_ranges + 1;
  587. while (*p < c) p += 2;
  588. --p;
  589. if ((c >= *p) && (c <= *(p + 1)))
  590. return true;
  591. return false;
  592. }
  593. template <class charT>
  594. inline bool is_combining(charT c)
  595. {
  596. return (c <= static_cast<charT>(0)) ? false : ((c >= static_cast<charT>((std::numeric_limits<uint_least16_t>::max)())) ? false : is_combining_implementation(static_cast<unsigned short>(c)));
  597. }
  598. template <>
  599. inline bool is_combining<char>(char)
  600. {
  601. return false;
  602. }
  603. template <>
  604. inline bool is_combining<signed char>(signed char)
  605. {
  606. return false;
  607. }
  608. template <>
  609. inline bool is_combining<unsigned char>(unsigned char)
  610. {
  611. return false;
  612. }
  613. #if !defined(__hpux) && !defined(__WINSCW__) // can't use WCHAR_MAX/MIN in pp-directives
  614. #ifdef _MSC_VER
  615. template<>
  616. inline bool is_combining<wchar_t>(wchar_t c)
  617. {
  618. return is_combining_implementation(static_cast<unsigned short>(c));
  619. }
  620. #elif !defined(__DECCXX) && !defined(__osf__) && !defined(__OSF__) && defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T)
  621. #if defined(WCHAR_MAX) && (WCHAR_MAX <= USHRT_MAX)
  622. template<>
  623. inline bool is_combining<wchar_t>(wchar_t c)
  624. {
  625. return is_combining_implementation(static_cast<unsigned short>(c));
  626. }
  627. #else
  628. template<>
  629. inline bool is_combining<wchar_t>(wchar_t c)
  630. {
  631. return (c >= (std::numeric_limits<uint_least16_t>::max)()) ? false : is_combining_implementation(static_cast<unsigned short>(c));
  632. }
  633. #endif
  634. #endif
  635. #endif
  636. //
  637. // is a charT c a line separator?
  638. //
  639. template <class charT>
  640. inline bool is_separator(charT c)
  641. {
  642. return BOOST_REGEX_MAKE_BOOL(
  643. (c == static_cast<charT>('\n'))
  644. || (c == static_cast<charT>('\r'))
  645. || (c == static_cast<charT>('\f'))
  646. || (static_cast<boost::uint16_t>(c) == 0x2028u)
  647. || (static_cast<boost::uint16_t>(c) == 0x2029u)
  648. || (static_cast<boost::uint16_t>(c) == 0x85u));
  649. }
  650. template <>
  651. inline bool is_separator<char>(char c)
  652. {
  653. return BOOST_REGEX_MAKE_BOOL((c == '\n') || (c == '\r') || (c == '\f'));
  654. }
  655. //
  656. // get a default collating element:
  657. //
  658. inline std::string BOOST_REGEX_CALL lookup_default_collate_name(const std::string& name)
  659. {
  660. //
  661. // these are the POSIX collating names:
  662. //
  663. static const char* def_coll_names[] = {
  664. "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "alert", "backspace", "tab", "newline",
  665. "vertical-tab", "form-feed", "carriage-return", "SO", "SI", "DLE", "DC1", "DC2", "DC3", "DC4", "NAK",
  666. "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "IS4", "IS3", "IS2", "IS1", "space", "exclamation-mark",
  667. "quotation-mark", "number-sign", "dollar-sign", "percent-sign", "ampersand", "apostrophe",
  668. "left-parenthesis", "right-parenthesis", "asterisk", "plus-sign", "comma", "hyphen",
  669. "period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine",
  670. "colon", "semicolon", "less-than-sign", "equals-sign", "greater-than-sign",
  671. "question-mark", "commercial-at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
  672. "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "left-square-bracket", "backslash",
  673. "right-square-bracket", "circumflex", "underscore", "grave-accent", "a", "b", "c", "d", "e", "f",
  674. "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "left-curly-bracket",
  675. "vertical-line", "right-curly-bracket", "tilde", "DEL", "",
  676. };
  677. // these multi-character collating elements
  678. // should keep most Western-European locales
  679. // happy - we should really localise these a
  680. // little more - but this will have to do for
  681. // now:
  682. static const char* def_multi_coll[] = {
  683. "ae",
  684. "Ae",
  685. "AE",
  686. "ch",
  687. "Ch",
  688. "CH",
  689. "ll",
  690. "Ll",
  691. "LL",
  692. "ss",
  693. "Ss",
  694. "SS",
  695. "nj",
  696. "Nj",
  697. "NJ",
  698. "dz",
  699. "Dz",
  700. "DZ",
  701. "lj",
  702. "Lj",
  703. "LJ",
  704. "",
  705. };
  706. unsigned int i = 0;
  707. while (*def_coll_names[i])
  708. {
  709. if (def_coll_names[i] == name)
  710. {
  711. return std::string(1, char(i));
  712. }
  713. ++i;
  714. }
  715. i = 0;
  716. while (*def_multi_coll[i])
  717. {
  718. if (def_multi_coll[i] == name)
  719. {
  720. return def_multi_coll[i];
  721. }
  722. ++i;
  723. }
  724. return std::string();
  725. }
  726. //
  727. // get the state_id of a character classification, the individual
  728. // traits classes then transform that state_id into a bitmask:
  729. //
  730. template <class charT>
  731. struct character_pointer_range
  732. {
  733. const charT* p1;
  734. const charT* p2;
  735. bool operator < (const character_pointer_range& r)const
  736. {
  737. return std::lexicographical_compare(p1, p2, r.p1, r.p2);
  738. }
  739. bool operator == (const character_pointer_range& r)const
  740. {
  741. // Not only do we check that the ranges are of equal size before
  742. // calling std::equal, but there is no other algorithm available:
  743. // not even a non-standard MS one. So forward to unchecked_equal
  744. // in the MS case.
  745. return ((p2 - p1) == (r.p2 - r.p1)) && BOOST_REGEX_DETAIL_NS::equal(p1, p2, r.p1);
  746. }
  747. };
  748. template <class charT>
  749. int get_default_class_id(const charT* p1, const charT* p2)
  750. {
  751. static const charT data[73] = {
  752. 'a', 'l', 'n', 'u', 'm',
  753. 'a', 'l', 'p', 'h', 'a',
  754. 'b', 'l', 'a', 'n', 'k',
  755. 'c', 'n', 't', 'r', 'l',
  756. 'd', 'i', 'g', 'i', 't',
  757. 'g', 'r', 'a', 'p', 'h',
  758. 'l', 'o', 'w', 'e', 'r',
  759. 'p', 'r', 'i', 'n', 't',
  760. 'p', 'u', 'n', 'c', 't',
  761. 's', 'p', 'a', 'c', 'e',
  762. 'u', 'n', 'i', 'c', 'o', 'd', 'e',
  763. 'u', 'p', 'p', 'e', 'r',
  764. 'v',
  765. 'w', 'o', 'r', 'd',
  766. 'x', 'd', 'i', 'g', 'i', 't',
  767. };
  768. static const character_pointer_range<charT> ranges[21] =
  769. {
  770. {data+0, data+5,}, // alnum
  771. {data+5, data+10,}, // alpha
  772. {data+10, data+15,}, // blank
  773. {data+15, data+20,}, // cntrl
  774. {data+20, data+21,}, // d
  775. {data+20, data+25,}, // digit
  776. {data+25, data+30,}, // graph
  777. {data+29, data+30,}, // h
  778. {data+30, data+31,}, // l
  779. {data+30, data+35,}, // lower
  780. {data+35, data+40,}, // print
  781. {data+40, data+45,}, // punct
  782. {data+45, data+46,}, // s
  783. {data+45, data+50,}, // space
  784. {data+57, data+58,}, // u
  785. {data+50, data+57,}, // unicode
  786. {data+57, data+62,}, // upper
  787. {data+62, data+63,}, // v
  788. {data+63, data+64,}, // w
  789. {data+63, data+67,}, // word
  790. {data+67, data+73,}, // xdigit
  791. };
  792. const character_pointer_range<charT>* ranges_begin = ranges;
  793. const character_pointer_range<charT>* ranges_end = ranges + (sizeof(ranges)/sizeof(ranges[0]));
  794. character_pointer_range<charT> t = { p1, p2, };
  795. const character_pointer_range<charT>* p = std::lower_bound(ranges_begin, ranges_end, t);
  796. if((p != ranges_end) && (t == *p))
  797. return static_cast<int>(p - ranges);
  798. return -1;
  799. }
  800. //
  801. // helper functions:
  802. //
  803. template <class charT>
  804. std::ptrdiff_t global_length(const charT* p)
  805. {
  806. std::ptrdiff_t n = 0;
  807. while(*p)
  808. {
  809. ++p;
  810. ++n;
  811. }
  812. return n;
  813. }
  814. template<>
  815. inline std::ptrdiff_t global_length<char>(const char* p)
  816. {
  817. return (std::strlen)(p);
  818. }
  819. #ifndef BOOST_NO_WREGEX
  820. template<>
  821. inline std::ptrdiff_t global_length<wchar_t>(const wchar_t* p)
  822. {
  823. return (std::ptrdiff_t)(std::wcslen)(p);
  824. }
  825. #endif
  826. template <class charT>
  827. inline charT BOOST_REGEX_CALL global_lower(charT c)
  828. {
  829. return c;
  830. }
  831. template <class charT>
  832. inline charT BOOST_REGEX_CALL global_upper(charT c)
  833. {
  834. return c;
  835. }
  836. inline char BOOST_REGEX_CALL do_global_lower(char c)
  837. {
  838. return static_cast<char>((std::tolower)((unsigned char)c));
  839. }
  840. inline char BOOST_REGEX_CALL do_global_upper(char c)
  841. {
  842. return static_cast<char>((std::toupper)((unsigned char)c));
  843. }
  844. #ifndef BOOST_NO_WREGEX
  845. inline wchar_t BOOST_REGEX_CALL do_global_lower(wchar_t c)
  846. {
  847. return (std::towlower)(c);
  848. }
  849. inline wchar_t BOOST_REGEX_CALL do_global_upper(wchar_t c)
  850. {
  851. return (std::towupper)(c);
  852. }
  853. #endif
  854. //
  855. // This sucks: declare template specialisations of global_lower/global_upper
  856. // that just forward to the non-template implementation functions. We do
  857. // this because there is one compiler (Compaq Tru64 C++) that doesn't seem
  858. // to differentiate between templates and non-template overloads....
  859. // what's more, the primary template, plus all overloads have to be
  860. // defined in the same translation unit (if one is inline they all must be)
  861. // otherwise the "local template instantiation" compiler option can pick
  862. // the wrong instantiation when linking:
  863. //
  864. template<> inline char BOOST_REGEX_CALL global_lower<char>(char c) { return do_global_lower(c); }
  865. template<> inline char BOOST_REGEX_CALL global_upper<char>(char c) { return do_global_upper(c); }
  866. #ifndef BOOST_NO_WREGEX
  867. template<> inline wchar_t BOOST_REGEX_CALL global_lower<wchar_t>(wchar_t c) { return do_global_lower(c); }
  868. template<> inline wchar_t BOOST_REGEX_CALL global_upper<wchar_t>(wchar_t c) { return do_global_upper(c); }
  869. #endif
  870. template <class charT>
  871. int global_value(charT c)
  872. {
  873. static const charT zero = '0';
  874. static const charT nine = '9';
  875. static const charT a = 'a';
  876. static const charT f = 'f';
  877. static const charT A = 'A';
  878. static const charT F = 'F';
  879. if(c > f) return -1;
  880. if(c >= a) return 10 + (c - a);
  881. if(c > F) return -1;
  882. if(c >= A) return 10 + (c - A);
  883. if(c > nine) return -1;
  884. if(c >= zero) return c - zero;
  885. return -1;
  886. }
  887. template <class charT, class traits>
  888. boost::intmax_t global_toi(const charT*& p1, const charT* p2, int radix, const traits& t)
  889. {
  890. (void)t; // warning suppression
  891. boost::intmax_t limit = (std::numeric_limits<boost::intmax_t>::max)() / radix;
  892. boost::intmax_t next_value = t.value(*p1, radix);
  893. if((p1 == p2) || (next_value < 0) || (next_value >= radix))
  894. return -1;
  895. boost::intmax_t result = 0;
  896. while(p1 != p2)
  897. {
  898. next_value = t.value(*p1, radix);
  899. if((next_value < 0) || (next_value >= radix))
  900. break;
  901. result *= radix;
  902. result += next_value;
  903. ++p1;
  904. if (result > limit)
  905. return -1;
  906. }
  907. return result;
  908. }
  909. template <class charT>
  910. inline typename boost::enable_if_c<(sizeof(charT) > 1), const charT*>::type get_escape_R_string()
  911. {
  912. #ifdef BOOST_MSVC
  913. # pragma warning(push)
  914. # pragma warning(disable:4309 4245)
  915. #endif
  916. static const charT e1[] = { '(', '?', '-', 'x', ':', '(', '?', '>', '\x0D', '\x0A', '?',
  917. '|', '[', '\x0A', '\x0B', '\x0C', static_cast<charT>(0x85), static_cast<charT>(0x2028),
  918. static_cast<charT>(0x2029), ']', ')', ')', '\0' };
  919. static const charT e2[] = { '(', '?', '-', 'x', ':', '(', '?', '>', '\x0D', '\x0A', '?',
  920. '|', '[', '\x0A', '\x0B', '\x0C', static_cast<charT>(0x85), ']', ')', ')', '\0' };
  921. charT c = static_cast<charT>(0x2029u);
  922. bool b = (static_cast<unsigned>(c) == 0x2029u);
  923. return (b ? e1 : e2);
  924. #ifdef BOOST_MSVC
  925. # pragma warning(pop)
  926. #endif
  927. }
  928. template <class charT>
  929. inline typename boost::disable_if_c<(sizeof(charT) > 1), const charT*>::type get_escape_R_string()
  930. {
  931. #ifdef BOOST_MSVC
  932. # pragma warning(push)
  933. # pragma warning(disable:4309)
  934. #endif
  935. static const charT e2[] = { '(', '?', '-', 'x', ':', '(', '?', '>', '\x0D', '\x0A', '?',
  936. '|', '[', '\x0A', '\x0B', '\x0C', '\x85', ']', ')', ')', '\0' };
  937. return e2;
  938. #ifdef BOOST_MSVC
  939. # pragma warning(pop)
  940. #endif
  941. }
  942. } // BOOST_REGEX_DETAIL_NS
  943. } // boost
  944. #ifdef BOOST_MSVC
  945. #pragma warning(push)
  946. #pragma warning(disable: 4103)
  947. #endif
  948. #ifdef BOOST_HAS_ABI_HEADERS
  949. # include BOOST_ABI_SUFFIX
  950. #endif
  951. #ifdef BOOST_MSVC
  952. #pragma warning(pop)
  953. #endif
  954. #endif