cpp_re.hpp 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423
  1. /*=============================================================================
  2. Boost.Wave: A Standard compliant C++ preprocessor library
  3. Re2C based C++ lexer
  4. http://www.boost.org/
  5. Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost
  6. Software License, Version 1.0. (See accompanying file
  7. LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  8. =============================================================================*/
  9. #if !defined(BOOST_CPP_RE_HPP_B76C4F5E_63E9_4B8A_9975_EC32FA6BF027_INCLUDED)
  10. #define BOOST_CPP_RE_HPP_B76C4F5E_63E9_4B8A_9975_EC32FA6BF027_INCLUDED
  11. #include <boost/assert.hpp>
  12. #include <boost/wave/wave_config.hpp>
  13. #include <boost/wave/token_ids.hpp>
  14. #include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
  15. #include <boost/wave/cpplexer/re2clex/aq.hpp>
  16. #include <boost/wave/cpplexer/re2clex/scanner.hpp>
  17. // this must occur after all of the includes and before any code appears
  18. #ifdef BOOST_HAS_ABI_HEADERS
  19. #include BOOST_ABI_PREFIX
  20. #endif
  21. // suppress warnings about dependent classes not being exported from the dll
  22. #ifdef BOOST_MSVC
  23. #pragma warning(push)
  24. #pragma warning(disable : 4251 4231 4660)
  25. #endif
  26. ///////////////////////////////////////////////////////////////////////////////
  27. #define YYCTYPE uchar
  28. #define YYCURSOR cursor
  29. #define YYLIMIT limit
  30. #define YYMARKER marker
  31. #define YYFILL(n) \
  32. { \
  33. s->ptr = marker; \
  34. cursor = uchar_wrapper(fill(s, cursor), cursor.column); \
  35. limit = uchar_wrapper (s->lim); \
  36. marker = uchar_wrapper(s->ptr); \
  37. } \
  38. /**/
  39. #include <iosfwd>
  40. ///////////////////////////////////////////////////////////////////////////////
  41. #define BOOST_WAVE_UPDATE_CURSOR() \
  42. { \
  43. s->line += count_backslash_newlines(s, cursor); \
  44. s->curr_column = cursor.column; \
  45. s->cur = cursor; \
  46. s->lim = limit; \
  47. s->ptr = marker; \
  48. } \
  49. /**/
  50. ///////////////////////////////////////////////////////////////////////////////
  51. #define BOOST_WAVE_RET(i) \
  52. { \
  53. BOOST_WAVE_UPDATE_CURSOR() \
  54. if (s->cur > s->lim) \
  55. return T_EOF; /* may happen for empty files */ \
  56. return (i); \
  57. } \
  58. /**/
  59. ///////////////////////////////////////////////////////////////////////////////
  60. namespace boost {
  61. namespace wave {
  62. namespace cpplexer {
  63. namespace re2clex {
  64. template<typename Iterator>
  65. struct Scanner;
  66. ///////////////////////////////////////////////////////////////////////////////
  67. // The scanner function to call whenever a new token is requested
  68. template<typename Iterator>
  69. BOOST_WAVE_DECL boost::wave::token_id scan(Scanner<Iterator> *s);
  70. ///////////////////////////////////////////////////////////////////////////////
  71. ///////////////////////////////////////////////////////////////////////////////
  72. // Utility functions
  73. #define RE2C_ASSERT BOOST_ASSERT
  74. template<typename Iterator>
  75. int get_one_char(Scanner<Iterator> *s)
  76. {
  77. RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
  78. if (s->act < s->last)
  79. return *(s->act)++;
  80. return -1;
  81. }
  82. template<typename Iterator>
  83. std::ptrdiff_t rewind_stream (Scanner<Iterator> *s, int cnt)
  84. {
  85. std::advance(s->act, cnt);
  86. RE2C_ASSERT(s->first <= s->act && s->act <= s->last);
  87. return std::distance(s->first, s->act);
  88. }
  89. template<typename Iterator>
  90. std::size_t get_first_eol_offset(Scanner<Iterator>* s)
  91. {
  92. if (!AQ_EMPTY(s->eol_offsets))
  93. {
  94. return s->eol_offsets->queue[s->eol_offsets->head];
  95. }
  96. else
  97. {
  98. return (unsigned int)-1;
  99. }
  100. }
  101. template<typename Iterator>
  102. void adjust_eol_offsets(Scanner<Iterator>* s, std::size_t adjustment)
  103. {
  104. aq_queue q;
  105. std::size_t i;
  106. if (!s->eol_offsets)
  107. s->eol_offsets = aq_create();
  108. q = s->eol_offsets;
  109. if (AQ_EMPTY(q))
  110. return;
  111. i = q->head;
  112. while (i != q->tail)
  113. {
  114. if (adjustment > q->queue[i])
  115. q->queue[i] = 0;
  116. else
  117. q->queue[i] -= adjustment;
  118. ++i;
  119. if (i == q->max_size)
  120. i = 0;
  121. }
  122. if (adjustment > q->queue[i])
  123. q->queue[i] = 0;
  124. else
  125. q->queue[i] -= adjustment;
  126. }
  127. template<typename Iterator>
  128. int count_backslash_newlines(Scanner<Iterator> *s, uchar *cursor)
  129. {
  130. std::size_t diff, offset;
  131. int skipped = 0;
  132. /* figure out how many backslash-newlines skipped over unknowingly. */
  133. diff = cursor - s->bot;
  134. offset = get_first_eol_offset(s);
  135. while (offset <= diff && offset != (unsigned int)-1)
  136. {
  137. skipped++;
  138. aq_pop(s->eol_offsets);
  139. offset = get_first_eol_offset(s);
  140. }
  141. return skipped;
  142. }
  143. BOOST_WAVE_DECL bool is_backslash(uchar *p, uchar *end, int &len);
  144. #define BOOST_WAVE_BSIZE 196608
  145. template<typename Iterator>
  146. uchar *fill(Scanner<Iterator> *s, uchar *cursor)
  147. {
  148. using namespace std; // some systems have memcpy etc. in namespace std
  149. if(!s->eof)
  150. {
  151. uchar* p;
  152. std::ptrdiff_t cnt = s->tok - s->bot;
  153. if(cnt)
  154. {
  155. if (NULL == s->lim)
  156. s->lim = s->top;
  157. size_t length = s->lim - s->tok;
  158. if(length > 0){
  159. memmove(s->bot, s->tok, length);
  160. }
  161. s->tok = s->cur = s->bot;
  162. s->ptr -= cnt;
  163. cursor -= cnt;
  164. s->lim -= cnt;
  165. adjust_eol_offsets(s, cnt);
  166. }
  167. if((s->top - s->lim) < BOOST_WAVE_BSIZE)
  168. {
  169. uchar *buf = (uchar*) malloc(((s->lim - s->bot) + BOOST_WAVE_BSIZE)*sizeof(uchar));
  170. if (buf == 0)
  171. {
  172. (*s->error_proc)(s, lexing_exception::unexpected_error,
  173. "Out of memory!");
  174. /* get the scanner to stop */
  175. *cursor = 0;
  176. return cursor;
  177. }
  178. size_t length = s->lim - s->tok;
  179. if(length > 0){
  180. memmove(buf, s->tok, length);
  181. }
  182. s->tok = s->cur = buf;
  183. s->ptr = &buf[s->ptr - s->bot];
  184. cursor = &buf[cursor - s->bot];
  185. s->lim = &buf[s->lim - s->bot];
  186. s->top = &s->lim[BOOST_WAVE_BSIZE];
  187. free(s->bot);
  188. s->bot = buf;
  189. }
  190. cnt = std::distance(s->act, s->last);
  191. if (cnt > BOOST_WAVE_BSIZE)
  192. cnt = BOOST_WAVE_BSIZE;
  193. uchar * dst = s->lim;
  194. for (std::ptrdiff_t idx = 0; idx < cnt; ++idx)
  195. {
  196. *dst++ = *s->act++;
  197. }
  198. if (cnt != BOOST_WAVE_BSIZE)
  199. {
  200. s->eof = &s->lim[cnt]; *(s->eof)++ = '\0';
  201. }
  202. /* backslash-newline erasing time */
  203. /* first scan for backslash-newline and erase them */
  204. /* a backslash-newline combination can be 2 (regular) or 4 (trigraph backslash) chars */
  205. /* start checking 3 chars within the old buffer, if possible */
  206. for (p = (std::max)(s->lim - 3, s->cur); p < s->lim + cnt - 2; ++p)
  207. {
  208. int len = 0;
  209. /* is there a backslash, and room afterwards for a newline? */
  210. if (is_backslash(p, s->lim + cnt, len) && ((p + len) < (s->lim + cnt)))
  211. {
  212. if (*(p+len) == '\n')
  213. {
  214. int offset = len + 1;
  215. memmove(p, p + offset, s->lim + cnt - p - offset);
  216. cnt -= offset;
  217. --p;
  218. aq_enqueue(s->eol_offsets, p - s->bot + 1);
  219. }
  220. else if (*(p+len) == '\r')
  221. {
  222. /* is there also room for a newline, and is one present? */
  223. if (((p + len + 1) < s->lim + cnt) && (*(p+len+1) == '\n'))
  224. {
  225. int offset = len + 2;
  226. memmove(p, p + offset, s->lim + cnt - p - offset);
  227. cnt -= offset;
  228. --p;
  229. }
  230. else
  231. {
  232. int offset = len + 1;
  233. memmove(p, p + offset, s->lim + cnt - p - offset);
  234. cnt -= offset;
  235. --p;
  236. }
  237. aq_enqueue(s->eol_offsets, p - s->bot + 1);
  238. }
  239. }
  240. }
  241. /* FIXME: the following code should be fixed to recognize correctly the
  242. trigraph backslash token */
  243. /* check to see if what we just read ends in a backslash */
  244. if (cnt >= 2)
  245. {
  246. uchar last = s->lim[cnt-1];
  247. uchar last2 = s->lim[cnt-2];
  248. /* check \ EOB */
  249. if (last == '\\')
  250. {
  251. int next = get_one_char(s);
  252. /* check for \ \n or \ \r or \ \r \n straddling the border */
  253. if (next == '\n')
  254. {
  255. --cnt; /* chop the final \, we've already read the \n. */
  256. aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
  257. }
  258. else if (next == '\r')
  259. {
  260. int next2 = get_one_char(s);
  261. if (next2 == '\n')
  262. {
  263. --cnt; /* skip the backslash */
  264. }
  265. else
  266. {
  267. /* rewind one, and skip one char */
  268. rewind_stream(s, -1);
  269. --cnt;
  270. }
  271. aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
  272. }
  273. else if (next != -1) /* -1 means end of file */
  274. {
  275. /* next was something else, so rewind the stream */
  276. rewind_stream(s, -1);
  277. }
  278. }
  279. /* check \ \r EOB */
  280. else if (last == '\r' && last2 == '\\')
  281. {
  282. int next = get_one_char(s);
  283. if (next == '\n')
  284. {
  285. cnt -= 2; /* skip the \ \r */
  286. }
  287. else
  288. {
  289. /* rewind one, and skip two chars */
  290. rewind_stream(s, -1);
  291. cnt -= 2;
  292. }
  293. aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
  294. }
  295. /* check \ \n EOB */
  296. else if (last == '\n' && last2 == '\\')
  297. {
  298. cnt -= 2;
  299. aq_enqueue(s->eol_offsets, cnt + (s->lim - s->bot));
  300. }
  301. }
  302. s->lim += cnt;
  303. if (s->eof) /* eof needs adjusting if we erased backslash-newlines */
  304. {
  305. s->eof = s->lim;
  306. *(s->eof)++ = '\0';
  307. }
  308. }
  309. return cursor;
  310. }
  311. ///////////////////////////////////////////////////////////////////////////////
  312. // Special wrapper class holding the current cursor position
  313. struct BOOST_WAVE_DECL uchar_wrapper
  314. {
  315. uchar_wrapper (uchar *base_cursor, std::size_t column = 1);
  316. uchar_wrapper& operator++();
  317. uchar_wrapper& operator--();
  318. uchar operator* () const;
  319. operator uchar *() const;
  320. friend BOOST_WAVE_DECL std::ptrdiff_t
  321. operator- (uchar_wrapper const& lhs, uchar_wrapper const& rhs);
  322. uchar *base_cursor;
  323. std::size_t column;
  324. };
  325. ///////////////////////////////////////////////////////////////////////////////
  326. template<typename Iterator>
  327. boost::wave::token_id scan(Scanner<Iterator> *s)
  328. {
  329. BOOST_ASSERT(0 != s->error_proc); // error handler must be given
  330. uchar_wrapper cursor (s->tok = s->cur, s->column = s->curr_column);
  331. uchar_wrapper marker (s->ptr);
  332. uchar_wrapper limit (s->lim);
  333. typedef BOOST_WAVE_STRINGTYPE string_type;
  334. string_type rawstringdelim; // for use with C++11 raw string literals
  335. // include the correct Re2C token definition rules
  336. #if (defined (__FreeBSD__) || defined (__DragonFly__) || defined (__OpenBSD__)) && defined (T_DIVIDE)
  337. #undef T_DIVIDE
  338. #endif
  339. #if BOOST_WAVE_USE_STRICT_LEXER != 0
  340. #include "strict_cpp_re.inc"
  341. #else
  342. #include "cpp_re.inc"
  343. #endif
  344. } /* end of scan */
  345. ///////////////////////////////////////////////////////////////////////////////
  346. } // namespace re2clex
  347. } // namespace cpplexer
  348. } // namespace wave
  349. } // namespace boost
  350. #ifdef BOOST_MSVC
  351. #pragma warning(pop)
  352. #endif
  353. #undef BOOST_WAVE_RET
  354. #undef YYCTYPE
  355. #undef YYCURSOR
  356. #undef YYLIMIT
  357. #undef YYMARKER
  358. #undef YYFILL
  359. // the suffix header occurs after all of the code
  360. #ifdef BOOST_HAS_ABI_HEADERS
  361. #include BOOST_ABI_SUFFIX
  362. #endif
  363. #endif // !defined(BOOST_CPP_RE_HPP_B76C4F5E_63E9_4B8A_9975_EC32FA6BF027_INCLUDED)