basic_parser.hpp 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715
  1. //
  2. // Copyright (c) 2019 Vinnie Falco ([email protected])
  3. // Copyright (c) 2020 Krystian Stasiowski ([email protected])
  4. //
  5. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  6. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. // Official repository: https://github.com/boostorg/json
  9. //
  10. #ifndef BOOST_JSON_BASIC_PARSER_HPP
  11. #define BOOST_JSON_BASIC_PARSER_HPP
  12. #include <boost/json/detail/config.hpp>
  13. #include <boost/json/detail/except.hpp>
  14. #include <boost/json/error.hpp>
  15. #include <boost/json/kind.hpp>
  16. #include <boost/json/parse_options.hpp>
  17. #include <boost/json/detail/stack.hpp>
  18. #include <boost/json/detail/stream.hpp>
  19. #include <boost/json/detail/utf8.hpp>
  20. #include <boost/json/detail/sbo_buffer.hpp>
  21. namespace boost {
  22. namespace json {
  23. /** An incremental SAX parser for serialized JSON.
  24. This implements a SAX-style parser, invoking a
  25. caller-supplied handler with each parsing event.
  26. To use, first declare a variable of type
  27. `basic_parser<T>` where `T` meets the handler
  28. requirements specified below. Then call
  29. @ref write_some one or more times with the input,
  30. setting `more = false` on the final buffer.
  31. The parsing events are realized through member
  32. function calls on the handler, which exists
  33. as a data member of the parser.
  34. \n
  35. The parser may dynamically allocate intermediate
  36. storage as needed to accommodate the nesting level
  37. of the input JSON. On subsequent invocations, the
  38. parser can cheaply re-use this memory, improving
  39. performance. This storage is freed when the
  40. parser is destroyed
  41. @par Usage
  42. To get the declaration and function definitions
  43. for this class it is necessary to include this
  44. file instead:
  45. @code
  46. #include <boost/json/basic_parser_impl.hpp>
  47. @endcode
  48. Users who wish to parse JSON into the DOM container
  49. @ref value will not use this class directly; instead
  50. they will create an instance of @ref parser or
  51. @ref stream_parser and use that instead. Alternatively,
  52. they may call the function @ref parse. This class is
  53. designed for users who wish to perform custom actions
  54. instead of building a @ref value. For example, to
  55. produce a DOM from an external library.
  56. \n
  57. @note
  58. By default, only conforming JSON using UTF-8
  59. encoding is accepted. However, select non-compliant
  60. syntax can be allowed by construction using a
  61. @ref parse_options set to desired values.
  62. @par Handler
  63. The handler provided must be implemented as an
  64. object of class type which defines each of the
  65. required event member functions below. The event
  66. functions return a `bool` where `true` indicates
  67. success, and `false` indicates failure. If the
  68. member function returns `false`, it must set
  69. the error code to a suitable value. This error
  70. code will be returned by the write function to
  71. the caller.
  72. \n
  73. Handlers are required to declare the maximum
  74. limits on various elements. If these limits
  75. are exceeded during parsing, then parsing
  76. fails with an error.
  77. \n
  78. The following declaration meets the parser's
  79. handler requirements:
  80. @code
  81. struct handler
  82. {
  83. /// The maximum number of elements allowed in an array
  84. static constexpr std::size_t max_array_size = -1;
  85. /// The maximum number of elements allowed in an object
  86. static constexpr std::size_t max_object_size = -1;
  87. /// The maximum number of characters allowed in a string
  88. static constexpr std::size_t max_string_size = -1;
  89. /// The maximum number of characters allowed in a key
  90. static constexpr std::size_t max_key_size = -1;
  91. /// Called once when the JSON parsing begins.
  92. ///
  93. /// @return `true` on success.
  94. /// @param ec Set to the error, if any occurred.
  95. ///
  96. bool on_document_begin( error_code& ec );
  97. /// Called when the JSON parsing is done.
  98. ///
  99. /// @return `true` on success.
  100. /// @param ec Set to the error, if any occurred.
  101. ///
  102. bool on_document_end( error_code& ec );
  103. /// Called when the beginning of an array is encountered.
  104. ///
  105. /// @return `true` on success.
  106. /// @param ec Set to the error, if any occurred.
  107. ///
  108. bool on_array_begin( error_code& ec );
  109. /// Called when the end of the current array is encountered.
  110. ///
  111. /// @return `true` on success.
  112. /// @param n The number of elements in the array.
  113. /// @param ec Set to the error, if any occurred.
  114. ///
  115. bool on_array_end( std::size_t n, error_code& ec );
  116. /// Called when the beginning of an object is encountered.
  117. ///
  118. /// @return `true` on success.
  119. /// @param ec Set to the error, if any occurred.
  120. ///
  121. bool on_object_begin( error_code& ec );
  122. /// Called when the end of the current object is encountered.
  123. ///
  124. /// @return `true` on success.
  125. /// @param n The number of elements in the object.
  126. /// @param ec Set to the error, if any occurred.
  127. ///
  128. bool on_object_end( std::size_t n, error_code& ec );
  129. /// Called with characters corresponding to part of the current string.
  130. ///
  131. /// @return `true` on success.
  132. /// @param s The partial characters
  133. /// @param n The total size of the string thus far
  134. /// @param ec Set to the error, if any occurred.
  135. ///
  136. bool on_string_part( string_view s, std::size_t n, error_code& ec );
  137. /// Called with the last characters corresponding to the current string.
  138. ///
  139. /// @return `true` on success.
  140. /// @param s The remaining characters
  141. /// @param n The total size of the string
  142. /// @param ec Set to the error, if any occurred.
  143. ///
  144. bool on_string( string_view s, std::size_t n, error_code& ec );
  145. /// Called with characters corresponding to part of the current key.
  146. ///
  147. /// @return `true` on success.
  148. /// @param s The partial characters
  149. /// @param n The total size of the key thus far
  150. /// @param ec Set to the error, if any occurred.
  151. ///
  152. bool on_key_part( string_view s, std::size_t n, error_code& ec );
  153. /// Called with the last characters corresponding to the current key.
  154. ///
  155. /// @return `true` on success.
  156. /// @param s The remaining characters
  157. /// @param n The total size of the key
  158. /// @param ec Set to the error, if any occurred.
  159. ///
  160. bool on_key( string_view s, std::size_t n, error_code& ec );
  161. /// Called with the characters corresponding to part of the current number.
  162. ///
  163. /// @return `true` on success.
  164. /// @param s The partial characters
  165. /// @param ec Set to the error, if any occurred.
  166. ///
  167. bool on_number_part( string_view s, error_code& ec );
  168. /// Called when a signed integer is parsed.
  169. ///
  170. /// @return `true` on success.
  171. /// @param i The value
  172. /// @param s The remaining characters
  173. /// @param ec Set to the error, if any occurred.
  174. ///
  175. bool on_int64( int64_t i, string_view s, error_code& ec );
  176. /// Called when an unsigend integer is parsed.
  177. ///
  178. /// @return `true` on success.
  179. /// @param u The value
  180. /// @param s The remaining characters
  181. /// @param ec Set to the error, if any occurred.
  182. ///
  183. bool on_uint64( uint64_t u, string_view s, error_code& ec );
  184. /// Called when a double is parsed.
  185. ///
  186. /// @return `true` on success.
  187. /// @param d The value
  188. /// @param s The remaining characters
  189. /// @param ec Set to the error, if any occurred.
  190. ///
  191. bool on_double( double d, string_view s, error_code& ec );
  192. /// Called when a boolean is parsed.
  193. ///
  194. /// @return `true` on success.
  195. /// @param b The value
  196. /// @param s The remaining characters
  197. /// @param ec Set to the error, if any occurred.
  198. ///
  199. bool on_bool( bool b, error_code& ec );
  200. /// Called when a null is parsed.
  201. ///
  202. /// @return `true` on success.
  203. /// @param ec Set to the error, if any occurred.
  204. ///
  205. bool on_null( error_code& ec );
  206. /// Called with characters corresponding to part of the current comment.
  207. ///
  208. /// @return `true` on success.
  209. /// @param s The partial characters.
  210. /// @param ec Set to the error, if any occurred.
  211. ///
  212. bool on_comment_part( string_view s, error_code& ec );
  213. /// Called with the last characters corresponding to the current comment.
  214. ///
  215. /// @return `true` on success.
  216. /// @param s The remaining characters
  217. /// @param ec Set to the error, if any occurred.
  218. ///
  219. bool on_comment( string_view s, error_code& ec );
  220. };
  221. @endcode
  222. @see
  223. @ref parse,
  224. @ref stream_parser,
  225. [Validating parser example](../../doc/html/json/examples.html#json.examples.validate).
  226. @headerfile <boost/json/basic_parser.hpp>
  227. */
  228. template<class Handler>
  229. class basic_parser
  230. {
  231. enum class state : char
  232. {
  233. doc1, doc3,
  234. com1, com2, com3, com4,
  235. lit1,
  236. str1, str2, str3, str4,
  237. str5, str6, str7, str8,
  238. sur1, sur2, sur3,
  239. sur4, sur5, sur6,
  240. obj1, obj2, obj3, obj4,
  241. obj5, obj6, obj7, obj8,
  242. obj9, obj10, obj11,
  243. arr1, arr2, arr3,
  244. arr4, arr5, arr6,
  245. num1, num2, num3, num4,
  246. num5, num6, num7, num8,
  247. exp1, exp2, exp3,
  248. val1, val2, val3
  249. };
  250. struct number
  251. {
  252. uint64_t mant;
  253. int bias;
  254. int exp;
  255. bool frac;
  256. bool neg;
  257. };
  258. template< bool StackEmpty_, char First_ >
  259. struct parse_number_helper;
  260. // optimization: must come first
  261. Handler h_;
  262. number num_;
  263. system::error_code ec_;
  264. detail::stack st_;
  265. detail::utf8_sequence seq_;
  266. unsigned u1_;
  267. unsigned u2_;
  268. bool more_; // false for final buffer
  269. bool done_ = false; // true on complete parse
  270. bool clean_ = true; // write_some exited cleanly
  271. const char* end_;
  272. detail::sbo_buffer<16 + 16 + 1 + 1> num_buf_;
  273. parse_options opt_;
  274. // how many levels deeper the parser can go
  275. std::size_t depth_ = opt_.max_depth;
  276. unsigned char cur_lit_ = 0;
  277. unsigned char lit_offset_ = 0;
  278. inline void reserve();
  279. inline const char* sentinel();
  280. inline bool incomplete(
  281. const detail::const_stream_wrapper& cs);
  282. #ifdef __INTEL_COMPILER
  283. #pragma warning push
  284. #pragma warning disable 2196
  285. #endif
  286. BOOST_NOINLINE
  287. inline
  288. const char*
  289. suspend_or_fail(state st);
  290. BOOST_NOINLINE
  291. inline
  292. const char*
  293. suspend_or_fail(
  294. state st,
  295. std::size_t n);
  296. BOOST_NOINLINE
  297. inline
  298. const char*
  299. fail(const char* p) noexcept;
  300. BOOST_NOINLINE
  301. inline
  302. const char*
  303. fail(
  304. const char* p,
  305. error ev,
  306. source_location const* loc) noexcept;
  307. BOOST_NOINLINE
  308. inline
  309. const char*
  310. maybe_suspend(
  311. const char* p,
  312. state st);
  313. BOOST_NOINLINE
  314. inline
  315. const char*
  316. maybe_suspend(
  317. const char* p,
  318. state st,
  319. std::size_t n);
  320. BOOST_NOINLINE
  321. inline
  322. const char*
  323. maybe_suspend(
  324. const char* p,
  325. state st,
  326. const number& num);
  327. BOOST_NOINLINE
  328. inline
  329. const char*
  330. suspend(
  331. const char* p,
  332. state st);
  333. BOOST_NOINLINE
  334. inline
  335. const char*
  336. suspend(
  337. const char* p,
  338. state st,
  339. const number& num);
  340. #ifdef __INTEL_COMPILER
  341. #pragma warning pop
  342. #endif
  343. template<bool StackEmpty_/*, bool Terminal_*/>
  344. const char* parse_comment(const char* p,
  345. std::integral_constant<bool, StackEmpty_> stack_empty,
  346. /*std::integral_constant<bool, Terminal_>*/ bool terminal);
  347. template<bool StackEmpty_>
  348. const char* parse_document(const char* p,
  349. std::integral_constant<bool, StackEmpty_> stack_empty);
  350. template<bool StackEmpty_, bool AllowComments_/*,
  351. bool AllowTrailing_, bool AllowBadUTF8_*/>
  352. const char* parse_value(const char* p,
  353. std::integral_constant<bool, StackEmpty_> stack_empty,
  354. std::integral_constant<bool, AllowComments_> allow_comments,
  355. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  356. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  357. template<bool AllowComments_/*,
  358. bool AllowTrailing_, bool AllowBadUTF8_*/>
  359. const char* resume_value(const char* p,
  360. std::integral_constant<bool, AllowComments_> allow_comments,
  361. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  362. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  363. template<bool StackEmpty_, bool AllowComments_/*,
  364. bool AllowTrailing_, bool AllowBadUTF8_*/>
  365. const char* parse_object(const char* p,
  366. std::integral_constant<bool, StackEmpty_> stack_empty,
  367. std::integral_constant<bool, AllowComments_> allow_comments,
  368. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  369. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  370. template<bool StackEmpty_, bool AllowComments_/*,
  371. bool AllowTrailing_, bool AllowBadUTF8_*/>
  372. const char* parse_array(const char* p,
  373. std::integral_constant<bool, StackEmpty_> stack_empty,
  374. std::integral_constant<bool, AllowComments_> allow_comments,
  375. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  376. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  377. template<int Literal>
  378. const char* parse_literal(const char* p,
  379. std::integral_constant<int, Literal> literal);
  380. template<bool StackEmpty_, bool IsKey_/*,
  381. bool AllowBadUTF8_*/>
  382. const char* parse_string(const char* p,
  383. std::integral_constant<bool, StackEmpty_> stack_empty,
  384. std::integral_constant<bool, IsKey_> is_key,
  385. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  386. template<bool StackEmpty_, char First_, number_precision Numbers_>
  387. const char* parse_number(const char* p,
  388. std::integral_constant<bool, StackEmpty_> stack_empty,
  389. std::integral_constant<char, First_> first,
  390. std::integral_constant<number_precision, Numbers_> numbers);
  391. template<bool StackEmpty_, bool IsKey_/*,
  392. bool AllowBadUTF8_*/>
  393. const char* parse_unescaped(const char* p,
  394. std::integral_constant<bool, StackEmpty_> stack_empty,
  395. std::integral_constant<bool, IsKey_> is_key,
  396. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  397. template<bool StackEmpty_/*, bool IsKey_,
  398. bool AllowBadUTF8_*/>
  399. const char* parse_escaped(
  400. const char* p,
  401. std::size_t total,
  402. std::integral_constant<bool, StackEmpty_> stack_empty,
  403. /*std::integral_constant<bool, IsKey_>*/ bool is_key,
  404. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
  405. // intentionally private
  406. std::size_t
  407. depth() const noexcept
  408. {
  409. return opt_.max_depth - depth_;
  410. }
  411. public:
  412. /// Copy constructor (deleted)
  413. basic_parser(
  414. basic_parser const&) = delete;
  415. /// Copy assignment (deleted)
  416. basic_parser& operator=(
  417. basic_parser const&) = delete;
  418. /** Destructor.
  419. All dynamically allocated internal memory is freed.
  420. @par Effects
  421. @code
  422. this->handler().~Handler()
  423. @endcode
  424. @par Complexity
  425. Same as `~Handler()`.
  426. @par Exception Safety
  427. Same as `~Handler()`.
  428. */
  429. ~basic_parser() = default;
  430. /** Constructor.
  431. This function constructs the parser with
  432. the specified options, with any additional
  433. arguments forwarded to the handler's constructor.
  434. @par Complexity
  435. Same as `Handler( std::forward< Args >( args )... )`.
  436. @par Exception Safety
  437. Same as `Handler( std::forward< Args >( args )... )`.
  438. @param opt Configuration settings for the parser.
  439. If this structure is default constructed, the
  440. parser will accept only standard JSON.
  441. @param args Optional additional arguments
  442. forwarded to the handler's constructor.
  443. @see parse_options
  444. */
  445. template<class... Args>
  446. explicit
  447. basic_parser(
  448. parse_options const& opt,
  449. Args&&... args);
  450. /** Return a reference to the handler.
  451. This function provides access to the constructed
  452. instance of the handler owned by the parser.
  453. @par Complexity
  454. Constant.
  455. @par Exception Safety
  456. No-throw guarantee.
  457. */
  458. Handler&
  459. handler() noexcept
  460. {
  461. return h_;
  462. }
  463. /** Return a reference to the handler.
  464. This function provides access to the constructed
  465. instance of the handler owned by the parser.
  466. @par Complexity
  467. Constant.
  468. @par Exception Safety
  469. No-throw guarantee.
  470. */
  471. Handler const&
  472. handler() const noexcept
  473. {
  474. return h_;
  475. }
  476. /** Return the last error.
  477. This returns the last error code which
  478. was generated in the most recent call
  479. to @ref write_some.
  480. @par Complexity
  481. Constant.
  482. @par Exception Safety
  483. No-throw guarantee.
  484. */
  485. system::error_code
  486. last_error() const noexcept
  487. {
  488. return ec_;
  489. }
  490. /** Return true if a complete JSON has been parsed.
  491. This function returns `true` when all of these
  492. conditions are met:
  493. @li A complete serialized JSON has been
  494. presented to the parser, and
  495. @li No error or exception has occurred since the
  496. parser was constructed, or since the last call
  497. to @ref reset,
  498. @par Complexity
  499. Constant.
  500. @par Exception Safety
  501. No-throw guarantee.
  502. */
  503. bool
  504. done() const noexcept
  505. {
  506. return done_;
  507. }
  508. /** Reset the state, to parse a new document.
  509. This function discards the current parsing
  510. state, to prepare for parsing a new document.
  511. Dynamically allocated temporary memory used
  512. by the implementation is not deallocated.
  513. @par Complexity
  514. Constant.
  515. @par Exception Safety
  516. No-throw guarantee.
  517. */
  518. void
  519. reset() noexcept;
  520. /** Indicate a parsing failure.
  521. This changes the state of the parser to indicate
  522. that the parse has failed. A parser implementation
  523. can use this to fail the parser if needed due to
  524. external inputs.
  525. @note
  526. If `!ec`, the stored error code is unspecified.
  527. @par Complexity
  528. Constant.
  529. @par Exception Safety
  530. No-throw guarantee.
  531. @param ec The error code to set. If the code does
  532. not indicate failure, an implementation-defined
  533. error code that indicates failure will be stored
  534. instead.
  535. */
  536. void
  537. fail(system::error_code ec) noexcept;
  538. /** Parse some of an input string as JSON, incrementally.
  539. This function parses the JSON in the specified
  540. buffer, calling the handler to emit each SAX
  541. parsing event. The parse proceeds from the
  542. current state, which is at the beginning of a
  543. new JSON or in the middle of the current JSON
  544. if any characters were already parsed.
  545. \n
  546. The characters in the buffer are processed
  547. starting from the beginning, until one of the
  548. following conditions is met:
  549. @li All of the characters in the buffer
  550. have been parsed, or
  551. @li Some of the characters in the buffer
  552. have been parsed and the JSON is complete, or
  553. @li A parsing error occurs.
  554. The supplied buffer does not need to contain the
  555. entire JSON. Subsequent calls can provide more
  556. serialized data, allowing JSON to be processed
  557. incrementally. The end of the serialized JSON
  558. can be indicated by passing `more = false`.
  559. @par Complexity
  560. Linear in `size`.
  561. @par Exception Safety
  562. Basic guarantee.
  563. Calls to the handler may throw.
  564. Upon error or exception, subsequent calls will
  565. fail until @ref reset is called to parse a new JSON.
  566. @return The number of characters successfully
  567. parsed, which may be smaller than `size`.
  568. @param more `true` if there are possibly more
  569. buffers in the current JSON, otherwise `false`.
  570. @param data A pointer to a buffer of `size`
  571. characters to parse.
  572. @param size The number of characters pointed to
  573. by `data`.
  574. @param ec Set to the error, if any occurred.
  575. */
  576. /** @{ */
  577. std::size_t
  578. write_some(
  579. bool more,
  580. char const* data,
  581. std::size_t size,
  582. system::error_code& ec);
  583. std::size_t
  584. write_some(
  585. bool more,
  586. char const* data,
  587. std::size_t size,
  588. std::error_code& ec);
  589. /** @} */
  590. };
  591. } // namespace json
  592. } // namespace boost
  593. #endif