123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715 |
- //
- // Copyright (c) 2019 Vinnie Falco ([email protected])
- // Copyright (c) 2020 Krystian Stasiowski ([email protected])
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- //
- // Official repository: https://github.com/boostorg/json
- //
- #ifndef BOOST_JSON_BASIC_PARSER_HPP
- #define BOOST_JSON_BASIC_PARSER_HPP
- #include <boost/json/detail/config.hpp>
- #include <boost/json/detail/except.hpp>
- #include <boost/json/error.hpp>
- #include <boost/json/kind.hpp>
- #include <boost/json/parse_options.hpp>
- #include <boost/json/detail/stack.hpp>
- #include <boost/json/detail/stream.hpp>
- #include <boost/json/detail/utf8.hpp>
- #include <boost/json/detail/sbo_buffer.hpp>
- namespace boost {
- namespace json {
- /** An incremental SAX parser for serialized JSON.
- This implements a SAX-style parser, invoking a
- caller-supplied handler with each parsing event.
- To use, first declare a variable of type
- `basic_parser<T>` where `T` meets the handler
- requirements specified below. Then call
- @ref write_some one or more times with the input,
- setting `more = false` on the final buffer.
- The parsing events are realized through member
- function calls on the handler, which exists
- as a data member of the parser.
- \n
- The parser may dynamically allocate intermediate
- storage as needed to accommodate the nesting level
- of the input JSON. On subsequent invocations, the
- parser can cheaply re-use this memory, improving
- performance. This storage is freed when the
- parser is destroyed
- @par Usage
- To get the declaration and function definitions
- for this class it is necessary to include this
- file instead:
- @code
- #include <boost/json/basic_parser_impl.hpp>
- @endcode
- Users who wish to parse JSON into the DOM container
- @ref value will not use this class directly; instead
- they will create an instance of @ref parser or
- @ref stream_parser and use that instead. Alternatively,
- they may call the function @ref parse. This class is
- designed for users who wish to perform custom actions
- instead of building a @ref value. For example, to
- produce a DOM from an external library.
- \n
- @note
- By default, only conforming JSON using UTF-8
- encoding is accepted. However, select non-compliant
- syntax can be allowed by construction using a
- @ref parse_options set to desired values.
- @par Handler
- The handler provided must be implemented as an
- object of class type which defines each of the
- required event member functions below. The event
- functions return a `bool` where `true` indicates
- success, and `false` indicates failure. If the
- member function returns `false`, it must set
- the error code to a suitable value. This error
- code will be returned by the write function to
- the caller.
- \n
- Handlers are required to declare the maximum
- limits on various elements. If these limits
- are exceeded during parsing, then parsing
- fails with an error.
- \n
- The following declaration meets the parser's
- handler requirements:
- @code
- struct handler
- {
- /// The maximum number of elements allowed in an array
- static constexpr std::size_t max_array_size = -1;
- /// The maximum number of elements allowed in an object
- static constexpr std::size_t max_object_size = -1;
- /// The maximum number of characters allowed in a string
- static constexpr std::size_t max_string_size = -1;
- /// The maximum number of characters allowed in a key
- static constexpr std::size_t max_key_size = -1;
- /// Called once when the JSON parsing begins.
- ///
- /// @return `true` on success.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_document_begin( error_code& ec );
- /// Called when the JSON parsing is done.
- ///
- /// @return `true` on success.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_document_end( error_code& ec );
- /// Called when the beginning of an array is encountered.
- ///
- /// @return `true` on success.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_array_begin( error_code& ec );
- /// Called when the end of the current array is encountered.
- ///
- /// @return `true` on success.
- /// @param n The number of elements in the array.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_array_end( std::size_t n, error_code& ec );
- /// Called when the beginning of an object is encountered.
- ///
- /// @return `true` on success.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_object_begin( error_code& ec );
- /// Called when the end of the current object is encountered.
- ///
- /// @return `true` on success.
- /// @param n The number of elements in the object.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_object_end( std::size_t n, error_code& ec );
- /// Called with characters corresponding to part of the current string.
- ///
- /// @return `true` on success.
- /// @param s The partial characters
- /// @param n The total size of the string thus far
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_string_part( string_view s, std::size_t n, error_code& ec );
- /// Called with the last characters corresponding to the current string.
- ///
- /// @return `true` on success.
- /// @param s The remaining characters
- /// @param n The total size of the string
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_string( string_view s, std::size_t n, error_code& ec );
- /// Called with characters corresponding to part of the current key.
- ///
- /// @return `true` on success.
- /// @param s The partial characters
- /// @param n The total size of the key thus far
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_key_part( string_view s, std::size_t n, error_code& ec );
- /// Called with the last characters corresponding to the current key.
- ///
- /// @return `true` on success.
- /// @param s The remaining characters
- /// @param n The total size of the key
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_key( string_view s, std::size_t n, error_code& ec );
- /// Called with the characters corresponding to part of the current number.
- ///
- /// @return `true` on success.
- /// @param s The partial characters
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_number_part( string_view s, error_code& ec );
- /// Called when a signed integer is parsed.
- ///
- /// @return `true` on success.
- /// @param i The value
- /// @param s The remaining characters
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_int64( int64_t i, string_view s, error_code& ec );
- /// Called when an unsigend integer is parsed.
- ///
- /// @return `true` on success.
- /// @param u The value
- /// @param s The remaining characters
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_uint64( uint64_t u, string_view s, error_code& ec );
- /// Called when a double is parsed.
- ///
- /// @return `true` on success.
- /// @param d The value
- /// @param s The remaining characters
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_double( double d, string_view s, error_code& ec );
- /// Called when a boolean is parsed.
- ///
- /// @return `true` on success.
- /// @param b The value
- /// @param s The remaining characters
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_bool( bool b, error_code& ec );
- /// Called when a null is parsed.
- ///
- /// @return `true` on success.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_null( error_code& ec );
- /// Called with characters corresponding to part of the current comment.
- ///
- /// @return `true` on success.
- /// @param s The partial characters.
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_comment_part( string_view s, error_code& ec );
- /// Called with the last characters corresponding to the current comment.
- ///
- /// @return `true` on success.
- /// @param s The remaining characters
- /// @param ec Set to the error, if any occurred.
- ///
- bool on_comment( string_view s, error_code& ec );
- };
- @endcode
- @see
- @ref parse,
- @ref stream_parser,
- [Validating parser example](../../doc/html/json/examples.html#json.examples.validate).
- @headerfile <boost/json/basic_parser.hpp>
- */
- template<class Handler>
- class basic_parser
- {
- enum class state : char
- {
- doc1, doc3,
- com1, com2, com3, com4,
- lit1,
- str1, str2, str3, str4,
- str5, str6, str7, str8,
- sur1, sur2, sur3,
- sur4, sur5, sur6,
- obj1, obj2, obj3, obj4,
- obj5, obj6, obj7, obj8,
- obj9, obj10, obj11,
- arr1, arr2, arr3,
- arr4, arr5, arr6,
- num1, num2, num3, num4,
- num5, num6, num7, num8,
- exp1, exp2, exp3,
- val1, val2, val3
- };
- struct number
- {
- uint64_t mant;
- int bias;
- int exp;
- bool frac;
- bool neg;
- };
- template< bool StackEmpty_, char First_ >
- struct parse_number_helper;
- // optimization: must come first
- Handler h_;
- number num_;
- system::error_code ec_;
- detail::stack st_;
- detail::utf8_sequence seq_;
- unsigned u1_;
- unsigned u2_;
- bool more_; // false for final buffer
- bool done_ = false; // true on complete parse
- bool clean_ = true; // write_some exited cleanly
- const char* end_;
- detail::sbo_buffer<16 + 16 + 1 + 1> num_buf_;
- parse_options opt_;
- // how many levels deeper the parser can go
- std::size_t depth_ = opt_.max_depth;
- unsigned char cur_lit_ = 0;
- unsigned char lit_offset_ = 0;
- inline void reserve();
- inline const char* sentinel();
- inline bool incomplete(
- const detail::const_stream_wrapper& cs);
- #ifdef __INTEL_COMPILER
- #pragma warning push
- #pragma warning disable 2196
- #endif
- BOOST_NOINLINE
- inline
- const char*
- suspend_or_fail(state st);
- BOOST_NOINLINE
- inline
- const char*
- suspend_or_fail(
- state st,
- std::size_t n);
- BOOST_NOINLINE
- inline
- const char*
- fail(const char* p) noexcept;
- BOOST_NOINLINE
- inline
- const char*
- fail(
- const char* p,
- error ev,
- source_location const* loc) noexcept;
- BOOST_NOINLINE
- inline
- const char*
- maybe_suspend(
- const char* p,
- state st);
- BOOST_NOINLINE
- inline
- const char*
- maybe_suspend(
- const char* p,
- state st,
- std::size_t n);
- BOOST_NOINLINE
- inline
- const char*
- maybe_suspend(
- const char* p,
- state st,
- const number& num);
- BOOST_NOINLINE
- inline
- const char*
- suspend(
- const char* p,
- state st);
- BOOST_NOINLINE
- inline
- const char*
- suspend(
- const char* p,
- state st,
- const number& num);
- #ifdef __INTEL_COMPILER
- #pragma warning pop
- #endif
- template<bool StackEmpty_/*, bool Terminal_*/>
- const char* parse_comment(const char* p,
- std::integral_constant<bool, StackEmpty_> stack_empty,
- /*std::integral_constant<bool, Terminal_>*/ bool terminal);
- template<bool StackEmpty_>
- const char* parse_document(const char* p,
- std::integral_constant<bool, StackEmpty_> stack_empty);
- template<bool StackEmpty_, bool AllowComments_/*,
- bool AllowTrailing_, bool AllowBadUTF8_*/>
- const char* parse_value(const char* p,
- std::integral_constant<bool, StackEmpty_> stack_empty,
- std::integral_constant<bool, AllowComments_> allow_comments,
- /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
- /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
- template<bool AllowComments_/*,
- bool AllowTrailing_, bool AllowBadUTF8_*/>
- const char* resume_value(const char* p,
- std::integral_constant<bool, AllowComments_> allow_comments,
- /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
- /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
- template<bool StackEmpty_, bool AllowComments_/*,
- bool AllowTrailing_, bool AllowBadUTF8_*/>
- const char* parse_object(const char* p,
- std::integral_constant<bool, StackEmpty_> stack_empty,
- std::integral_constant<bool, AllowComments_> allow_comments,
- /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
- /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
- template<bool StackEmpty_, bool AllowComments_/*,
- bool AllowTrailing_, bool AllowBadUTF8_*/>
- const char* parse_array(const char* p,
- std::integral_constant<bool, StackEmpty_> stack_empty,
- std::integral_constant<bool, AllowComments_> allow_comments,
- /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
- /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
- template<int Literal>
- const char* parse_literal(const char* p,
- std::integral_constant<int, Literal> literal);
- template<bool StackEmpty_, bool IsKey_/*,
- bool AllowBadUTF8_*/>
- const char* parse_string(const char* p,
- std::integral_constant<bool, StackEmpty_> stack_empty,
- std::integral_constant<bool, IsKey_> is_key,
- /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
- template<bool StackEmpty_, char First_, number_precision Numbers_>
- const char* parse_number(const char* p,
- std::integral_constant<bool, StackEmpty_> stack_empty,
- std::integral_constant<char, First_> first,
- std::integral_constant<number_precision, Numbers_> numbers);
- template<bool StackEmpty_, bool IsKey_/*,
- bool AllowBadUTF8_*/>
- const char* parse_unescaped(const char* p,
- std::integral_constant<bool, StackEmpty_> stack_empty,
- std::integral_constant<bool, IsKey_> is_key,
- /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
- template<bool StackEmpty_/*, bool IsKey_,
- bool AllowBadUTF8_*/>
- const char* parse_escaped(
- const char* p,
- std::size_t total,
- std::integral_constant<bool, StackEmpty_> stack_empty,
- /*std::integral_constant<bool, IsKey_>*/ bool is_key,
- /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8);
- // intentionally private
- std::size_t
- depth() const noexcept
- {
- return opt_.max_depth - depth_;
- }
- public:
- /// Copy constructor (deleted)
- basic_parser(
- basic_parser const&) = delete;
- /// Copy assignment (deleted)
- basic_parser& operator=(
- basic_parser const&) = delete;
- /** Destructor.
- All dynamically allocated internal memory is freed.
- @par Effects
- @code
- this->handler().~Handler()
- @endcode
- @par Complexity
- Same as `~Handler()`.
- @par Exception Safety
- Same as `~Handler()`.
- */
- ~basic_parser() = default;
- /** Constructor.
- This function constructs the parser with
- the specified options, with any additional
- arguments forwarded to the handler's constructor.
- @par Complexity
- Same as `Handler( std::forward< Args >( args )... )`.
- @par Exception Safety
- Same as `Handler( std::forward< Args >( args )... )`.
- @param opt Configuration settings for the parser.
- If this structure is default constructed, the
- parser will accept only standard JSON.
- @param args Optional additional arguments
- forwarded to the handler's constructor.
- @see parse_options
- */
- template<class... Args>
- explicit
- basic_parser(
- parse_options const& opt,
- Args&&... args);
- /** Return a reference to the handler.
- This function provides access to the constructed
- instance of the handler owned by the parser.
- @par Complexity
- Constant.
- @par Exception Safety
- No-throw guarantee.
- */
- Handler&
- handler() noexcept
- {
- return h_;
- }
- /** Return a reference to the handler.
- This function provides access to the constructed
- instance of the handler owned by the parser.
- @par Complexity
- Constant.
- @par Exception Safety
- No-throw guarantee.
- */
- Handler const&
- handler() const noexcept
- {
- return h_;
- }
- /** Return the last error.
- This returns the last error code which
- was generated in the most recent call
- to @ref write_some.
- @par Complexity
- Constant.
- @par Exception Safety
- No-throw guarantee.
- */
- system::error_code
- last_error() const noexcept
- {
- return ec_;
- }
- /** Return true if a complete JSON has been parsed.
- This function returns `true` when all of these
- conditions are met:
- @li A complete serialized JSON has been
- presented to the parser, and
- @li No error or exception has occurred since the
- parser was constructed, or since the last call
- to @ref reset,
- @par Complexity
- Constant.
- @par Exception Safety
- No-throw guarantee.
- */
- bool
- done() const noexcept
- {
- return done_;
- }
- /** Reset the state, to parse a new document.
- This function discards the current parsing
- state, to prepare for parsing a new document.
- Dynamically allocated temporary memory used
- by the implementation is not deallocated.
- @par Complexity
- Constant.
- @par Exception Safety
- No-throw guarantee.
- */
- void
- reset() noexcept;
- /** Indicate a parsing failure.
- This changes the state of the parser to indicate
- that the parse has failed. A parser implementation
- can use this to fail the parser if needed due to
- external inputs.
- @note
- If `!ec`, the stored error code is unspecified.
- @par Complexity
- Constant.
- @par Exception Safety
- No-throw guarantee.
- @param ec The error code to set. If the code does
- not indicate failure, an implementation-defined
- error code that indicates failure will be stored
- instead.
- */
- void
- fail(system::error_code ec) noexcept;
- /** Parse some of an input string as JSON, incrementally.
- This function parses the JSON in the specified
- buffer, calling the handler to emit each SAX
- parsing event. The parse proceeds from the
- current state, which is at the beginning of a
- new JSON or in the middle of the current JSON
- if any characters were already parsed.
- \n
- The characters in the buffer are processed
- starting from the beginning, until one of the
- following conditions is met:
- @li All of the characters in the buffer
- have been parsed, or
- @li Some of the characters in the buffer
- have been parsed and the JSON is complete, or
- @li A parsing error occurs.
- The supplied buffer does not need to contain the
- entire JSON. Subsequent calls can provide more
- serialized data, allowing JSON to be processed
- incrementally. The end of the serialized JSON
- can be indicated by passing `more = false`.
- @par Complexity
- Linear in `size`.
- @par Exception Safety
- Basic guarantee.
- Calls to the handler may throw.
- Upon error or exception, subsequent calls will
- fail until @ref reset is called to parse a new JSON.
- @return The number of characters successfully
- parsed, which may be smaller than `size`.
- @param more `true` if there are possibly more
- buffers in the current JSON, otherwise `false`.
- @param data A pointer to a buffer of `size`
- characters to parse.
- @param size The number of characters pointed to
- by `data`.
- @param ec Set to the error, if any occurred.
- */
- /** @{ */
- std::size_t
- write_some(
- bool more,
- char const* data,
- std::size_t size,
- system::error_code& ec);
- std::size_t
- write_some(
- bool more,
- char const* data,
- std::size_t size,
- std::error_code& ec);
- /** @} */
- };
- } // namespace json
- } // namespace boost
- #endif
|