123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410 |
- //
- // Copyright (c) 2021 Vinnie Falco (vinnie dot falco at gmail dot com)
- //
- // Distributed under the Boost Software License, Version 1.0. (See accompanying
- // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
- //
- // Official repository: https://github.com/boostorg/url
- //
- #ifndef BOOST_URL_GRAMMAR_LUT_CHARS_HPP
- #define BOOST_URL_GRAMMAR_LUT_CHARS_HPP
- #include <boost/url/detail/config.hpp>
- #include <boost/url/grammar/detail/charset.hpp>
- #include <cstdint>
- #include <type_traits>
- // Credit to Peter Dimov for ideas regarding
- // SIMD constexpr, and character set masks.
- namespace boost {
- namespace urls {
- namespace grammar {
- #ifndef BOOST_URL_DOCS
- namespace detail {
- template<class T, class = void>
- struct is_pred : std::false_type {};
- template<class T>
- struct is_pred<T, void_t<
- decltype(
- std::declval<bool&>() =
- std::declval<T const&>().operator()(
- std::declval<char>())
- ) > > : std::true_type
- {
- };
- } // detail
- #endif
- /** A set of characters
- The characters defined by instances of
- this set are provided upon construction.
- The `constexpr` implementation allows
- these to become compile-time constants.
- @par Example
- Character sets are used with rules and the
- functions @ref find_if and @ref find_if_not.
- @code
- constexpr lut_chars vowel_chars = "AEIOU" "aeiou";
- system::result< core::string_view > rv = parse( "Aiea", token_rule( vowel_chars ) );
- @endcode
- @see
- @ref find_if,
- @ref find_if_not,
- @ref parse,
- @ref token_rule.
- */
- class lut_chars
- {
- std::uint64_t mask_[4] = {};
- constexpr
- static
- std::uint64_t
- lo(char c) noexcept
- {
- return static_cast<
- unsigned char>(c) & 3;
- }
- constexpr
- static
- std::uint64_t
- hi(char c) noexcept
- {
- return 1ULL << (static_cast<
- unsigned char>(c) >> 2);
- }
- constexpr
- static
- lut_chars
- construct(
- char const* s) noexcept
- {
- return *s
- ? lut_chars(*s) +
- construct(s+1)
- : lut_chars();
- }
- constexpr
- static
- lut_chars
- construct(
- unsigned char ch,
- bool b) noexcept
- {
- return b
- ? lut_chars(ch)
- : lut_chars();
- }
- template<class Pred>
- constexpr
- static
- lut_chars
- construct(
- Pred pred,
- unsigned char ch) noexcept
- {
- return ch == 255
- ? construct(ch, pred(static_cast<char>(ch)))
- : construct(ch, pred(static_cast<char>(ch))) +
- construct(pred, ch + 1);
- }
- constexpr
- lut_chars() = default;
- constexpr
- lut_chars(
- std::uint64_t m0,
- std::uint64_t m1,
- std::uint64_t m2,
- std::uint64_t m3) noexcept
- : mask_{ m0, m1, m2, m3 }
- {
- }
- public:
- /** Constructor
- This function constructs a character
- set which has as a single member,
- the character `ch`.
- @par Example
- @code
- constexpr lut_chars asterisk( '*' );
- @endcode
- @par Complexity
- Constant.
- @par Exception Safety
- Throws nothing.
- @param ch A character.
- */
- constexpr
- lut_chars(char ch) noexcept
- : mask_ {
- lo(ch) == 0 ? hi(ch) : 0,
- lo(ch) == 1 ? hi(ch) : 0,
- lo(ch) == 2 ? hi(ch) : 0,
- lo(ch) == 3 ? hi(ch) : 0 }
- {
- }
- /** Constructor
- This function constructs a character
- set which has as members, all of the
- characters present in the null-terminated
- string `s`.
- @par Example
- @code
- constexpr lut_chars digits = "0123456789";
- @endcode
- @par Complexity
- Linear in `::strlen(s)`, or constant
- if `s` is a constant expression.
- @par Exception Safety
- Throws nothing.
- @param s A null-terminated string.
- */
- constexpr
- lut_chars(
- char const* s) noexcept
- : lut_chars(construct(s))
- {
- }
- /** Constructor.
- This function constructs a character
- set which has as members, every value
- of `char ch` for which the expression
- `pred(ch)` returns `true`.
- @par Example
- @code
- struct is_digit
- {
- constexpr bool
- operator()(char c ) const noexcept
- {
- return c >= '0' && c <= '9';
- }
- };
- constexpr lut_chars digits( is_digit{} );
- @endcode
- @par Complexity
- Linear in `pred`, or constant if
- `pred(ch)` is a constant expression.
- @par Exception Safety
- Throws nothing.
- @param pred The function object to
- use for determining membership in
- the character set.
- */
- template<class Pred
- #ifndef BOOST_URL_DOCS
- ,class = typename std::enable_if<
- detail::is_pred<Pred>::value &&
- ! std::is_base_of<
- lut_chars, Pred>::value>::type
- #endif
- >
- constexpr
- lut_chars(Pred const& pred) noexcept
- : lut_chars(
- construct(pred, 0))
- {
- }
- /** Return true if ch is in the character set.
- This function returns true if the
- character `ch` is in the set, otherwise
- it returns false.
- @par Complexity
- Constant.
- @par Exception Safety
- Throws nothing.
- @param ch The character to test.
- */
- constexpr
- bool
- operator()(
- unsigned char ch) const noexcept
- {
- return operator()(static_cast<char>(ch));
- }
- /// @copydoc operator()(unsigned char) const
- constexpr
- bool
- operator()(char ch) const noexcept
- {
- return mask_[lo(ch)] & hi(ch);
- }
- /** Return the union of two character sets.
- This function returns a new character
- set which contains all of the characters
- in `cs0` as well as all of the characters
- in `cs`.
- @par Example
- This creates a character set which
- includes all letters and numbers
- @code
- constexpr lut_chars alpha_chars(
- "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
- "abcdefghijklmnopqrstuvwxyz");
- constexpr lut_chars alnum_chars = alpha_chars + "0123456789";
- @endcode
- @par Complexity
- Constant.
- @return The new character set.
- @param cs0 A character to join
-
- @param cs1 A character to join
- */
- friend
- constexpr
- lut_chars
- operator+(
- lut_chars const& cs0,
- lut_chars const& cs1) noexcept
- {
- return lut_chars(
- cs0.mask_[0] | cs1.mask_[0],
- cs0.mask_[1] | cs1.mask_[1],
- cs0.mask_[2] | cs1.mask_[2],
- cs0.mask_[3] | cs1.mask_[3]);
- }
- /** Return a new character set by subtracting
- This function returns a new character
- set which is formed from all of the
- characters in `cs0` which are not in `cs`.
- @par Example
- This statement declares a character set
- containing all the lowercase letters
- which are not vowels:
- @code
- constexpr lut_chars consonants = lut_chars("abcdefghijklmnopqrstuvwxyz") - "aeiou";
- @endcode
- @par Complexity
- Constant.
- @return The new character set.
- @param cs0 A character set to join.
-
- @param cs1 A character set to join.
- */
- friend
- constexpr
- lut_chars
- operator-(
- lut_chars const& cs0,
- lut_chars const& cs1) noexcept
- {
- return lut_chars(
- cs0.mask_[0] & ~cs1.mask_[0],
- cs0.mask_[1] & ~cs1.mask_[1],
- cs0.mask_[2] & ~cs1.mask_[2],
- cs0.mask_[3] & ~cs1.mask_[3]);
- }
- /** Return a new character set which is the complement of another character set.
- This function returns a new character
- set which contains all of the characters
- that are not in `*this`.
- @par Example
- This statement declares a character set
- containing everything but vowels:
- @code
- constexpr lut_chars not_vowels = ~lut_chars( "AEIOU" "aeiou" );
- @endcode
- @par Complexity
- Constant.
- @par Exception Safety
- Throws nothing.
- @return The new character set.
- */
- constexpr
- lut_chars
- operator~() const noexcept
- {
- return lut_chars(
- ~mask_[0],
- ~mask_[1],
- ~mask_[2],
- ~mask_[3]
- );
- }
- #ifndef BOOST_URL_DOCS
- #ifdef BOOST_URL_USE_SSE2
- char const*
- find_if(
- char const* first,
- char const* last) const noexcept
- {
- return detail::find_if_pred(
- *this, first, last);
- }
- char const*
- find_if_not(
- char const* first,
- char const* last) const noexcept
- {
- return detail::find_if_not_pred(
- *this, first, last);
- }
- #endif
- #endif
- };
- } // grammar
- } // urls
- } // boost
- #endif
|