sse2.hpp 11 KB


  1. //
  2. // Copyright (c) 2019 Peter Dimov (pdimov at gmail dot com),
  3. // Vinnie Falco ([email protected])
  4. // Copyright (c) 2020 Krystian Stasiowski ([email protected])
  5. //
  6. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  7. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  8. //
  9. // Official repository: https://github.com/boostorg/json
  10. //
  11. #ifndef BOOST_JSON_DETAIL_SSE2_HPP
  12. #define BOOST_JSON_DETAIL_SSE2_HPP
  13. #include <boost/json/detail/config.hpp>
  14. #include <boost/json/detail/utf8.hpp>
  15. #include <cstddef>
  16. #include <cstring>
  17. #ifdef BOOST_JSON_USE_SSE2
  18. # include <emmintrin.h>
  19. # include <xmmintrin.h>
  20. # ifdef _MSC_VER
  21. # include <intrin.h>
  22. # endif
  23. #endif
  24. namespace boost {
  25. namespace json {
  26. namespace detail {
  27. #ifdef BOOST_JSON_USE_SSE2
  28. template<bool AllowBadUTF8>
  29. inline
  30. const char*
  31. count_valid(
  32. char const* p,
  33. const char* end) noexcept
  34. {
  35. __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
  36. __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
  37. __m128i const q3 = _mm_set1_epi8( 0x1F );
  38. while(end - p >= 16)
  39. {
  40. __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
  41. __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
  42. __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
  43. __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
  44. __m128i v5 = _mm_min_epu8( v1, q3 );
  45. __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
  46. __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
  47. int w = _mm_movemask_epi8( v7 );
  48. if( w != 0 )
  49. {
  50. int m;
  51. #if defined(__GNUC__) || defined(__clang__)
  52. m = __builtin_ffs( w ) - 1;
  53. #else
  54. unsigned long index;
  55. _BitScanForward( &index, w );
  56. m = index;
  57. #endif
  58. return p + m;
  59. }
  60. p += 16;
  61. }
  62. while(p != end)
  63. {
  64. const unsigned char c = *p;
  65. if(c == '\x22' || c == '\\' || c < 0x20)
  66. break;
  67. ++p;
  68. }
  69. return p;
  70. }
  71. template<>
  72. inline
  73. const char*
  74. count_valid<false>(
  75. char const* p,
  76. const char* end) noexcept
  77. {
  78. __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
  79. __m128i const q2 = _mm_set1_epi8( '\\' );
  80. __m128i const q3 = _mm_set1_epi8( 0x20 );
  81. while(end - p >= 16)
  82. {
  83. __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
  84. __m128i v2 = _mm_cmpeq_epi8( v1, q1 );
  85. __m128i v3 = _mm_cmpeq_epi8( v1, q2 );
  86. __m128i v4 = _mm_cmplt_epi8( v1, q3 );
  87. __m128i v5 = _mm_or_si128( v2, v3 );
  88. __m128i v6 = _mm_or_si128( v5, v4 );
  89. int w = _mm_movemask_epi8( v6 );
  90. if( w != 0 )
  91. {
  92. int m;
  93. #if defined(__GNUC__) || defined(__clang__)
  94. m = __builtin_ffs( w ) - 1;
  95. #else
  96. unsigned long index;
  97. _BitScanForward( &index, w );
  98. m = index;
  99. #endif
  100. p += m;
  101. break;
  102. }
  103. p += 16;
  104. }
  105. while(p != end)
  106. {
  107. const unsigned char c = *p;
  108. if(c == '\x22' || c == '\\' || c < 0x20)
  109. break;
  110. if(c < 0x80)
  111. {
  112. ++p;
  113. continue;
  114. }
  115. // validate utf-8
  116. uint16_t first = classify_utf8(c);
  117. uint8_t len = first & 0xFF;
  118. if(BOOST_JSON_UNLIKELY(end - p < len))
  119. break;
  120. if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
  121. break;
  122. p += len;
  123. }
  124. return p;
  125. }
  126. #else
  127. template<bool AllowBadUTF8>
  128. char const*
  129. count_valid(
  130. char const* p,
  131. char const* end) noexcept
  132. {
  133. while(p != end)
  134. {
  135. const unsigned char c = *p;
  136. if(c == '\x22' || c == '\\' || c < 0x20)
  137. break;
  138. ++p;
  139. }
  140. return p;
  141. }
  142. template<>
  143. inline
  144. char const*
  145. count_valid<false>(
  146. char const* p,
  147. char const* end) noexcept
  148. {
  149. while(p != end)
  150. {
  151. const unsigned char c = *p;
  152. if(c == '\x22' || c == '\\' || c < 0x20)
  153. break;
  154. if(c < 0x80)
  155. {
  156. ++p;
  157. continue;
  158. }
  159. // validate utf-8
  160. uint16_t first = classify_utf8(c);
  161. uint8_t len = first & 0xFF;
  162. if(BOOST_JSON_UNLIKELY(end - p < len))
  163. break;
  164. if(BOOST_JSON_UNLIKELY(! is_valid_utf8(p, first)))
  165. break;
  166. p += len;
  167. }
  168. return p;
  169. }
  170. #endif
  171. // KRYSTIAN NOTE: does not stop to validate
  172. // count_unescaped
  173. #ifdef BOOST_JSON_USE_SSE2
  174. inline
  175. size_t
  176. count_unescaped(
  177. char const* s,
  178. size_t n) noexcept
  179. {
  180. __m128i const q1 = _mm_set1_epi8( '\x22' ); // '"'
  181. __m128i const q2 = _mm_set1_epi8( '\\' ); // '\\'
  182. __m128i const q3 = _mm_set1_epi8( 0x1F );
  183. char const * s0 = s;
  184. while( n >= 16 )
  185. {
  186. __m128i v1 = _mm_loadu_si128( (__m128i const*)s );
  187. __m128i v2 = _mm_cmpeq_epi8( v1, q1 ); // quote
  188. __m128i v3 = _mm_cmpeq_epi8( v1, q2 ); // backslash
  189. __m128i v4 = _mm_or_si128( v2, v3 ); // combine quotes and backslash
  190. __m128i v5 = _mm_min_epu8( v1, q3 );
  191. __m128i v6 = _mm_cmpeq_epi8( v5, v1 ); // controls
  192. __m128i v7 = _mm_or_si128( v4, v6 ); // combine with control
  193. int w = _mm_movemask_epi8( v7 );
  194. if( w != 0 )
  195. {
  196. int m;
  197. #if defined(__GNUC__) || defined(__clang__)
  198. m = __builtin_ffs( w ) - 1;
  199. #else
  200. unsigned long index;
  201. _BitScanForward( &index, w );
  202. m = index;
  203. #endif
  204. s += m;
  205. break;
  206. }
  207. s += 16;
  208. n -= 16;
  209. }
  210. return s - s0;
  211. }
  212. #else
  213. inline
  214. std::size_t
  215. count_unescaped(
  216. char const*,
  217. std::size_t) noexcept
  218. {
  219. return 0;
  220. }
  221. #endif
  222. // count_digits
  223. #ifdef BOOST_JSON_USE_SSE2
  224. // assumes p..p+15 are valid
  225. inline int count_digits( char const* p ) noexcept
  226. {
  227. __m128i v1 = _mm_loadu_si128( (__m128i const*)p );
  228. v1 = _mm_add_epi8(v1, _mm_set1_epi8(70));
  229. v1 = _mm_cmplt_epi8(v1, _mm_set1_epi8(118));
  230. int m = _mm_movemask_epi8(v1);
  231. int n;
  232. if( m == 0 )
  233. {
  234. n = 16;
  235. }
  236. else
  237. {
  238. #if defined(__GNUC__) || defined(__clang__)
  239. n = __builtin_ffs( m ) - 1;
  240. #else
  241. unsigned long index;
  242. _BitScanForward( &index, m );
  243. n = static_cast<int>(index);
  244. #endif
  245. }
  246. return n;
  247. }
  248. #else
  249. // assumes p..p+15 are valid
  250. inline int count_digits( char const* p ) noexcept
  251. {
  252. int n = 0;
  253. for( ; n < 16; ++n )
  254. {
  255. unsigned char const d = *p++ - '0';
  256. if(d > 9) break;
  257. }
  258. return n;
  259. }
  260. #endif
  261. // parse_unsigned
  262. inline uint64_t parse_unsigned( uint64_t r, char const * p, std::size_t n ) noexcept
  263. {
  264. while( n >= 4 )
  265. {
  266. // faster on on clang for x86,
  267. // slower on gcc
  268. #ifdef __clang__
  269. r = r * 10 + p[0] - '0';
  270. r = r * 10 + p[1] - '0';
  271. r = r * 10 + p[2] - '0';
  272. r = r * 10 + p[3] - '0';
  273. #else
  274. uint32_t v;
  275. std::memcpy( &v, p, 4 );
  276. endian::native_to_little_inplace(v);
  277. v -= 0x30303030;
  278. unsigned w0 = v & 0xFF;
  279. unsigned w1 = (v >> 8) & 0xFF;
  280. unsigned w2 = (v >> 16) & 0xFF;
  281. unsigned w3 = (v >> 24);
  282. r = (((r * 10 + w0) * 10 + w1) * 10 + w2) * 10 + w3;
  283. #endif
  284. p += 4;
  285. n -= 4;
  286. }
  287. switch( n )
  288. {
  289. case 0:
  290. break;
  291. case 1:
  292. r = r * 10 + p[0] - '0';
  293. break;
  294. case 2:
  295. r = r * 10 + p[0] - '0';
  296. r = r * 10 + p[1] - '0';
  297. break;
  298. case 3:
  299. r = r * 10 + p[0] - '0';
  300. r = r * 10 + p[1] - '0';
  301. r = r * 10 + p[2] - '0';
  302. break;
  303. }
  304. return r;
  305. }
  306. // KRYSTIAN: this function is unused
  307. // count_leading
  308. /*
  309. #ifdef BOOST_JSON_USE_SSE2
  310. // assumes p..p+15
  311. inline std::size_t count_leading( char const * p, char ch ) noexcept
  312. {
  313. __m128i const q1 = _mm_set1_epi8( ch );
  314. __m128i v = _mm_loadu_si128( (__m128i const*)p );
  315. __m128i w = _mm_cmpeq_epi8( v, q1 );
  316. int m = _mm_movemask_epi8( w ) ^ 0xFFFF;
  317. std::size_t n;
  318. if( m == 0 )
  319. {
  320. n = 16;
  321. }
  322. else
  323. {
  324. #if defined(__GNUC__) || defined(__clang__)
  325. n = __builtin_ffs( m ) - 1;
  326. #else
  327. unsigned long index;
  328. _BitScanForward( &index, m );
  329. n = index;
  330. #endif
  331. }
  332. return n;
  333. }
  334. #else
  335. // assumes p..p+15
  336. inline std::size_t count_leading( char const * p, char ch ) noexcept
  337. {
  338. std::size_t n = 0;
  339. for( ; n < 16 && *p == ch; ++p, ++n );
  340. return n;
  341. }
  342. #endif
  343. */
  344. // count_whitespace
  345. #ifdef BOOST_JSON_USE_SSE2
  346. inline const char* count_whitespace( char const* p, const char* end ) noexcept
  347. {
  348. if( p == end )
  349. {
  350. return p;
  351. }
  352. if( static_cast<unsigned char>( *p ) > 0x20 )
  353. {
  354. return p;
  355. }
  356. __m128i const q1 = _mm_set1_epi8( ' ' );
  357. __m128i const q2 = _mm_set1_epi8( '\n' );
  358. __m128i const q3 = _mm_set1_epi8( 4 ); // '\t' | 4 == '\r'
  359. __m128i const q4 = _mm_set1_epi8( '\r' );
  360. while( end - p >= 16 )
  361. {
  362. __m128i v0 = _mm_loadu_si128( (__m128i const*)p );
  363. __m128i w0 = _mm_or_si128(
  364. _mm_cmpeq_epi8( v0, q1 ),
  365. _mm_cmpeq_epi8( v0, q2 ));
  366. __m128i v1 = _mm_or_si128( v0, q3 );
  367. __m128i w1 = _mm_cmpeq_epi8( v1, q4 );
  368. __m128i w2 = _mm_or_si128( w0, w1 );
  369. int m = _mm_movemask_epi8( w2 ) ^ 0xFFFF;
  370. if( m != 0 )
  371. {
  372. #if defined(__GNUC__) || defined(__clang__)
  373. std::size_t c = __builtin_ffs( m ) - 1;
  374. #else
  375. unsigned long index;
  376. _BitScanForward( &index, m );
  377. std::size_t c = index;
  378. #endif
  379. p += c;
  380. return p;
  381. }
  382. p += 16;
  383. }
  384. while( p != end )
  385. {
  386. if( *p != ' ' && *p != '\t' && *p != '\r' && *p != '\n' )
  387. {
  388. return p;
  389. }
  390. ++p;
  391. }
  392. return p;
  393. }
  394. /*
  395. // slightly faster on msvc-14.2, slightly slower on clang-win
  396. inline std::size_t count_whitespace( char const * p, std::size_t n ) noexcept
  397. {
  398. char const * p0 = p;
  399. while( n > 0 )
  400. {
  401. char ch = *p;
  402. if( ch == '\n' || ch == '\r' )
  403. {
  404. ++p;
  405. --n;
  406. continue;
  407. }
  408. if( ch != ' ' && ch != '\t' )
  409. {
  410. break;
  411. }
  412. ++p;
  413. --n;
  414. while( n >= 16 )
  415. {
  416. std::size_t n2 = count_leading( p, ch );
  417. p += n2;
  418. n -= n2;
  419. if( n2 < 16 )
  420. {
  421. break;
  422. }
  423. }
  424. }
  425. return p - p0;
  426. }
  427. */
  428. #else
  429. inline const char* count_whitespace( char const* p, const char* end ) noexcept
  430. {
  431. for(; p != end; ++p)
  432. {
  433. char const c = *p;
  434. if( c != ' ' && c != '\n' && c != '\r' && c != '\t' ) break;
  435. }
  436. return p;
  437. }
  438. #endif
  439. } // detail
  440. } // namespace json
  441. } // namespace boost
  442. #endif