basic_parser_impl.hpp 89 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949
  1. //
  2. // Copyright (c) 2019 Vinnie Falco ([email protected])
  3. // Copyright (c) 2020 Krystian Stasiowski ([email protected])
  4. //
  5. // Distributed under the Boost Software License, Version 1.0. (See accompanying
  6. // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
  7. //
  8. // Official repository: https://github.com/boostorg/json
  9. //
  10. #ifndef BOOST_JSON_BASIC_PARSER_IMPL_HPP
  11. #define BOOST_JSON_BASIC_PARSER_IMPL_HPP
  12. #include <boost/json/detail/config.hpp>
  13. #include <boost/json/basic_parser.hpp>
  14. #include <boost/json/error.hpp>
  15. #include <boost/json/detail/buffer.hpp>
  16. #include <boost/json/detail/charconv/from_chars.hpp>
  17. #include <boost/json/detail/sse2.hpp>
  18. #include <boost/mp11/algorithm.hpp>
  19. #include <boost/mp11/integral.hpp>
  20. #include <cmath>
  21. #include <limits>
  22. #include <cstring>
  23. #ifdef _MSC_VER
  24. #pragma warning(push)
  25. #pragma warning(disable: 4702) // unreachable code
  26. #pragma warning(disable: 4127) // conditional expression is constant
  27. #endif
  28. /* This file must be manually included to get the
  29. function template definitions for basic_parser.
  30. */
  31. /* Reference:
  32. https://www.json.org/
  33. RFC 7159: The JavaScript Object Notation (JSON) Data Interchange Format
  34. https://tools.ietf.org/html/rfc7159
  35. https://ampl.com/netlib/fp/dtoa.c
  36. */
  37. #ifndef BOOST_JSON_DOCS
  38. namespace boost {
  39. namespace json {
  40. namespace detail {
  41. inline
  42. double
  43. pow10(int exp) noexcept
  44. {
  45. static double const tab[618] = {
  46. 1e-308, 1e-307, 1e-306, 1e-305, 1e-304, 1e-303, 1e-302, 1e-301,
  47. 1e-300, 1e-299, 1e-298, 1e-297, 1e-296, 1e-295, 1e-294, 1e-293, 1e-292, 1e-291,
  48. 1e-290, 1e-289, 1e-288, 1e-287, 1e-286, 1e-285, 1e-284, 1e-283, 1e-282, 1e-281,
  49. 1e-280, 1e-279, 1e-278, 1e-277, 1e-276, 1e-275, 1e-274, 1e-273, 1e-272, 1e-271,
  50. 1e-270, 1e-269, 1e-268, 1e-267, 1e-266, 1e-265, 1e-264, 1e-263, 1e-262, 1e-261,
  51. 1e-260, 1e-259, 1e-258, 1e-257, 1e-256, 1e-255, 1e-254, 1e-253, 1e-252, 1e-251,
  52. 1e-250, 1e-249, 1e-248, 1e-247, 1e-246, 1e-245, 1e-244, 1e-243, 1e-242, 1e-241,
  53. 1e-240, 1e-239, 1e-238, 1e-237, 1e-236, 1e-235, 1e-234, 1e-233, 1e-232, 1e-231,
  54. 1e-230, 1e-229, 1e-228, 1e-227, 1e-226, 1e-225, 1e-224, 1e-223, 1e-222, 1e-221,
  55. 1e-220, 1e-219, 1e-218, 1e-217, 1e-216, 1e-215, 1e-214, 1e-213, 1e-212, 1e-211,
  56. 1e-210, 1e-209, 1e-208, 1e-207, 1e-206, 1e-205, 1e-204, 1e-203, 1e-202, 1e-201,
  57. 1e-200, 1e-199, 1e-198, 1e-197, 1e-196, 1e-195, 1e-194, 1e-193, 1e-192, 1e-191,
  58. 1e-190, 1e-189, 1e-188, 1e-187, 1e-186, 1e-185, 1e-184, 1e-183, 1e-182, 1e-181,
  59. 1e-180, 1e-179, 1e-178, 1e-177, 1e-176, 1e-175, 1e-174, 1e-173, 1e-172, 1e-171,
  60. 1e-170, 1e-169, 1e-168, 1e-167, 1e-166, 1e-165, 1e-164, 1e-163, 1e-162, 1e-161,
  61. 1e-160, 1e-159, 1e-158, 1e-157, 1e-156, 1e-155, 1e-154, 1e-153, 1e-152, 1e-151,
  62. 1e-150, 1e-149, 1e-148, 1e-147, 1e-146, 1e-145, 1e-144, 1e-143, 1e-142, 1e-141,
  63. 1e-140, 1e-139, 1e-138, 1e-137, 1e-136, 1e-135, 1e-134, 1e-133, 1e-132, 1e-131,
  64. 1e-130, 1e-129, 1e-128, 1e-127, 1e-126, 1e-125, 1e-124, 1e-123, 1e-122, 1e-121,
  65. 1e-120, 1e-119, 1e-118, 1e-117, 1e-116, 1e-115, 1e-114, 1e-113, 1e-112, 1e-111,
  66. 1e-110, 1e-109, 1e-108, 1e-107, 1e-106, 1e-105, 1e-104, 1e-103, 1e-102, 1e-101,
  67. 1e-100, 1e-099, 1e-098, 1e-097, 1e-096, 1e-095, 1e-094, 1e-093, 1e-092, 1e-091,
  68. 1e-090, 1e-089, 1e-088, 1e-087, 1e-086, 1e-085, 1e-084, 1e-083, 1e-082, 1e-081,
  69. 1e-080, 1e-079, 1e-078, 1e-077, 1e-076, 1e-075, 1e-074, 1e-073, 1e-072, 1e-071,
  70. 1e-070, 1e-069, 1e-068, 1e-067, 1e-066, 1e-065, 1e-064, 1e-063, 1e-062, 1e-061,
  71. 1e-060, 1e-059, 1e-058, 1e-057, 1e-056, 1e-055, 1e-054, 1e-053, 1e-052, 1e-051,
  72. 1e-050, 1e-049, 1e-048, 1e-047, 1e-046, 1e-045, 1e-044, 1e-043, 1e-042, 1e-041,
  73. 1e-040, 1e-039, 1e-038, 1e-037, 1e-036, 1e-035, 1e-034, 1e-033, 1e-032, 1e-031,
  74. 1e-030, 1e-029, 1e-028, 1e-027, 1e-026, 1e-025, 1e-024, 1e-023, 1e-022, 1e-021,
  75. 1e-020, 1e-019, 1e-018, 1e-017, 1e-016, 1e-015, 1e-014, 1e-013, 1e-012, 1e-011,
  76. 1e-010, 1e-009, 1e-008, 1e-007, 1e-006, 1e-005, 1e-004, 1e-003, 1e-002, 1e-001,
  77. 1e+000, 1e+001, 1e+002, 1e+003, 1e+004, 1e+005, 1e+006, 1e+007, 1e+008, 1e+009,
  78. 1e+010, 1e+011, 1e+012, 1e+013, 1e+014, 1e+015, 1e+016, 1e+017, 1e+018, 1e+019,
  79. 1e+020, 1e+021, 1e+022, 1e+023, 1e+024, 1e+025, 1e+026, 1e+027, 1e+028, 1e+029,
  80. 1e+030, 1e+031, 1e+032, 1e+033, 1e+034, 1e+035, 1e+036, 1e+037, 1e+038, 1e+039,
  81. 1e+040, 1e+041, 1e+042, 1e+043, 1e+044, 1e+045, 1e+046, 1e+047, 1e+048, 1e+049,
  82. 1e+050, 1e+051, 1e+052, 1e+053, 1e+054, 1e+055, 1e+056, 1e+057, 1e+058, 1e+059,
  83. 1e+060, 1e+061, 1e+062, 1e+063, 1e+064, 1e+065, 1e+066, 1e+067, 1e+068, 1e+069,
  84. 1e+070, 1e+071, 1e+072, 1e+073, 1e+074, 1e+075, 1e+076, 1e+077, 1e+078, 1e+079,
  85. 1e+080, 1e+081, 1e+082, 1e+083, 1e+084, 1e+085, 1e+086, 1e+087, 1e+088, 1e+089,
  86. 1e+090, 1e+091, 1e+092, 1e+093, 1e+094, 1e+095, 1e+096, 1e+097, 1e+098, 1e+099,
  87. 1e+100, 1e+101, 1e+102, 1e+103, 1e+104, 1e+105, 1e+106, 1e+107, 1e+108, 1e+109,
  88. 1e+110, 1e+111, 1e+112, 1e+113, 1e+114, 1e+115, 1e+116, 1e+117, 1e+118, 1e+119,
  89. 1e+120, 1e+121, 1e+122, 1e+123, 1e+124, 1e+125, 1e+126, 1e+127, 1e+128, 1e+129,
  90. 1e+130, 1e+131, 1e+132, 1e+133, 1e+134, 1e+135, 1e+136, 1e+137, 1e+138, 1e+139,
  91. 1e+140, 1e+141, 1e+142, 1e+143, 1e+144, 1e+145, 1e+146, 1e+147, 1e+148, 1e+149,
  92. 1e+150, 1e+151, 1e+152, 1e+153, 1e+154, 1e+155, 1e+156, 1e+157, 1e+158, 1e+159,
  93. 1e+160, 1e+161, 1e+162, 1e+163, 1e+164, 1e+165, 1e+166, 1e+167, 1e+168, 1e+169,
  94. 1e+170, 1e+171, 1e+172, 1e+173, 1e+174, 1e+175, 1e+176, 1e+177, 1e+178, 1e+179,
  95. 1e+180, 1e+181, 1e+182, 1e+183, 1e+184, 1e+185, 1e+186, 1e+187, 1e+188, 1e+189,
  96. 1e+190, 1e+191, 1e+192, 1e+193, 1e+194, 1e+195, 1e+196, 1e+197, 1e+198, 1e+199,
  97. 1e+200, 1e+201, 1e+202, 1e+203, 1e+204, 1e+205, 1e+206, 1e+207, 1e+208, 1e+209,
  98. 1e+210, 1e+211, 1e+212, 1e+213, 1e+214, 1e+215, 1e+216, 1e+217, 1e+218, 1e+219,
  99. 1e+220, 1e+221, 1e+222, 1e+223, 1e+224, 1e+225, 1e+226, 1e+227, 1e+228, 1e+229,
  100. 1e+230, 1e+231, 1e+232, 1e+233, 1e+234, 1e+235, 1e+236, 1e+237, 1e+238, 1e+239,
  101. 1e+240, 1e+241, 1e+242, 1e+243, 1e+244, 1e+245, 1e+246, 1e+247, 1e+248, 1e+249,
  102. 1e+250, 1e+251, 1e+252, 1e+253, 1e+254, 1e+255, 1e+256, 1e+257, 1e+258, 1e+259,
  103. 1e+260, 1e+261, 1e+262, 1e+263, 1e+264, 1e+265, 1e+266, 1e+267, 1e+268, 1e+269,
  104. 1e+270, 1e+271, 1e+272, 1e+273, 1e+274, 1e+275, 1e+276, 1e+277, 1e+278, 1e+279,
  105. 1e+280, 1e+281, 1e+282, 1e+283, 1e+284, 1e+285, 1e+286, 1e+287, 1e+288, 1e+289,
  106. 1e+290, 1e+291, 1e+292, 1e+293, 1e+294, 1e+295, 1e+296, 1e+297, 1e+298, 1e+299,
  107. 1e+300, 1e+301, 1e+302, 1e+303, 1e+304, 1e+305, 1e+306, 1e+307, 1e+308 };
  108. if( exp > 308 )
  109. {
  110. return std::numeric_limits<double>::infinity();
  111. }
  112. else if( exp < -308 )
  113. {
  114. // due to the way pow10 is used by dec_to_float,
  115. // we can afford to return 0.0 here
  116. return 0.0;
  117. }
  118. else
  119. {
  120. exp += 308;
  121. BOOST_ASSERT(exp >= 0 && exp < 618);
  122. return tab[exp];
  123. }
  124. }
  125. inline
  126. double
  127. dec_to_float(
  128. std::uint64_t m,
  129. std::int32_t e,
  130. bool neg) noexcept
  131. {
  132. // convert to double explicitly to silence warnings
  133. double x = static_cast<double>(m);
  134. if(neg)
  135. x = -x;
  136. if(e < -305)
  137. {
  138. x *= 1e-305 ;
  139. e += 305;
  140. }
  141. if(e >= -22 && e < 0)
  142. return x / pow10(-e);
  143. return x * pow10(e);
  144. }
  145. inline
  146. bool
  147. is_control(char c) noexcept
  148. {
  149. return static_cast<unsigned char>(c) < 32;
  150. }
  151. inline
  152. int
  153. hex_digit(unsigned char c) noexcept
  154. {
  155. // by Peter Dimov
  156. if( c >= '0' && c <= '9' )
  157. return c - '0';
  158. c &= ~0x20;
  159. if( c >= 'A' && c <= 'F' )
  160. return 10 + c - 'A';
  161. return -1;
  162. }
  163. enum json_literal
  164. {
  165. null_literal = 0,
  166. true_literal,
  167. false_literal,
  168. infinity_literal,
  169. neg_infinity_literal,
  170. nan_literal,
  171. resume_literal = -1
  172. };
  173. } // detail
  174. //----------------------------------------------------------
  175. template< class Handler >
  176. template< bool StackEmpty_, char First_ >
  177. struct basic_parser<Handler>::
  178. parse_number_helper
  179. {
  180. basic_parser* parser;
  181. char const* p;
  182. template< std::size_t N >
  183. char const*
  184. operator()( mp11::mp_size_t<N> ) const
  185. {
  186. return parser->parse_number(
  187. p,
  188. std::integral_constant<bool, StackEmpty_>(),
  189. std::integral_constant<char, First_>(),
  190. std::integral_constant<
  191. number_precision, static_cast<number_precision>(N)>() );
  192. }
  193. };
  194. //----------------------------------------------------------
  195. template<class Handler>
  196. void
  197. basic_parser<Handler>::
  198. reserve()
  199. {
  200. if(BOOST_JSON_LIKELY(
  201. ! st_.empty()))
  202. return;
  203. // Reserve the largest stack we need,
  204. // to avoid reallocation during suspend.
  205. st_.reserve(
  206. sizeof(state) + // document parsing state
  207. (sizeof(state) +
  208. sizeof(std::size_t)) * depth() + // array and object state + size
  209. sizeof(state) + // value parsing state
  210. sizeof(std::size_t) + // string size
  211. sizeof(state)); // comment state
  212. }
  213. //----------------------------------------------------------
  214. //
  215. // The sentinel value is returned by parse functions
  216. // to indicate that the parser failed, or suspended.
  217. // this is used as it is distinct from all valid values
  218. // for data in write
  219. template<class Handler>
  220. const char*
  221. basic_parser<Handler>::
  222. sentinel()
  223. {
  224. // the "+1" ensures that the returned pointer is unique even if
  225. // the given input buffer borders on this object
  226. return reinterpret_cast<
  227. const char*>(this) + 1;
  228. }
  229. template<class Handler>
  230. bool
  231. basic_parser<Handler>::
  232. incomplete(
  233. const detail::const_stream_wrapper& cs)
  234. {
  235. return cs.begin() == sentinel();
  236. }
  237. //----------------------------------------------------------
  238. //
  239. // These functions are declared with the BOOST_NOINLINE
  240. // attribute to avoid polluting the parsers hot-path.
  241. // They return the canary value to indicate suspension
  242. // or failure.
  243. template<class Handler>
  244. const char*
  245. basic_parser<Handler>::
  246. suspend_or_fail(state st)
  247. {
  248. if(BOOST_JSON_LIKELY(
  249. ! ec_ && more_))
  250. {
  251. // suspend
  252. reserve();
  253. st_.push_unchecked(st);
  254. }
  255. return sentinel();
  256. }
  257. template<class Handler>
  258. const char*
  259. basic_parser<Handler>::
  260. suspend_or_fail(
  261. state st,
  262. std::size_t n)
  263. {
  264. if(BOOST_JSON_LIKELY(
  265. ! ec_ && more_))
  266. {
  267. // suspend
  268. reserve();
  269. st_.push_unchecked(n);
  270. st_.push_unchecked(st);
  271. }
  272. return sentinel();
  273. }
  274. template<class Handler>
  275. const char*
  276. basic_parser<Handler>::
  277. fail(const char* p) noexcept
  278. {
  279. BOOST_ASSERT( p != sentinel() );
  280. end_ = p;
  281. return sentinel();
  282. }
  283. template<class Handler>
  284. const char*
  285. basic_parser<Handler>::
  286. fail(
  287. const char* p,
  288. error ev,
  289. source_location const* loc) noexcept
  290. {
  291. BOOST_ASSERT( p != sentinel() );
  292. end_ = p;
  293. ec_.assign(ev, loc);
  294. return sentinel();
  295. }
  296. template<class Handler>
  297. const char*
  298. basic_parser<Handler>::
  299. maybe_suspend(
  300. const char* p,
  301. state st)
  302. {
  303. if( p != sentinel() )
  304. end_ = p;
  305. if(BOOST_JSON_LIKELY(more_))
  306. {
  307. // suspend
  308. reserve();
  309. st_.push_unchecked(st);
  310. }
  311. return sentinel();
  312. }
  313. template<class Handler>
  314. const char*
  315. basic_parser<Handler>::
  316. maybe_suspend(
  317. const char* p,
  318. state st,
  319. std::size_t n)
  320. {
  321. BOOST_ASSERT( p != sentinel() );
  322. end_ = p;
  323. if(BOOST_JSON_LIKELY(more_))
  324. {
  325. // suspend
  326. reserve();
  327. st_.push_unchecked(n);
  328. st_.push_unchecked(st);
  329. }
  330. return sentinel();
  331. }
  332. template<class Handler>
  333. const char*
  334. basic_parser<Handler>::
  335. maybe_suspend(
  336. const char* p,
  337. state st,
  338. const number& num)
  339. {
  340. BOOST_ASSERT( p != sentinel() );
  341. end_ = p;
  342. if(BOOST_JSON_LIKELY(more_))
  343. {
  344. // suspend
  345. num_ = num;
  346. reserve();
  347. st_.push_unchecked(st);;
  348. }
  349. return sentinel();
  350. }
  351. template<class Handler>
  352. const char*
  353. basic_parser<Handler>::
  354. suspend(
  355. const char* p,
  356. state st)
  357. {
  358. BOOST_ASSERT( p != sentinel() );
  359. end_ = p;
  360. // suspend
  361. reserve();
  362. st_.push_unchecked(st);
  363. return sentinel();
  364. }
  365. template<class Handler>
  366. const char*
  367. basic_parser<Handler>::
  368. suspend(
  369. const char* p,
  370. state st,
  371. const number& num)
  372. {
  373. BOOST_ASSERT( p != sentinel() );
  374. end_ = p;
  375. // suspend
  376. num_ = num;
  377. reserve();
  378. st_.push_unchecked(st);
  379. return sentinel();
  380. }
  381. template<class Handler>
  382. template<
  383. bool StackEmpty_/*,
  384. bool Terminal_*/>
  385. const char*
  386. basic_parser<Handler>::
  387. parse_comment(const char* p,
  388. std::integral_constant<bool, StackEmpty_> stack_empty,
  389. /*std::integral_constant<bool, Terminal_>*/ bool terminal)
  390. {
  391. detail::const_stream_wrapper cs(p, end_);
  392. const char* start = cs.begin();
  393. std::size_t remain;
  394. if(! stack_empty && ! st_.empty())
  395. {
  396. state st;
  397. st_.pop(st);
  398. switch(st)
  399. {
  400. default: BOOST_JSON_UNREACHABLE();
  401. case state::com1: goto do_com1;
  402. case state::com2: goto do_com2;
  403. case state::com3: goto do_com3;
  404. case state::com4: goto do_com4;
  405. }
  406. }
  407. BOOST_ASSERT(*cs == '/');
  408. ++cs;
  409. do_com1:
  410. if(BOOST_JSON_UNLIKELY(! cs))
  411. return maybe_suspend(cs.begin(), state::com1);
  412. switch(*cs)
  413. {
  414. default:
  415. {
  416. BOOST_STATIC_CONSTEXPR source_location loc
  417. = BOOST_CURRENT_LOCATION;
  418. return fail(cs.begin(), error::syntax, &loc);
  419. }
  420. case '/':
  421. ++cs;
  422. do_com2:
  423. // KRYSTIAN TODO: this is a mess, we have to fix this
  424. remain = cs.remain();
  425. cs = remain ? static_cast<const char*>(
  426. std::memchr(cs.begin(), '\n', remain)) : sentinel();
  427. if(! cs.begin())
  428. cs = sentinel();
  429. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  430. {
  431. // if the doc does not terminate
  432. // with a newline, treat it as the
  433. // end of the comment
  434. if(terminal && ! more_)
  435. {
  436. if(BOOST_JSON_UNLIKELY(! h_.on_comment(
  437. {start, cs.remain(start)}, ec_)))
  438. return fail(cs.end());
  439. return cs.end();
  440. }
  441. if(BOOST_JSON_UNLIKELY(! h_.on_comment_part(
  442. {start, cs.remain(start)}, ec_)))
  443. return fail(cs.end());
  444. if(terminal)
  445. return suspend(cs.end(), state::com2);
  446. return maybe_suspend(cs.end(), state::com2);
  447. }
  448. break;
  449. case '*':
  450. do
  451. {
  452. ++cs;
  453. do_com3:
  454. // KRYSTIAN TODO: this is a mess, we have to fix this
  455. remain = cs.remain();
  456. cs = remain ? static_cast<const char*>(
  457. std::memchr(cs.begin(), '*', remain)) : sentinel();
  458. if(! cs.begin())
  459. cs = sentinel();
  460. // stopped inside a c comment
  461. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  462. {
  463. if(BOOST_JSON_UNLIKELY(! h_.on_comment_part(
  464. {start, cs.remain(start)}, ec_)))
  465. return fail(cs.end());
  466. return maybe_suspend(cs.end(), state::com3);
  467. }
  468. // found a asterisk, check if the next char is a slash
  469. ++cs;
  470. do_com4:
  471. if(BOOST_JSON_UNLIKELY(! cs))
  472. {
  473. if(BOOST_JSON_UNLIKELY(! h_.on_comment_part(
  474. {start, cs.used(start)}, ec_)))
  475. return fail(cs.begin());
  476. return maybe_suspend(cs.begin(), state::com4);
  477. }
  478. }
  479. while(*cs != '/');
  480. }
  481. ++cs;
  482. if(BOOST_JSON_UNLIKELY(! h_.on_comment(
  483. {start, cs.used(start)}, ec_)))
  484. return fail(cs.begin());
  485. return cs.begin();
  486. }
  487. template<class Handler>
  488. template<bool StackEmpty_>
  489. const char*
  490. basic_parser<Handler>::
  491. parse_document(const char* p,
  492. std::integral_constant<bool, StackEmpty_> stack_empty)
  493. {
  494. detail::const_stream_wrapper cs(p, end_);
  495. if(! stack_empty && ! st_.empty())
  496. {
  497. state st;
  498. st_.peek(st);
  499. switch(st)
  500. {
  501. default: goto do_doc2;
  502. case state::doc1:
  503. st_.pop(st);
  504. goto do_doc1;
  505. case state::doc3:
  506. st_.pop(st);
  507. goto do_doc3;
  508. case state::com1: case state::com2:
  509. case state::com3: case state::com4:
  510. goto do_doc4;
  511. }
  512. }
  513. do_doc1:
  514. cs = detail::count_whitespace(cs.begin(), cs.end());
  515. if(BOOST_JSON_UNLIKELY(! cs))
  516. return maybe_suspend(cs.begin(), state::doc1);
  517. do_doc2:
  518. switch(+opt_.allow_comments |
  519. (opt_.allow_trailing_commas << 1) |
  520. (opt_.allow_invalid_utf8 << 2))
  521. {
  522. // no extensions
  523. default:
  524. cs = parse_value(cs.begin(), stack_empty, std::false_type(), std::false_type(), std::false_type());
  525. break;
  526. // comments
  527. case 1:
  528. cs = parse_value(cs.begin(), stack_empty, std::true_type(), std::false_type(), std::false_type());
  529. break;
  530. // trailing
  531. case 2:
  532. cs = parse_value(cs.begin(), stack_empty, std::false_type(), std::true_type(), std::false_type());
  533. break;
  534. // comments & trailing
  535. case 3:
  536. cs = parse_value(cs.begin(), stack_empty, std::true_type(), std::true_type(), std::false_type());
  537. break;
  538. // skip validation
  539. case 4:
  540. cs = parse_value(cs.begin(), stack_empty, std::false_type(), std::false_type(), std::true_type());
  541. break;
  542. // comments & skip validation
  543. case 5:
  544. cs = parse_value(cs.begin(), stack_empty, std::true_type(), std::false_type(), std::true_type());
  545. break;
  546. // trailing & skip validation
  547. case 6:
  548. cs = parse_value(cs.begin(), stack_empty, std::false_type(), std::true_type(), std::true_type());
  549. break;
  550. // comments & trailing & skip validation
  551. case 7:
  552. cs = parse_value(cs.begin(), stack_empty, std::true_type(), std::true_type(), std::true_type());
  553. break;
  554. }
  555. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  556. // the appropriate state has already been pushed into stack
  557. return sentinel();
  558. do_doc3:
  559. cs = detail::count_whitespace(cs.begin(), cs.end());
  560. if(BOOST_JSON_UNLIKELY(! cs))
  561. {
  562. if(more_)
  563. return suspend(cs.begin(), state::doc3);
  564. }
  565. else if(opt_.allow_comments && *cs == '/')
  566. {
  567. do_doc4:
  568. cs = parse_comment(cs.begin(), stack_empty, std::true_type());
  569. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  570. return sentinel();
  571. goto do_doc3;
  572. }
  573. return cs.begin();
  574. }
  575. template<class Handler>
  576. template<
  577. bool StackEmpty_,
  578. bool AllowComments_/*,
  579. bool AllowTrailing_,
  580. bool AllowBadUTF8_*/>
  581. const char*
  582. basic_parser<Handler>::
  583. parse_value(const char* p,
  584. std::integral_constant<bool, StackEmpty_> stack_empty,
  585. std::integral_constant<bool, AllowComments_> allow_comments,
  586. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  587. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8)
  588. {
  589. if(stack_empty || st_.empty())
  590. {
  591. loop:
  592. switch(*p)
  593. {
  594. case '0':
  595. return mp11::mp_with_index<3>(
  596. static_cast<unsigned char>(opt_.numbers),
  597. parse_number_helper<true, '0'>{ this, p });
  598. case '-':
  599. return mp11::mp_with_index<3>(
  600. static_cast<unsigned char>(opt_.numbers),
  601. parse_number_helper<true, '-'>{ this, p });
  602. case '1': case '2': case '3':
  603. case '4': case '5': case '6':
  604. case '7': case '8': case '9':
  605. return mp11::mp_with_index<3>(
  606. static_cast<unsigned char>(opt_.numbers),
  607. parse_number_helper<true, '+'>{ this, p });
  608. case 'n':
  609. return parse_literal( p, mp11::mp_int<detail::null_literal>() );
  610. case 't':
  611. return parse_literal( p, mp11::mp_int<detail::true_literal>() );
  612. case 'f':
  613. return parse_literal( p, mp11::mp_int<detail::false_literal>() );
  614. case 'I':
  615. if( !opt_.allow_infinity_and_nan )
  616. {
  617. BOOST_STATIC_CONSTEXPR source_location loc
  618. = BOOST_CURRENT_LOCATION;
  619. return fail(p, error::syntax, &loc);
  620. }
  621. return parse_literal( p, mp11::mp_int<detail::infinity_literal>() );
  622. case 'N':
  623. if( !opt_.allow_infinity_and_nan )
  624. {
  625. BOOST_STATIC_CONSTEXPR source_location loc
  626. = BOOST_CURRENT_LOCATION;
  627. return fail(p, error::syntax, &loc);
  628. }
  629. return parse_literal( p, mp11::mp_int<detail::nan_literal>() );
  630. case '"':
  631. return parse_unescaped(p, std::true_type(), std::false_type(), allow_bad_utf8);
  632. case '[':
  633. return parse_array(p, std::true_type(), allow_comments, allow_trailing, allow_bad_utf8);
  634. case '{':
  635. return parse_object(p, std::true_type(), allow_comments, allow_trailing, allow_bad_utf8);
  636. case '/':
  637. if(! allow_comments)
  638. {
  639. BOOST_STATIC_CONSTEXPR source_location loc
  640. = BOOST_CURRENT_LOCATION;
  641. return fail(p, error::syntax, &loc);
  642. }
  643. p = parse_comment(p, stack_empty, std::false_type());
  644. // KRYSTIAN NOTE: incomplete takes const_stream, we either
  645. // can add an overload, change the existing one to take a pointer,
  646. // or just leave it as is
  647. if(BOOST_JSON_UNLIKELY(p == sentinel()))
  648. return maybe_suspend(p, state::val2);
  649. // intentional fallthrough
  650. case ' ':
  651. case '\t':
  652. case '\n':
  653. case '\r':
  654. p = detail::count_whitespace(p, end_);
  655. if(BOOST_JSON_UNLIKELY(p == end_))
  656. return maybe_suspend(p, state::val1);
  657. goto loop;
  658. default:
  659. {
  660. BOOST_STATIC_CONSTEXPR source_location loc
  661. = BOOST_CURRENT_LOCATION;
  662. return fail(p, error::syntax, &loc);
  663. }
  664. }
  665. }
  666. return resume_value(p, allow_comments, allow_trailing, allow_bad_utf8);
  667. }
  668. template<class Handler>
  669. template<
  670. bool AllowComments_/*,
  671. bool AllowTrailing_,
  672. bool AllowBadUTF8_*/>
  673. const char*
  674. basic_parser<Handler>::
  675. resume_value(const char* p,
  676. std::integral_constant<bool, AllowComments_> allow_comments,
  677. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  678. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8)
  679. {
  680. state st;
  681. st_.peek(st);
  682. switch(st)
  683. {
  684. default: BOOST_JSON_UNREACHABLE();
  685. case state::lit1:
  686. return parse_literal(p, mp11::mp_int<detail::resume_literal>() );
  687. case state::str1:
  688. return parse_unescaped(p, std::false_type(), std::false_type(), allow_bad_utf8);
  689. case state::str2: case state::str3:
  690. case state::str4: case state::str5:
  691. case state::str6: case state::str7:
  692. case state::str8:
  693. case state::sur1: case state::sur2:
  694. case state::sur3: case state::sur4:
  695. case state::sur5: case state::sur6:
  696. return parse_escaped(p, 0, std::false_type(), std::false_type(), allow_bad_utf8);
  697. case state::arr1: case state::arr2:
  698. case state::arr3: case state::arr4:
  699. case state::arr5: case state::arr6:
  700. return parse_array(p, std::false_type(), allow_comments, allow_trailing, allow_bad_utf8);
  701. case state::obj1: case state::obj2:
  702. case state::obj3: case state::obj4:
  703. case state::obj5: case state::obj6:
  704. case state::obj7: case state::obj8:
  705. case state::obj9: case state::obj10:
  706. case state::obj11:
  707. return parse_object(p, std::false_type(), allow_comments, allow_trailing, allow_bad_utf8);
  708. case state::num1: case state::num2:
  709. case state::num3: case state::num4:
  710. case state::num5: case state::num6:
  711. case state::num7: case state::num8:
  712. case state::exp1: case state::exp2:
  713. case state::exp3:
  714. return mp11::mp_with_index<3>(
  715. static_cast<unsigned char>(opt_.numbers),
  716. parse_number_helper<false, 0>{ this, p });
  717. // KRYSTIAN NOTE: these are special cases
  718. case state::val1:
  719. {
  720. st_.pop(st);
  721. BOOST_ASSERT(st_.empty());
  722. p = detail::count_whitespace(p, end_);
  723. if(BOOST_JSON_UNLIKELY(p == end_))
  724. return maybe_suspend(p, state::val1);
  725. return parse_value(p, std::true_type(), allow_comments, allow_trailing, allow_bad_utf8);
  726. }
  727. case state::val2:
  728. {
  729. st_.pop(st);
  730. p = parse_comment(p, std::false_type(), std::false_type());
  731. if(BOOST_JSON_UNLIKELY(p == sentinel()))
  732. return maybe_suspend(p, state::val2);
  733. if(BOOST_JSON_UNLIKELY( p == end_ ))
  734. return maybe_suspend(p, state::val3);
  735. BOOST_ASSERT(st_.empty());
  736. return parse_value(p, std::true_type(), std::true_type(), allow_trailing, allow_bad_utf8);
  737. }
  738. case state::val3:
  739. {
  740. st_.pop(st);
  741. return parse_value(p, std::true_type(), std::true_type(), allow_trailing, allow_bad_utf8);
  742. }
  743. }
  744. }
  745. template<class Handler>
  746. template<int Literal>
  747. const char*
  748. basic_parser<Handler>::
  749. parse_literal(const char* p,
  750. std::integral_constant<int, Literal> literal)
  751. {
  752. constexpr char const* literals[] = {
  753. "null",
  754. "true",
  755. "false",
  756. "Infinity",
  757. "-Infinity",
  758. "NaN",
  759. };
  760. constexpr std::size_t literal_sizes[] = {
  761. 4,
  762. 4,
  763. 5,
  764. 8,
  765. 9,
  766. 3,
  767. };
  768. std::size_t cur_lit;
  769. std::size_t offset;
  770. detail::const_stream_wrapper cs(p, end_);
  771. BOOST_IF_CONSTEXPR( literal != detail::resume_literal )
  772. {
  773. BOOST_ASSERT( literal >= 0 );
  774. if(BOOST_JSON_LIKELY( cs.remain() >= literal_sizes[literal] ))
  775. {
  776. int const cmp = std::memcmp(
  777. cs.begin(), literals[literal], literal_sizes[literal] );
  778. if( cmp != 0 )
  779. {
  780. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  781. return fail(cs.begin(), error::syntax, &loc);
  782. }
  783. BOOST_IF_CONSTEXPR( literal == detail::null_literal )
  784. {
  785. if(BOOST_JSON_UNLIKELY(
  786. ! h_.on_null(ec_)))
  787. return fail(cs.begin());
  788. }
  789. else BOOST_IF_CONSTEXPR( literal == detail::true_literal )
  790. {
  791. if(BOOST_JSON_UNLIKELY(
  792. ! h_.on_bool(true, ec_)))
  793. return fail(cs.begin());
  794. }
  795. else BOOST_IF_CONSTEXPR( literal == detail::false_literal )
  796. {
  797. if(BOOST_JSON_UNLIKELY(
  798. ! h_.on_bool(false, ec_)))
  799. return fail(cs.begin());
  800. }
  801. else BOOST_IF_CONSTEXPR( literal == detail::infinity_literal )
  802. {
  803. if(BOOST_JSON_UNLIKELY(
  804. ! h_.on_double(
  805. std::numeric_limits<double>::infinity(),
  806. string_view(
  807. literals[detail::infinity_literal],
  808. literal_sizes[detail::infinity_literal]),
  809. ec_)))
  810. return fail(cs.begin());
  811. }
  812. else BOOST_IF_CONSTEXPR( literal == detail::neg_infinity_literal )
  813. {
  814. if(BOOST_JSON_UNLIKELY(
  815. ! h_.on_double(
  816. -std::numeric_limits<double>::infinity(),
  817. string_view(
  818. literals[detail::neg_infinity_literal],
  819. literal_sizes[detail::neg_infinity_literal]),
  820. ec_)))
  821. return fail(cs.begin());
  822. }
  823. else BOOST_IF_CONSTEXPR( literal == detail::nan_literal )
  824. {
  825. if(BOOST_JSON_UNLIKELY(
  826. ! h_.on_double(
  827. std::numeric_limits<double>::quiet_NaN(),
  828. string_view(
  829. literals[detail::nan_literal],
  830. literal_sizes[detail::nan_literal]),
  831. ec_)))
  832. return fail(cs.begin());
  833. }
  834. else
  835. {
  836. BOOST_JSON_UNREACHABLE();
  837. }
  838. cs += literal_sizes[literal];
  839. return cs.begin();
  840. }
  841. offset = 0;
  842. cur_lit = literal;
  843. }
  844. else
  845. {
  846. state st;
  847. st_.pop(st);
  848. BOOST_ASSERT( st == state::lit1 );
  849. cur_lit = cur_lit_;
  850. offset = lit_offset_;
  851. }
  852. std::size_t const size = (std::min)(
  853. literal_sizes[cur_lit] - offset, cs.remain() );
  854. int cmp = 0;
  855. if(BOOST_JSON_LIKELY( cs.begin() ))
  856. cmp = std::memcmp( cs.begin(), literals[cur_lit] + offset, size );
  857. if( cmp != 0 )
  858. {
  859. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  860. return fail(cs.begin(), error::syntax, &loc);
  861. }
  862. if(BOOST_JSON_UNLIKELY( offset + size < literal_sizes[cur_lit] ))
  863. {
  864. BOOST_ASSERT( cur_lit < 256 );
  865. cur_lit_ = static_cast<unsigned char>( cur_lit );
  866. BOOST_ASSERT( offset + size < 256 );
  867. lit_offset_ = static_cast<unsigned char>( offset + size );
  868. return maybe_suspend(cs.begin() + size, state::lit1);
  869. }
  870. switch( cur_lit )
  871. {
  872. case detail::null_literal:
  873. if(BOOST_JSON_UNLIKELY(
  874. ! h_.on_null(ec_)))
  875. return fail(cs.begin());
  876. break;
  877. case detail::true_literal:
  878. if(BOOST_JSON_UNLIKELY(
  879. ! h_.on_bool(true, ec_)))
  880. return fail(cs.begin());
  881. break;
  882. case detail::false_literal:
  883. if(BOOST_JSON_UNLIKELY(
  884. ! h_.on_bool(false, ec_)))
  885. return fail(cs.begin());
  886. break;
  887. case detail::infinity_literal:
  888. if(BOOST_JSON_UNLIKELY(
  889. ! h_.on_double(
  890. std::numeric_limits<double>::infinity(),
  891. string_view(
  892. literals[detail::infinity_literal],
  893. literal_sizes[detail::infinity_literal]),
  894. ec_)))
  895. return fail(cs.begin());
  896. break;
  897. case detail::neg_infinity_literal:
  898. if(BOOST_JSON_UNLIKELY(
  899. ! h_.on_double(
  900. -std::numeric_limits<double>::infinity(),
  901. string_view(
  902. literals[detail::neg_infinity_literal],
  903. literal_sizes[detail::neg_infinity_literal]),
  904. ec_)))
  905. return fail(cs.begin());
  906. break;
  907. case detail::nan_literal:
  908. if(BOOST_JSON_UNLIKELY(
  909. ! h_.on_double(
  910. std::numeric_limits<double>::quiet_NaN(),
  911. string_view(
  912. literals[detail::nan_literal],
  913. literal_sizes[detail::nan_literal]),
  914. ec_)))
  915. return fail(cs.begin());
  916. break;
  917. default: BOOST_JSON_UNREACHABLE();
  918. }
  919. cs += size;
  920. return cs.begin();
  921. }
  922. //----------------------------------------------------------
  923. template<class Handler>
  924. template<
  925. bool StackEmpty_,
  926. bool IsKey_/*,
  927. bool AllowBadUTF8_*/>
  928. const char*
  929. basic_parser<Handler>::
  930. parse_string(const char* p,
  931. std::integral_constant<bool, StackEmpty_> stack_empty,
  932. std::integral_constant<bool, IsKey_> is_key,
  933. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8)
  934. {
  935. if(! stack_empty && ! st_.empty())
  936. {
  937. state st;
  938. st_.peek(st);
  939. switch(st)
  940. {
  941. default: BOOST_JSON_UNREACHABLE();
  942. case state::str1:
  943. return parse_unescaped(p, stack_empty, is_key, allow_bad_utf8);
  944. case state::str2: case state::str3:
  945. case state::str4: case state::str5:
  946. case state::str6: case state::str7:
  947. case state::str8:
  948. case state::sur1: case state::sur2:
  949. case state::sur3: case state::sur4:
  950. case state::sur5: case state::sur6:
  951. return parse_escaped(p, 0, stack_empty, is_key, allow_bad_utf8);
  952. }
  953. }
  954. return parse_unescaped(p, std::true_type(), is_key, allow_bad_utf8);
  955. }
  956. template<class Handler>
  957. template<
  958. bool StackEmpty_,
  959. bool IsKey_/*,
  960. bool AllowBadUTF8_*/>
  961. const char*
  962. basic_parser<Handler>::
  963. parse_unescaped(const char* p,
  964. std::integral_constant<bool, StackEmpty_> stack_empty,
  965. std::integral_constant<bool, IsKey_> is_key,
  966. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8)
  967. {
  968. detail::const_stream_wrapper cs(p, end_);
  969. std::size_t total;
  970. if(stack_empty || st_.empty())
  971. {
  972. BOOST_ASSERT(*cs == '\x22'); // '"'
  973. ++cs;
  974. total = 0;
  975. }
  976. else
  977. {
  978. state st;
  979. st_.pop(st);
  980. st_.pop(total);
  981. }
  982. char const* start = cs.begin();
  983. cs = allow_bad_utf8?
  984. detail::count_valid<true>(cs.begin(), cs.end()):
  985. detail::count_valid<false>(cs.begin(), cs.end());
  986. std::size_t size = cs.used(start);
  987. if(is_key)
  988. {
  989. BOOST_ASSERT(total <= Handler::max_key_size);
  990. if(BOOST_JSON_UNLIKELY(size >
  991. Handler::max_key_size - total))
  992. {
  993. BOOST_STATIC_CONSTEXPR source_location loc
  994. = BOOST_CURRENT_LOCATION;
  995. return fail(cs.begin(), error::key_too_large, &loc);
  996. }
  997. }
  998. else
  999. {
  1000. BOOST_ASSERT(total <= Handler::max_string_size);
  1001. if(BOOST_JSON_UNLIKELY(size >
  1002. Handler::max_string_size - total))
  1003. {
  1004. BOOST_STATIC_CONSTEXPR source_location loc
  1005. = BOOST_CURRENT_LOCATION;
  1006. return fail(cs.begin(), error::string_too_large, &loc);
  1007. }
  1008. }
  1009. total += size;
  1010. if(BOOST_JSON_UNLIKELY(! cs))
  1011. {
  1012. // call handler if the string isn't empty
  1013. if(BOOST_JSON_LIKELY(size))
  1014. {
  1015. {
  1016. bool r = is_key?
  1017. h_.on_key_part( {start, size}, total, ec_ ):
  1018. h_.on_string_part( {start, size}, total, ec_ );
  1019. if(BOOST_JSON_UNLIKELY(!r))
  1020. {
  1021. return fail(cs.begin());
  1022. }
  1023. }
  1024. }
  1025. return maybe_suspend(cs.begin(), state::str1, total);
  1026. }
  1027. // at this point all valid characters have been skipped, so any remaining
  1028. // if there are any more characters, they are either escaped, or incomplete
  1029. // utf8, or invalid utf8
  1030. if(BOOST_JSON_UNLIKELY(*cs != '\x22')) // '"'
  1031. {
  1032. // sequence is invalid or incomplete
  1033. if((*cs & 0x80) && !allow_bad_utf8)
  1034. {
  1035. seq_.save(cs.begin(), cs.remain());
  1036. if(BOOST_JSON_UNLIKELY(seq_.complete()))
  1037. {
  1038. BOOST_STATIC_CONSTEXPR source_location loc
  1039. = BOOST_CURRENT_LOCATION;
  1040. return fail(cs.begin(), error::syntax, &loc);
  1041. }
  1042. if(BOOST_JSON_LIKELY(size))
  1043. {
  1044. {
  1045. bool r = is_key?
  1046. h_.on_key_part( {start, size}, total, ec_ ):
  1047. h_.on_string_part( {start, size}, total, ec_ );
  1048. if(BOOST_JSON_UNLIKELY(!r))
  1049. {
  1050. return fail(cs.begin());
  1051. }
  1052. }
  1053. }
  1054. return maybe_suspend(cs.end(), state::str8, total);
  1055. }
  1056. else if(BOOST_JSON_LIKELY(*cs == '\\'))
  1057. {
  1058. // flush unescaped run from input
  1059. if(BOOST_JSON_LIKELY(size))
  1060. {
  1061. {
  1062. bool r = is_key?
  1063. h_.on_key_part( {start, size}, total, ec_ ):
  1064. h_.on_string_part( {start, size}, total, ec_ );
  1065. if(BOOST_JSON_UNLIKELY(!r))
  1066. {
  1067. return fail(cs.begin());
  1068. }
  1069. }
  1070. }
  1071. return parse_escaped(cs.begin(), total, stack_empty, is_key, allow_bad_utf8);
  1072. }
  1073. // illegal control
  1074. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  1075. return fail(cs.begin(), error::syntax, &loc);
  1076. }
  1077. {
  1078. bool r = is_key?
  1079. h_.on_key( {start, size}, total, ec_ ):
  1080. h_.on_string( {start, size}, total, ec_ );
  1081. if(BOOST_JSON_UNLIKELY(!r))
  1082. {
  1083. return fail(cs.begin());
  1084. }
  1085. }
  1086. ++cs;
  1087. return cs.begin();
  1088. }
  1089. template<class Handler>
  1090. template<
  1091. bool StackEmpty_/*,
  1092. bool IsKey_,
  1093. bool AllowBadUTF8_*/>
  1094. const char*
  1095. basic_parser<Handler>::
  1096. parse_escaped(
  1097. const char* p,
  1098. std::size_t total,
  1099. std::integral_constant<bool, StackEmpty_> stack_empty,
  1100. /*std::integral_constant<bool, IsKey_>*/ bool is_key,
  1101. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8)
  1102. {
  1103. //---------------------------------------------------------------
  1104. //
  1105. // To handle escapes, a local temporary buffer accumulates
  1106. // the unescaped result. The algorithm attempts to fill the
  1107. // buffer to capacity before invoking the handler.
  1108. // In some cases the temporary buffer needs to be flushed
  1109. // before it is full:
  1110. // * When the closing double quote is seen
  1111. // * When there in no more input (and more is expected later)
  1112. // A goal of the algorithm is to call the handler as few times
  1113. // as possible. Thus, when the first escape is encountered,
  1114. // the algorithm attempts to fill the temporary buffer first.
  1115. //
  1116. auto const ev_too_large = is_key?
  1117. error::key_too_large : error::string_too_large;
  1118. auto const max_size = is_key?
  1119. Handler::max_key_size : Handler::max_string_size;
  1120. detail::clipped_const_stream cs(p, end_);
  1121. detail::buffer<BOOST_JSON_STACK_BUFFER_SIZE> temp;
  1122. int digit;
  1123. char c;
  1124. cs.clip(temp.max_size());
  1125. if(! stack_empty && ! st_.empty())
  1126. {
  1127. state st;
  1128. st_.pop(st);
  1129. st_.pop(total);
  1130. switch(st)
  1131. {
  1132. default: BOOST_JSON_UNREACHABLE();
  1133. case state::str2: goto do_str2;
  1134. case state::str3: goto do_str3;
  1135. case state::str4: goto do_str4;
  1136. case state::str5: goto do_str5;
  1137. case state::str6: goto do_str6;
  1138. case state::str7: goto do_str7;
  1139. case state::str8: goto do_str8;
  1140. case state::sur1: goto do_sur1;
  1141. case state::sur2: goto do_sur2;
  1142. case state::sur3: goto do_sur3;
  1143. case state::sur4: goto do_sur4;
  1144. case state::sur5: goto do_sur5;
  1145. case state::sur6: goto do_sur6;
  1146. }
  1147. }
  1148. // Unescaped JSON is never larger than its escaped version.
  1149. // To efficiently process only what will fit in the temporary buffer,
  1150. // the size of the input stream is temporarily "clipped" to the size
  1151. // of the temporary buffer.
  1152. // handle escaped character
  1153. BOOST_ASSERT(*cs == '\\');
  1154. ++cs;
  1155. do_str3:
  1156. if(BOOST_JSON_UNLIKELY(! cs))
  1157. {
  1158. if(BOOST_JSON_LIKELY(! temp.empty()))
  1159. {
  1160. BOOST_ASSERT(total <= max_size);
  1161. if(BOOST_JSON_UNLIKELY(
  1162. temp.size() > max_size - total))
  1163. {
  1164. BOOST_STATIC_CONSTEXPR source_location loc
  1165. = BOOST_CURRENT_LOCATION;
  1166. return fail(cs.begin(), ev_too_large, &loc);
  1167. }
  1168. total += temp.size();
  1169. {
  1170. bool r = is_key
  1171. ? h_.on_key_part(temp.get(), total, ec_)
  1172. : h_.on_string_part(temp.get(), total, ec_);
  1173. if(BOOST_JSON_UNLIKELY(!r))
  1174. {
  1175. return fail(cs.begin());
  1176. }
  1177. }
  1178. temp.clear();
  1179. }
  1180. cs.clip(temp.max_size());
  1181. if(BOOST_JSON_UNLIKELY(! cs))
  1182. return maybe_suspend(cs.begin(), state::str3, total);
  1183. }
  1184. switch(*cs)
  1185. {
  1186. default:
  1187. {
  1188. BOOST_STATIC_CONSTEXPR source_location loc
  1189. = BOOST_CURRENT_LOCATION;
  1190. return fail(cs.begin(), error::syntax, &loc);
  1191. }
  1192. case '\x22': // '"'
  1193. temp.push_back('\x22');
  1194. ++cs;
  1195. break;
  1196. case '\\':
  1197. temp.push_back('\\');
  1198. ++cs;
  1199. break;
  1200. case '/':
  1201. temp.push_back('/');
  1202. ++cs;
  1203. break;
  1204. case 'b':
  1205. temp.push_back('\x08');
  1206. ++cs;
  1207. break;
  1208. case 'f':
  1209. temp.push_back('\x0c');
  1210. ++cs;
  1211. break;
  1212. case 'n':
  1213. temp.push_back('\x0a');
  1214. ++cs;
  1215. break;
  1216. case 'r':
  1217. temp.push_back('\x0d');
  1218. ++cs;
  1219. break;
  1220. case 't':
  1221. temp.push_back('\x09');
  1222. ++cs;
  1223. break;
  1224. case 'u':
  1225. // utf16 escape
  1226. //
  1227. // fast path only when the buffer
  1228. // is large enough for 2 surrogates
  1229. if(BOOST_JSON_LIKELY(cs.remain() > 10))
  1230. {
  1231. // KRYSTIAN TODO: this could be done
  1232. // with fewer instructions
  1233. digit = detail::load_little_endian<4>(
  1234. cs.begin() + 1);
  1235. int d4 = detail::hex_digit(static_cast<
  1236. unsigned char>(digit >> 24));
  1237. int d3 = detail::hex_digit(static_cast<
  1238. unsigned char>(digit >> 16));
  1239. int d2 = detail::hex_digit(static_cast<
  1240. unsigned char>(digit >> 8));
  1241. int d1 = detail::hex_digit(static_cast<
  1242. unsigned char>(digit));
  1243. if(BOOST_JSON_UNLIKELY(
  1244. (d1 | d2 | d3 | d4) == -1))
  1245. {
  1246. if(d1 != -1)
  1247. ++cs;
  1248. if(d2 != -1)
  1249. ++cs;
  1250. if(d3 != -1)
  1251. ++cs;
  1252. BOOST_STATIC_CONSTEXPR source_location loc
  1253. = BOOST_CURRENT_LOCATION;
  1254. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1255. }
  1256. // 32 bit unicode scalar value
  1257. unsigned const u1 =
  1258. (d1 << 12) + (d2 << 8) +
  1259. (d3 << 4) + d4;
  1260. // valid unicode scalar values are
  1261. // [0, D7FF] and [E000, 10FFFF]
  1262. // values within this range are valid utf-8
  1263. // code points and invalid leading surrogates.
  1264. if(BOOST_JSON_LIKELY(
  1265. u1 < 0xd800 || u1 > 0xdfff))
  1266. {
  1267. cs += 5;
  1268. temp.append_utf8(u1);
  1269. break;
  1270. }
  1271. if(BOOST_JSON_UNLIKELY(u1 > 0xdbff))
  1272. {
  1273. BOOST_STATIC_CONSTEXPR source_location loc
  1274. = BOOST_CURRENT_LOCATION;
  1275. return fail(cs.begin(), error::illegal_leading_surrogate,
  1276. &loc);
  1277. }
  1278. cs += 5;
  1279. // KRYSTIAN TODO: this can be a two byte load
  1280. // and a single comparison. We lose error information,
  1281. // but it's faster.
  1282. if(BOOST_JSON_UNLIKELY(*cs != '\\'))
  1283. {
  1284. BOOST_STATIC_CONSTEXPR source_location loc
  1285. = BOOST_CURRENT_LOCATION;
  1286. return fail(cs.begin(), error::syntax, &loc);
  1287. }
  1288. ++cs;
  1289. if(BOOST_JSON_UNLIKELY(*cs != 'u'))
  1290. {
  1291. BOOST_STATIC_CONSTEXPR source_location loc
  1292. = BOOST_CURRENT_LOCATION;
  1293. return fail(cs.begin(), error::syntax, &loc);
  1294. }
  1295. ++cs;
  1296. digit = detail::load_little_endian<4>(cs.begin());
  1297. d4 = detail::hex_digit(static_cast<
  1298. unsigned char>(digit >> 24));
  1299. d3 = detail::hex_digit(static_cast<
  1300. unsigned char>(digit >> 16));
  1301. d2 = detail::hex_digit(static_cast<
  1302. unsigned char>(digit >> 8));
  1303. d1 = detail::hex_digit(static_cast<
  1304. unsigned char>(digit));
  1305. if(BOOST_JSON_UNLIKELY(
  1306. (d1 | d2 | d3 | d4) == -1))
  1307. {
  1308. if(d1 != -1)
  1309. ++cs;
  1310. if(d2 != -1)
  1311. ++cs;
  1312. if(d3 != -1)
  1313. ++cs;
  1314. BOOST_STATIC_CONSTEXPR source_location loc
  1315. = BOOST_CURRENT_LOCATION;
  1316. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1317. }
  1318. unsigned const u2 =
  1319. (d1 << 12) + (d2 << 8) +
  1320. (d3 << 4) + d4;
  1321. // valid trailing surrogates are [DC00, DFFF]
  1322. if(BOOST_JSON_UNLIKELY(
  1323. u2 < 0xdc00 || u2 > 0xdfff))
  1324. {
  1325. BOOST_STATIC_CONSTEXPR source_location loc
  1326. = BOOST_CURRENT_LOCATION;
  1327. return fail(cs.begin(), error::illegal_trailing_surrogate,
  1328. &loc);
  1329. }
  1330. cs += 4;
  1331. unsigned cp =
  1332. ((u1 - 0xd800) << 10) +
  1333. ((u2 - 0xdc00)) +
  1334. 0x10000;
  1335. // utf-16 surrogate pair
  1336. temp.append_utf8(cp);
  1337. break;
  1338. }
  1339. // flush
  1340. if(BOOST_JSON_LIKELY(! temp.empty()))
  1341. {
  1342. BOOST_ASSERT(total <= max_size);
  1343. if(BOOST_JSON_UNLIKELY(
  1344. temp.size() > max_size - total))
  1345. {
  1346. BOOST_STATIC_CONSTEXPR source_location loc
  1347. = BOOST_CURRENT_LOCATION;
  1348. return fail(cs.begin(), ev_too_large, &loc);
  1349. }
  1350. total += temp.size();
  1351. {
  1352. bool r = is_key
  1353. ? h_.on_key_part(temp.get(), total, ec_)
  1354. : h_.on_string_part(temp.get(), total, ec_);
  1355. if(BOOST_JSON_UNLIKELY(!r))
  1356. {
  1357. return fail(cs.begin());
  1358. }
  1359. }
  1360. temp.clear();
  1361. cs.clip(temp.max_size());
  1362. }
  1363. ++cs;
  1364. // utf-16 escape
  1365. do_str4:
  1366. if(BOOST_JSON_UNLIKELY(! cs))
  1367. return maybe_suspend(cs.begin(), state::str4, total);
  1368. digit = detail::hex_digit(*cs);
  1369. if(BOOST_JSON_UNLIKELY(digit == -1))
  1370. {
  1371. BOOST_STATIC_CONSTEXPR source_location loc
  1372. = BOOST_CURRENT_LOCATION;
  1373. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1374. }
  1375. ++cs;
  1376. u1_ = digit << 12;
  1377. do_str5:
  1378. if(BOOST_JSON_UNLIKELY(! cs))
  1379. return maybe_suspend(cs.begin(), state::str5, total);
  1380. digit = detail::hex_digit(*cs);
  1381. if(BOOST_JSON_UNLIKELY(digit == -1))
  1382. {
  1383. BOOST_STATIC_CONSTEXPR source_location loc
  1384. = BOOST_CURRENT_LOCATION;
  1385. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1386. }
  1387. ++cs;
  1388. u1_ += digit << 8;
  1389. do_str6:
  1390. if(BOOST_JSON_UNLIKELY(! cs))
  1391. return maybe_suspend(cs.begin(), state::str6, total);
  1392. digit = detail::hex_digit(*cs);
  1393. if(BOOST_JSON_UNLIKELY(digit == -1))
  1394. {
  1395. BOOST_STATIC_CONSTEXPR source_location loc
  1396. = BOOST_CURRENT_LOCATION;
  1397. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1398. }
  1399. ++cs;
  1400. u1_ += digit << 4;
  1401. do_str7:
  1402. if(BOOST_JSON_UNLIKELY(! cs))
  1403. return maybe_suspend(cs.begin(), state::str7, total);
  1404. digit = detail::hex_digit(*cs);
  1405. if(BOOST_JSON_UNLIKELY(digit == -1))
  1406. {
  1407. BOOST_STATIC_CONSTEXPR source_location loc
  1408. = BOOST_CURRENT_LOCATION;
  1409. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1410. }
  1411. ++cs;
  1412. u1_ += digit;
  1413. if(BOOST_JSON_LIKELY(
  1414. u1_ < 0xd800 || u1_ > 0xdfff))
  1415. {
  1416. BOOST_ASSERT(temp.empty());
  1417. // utf-8 codepoint
  1418. temp.append_utf8(u1_);
  1419. break;
  1420. }
  1421. if(BOOST_JSON_UNLIKELY(u1_ > 0xdbff))
  1422. {
  1423. BOOST_STATIC_CONSTEXPR source_location loc
  1424. = BOOST_CURRENT_LOCATION;
  1425. return fail(cs.begin(), error::illegal_trailing_surrogate, &loc);
  1426. }
  1427. do_sur1:
  1428. if(BOOST_JSON_UNLIKELY(! cs))
  1429. return maybe_suspend(cs.begin(), state::sur1, total);
  1430. if(BOOST_JSON_UNLIKELY(*cs != '\\'))
  1431. {
  1432. BOOST_STATIC_CONSTEXPR source_location loc
  1433. = BOOST_CURRENT_LOCATION;
  1434. return fail(cs.begin(), error::syntax, &loc);
  1435. }
  1436. ++cs;
  1437. do_sur2:
  1438. if(BOOST_JSON_UNLIKELY(! cs))
  1439. return maybe_suspend(cs.begin(), state::sur2, total);
  1440. if(BOOST_JSON_UNLIKELY(*cs != 'u'))
  1441. {
  1442. BOOST_STATIC_CONSTEXPR source_location loc
  1443. = BOOST_CURRENT_LOCATION;
  1444. return fail(cs.begin(), error::syntax, &loc);
  1445. }
  1446. ++cs;
  1447. do_sur3:
  1448. if(BOOST_JSON_UNLIKELY(! cs))
  1449. return maybe_suspend(cs.begin(), state::sur3, total);
  1450. digit = detail::hex_digit(*cs);
  1451. if(BOOST_JSON_UNLIKELY(digit == -1))
  1452. {
  1453. BOOST_STATIC_CONSTEXPR source_location loc
  1454. = BOOST_CURRENT_LOCATION;
  1455. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1456. }
  1457. ++cs;
  1458. u2_ = digit << 12;
  1459. do_sur4:
  1460. if(BOOST_JSON_UNLIKELY(! cs))
  1461. return maybe_suspend(cs.begin(), state::sur4, total);
  1462. digit = detail::hex_digit(*cs);
  1463. if(BOOST_JSON_UNLIKELY(digit == -1))
  1464. {
  1465. BOOST_STATIC_CONSTEXPR source_location loc
  1466. = BOOST_CURRENT_LOCATION;
  1467. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1468. }
  1469. ++cs;
  1470. u2_ += digit << 8;
  1471. do_sur5:
  1472. if(BOOST_JSON_UNLIKELY(! cs))
  1473. return maybe_suspend(cs.begin(), state::sur5, total);
  1474. digit = detail::hex_digit(*cs);
  1475. if(BOOST_JSON_UNLIKELY(digit == -1))
  1476. {
  1477. BOOST_STATIC_CONSTEXPR source_location loc
  1478. = BOOST_CURRENT_LOCATION;
  1479. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1480. }
  1481. ++cs;
  1482. u2_ += digit << 4;
  1483. do_sur6:
  1484. if(BOOST_JSON_UNLIKELY(! cs))
  1485. return maybe_suspend(cs.begin(), state::sur6, total);
  1486. digit = detail::hex_digit(*cs);
  1487. if(BOOST_JSON_UNLIKELY(digit == -1))
  1488. {
  1489. BOOST_STATIC_CONSTEXPR source_location loc
  1490. = BOOST_CURRENT_LOCATION;
  1491. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1492. }
  1493. ++cs;
  1494. u2_ += digit;
  1495. if(BOOST_JSON_UNLIKELY(
  1496. u2_ < 0xdc00 || u2_ > 0xdfff))
  1497. {
  1498. BOOST_STATIC_CONSTEXPR source_location loc
  1499. = BOOST_CURRENT_LOCATION;
  1500. return fail(cs.begin(), error::expected_hex_digit, &loc);
  1501. }
  1502. unsigned cp =
  1503. ((u1_ - 0xd800) << 10) +
  1504. ((u2_ - 0xdc00)) +
  1505. 0x10000;
  1506. BOOST_ASSERT(temp.empty());
  1507. // utf-16 surrogate pair
  1508. temp.append_utf8(cp);
  1509. }
  1510. do_str2:
  1511. // KRYSTIAN TODO: we can append the characters
  1512. // all at once instead of one at a time
  1513. for(;;)
  1514. {
  1515. if(BOOST_JSON_UNLIKELY(! cs || temp.capacity() == 0 ))
  1516. {
  1517. // flush
  1518. if(BOOST_JSON_LIKELY(! temp.empty()))
  1519. {
  1520. BOOST_ASSERT(total <= max_size);
  1521. if(BOOST_JSON_UNLIKELY(
  1522. temp.size() > max_size - total))
  1523. {
  1524. BOOST_STATIC_CONSTEXPR source_location loc
  1525. = BOOST_CURRENT_LOCATION;
  1526. return fail(cs.begin(), ev_too_large, &loc);
  1527. }
  1528. total += temp.size();
  1529. {
  1530. bool r = is_key
  1531. ? h_.on_key_part(temp.get(), total, ec_)
  1532. : h_.on_string_part(temp.get(), total, ec_);
  1533. if(BOOST_JSON_UNLIKELY(!r))
  1534. {
  1535. return fail(cs.begin());
  1536. }
  1537. }
  1538. temp.clear();
  1539. }
  1540. cs.clip(temp.max_size());
  1541. if(BOOST_JSON_UNLIKELY(! cs))
  1542. return maybe_suspend(cs.begin(), state::str2, total);
  1543. }
  1544. c = *cs;
  1545. if(BOOST_JSON_LIKELY(c == '\x22')) // '"'
  1546. {
  1547. BOOST_ASSERT(total <= max_size);
  1548. if(BOOST_JSON_UNLIKELY(
  1549. temp.size() > max_size - total))
  1550. {
  1551. BOOST_STATIC_CONSTEXPR source_location loc
  1552. = BOOST_CURRENT_LOCATION;
  1553. return fail(cs.begin(), ev_too_large, &loc);
  1554. }
  1555. total += temp.size();
  1556. {
  1557. bool r = is_key
  1558. ? h_.on_key(temp.get(), total, ec_)
  1559. : h_.on_string(temp.get(), total, ec_);
  1560. if(BOOST_JSON_UNLIKELY(!r))
  1561. {
  1562. return fail(cs.begin());
  1563. }
  1564. }
  1565. ++cs;
  1566. return cs.begin();
  1567. }
  1568. else if((c & 0x80) && !allow_bad_utf8)
  1569. {
  1570. seq_.save(cs.begin(), cs.remain());
  1571. if(BOOST_JSON_UNLIKELY(! seq_.complete()))
  1572. {
  1573. if(BOOST_JSON_LIKELY(! temp.empty()))
  1574. {
  1575. BOOST_ASSERT(total <= max_size);
  1576. if(BOOST_JSON_UNLIKELY(
  1577. temp.size() > max_size - total))
  1578. {
  1579. BOOST_STATIC_CONSTEXPR source_location loc
  1580. = BOOST_CURRENT_LOCATION;
  1581. return fail(cs.begin(), ev_too_large, &loc);
  1582. }
  1583. total += temp.size();
  1584. {
  1585. bool r = is_key
  1586. ? h_.on_key_part(temp.get(), total, ec_)
  1587. : h_.on_string_part(temp.get(), total, ec_);
  1588. if(BOOST_JSON_UNLIKELY(!r))
  1589. {
  1590. return fail(cs.begin());
  1591. }
  1592. }
  1593. temp.clear();
  1594. }
  1595. cs = cs.end();
  1596. // ensure there is room for the saved byte sequence
  1597. cs.clip(temp.max_size() - seq_.length());
  1598. goto do_str8;
  1599. }
  1600. if(BOOST_JSON_UNLIKELY(! seq_.valid()))
  1601. {
  1602. BOOST_STATIC_CONSTEXPR source_location loc
  1603. = BOOST_CURRENT_LOCATION;
  1604. return fail(cs.begin(), error::syntax, &loc);
  1605. }
  1606. temp.append(seq_.data(), seq_.length());
  1607. cs += seq_.length();
  1608. continue;
  1609. }
  1610. else if(BOOST_JSON_LIKELY(c == '\\'))
  1611. {
  1612. ++cs;
  1613. goto do_str3;
  1614. }
  1615. else if(BOOST_JSON_UNLIKELY(
  1616. detail::is_control(c)))
  1617. {
  1618. BOOST_STATIC_CONSTEXPR source_location loc
  1619. = BOOST_CURRENT_LOCATION;
  1620. return fail(cs.begin(), error::syntax, &loc);
  1621. }
  1622. temp.push_back(c);
  1623. ++cs;
  1624. }
  1625. do_str8:
  1626. uint8_t needed = seq_.needed();
  1627. if(BOOST_JSON_UNLIKELY(
  1628. ! seq_.append(cs.begin(), cs.remain())))
  1629. return maybe_suspend(cs.end(), state::str8, total);
  1630. if(BOOST_JSON_UNLIKELY(! seq_.valid()))
  1631. {
  1632. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  1633. return fail(cs.begin(), error::syntax, &loc);
  1634. }
  1635. temp.append(seq_.data(), seq_.length());
  1636. cs += needed;
  1637. goto do_str2;
  1638. }
  1639. //----------------------------------------------------------
  1640. template<class Handler>
  1641. template<
  1642. bool StackEmpty_,
  1643. bool AllowComments_/*,
  1644. bool AllowTrailing_,
  1645. bool AllowBadUTF8_*/>
  1646. const char*
  1647. basic_parser<Handler>::
  1648. parse_object(const char* p,
  1649. std::integral_constant<bool, StackEmpty_> stack_empty,
  1650. std::integral_constant<bool, AllowComments_> allow_comments,
  1651. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  1652. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8)
  1653. {
  1654. detail::const_stream_wrapper cs(p, end_);
  1655. std::size_t size;
  1656. if(! stack_empty && ! st_.empty())
  1657. {
  1658. // resume
  1659. state st;
  1660. st_.pop(st);
  1661. st_.pop(size);
  1662. switch(st)
  1663. {
  1664. default: BOOST_JSON_UNREACHABLE();
  1665. case state::obj1: goto do_obj1;
  1666. case state::obj2: goto do_obj2;
  1667. case state::obj3: goto do_obj3;
  1668. case state::obj4: goto do_obj4;
  1669. case state::obj5: goto do_obj5;
  1670. case state::obj6: goto do_obj6;
  1671. case state::obj7: goto do_obj7;
  1672. case state::obj8: goto do_obj8;
  1673. case state::obj9: goto do_obj9;
  1674. case state::obj10: goto do_obj10;
  1675. case state::obj11: goto do_obj11;
  1676. }
  1677. }
  1678. BOOST_ASSERT(*cs == '{');
  1679. size = 0;
  1680. if(BOOST_JSON_UNLIKELY(! depth_))
  1681. {
  1682. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  1683. return fail(cs.begin(), error::too_deep, &loc);
  1684. }
  1685. --depth_;
  1686. if(BOOST_JSON_UNLIKELY(
  1687. ! h_.on_object_begin(ec_)))
  1688. return fail(cs.begin());
  1689. ++cs;
  1690. // object:
  1691. // '{' *ws '}'
  1692. // '{' *ws string *ws ':' *ws value *ws *[ ',' *ws string *ws ':' *ws value *ws ] '}'
  1693. do_obj1:
  1694. cs = detail::count_whitespace(cs.begin(), cs.end());
  1695. if(BOOST_JSON_UNLIKELY(! cs))
  1696. return maybe_suspend(cs.begin(), state::obj1, size);
  1697. if(BOOST_JSON_LIKELY(*cs != '}'))
  1698. {
  1699. if(BOOST_JSON_UNLIKELY(*cs != '\x22'))
  1700. {
  1701. if(allow_comments && *cs == '/')
  1702. {
  1703. do_obj2:
  1704. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1705. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1706. return suspend_or_fail(state::obj2, size);
  1707. goto do_obj1;
  1708. }
  1709. BOOST_STATIC_CONSTEXPR source_location loc
  1710. = BOOST_CURRENT_LOCATION;
  1711. return fail(cs.begin(), error::syntax, &loc);
  1712. }
  1713. loop:
  1714. if(BOOST_JSON_UNLIKELY(++size >
  1715. Handler::max_object_size))
  1716. {
  1717. BOOST_STATIC_CONSTEXPR source_location loc
  1718. = BOOST_CURRENT_LOCATION;
  1719. return fail(cs.begin(), error::object_too_large, &loc);
  1720. }
  1721. do_obj3:
  1722. cs = parse_string(cs.begin(), stack_empty, std::true_type(), allow_bad_utf8);
  1723. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1724. return suspend_or_fail(state::obj3, size);
  1725. do_obj4:
  1726. cs = detail::count_whitespace(cs.begin(), cs.end());
  1727. if(BOOST_JSON_UNLIKELY(! cs))
  1728. return maybe_suspend(cs.begin(), state::obj4, size);
  1729. if(BOOST_JSON_UNLIKELY(*cs != ':'))
  1730. {
  1731. if(allow_comments && *cs == '/')
  1732. {
  1733. do_obj5:
  1734. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1735. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1736. return suspend_or_fail(state::obj5, size);
  1737. goto do_obj4;
  1738. }
  1739. BOOST_STATIC_CONSTEXPR source_location loc
  1740. = BOOST_CURRENT_LOCATION;
  1741. return fail(cs.begin(), error::syntax, &loc);
  1742. }
  1743. ++cs;
  1744. do_obj6:
  1745. cs = detail::count_whitespace(cs.begin(), cs.end());
  1746. if(BOOST_JSON_UNLIKELY(! cs))
  1747. return maybe_suspend(cs.begin(), state::obj6, size);
  1748. do_obj7:
  1749. cs = parse_value(cs.begin(), stack_empty, allow_comments, allow_trailing, allow_bad_utf8);
  1750. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1751. return suspend_or_fail(state::obj7, size);
  1752. do_obj8:
  1753. cs = detail::count_whitespace(cs.begin(), cs.end());
  1754. if(BOOST_JSON_UNLIKELY(! cs))
  1755. return maybe_suspend(cs.begin(), state::obj8, size);
  1756. if(BOOST_JSON_LIKELY(*cs == ','))
  1757. {
  1758. ++cs;
  1759. do_obj9:
  1760. cs = detail::count_whitespace(cs.begin(), cs.end());
  1761. if(BOOST_JSON_UNLIKELY(! cs))
  1762. return maybe_suspend(cs.begin(), state::obj9, size);
  1763. // loop for next element
  1764. if(BOOST_JSON_LIKELY(*cs == '\x22'))
  1765. goto loop;
  1766. if(! allow_trailing || *cs != '}')
  1767. {
  1768. if(allow_comments && *cs == '/')
  1769. {
  1770. do_obj10:
  1771. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1772. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1773. return suspend_or_fail(state::obj10, size);
  1774. goto do_obj9;
  1775. }
  1776. BOOST_STATIC_CONSTEXPR source_location loc
  1777. = BOOST_CURRENT_LOCATION;
  1778. return fail(cs.begin(), error::syntax, &loc);
  1779. }
  1780. }
  1781. else if(BOOST_JSON_UNLIKELY(*cs != '}'))
  1782. {
  1783. if(allow_comments && *cs == '/')
  1784. {
  1785. do_obj11:
  1786. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1787. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1788. return suspend_or_fail(state::obj11, size);
  1789. goto do_obj8;
  1790. }
  1791. BOOST_STATIC_CONSTEXPR source_location loc
  1792. = BOOST_CURRENT_LOCATION;
  1793. return fail(cs.begin(), error::syntax, &loc);
  1794. }
  1795. // got closing brace, fall through
  1796. }
  1797. if(BOOST_JSON_UNLIKELY(
  1798. ! h_.on_object_end(size, ec_)))
  1799. return fail(cs.begin());
  1800. ++depth_;
  1801. ++cs;
  1802. return cs.begin();
  1803. }
  1804. //----------------------------------------------------------
  1805. template<class Handler>
  1806. template<
  1807. bool StackEmpty_,
  1808. bool AllowComments_/*,
  1809. bool AllowTrailing_,
  1810. bool AllowBadUTF8_*/>
  1811. const char*
  1812. basic_parser<Handler>::
  1813. parse_array(const char* p,
  1814. std::integral_constant<bool, StackEmpty_> stack_empty,
  1815. std::integral_constant<bool, AllowComments_> allow_comments,
  1816. /*std::integral_constant<bool, AllowTrailing_>*/ bool allow_trailing,
  1817. /*std::integral_constant<bool, AllowBadUTF8_>*/ bool allow_bad_utf8)
  1818. {
  1819. detail::const_stream_wrapper cs(p, end_);
  1820. std::size_t size;
  1821. if(! stack_empty && ! st_.empty())
  1822. {
  1823. // resume
  1824. state st;
  1825. st_.pop(st);
  1826. st_.pop(size);
  1827. switch(st)
  1828. {
  1829. default: BOOST_JSON_UNREACHABLE();
  1830. case state::arr1: goto do_arr1;
  1831. case state::arr2: goto do_arr2;
  1832. case state::arr3: goto do_arr3;
  1833. case state::arr4: goto do_arr4;
  1834. case state::arr5: goto do_arr5;
  1835. case state::arr6: goto do_arr6;
  1836. }
  1837. }
  1838. BOOST_ASSERT(*cs == '[');
  1839. size = 0;
  1840. if(BOOST_JSON_UNLIKELY(! depth_))
  1841. {
  1842. BOOST_STATIC_CONSTEXPR source_location loc = BOOST_CURRENT_LOCATION;
  1843. return fail(cs.begin(), error::too_deep, &loc);
  1844. }
  1845. --depth_;
  1846. if(BOOST_JSON_UNLIKELY(
  1847. ! h_.on_array_begin(ec_)))
  1848. return fail(cs.begin());
  1849. ++cs;
  1850. // array:
  1851. // '[' *ws ']'
  1852. // '[' *ws value *ws *[ ',' *ws value *ws ] ']'
  1853. do_arr1:
  1854. cs = detail::count_whitespace(cs.begin(), cs.end());
  1855. if(BOOST_JSON_UNLIKELY(! cs))
  1856. return maybe_suspend(cs.begin(), state::arr1, size);
  1857. if(BOOST_JSON_LIKELY(*cs != ']'))
  1858. {
  1859. loop:
  1860. if(allow_comments && *cs == '/')
  1861. {
  1862. do_arr2:
  1863. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1864. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1865. return suspend_or_fail(state::arr2, size);
  1866. goto do_arr1;
  1867. }
  1868. if(BOOST_JSON_UNLIKELY(++size >
  1869. Handler::max_array_size))
  1870. {
  1871. BOOST_STATIC_CONSTEXPR source_location loc
  1872. = BOOST_CURRENT_LOCATION;
  1873. return fail(cs.begin(), error::array_too_large, &loc);
  1874. }
  1875. do_arr3:
  1876. // array is not empty, value required
  1877. cs = parse_value(cs.begin(), stack_empty, allow_comments, allow_trailing, allow_bad_utf8);
  1878. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1879. return suspend_or_fail(state::arr3, size);
  1880. do_arr4:
  1881. cs = detail::count_whitespace(cs.begin(), cs.end());
  1882. if(BOOST_JSON_UNLIKELY(! cs))
  1883. return maybe_suspend(cs.begin(), state::arr4, size);
  1884. if(BOOST_JSON_LIKELY(*cs == ','))
  1885. {
  1886. ++cs;
  1887. do_arr5:
  1888. cs = detail::count_whitespace(cs.begin(), cs.end());
  1889. if(BOOST_JSON_UNLIKELY(! cs))
  1890. return maybe_suspend(cs.begin(), state::arr5, size);
  1891. // loop for next element
  1892. if(! allow_trailing || *cs != ']')
  1893. goto loop;
  1894. }
  1895. else if(BOOST_JSON_UNLIKELY(*cs != ']'))
  1896. {
  1897. if(allow_comments && *cs == '/')
  1898. {
  1899. do_arr6:
  1900. cs = parse_comment(cs.begin(), stack_empty, std::false_type());
  1901. if(BOOST_JSON_UNLIKELY(incomplete(cs)))
  1902. return suspend_or_fail(state::arr6, size);
  1903. goto do_arr4;
  1904. }
  1905. BOOST_STATIC_CONSTEXPR source_location loc
  1906. = BOOST_CURRENT_LOCATION;
  1907. return fail(cs.begin(), error::syntax, &loc);
  1908. }
  1909. // got closing bracket; fall through
  1910. }
  1911. if(BOOST_JSON_UNLIKELY(
  1912. ! h_.on_array_end(size, ec_)))
  1913. return fail(cs.begin());
  1914. ++depth_;
  1915. ++cs;
  1916. return cs.begin();
  1917. }
  1918. //----------------------------------------------------------
  1919. template<class Handler>
  1920. template<bool StackEmpty_, char First_, number_precision Numbers_>
  1921. const char*
  1922. basic_parser<Handler>::
  1923. parse_number(const char* p,
  1924. std::integral_constant<bool, StackEmpty_> stack_empty,
  1925. std::integral_constant<char, First_> first,
  1926. std::integral_constant<number_precision, Numbers_> mode)
  1927. {
  1928. constexpr bool precise_parsing = mode == number_precision::precise;
  1929. constexpr bool no_parsing = mode == number_precision::none;
  1930. // only one of these will be true if we are not resuming
  1931. // if negative then !zero_first && !nonzero_first
  1932. // if zero_first then !nonzero_first && !negative
  1933. // if nonzero_first then !zero_first && !negative
  1934. bool const negative = first == '-';
  1935. bool const zero_first = first == '0';
  1936. bool const nonzero_first = first == '+';
  1937. detail::const_stream_wrapper cs(p, end_);
  1938. number num;
  1939. const char* begin = cs.begin();
  1940. if(stack_empty || st_.empty())
  1941. {
  1942. num.bias = 0;
  1943. num.exp = 0;
  1944. num.frac = false;
  1945. num_buf_.clear();
  1946. //----------------------------------
  1947. //
  1948. // '-'
  1949. // leading minus sign
  1950. //
  1951. BOOST_ASSERT(cs);
  1952. if(negative)
  1953. ++cs;
  1954. num.neg = negative;
  1955. num.frac = false;
  1956. num.exp = 0;
  1957. num.bias = 0;
  1958. // fast path
  1959. if( cs.remain() >= 16 + 1 + 16 ) // digits . digits
  1960. {
  1961. int n1;
  1962. if( nonzero_first ||
  1963. (negative && *cs != '0') )
  1964. {
  1965. n1 = detail::count_digits( cs.begin() );
  1966. BOOST_ASSERT(n1 >= 0 && n1 <= 16);
  1967. if( negative && n1 == 0 && opt_.allow_infinity_and_nan )
  1968. {
  1969. return parse_literal(
  1970. p - 1, mp11::mp_int<detail::neg_infinity_literal>());
  1971. }
  1972. if( ! nonzero_first && n1 == 0 )
  1973. {
  1974. // digit required
  1975. BOOST_STATIC_CONSTEXPR source_location loc
  1976. = BOOST_CURRENT_LOCATION;
  1977. return fail(cs.begin(), error::syntax, &loc);
  1978. }
  1979. BOOST_IF_CONSTEXPR( !no_parsing )
  1980. num.mant = detail::parse_unsigned( 0, cs.begin(), n1 );
  1981. else
  1982. num.mant = 0;
  1983. cs += n1;
  1984. // integer or floating-point with
  1985. // >= 16 leading digits
  1986. if( n1 == 16 )
  1987. {
  1988. goto do_num2;
  1989. }
  1990. }
  1991. else
  1992. {
  1993. // 0. floating-point or 0e integer
  1994. num.mant = 0;
  1995. n1 = 0;
  1996. ++cs;
  1997. }
  1998. {
  1999. const char c = *cs;
  2000. if(c != '.')
  2001. {
  2002. if((c | 32) == 'e')
  2003. {
  2004. ++cs;
  2005. goto do_exp1;
  2006. }
  2007. BOOST_IF_CONSTEXPR( negative && !no_parsing )
  2008. num.mant = ~num.mant + 1;
  2009. goto finish_signed;
  2010. }
  2011. }
  2012. // floating-point number
  2013. ++cs;
  2014. int n2 = detail::count_digits( cs.begin() );
  2015. BOOST_ASSERT(n2 >= 0 && n2 <= 16);
  2016. if( n2 == 0 )
  2017. {
  2018. // digit required
  2019. BOOST_STATIC_CONSTEXPR source_location loc
  2020. = BOOST_CURRENT_LOCATION;
  2021. return fail(cs.begin(), error::syntax, &loc);
  2022. }
  2023. // floating-point mantissa overflow
  2024. if( n1 + n2 >= 19 )
  2025. {
  2026. goto do_num7;
  2027. }
  2028. BOOST_IF_CONSTEXPR( !no_parsing )
  2029. num.mant = detail::parse_unsigned( num.mant, cs.begin(), n2 );
  2030. BOOST_ASSERT(num.bias == 0);
  2031. num.bias -= n2;
  2032. cs += n2;
  2033. char ch = *cs;
  2034. if( (ch | 32) == 'e' )
  2035. {
  2036. ++cs;
  2037. goto do_exp1;
  2038. }
  2039. else if( ch >= '0' && ch <= '9' )
  2040. {
  2041. goto do_num8;
  2042. }
  2043. goto finish_dub;
  2044. }
  2045. }
  2046. else
  2047. {
  2048. num = num_;
  2049. state st;
  2050. st_.pop(st);
  2051. switch(st)
  2052. {
  2053. default: BOOST_JSON_UNREACHABLE();
  2054. case state::num1: goto do_num1;
  2055. case state::num2: goto do_num2;
  2056. case state::num3: goto do_num3;
  2057. case state::num4: goto do_num4;
  2058. case state::num5: goto do_num5;
  2059. case state::num6: goto do_num6;
  2060. case state::num7: goto do_num7;
  2061. case state::num8: goto do_num8;
  2062. case state::exp1: goto do_exp1;
  2063. case state::exp2: goto do_exp2;
  2064. case state::exp3: goto do_exp3;
  2065. }
  2066. }
  2067. //----------------------------------
  2068. //
  2069. // DIGIT
  2070. // first digit
  2071. //
  2072. do_num1:
  2073. if(zero_first || nonzero_first ||
  2074. BOOST_JSON_LIKELY(cs))
  2075. {
  2076. char const c = *cs;
  2077. if(zero_first)
  2078. {
  2079. ++cs;
  2080. num.mant = 0;
  2081. goto do_num6;
  2082. }
  2083. else if(nonzero_first || BOOST_JSON_LIKELY(
  2084. c >= '1' && c <= '9'))
  2085. {
  2086. ++cs;
  2087. num.mant = c - '0';
  2088. }
  2089. else if(BOOST_JSON_UNLIKELY(
  2090. c == '0'))
  2091. {
  2092. ++cs;
  2093. num.mant = 0;
  2094. goto do_num6;
  2095. }
  2096. else if( (negative || num.neg) && opt_.allow_infinity_and_nan )
  2097. {
  2098. st_.push(state::lit1);
  2099. cur_lit_ = detail::neg_infinity_literal;
  2100. lit_offset_ = 1;
  2101. return parse_literal(
  2102. cs.begin(), mp11::mp_int<detail::resume_literal>() );
  2103. }
  2104. else
  2105. {
  2106. BOOST_STATIC_CONSTEXPR source_location loc
  2107. = BOOST_CURRENT_LOCATION;
  2108. return fail(cs.begin(), error::syntax, &loc);
  2109. }
  2110. }
  2111. else
  2112. {
  2113. if(BOOST_JSON_UNLIKELY(
  2114. ! h_.on_number_part(
  2115. {begin, cs.used(begin)}, ec_)))
  2116. return fail(cs.begin());
  2117. BOOST_IF_CONSTEXPR( precise_parsing )
  2118. num_buf_.append( begin, cs.used(begin) );
  2119. return maybe_suspend(
  2120. cs.begin(), state::num1, num);
  2121. }
  2122. //----------------------------------
  2123. //
  2124. // 1*DIGIT
  2125. // significant digits left of decimal
  2126. //
  2127. do_num2:
  2128. if(negative || (!stack_empty && num.neg))
  2129. {
  2130. for(;;)
  2131. {
  2132. if(BOOST_JSON_UNLIKELY(! cs))
  2133. {
  2134. if(BOOST_JSON_UNLIKELY(more_))
  2135. {
  2136. if(BOOST_JSON_UNLIKELY(
  2137. ! h_.on_number_part(
  2138. {begin, cs.used(begin)}, ec_)))
  2139. return fail(cs.begin());
  2140. BOOST_IF_CONSTEXPR( precise_parsing )
  2141. num_buf_.append( begin, cs.used(begin) );
  2142. return suspend(cs.begin(), state::num2, num);
  2143. }
  2144. goto finish_int;
  2145. }
  2146. char const c = *cs;
  2147. if(BOOST_JSON_LIKELY(
  2148. c >= '0' && c <= '9'))
  2149. {
  2150. ++cs;
  2151. // 9223372036854775808 INT64_MIN
  2152. if( num.mant > 922337203685477580 || (
  2153. num.mant == 922337203685477580 && c > '8'))
  2154. break;
  2155. BOOST_IF_CONSTEXPR( !no_parsing )
  2156. num.mant = 10 * num.mant + ( c - '0' );
  2157. continue;
  2158. }
  2159. goto do_num6; // [.eE]
  2160. }
  2161. }
  2162. else
  2163. {
  2164. for(;;)
  2165. {
  2166. if(BOOST_JSON_UNLIKELY(! cs))
  2167. {
  2168. if(BOOST_JSON_UNLIKELY(more_))
  2169. {
  2170. if(BOOST_JSON_UNLIKELY(
  2171. ! h_.on_number_part(
  2172. {begin, cs.used(begin)}, ec_)))
  2173. return fail(cs.begin());
  2174. BOOST_IF_CONSTEXPR( precise_parsing )
  2175. num_buf_.append( begin, cs.used(begin) );
  2176. return suspend(cs.begin(), state::num2, num);
  2177. }
  2178. goto finish_int;
  2179. }
  2180. char const c = *cs;
  2181. if(BOOST_JSON_LIKELY(
  2182. c >= '0' && c <= '9'))
  2183. {
  2184. ++cs;
  2185. // 18446744073709551615 UINT64_MAX
  2186. if( num.mant > 1844674407370955161 || (
  2187. num.mant == 1844674407370955161 && c > '5'))
  2188. break;
  2189. BOOST_IF_CONSTEXPR( !no_parsing )
  2190. num.mant = 10 * num.mant + ( c - '0' );
  2191. }
  2192. else
  2193. {
  2194. goto do_num6; // [.eE]
  2195. }
  2196. }
  2197. }
  2198. ++num.bias;
  2199. //----------------------------------
  2200. //
  2201. // 1*DIGIT
  2202. // non-significant digits left of decimal
  2203. //
  2204. do_num3:
  2205. for(;;)
  2206. {
  2207. if(BOOST_JSON_UNLIKELY(! cs))
  2208. {
  2209. if(BOOST_JSON_UNLIKELY(more_))
  2210. {
  2211. if(BOOST_JSON_UNLIKELY(
  2212. ! h_.on_number_part(
  2213. {begin, cs.used(begin)}, ec_)))
  2214. return fail(cs.begin());
  2215. BOOST_IF_CONSTEXPR( precise_parsing )
  2216. num_buf_.append( begin, cs.used(begin) );
  2217. return suspend(cs.begin(), state::num3, num);
  2218. }
  2219. goto finish_dub;
  2220. }
  2221. char const c = *cs;
  2222. if(BOOST_JSON_UNLIKELY(
  2223. c >= '0' && c <= '9'))
  2224. {
  2225. if(BOOST_JSON_UNLIKELY( num.bias + 1 == INT_MAX ))
  2226. {
  2227. BOOST_STATIC_CONSTEXPR source_location loc
  2228. = BOOST_CURRENT_LOCATION;
  2229. return fail(cs.begin(), error::exponent_overflow, &loc);
  2230. }
  2231. ++cs;
  2232. ++num.bias;
  2233. }
  2234. else if(BOOST_JSON_LIKELY(
  2235. c == '.'))
  2236. {
  2237. ++cs;
  2238. break;
  2239. }
  2240. else if((c | 32) == 'e')
  2241. {
  2242. ++cs;
  2243. goto do_exp1;
  2244. }
  2245. else
  2246. {
  2247. goto finish_dub;
  2248. }
  2249. }
  2250. //----------------------------------
  2251. //
  2252. // DIGIT
  2253. // first non-significant digit
  2254. // to the right of decimal
  2255. //
  2256. do_num4:
  2257. {
  2258. if(BOOST_JSON_UNLIKELY(! cs))
  2259. {
  2260. if(BOOST_JSON_UNLIKELY(
  2261. ! h_.on_number_part(
  2262. {begin, cs.used(begin)}, ec_)))
  2263. return fail(cs.begin());
  2264. BOOST_IF_CONSTEXPR( precise_parsing )
  2265. num_buf_.append( begin, cs.used(begin) );
  2266. return maybe_suspend(
  2267. cs.begin(), state::num4, num);
  2268. }
  2269. char const c = *cs;
  2270. if(BOOST_JSON_LIKELY(
  2271. //static_cast<unsigned char>(c - '0') < 10))
  2272. c >= '0' && c <= '9'))
  2273. {
  2274. ++cs;
  2275. }
  2276. else
  2277. {
  2278. // digit required
  2279. BOOST_STATIC_CONSTEXPR source_location loc
  2280. = BOOST_CURRENT_LOCATION;
  2281. return fail(cs.begin(), error::syntax, &loc);
  2282. }
  2283. }
  2284. //----------------------------------
  2285. //
  2286. // 1*DIGIT
  2287. // non-significant digits
  2288. // to the right of decimal
  2289. //
  2290. do_num5:
  2291. for(;;)
  2292. {
  2293. if(BOOST_JSON_UNLIKELY(! cs))
  2294. {
  2295. if(BOOST_JSON_UNLIKELY(more_))
  2296. {
  2297. if(BOOST_JSON_UNLIKELY(
  2298. ! h_.on_number_part(
  2299. {begin, cs.used(begin)}, ec_)))
  2300. return fail(cs.begin());
  2301. BOOST_IF_CONSTEXPR( precise_parsing )
  2302. num_buf_.append( begin, cs.used(begin) );
  2303. return suspend(cs.begin(), state::num5, num);
  2304. }
  2305. goto finish_dub;
  2306. }
  2307. char const c = *cs;
  2308. if(BOOST_JSON_LIKELY(
  2309. c >= '0' && c <= '9'))
  2310. {
  2311. ++cs;
  2312. }
  2313. else if((c | 32) == 'e')
  2314. {
  2315. ++cs;
  2316. goto do_exp1;
  2317. }
  2318. else
  2319. {
  2320. goto finish_dub;
  2321. }
  2322. }
  2323. //----------------------------------
  2324. //
  2325. // [.eE]
  2326. //
  2327. do_num6:
  2328. {
  2329. if(BOOST_JSON_UNLIKELY(! cs))
  2330. {
  2331. if(BOOST_JSON_UNLIKELY(more_))
  2332. {
  2333. if(BOOST_JSON_UNLIKELY(
  2334. ! h_.on_number_part(
  2335. {begin, cs.used(begin)}, ec_)))
  2336. return fail(cs.begin());
  2337. BOOST_IF_CONSTEXPR( precise_parsing )
  2338. num_buf_.append( begin, cs.used(begin) );
  2339. return suspend(cs.begin(), state::num6, num);
  2340. }
  2341. goto finish_int;
  2342. }
  2343. char const c = *cs;
  2344. if(BOOST_JSON_LIKELY(
  2345. c == '.'))
  2346. {
  2347. ++cs;
  2348. }
  2349. else if((c | 32) == 'e')
  2350. {
  2351. ++cs;
  2352. goto do_exp1;
  2353. }
  2354. else
  2355. {
  2356. goto finish_int;
  2357. }
  2358. }
  2359. //----------------------------------
  2360. //
  2361. // DIGIT
  2362. // first significant digit
  2363. // to the right of decimal
  2364. //
  2365. do_num7:
  2366. {
  2367. if(BOOST_JSON_UNLIKELY(! cs))
  2368. {
  2369. if(BOOST_JSON_UNLIKELY(more_))
  2370. {
  2371. if(BOOST_JSON_UNLIKELY(
  2372. ! h_.on_number_part(
  2373. {begin, cs.used(begin)}, ec_)))
  2374. return fail(cs.begin());
  2375. BOOST_IF_CONSTEXPR( precise_parsing )
  2376. num_buf_.append( begin, cs.used(begin) );
  2377. return suspend(cs.begin(), state::num7, num);
  2378. }
  2379. // digit required
  2380. BOOST_STATIC_CONSTEXPR source_location loc
  2381. = BOOST_CURRENT_LOCATION;
  2382. return fail(cs.begin(), error::syntax, &loc);
  2383. }
  2384. char const c = *cs;
  2385. if(BOOST_JSON_UNLIKELY(
  2386. c < '0' || c > '9'))
  2387. {
  2388. // digit required
  2389. BOOST_STATIC_CONSTEXPR source_location loc
  2390. = BOOST_CURRENT_LOCATION;
  2391. return fail(cs.begin(), error::syntax, &loc);
  2392. }
  2393. }
  2394. //----------------------------------
  2395. //
  2396. // 1*DIGIT
  2397. // significant digits
  2398. // to the right of decimal
  2399. //
  2400. do_num8:
  2401. for(;;)
  2402. {
  2403. if(BOOST_JSON_UNLIKELY(! cs))
  2404. {
  2405. if(BOOST_JSON_UNLIKELY(more_))
  2406. {
  2407. if(BOOST_JSON_UNLIKELY(
  2408. ! h_.on_number_part(
  2409. {begin, cs.used(begin)}, ec_)))
  2410. return fail(cs.begin());
  2411. BOOST_IF_CONSTEXPR( precise_parsing )
  2412. num_buf_.append( begin, cs.used(begin) );
  2413. return suspend(cs.begin(), state::num8, num);
  2414. }
  2415. goto finish_dub;
  2416. }
  2417. char const c = *cs;
  2418. if(BOOST_JSON_LIKELY(
  2419. c >= '0' && c <= '9'))
  2420. {
  2421. ++cs;
  2422. if(!no_parsing && BOOST_JSON_LIKELY(
  2423. num.mant <= 9007199254740991)) // 2^53-1
  2424. {
  2425. if(BOOST_JSON_UNLIKELY( num.bias - 1 == INT_MIN ))
  2426. {
  2427. BOOST_STATIC_CONSTEXPR source_location loc
  2428. = BOOST_CURRENT_LOCATION;
  2429. return fail(cs.begin(), error::exponent_overflow, &loc);
  2430. }
  2431. --num.bias;
  2432. num.mant = 10 * num.mant + ( c - '0' );
  2433. }
  2434. else
  2435. {
  2436. goto do_num5;
  2437. }
  2438. }
  2439. else if((c | 32) == 'e')
  2440. {
  2441. ++cs;
  2442. goto do_exp1;
  2443. }
  2444. else
  2445. {
  2446. goto finish_dub;
  2447. }
  2448. }
  2449. //----------------------------------
  2450. //
  2451. // *[+-]
  2452. //
  2453. do_exp1:
  2454. if(BOOST_JSON_UNLIKELY(! cs))
  2455. {
  2456. if(BOOST_JSON_UNLIKELY(
  2457. ! h_.on_number_part(
  2458. {begin, cs.used(begin)}, ec_)))
  2459. return fail(cs.begin());
  2460. BOOST_IF_CONSTEXPR( precise_parsing )
  2461. num_buf_.append( begin, cs.used(begin) );
  2462. return maybe_suspend(
  2463. cs.begin(), state::exp1, num);
  2464. }
  2465. if(*cs == '+')
  2466. {
  2467. ++cs;
  2468. }
  2469. else if(*cs == '-')
  2470. {
  2471. ++cs;
  2472. num.frac = true;
  2473. }
  2474. //----------------------------------
  2475. //
  2476. // DIGIT
  2477. // first digit of the exponent
  2478. //
  2479. do_exp2:
  2480. {
  2481. if(BOOST_JSON_UNLIKELY(! cs))
  2482. {
  2483. if(BOOST_JSON_UNLIKELY(more_))
  2484. {
  2485. if(BOOST_JSON_UNLIKELY(
  2486. ! h_.on_number_part(
  2487. {begin, cs.used(begin)}, ec_)))
  2488. return fail(cs.begin());
  2489. BOOST_IF_CONSTEXPR( precise_parsing )
  2490. num_buf_.append( begin, cs.used(begin) );
  2491. return suspend(cs.begin(), state::exp2, num);
  2492. }
  2493. // digit required
  2494. BOOST_STATIC_CONSTEXPR source_location loc
  2495. = BOOST_CURRENT_LOCATION;
  2496. return fail(cs.begin(), error::syntax, &loc);
  2497. }
  2498. char const c = *cs;
  2499. if(BOOST_JSON_UNLIKELY(
  2500. c < '0' || c > '9'))
  2501. {
  2502. // digit required
  2503. BOOST_STATIC_CONSTEXPR source_location loc
  2504. = BOOST_CURRENT_LOCATION;
  2505. return fail(cs.begin(), error::syntax, &loc);
  2506. }
  2507. ++cs;
  2508. num.exp = c - '0';
  2509. }
  2510. //----------------------------------
  2511. //
  2512. // 1*DIGIT
  2513. // subsequent digits in the exponent
  2514. //
  2515. do_exp3:
  2516. for(;;)
  2517. {
  2518. if(BOOST_JSON_UNLIKELY(! cs))
  2519. {
  2520. if(BOOST_JSON_UNLIKELY(more_))
  2521. {
  2522. if(BOOST_JSON_UNLIKELY(
  2523. ! h_.on_number_part(
  2524. {begin, cs.used(begin)}, ec_)))
  2525. return fail(cs.begin());
  2526. BOOST_IF_CONSTEXPR( precise_parsing )
  2527. num_buf_.append( begin, cs.used(begin) );
  2528. return suspend(cs.begin(), state::exp3, num);
  2529. }
  2530. }
  2531. else
  2532. {
  2533. char const c = *cs;
  2534. if(BOOST_JSON_LIKELY( c >= '0' && c <= '9' ))
  2535. {
  2536. if(BOOST_JSON_UNLIKELY(
  2537. // 2147483647 INT_MAX
  2538. num.exp > 214748364 ||
  2539. (num.exp == 214748364 && c > '7')
  2540. ))
  2541. num.exp = INT_MAX;
  2542. else BOOST_IF_CONSTEXPR( !no_parsing )
  2543. num.exp = 10 * num.exp + ( c - '0' );
  2544. ++cs;
  2545. continue;
  2546. }
  2547. }
  2548. BOOST_ASSERT(num.exp >= 0);
  2549. if ( num.frac )
  2550. {
  2551. if(BOOST_JSON_UNLIKELY( num.bias < (INT_MIN + num.exp) ))
  2552. {
  2553. // if exponent overflowed, bias is a very large negative
  2554. // number, and mantissa isn't zero, then we cannot parse the
  2555. // number correctly
  2556. if(BOOST_JSON_UNLIKELY(
  2557. (num.exp == INT_MAX) &&
  2558. (num.bias < 0) &&
  2559. (num.exp + num.bias < 308) &&
  2560. num.mant ))
  2561. {
  2562. BOOST_STATIC_CONSTEXPR source_location loc
  2563. = BOOST_CURRENT_LOCATION;
  2564. return fail(cs.begin(), error::exponent_overflow, &loc);
  2565. }
  2566. num.bias = 0;
  2567. num.exp = INT_MAX;
  2568. }
  2569. }
  2570. else if (BOOST_JSON_UNLIKELY( num.bias > (INT_MAX - num.exp) ))
  2571. {
  2572. // if exponent overflowed, bias is a very large positive number,
  2573. // and mantissa isn't zero, then we cannot parse the
  2574. // number correctly
  2575. if(BOOST_JSON_UNLIKELY(
  2576. (num.exp == INT_MAX) &&
  2577. (num.bias > 0) &&
  2578. (num.exp - num.bias < 308) &&
  2579. num.mant ))
  2580. {
  2581. BOOST_STATIC_CONSTEXPR source_location loc
  2582. = BOOST_CURRENT_LOCATION;
  2583. return fail(cs.begin(), error::exponent_overflow, &loc);
  2584. }
  2585. num.bias = 0;
  2586. num.exp = INT_MAX;
  2587. }
  2588. goto finish_dub;
  2589. }
  2590. finish_int:
  2591. if(negative || (!stack_empty && num.neg))
  2592. {
  2593. if(BOOST_JSON_UNLIKELY(
  2594. ! h_.on_int64(static_cast<
  2595. int64_t>(~num.mant + 1), {begin, cs.used(begin)}, ec_)))
  2596. return fail(cs.begin());
  2597. return cs.begin();
  2598. }
  2599. if(num.mant <= INT64_MAX)
  2600. {
  2601. finish_signed:
  2602. if(BOOST_JSON_UNLIKELY(
  2603. ! h_.on_int64(static_cast<
  2604. int64_t>(num.mant), {begin, cs.used(begin)}, ec_)))
  2605. return fail(cs.begin());
  2606. return cs.begin();
  2607. }
  2608. if(BOOST_JSON_UNLIKELY(
  2609. ! h_.on_uint64(num.mant, {begin, cs.used(begin)}, ec_)))
  2610. return fail(cs.begin());
  2611. return cs.begin();
  2612. finish_dub:
  2613. double d;
  2614. std::size_t const size = cs.used(begin);
  2615. BOOST_ASSERT( !num_buf_.size() || precise_parsing );
  2616. BOOST_IF_CONSTEXPR( precise_parsing )
  2617. {
  2618. char const* data = begin;
  2619. std::size_t full_size = size;
  2620. // if we previously suspended or if the current input ends with the
  2621. // number, we need to copy the current part of the number to the
  2622. // temporary buffer
  2623. if(BOOST_JSON_UNLIKELY( num_buf_.size() ))
  2624. {
  2625. data = num_buf_.append( begin, size );
  2626. full_size = num_buf_.size();
  2627. }
  2628. auto const err = detail::charconv::from_chars(
  2629. data, data + full_size, d );
  2630. BOOST_ASSERT( err.ec != std::errc::invalid_argument );
  2631. BOOST_ASSERT( err.ptr == data + full_size );
  2632. (void)err;
  2633. }
  2634. else BOOST_IF_CONSTEXPR( no_parsing )
  2635. d = 0;
  2636. else
  2637. d = detail::dec_to_float(
  2638. num.mant,
  2639. num.bias + (num.frac ?
  2640. -num.exp : num.exp),
  2641. num.neg);
  2642. if(BOOST_JSON_UNLIKELY(
  2643. ! h_.on_double(d, {begin, size}, ec_)))
  2644. return fail(cs.begin());
  2645. return cs.begin();
  2646. }
  2647. //----------------------------------------------------------
  2648. template<class Handler>
  2649. template<class... Args>
  2650. basic_parser<Handler>::
  2651. basic_parser(
  2652. parse_options const& opt,
  2653. Args&&... args)
  2654. : h_(std::forward<Args>(args)...)
  2655. , opt_(opt)
  2656. {
  2657. }
  2658. //----------------------------------------------------------
  2659. template<class Handler>
  2660. void
  2661. basic_parser<Handler>::
  2662. reset() noexcept
  2663. {
  2664. ec_ = {};
  2665. st_.clear();
  2666. more_ = true;
  2667. done_ = false;
  2668. clean_ = true;
  2669. num_buf_.clear();
  2670. }
  2671. template<class Handler>
  2672. void
  2673. basic_parser<Handler>::
  2674. fail(system::error_code ec) noexcept
  2675. {
  2676. if(! ec)
  2677. {
  2678. // assign an arbitrary
  2679. // error code to prevent UB
  2680. BOOST_JSON_FAIL(ec_, error::incomplete);
  2681. }
  2682. else
  2683. {
  2684. ec_ = ec;
  2685. }
  2686. done_ = false;
  2687. }
  2688. //----------------------------------------------------------
  2689. template<class Handler>
  2690. std::size_t
  2691. basic_parser<Handler>::
  2692. write_some(
  2693. bool more,
  2694. char const* data,
  2695. std::size_t size,
  2696. system::error_code& ec)
  2697. {
  2698. // see if we exited via exception
  2699. // on the last call to write_some
  2700. if(! clean_)
  2701. {
  2702. // prevent UB
  2703. if(! ec_)
  2704. {
  2705. BOOST_JSON_FAIL(ec_, error::exception);
  2706. }
  2707. }
  2708. if(ec_)
  2709. {
  2710. // error is sticky
  2711. ec = ec_;
  2712. return 0;
  2713. }
  2714. clean_ = false;
  2715. more_ = more;
  2716. end_ = data + size;
  2717. const char* p;
  2718. if(BOOST_JSON_LIKELY(st_.empty()))
  2719. {
  2720. // first time
  2721. depth_ = opt_.max_depth;
  2722. if(BOOST_JSON_UNLIKELY(
  2723. ! h_.on_document_begin(ec_)))
  2724. {
  2725. ec = ec_;
  2726. return 0;
  2727. }
  2728. p = parse_document(data, std::true_type());
  2729. }
  2730. else
  2731. {
  2732. p = parse_document(data, std::false_type());
  2733. }
  2734. if(BOOST_JSON_LIKELY(p != sentinel()))
  2735. {
  2736. BOOST_ASSERT(! ec_);
  2737. if(! done_)
  2738. {
  2739. done_ = true;
  2740. h_.on_document_end(ec_);
  2741. }
  2742. }
  2743. else
  2744. {
  2745. if(! ec_)
  2746. {
  2747. if(! more_)
  2748. {
  2749. BOOST_JSON_FAIL(ec_, error::incomplete);
  2750. }
  2751. else if(! st_.empty())
  2752. {
  2753. // consume as much trailing whitespace in
  2754. // the JSON document as possible, but still
  2755. // consider the parse complete
  2756. state st;
  2757. st_.peek(st);
  2758. if( st == state::doc3 &&
  2759. ! done_)
  2760. {
  2761. done_ = true;
  2762. h_.on_document_end(ec_);
  2763. }
  2764. }
  2765. }
  2766. p = end_;
  2767. }
  2768. ec = ec_;
  2769. clean_ = true;
  2770. return p - data;
  2771. }
  2772. template<class Handler>
  2773. std::size_t
  2774. basic_parser<Handler>::
  2775. write_some(
  2776. bool more,
  2777. char const* data,
  2778. std::size_t size,
  2779. std::error_code& ec)
  2780. {
  2781. system::error_code jec;
  2782. std::size_t const result = write_some(more, data, size, jec);
  2783. ec = jec;
  2784. return result;
  2785. }
  2786. #endif
  2787. } // namespace json
  2788. } // namespace boost
  2789. #ifdef _MSC_VER
  2790. #pragma warning(pop)
  2791. #endif
  2792. #endif