llmessagetemplateparser.cpp 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734
  1. /**
  2. * @file llmessagetemplateparser.cpp
  3. * @brief LLMessageTemplateParser implementation
  4. *
  5. * $LicenseInfo:firstyear=2007&license=viewergpl$
  6. *
  7. * Copyright (c) 2007-2009, Linden Research, Inc.
  8. *
  9. * Second Life Viewer Source Code
  10. * The source code in this file ("Source Code") is provided by Linden Lab
  11. * to you under the terms of the GNU General Public License, version 2.0
  12. * ("GPL"), unless you have obtained a separate licensing agreement
  13. * ("Other License"), formally executed by you and Linden Lab. Terms of
  14. * the GPL can be found in doc/GPL-license.txt in this distribution, or
  15. * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
  16. *
  17. * There are special exceptions to the terms and conditions of the GPL as
  18. * it is applied to this Source Code. View the full text of the exception
  19. * in the file doc/FLOSS-exception.txt in this software distribution, or
  20. * online at
  21. * http://secondlifegrid.net/programs/open_source/licensing/flossexception
  22. *
  23. * By copying, modifying or distributing this software, you acknowledge
  24. * that you have read and understood your obligations described above,
  25. * and agree to abide by those obligations.
  26. *
  27. * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
  28. * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
  29. * COMPLETENESS OR PERFORMANCE.
  30. * $/LicenseInfo$
  31. */
  32. #include "linden_common.h"
  33. #include "boost/tokenizer.hpp"
  34. #include "llmessagetemplateparser.h"
  35. // What follows is a bunch of C functions to do validation.
  36. // Lets support a small subset of regular expressions here
  37. // Syntax is a string made up of:
  38. // a - checks against alphanumeric ([A-Za-z0-9])
  39. // c - checks against character ([A-Za-z])
  40. // f - checks against first variable character ([A-Za-z_])
  41. // v - checks against variable ([A-Za-z0-9_])
  42. // s - checks against sign of integer ([-0-9])
  43. // d - checks against integer digit ([0-9])
  44. // * - repeat last check
  45. // Checks 'a'
  46. bool b_return_alphanumeric_ok(char c)
  47. {
  48. return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
  49. (c >= '0' && c <= '9');
  50. }
  51. // Checks 'c'
  52. bool b_return_character_ok(char c)
  53. {
  54. return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
  55. }
  56. // Checks 'f'
  57. bool b_return_first_variable_ok(char c)
  58. {
  59. return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_';
  60. }
  61. // Checks 'v'
  62. bool b_return_variable_ok(char c)
  63. {
  64. return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
  65. (c >= '0' && c <= '9') || c == '_';
  66. }
  67. // Checks 's'
  68. bool b_return_signed_integer_ok(char c)
  69. {
  70. return (c >= '0' && c <= '9') || c == '-';
  71. }
  72. // Checks 'd'
  73. bool b_return_integer_ok(char c)
  74. {
  75. return c >= '0' && c <= '9';
  76. }
  77. bool (*gParseCheckCharacters[])(char c) =
  78. {
  79. b_return_alphanumeric_ok,
  80. b_return_character_ok,
  81. b_return_first_variable_ok,
  82. b_return_variable_ok,
  83. b_return_signed_integer_ok,
  84. b_return_integer_ok
  85. };
  86. S32 get_checker_number(char checker)
  87. {
  88. switch (checker)
  89. {
  90. case 'a':
  91. return 0;
  92. case 'c':
  93. return 1;
  94. case 'f':
  95. return 2;
  96. case 'v':
  97. return 3;
  98. case 's':
  99. return 4;
  100. case 'd':
  101. return 5;
  102. case '*':
  103. return 9999;
  104. default:
  105. return -1;
  106. }
  107. }
  108. // Checks token based on passed simplified regular expression
  109. bool b_check_token(const char* token, const char* regexp)
  110. {
  111. S32 tptr, rptr = 0;
  112. S32 current_checker, next_checker = 0;
  113. current_checker = get_checker_number(regexp[rptr++]);
  114. if (current_checker == -1)
  115. {
  116. llerrs << "Invalid regular expression value!" << llendl;
  117. return false;
  118. }
  119. if (current_checker == 9999)
  120. {
  121. llerrs << "Regular expression can't start with *!" << llendl;
  122. return false;
  123. }
  124. for (tptr = 0; token[tptr]; ++tptr)
  125. {
  126. if (current_checker == -1)
  127. {
  128. llerrs << "Input exceeds regular expression!\nDid you forget a *?"
  129. << llendl;
  130. return false;
  131. }
  132. if (!gParseCheckCharacters[current_checker](token[tptr]))
  133. {
  134. return false;
  135. }
  136. if (next_checker != 9999)
  137. {
  138. next_checker = get_checker_number(regexp[rptr++]);
  139. if (next_checker != 9999)
  140. {
  141. current_checker = next_checker;
  142. }
  143. }
  144. }
  145. return true;
  146. }
  147. // C variable can be made up of upper or lower case letters, underscores, or
  148. // numbers, but can't start with a number
  149. bool b_variable_ok(const char* token)
  150. {
  151. if (b_check_token(token, "fv*"))
  152. {
  153. return true;
  154. }
  155. llwarns << "Token '" << token << "' is not a variable !" << llendl;
  156. return false;
  157. }
  158. // An integer is made up of the digits 0-9 and may be preceded by a '-'
  159. bool b_integer_ok(const char* token)
  160. {
  161. if (b_check_token(token, "sd*"))
  162. {
  163. return true;
  164. }
  165. llwarns << "Token is not an integer !" << llendl;
  166. return false;
  167. }
  168. // An integer is made up of the digits 0-9
  169. bool b_positive_integer_ok(const char* token)
  170. {
  171. if (b_check_token(token, "d*"))
  172. {
  173. return true;
  174. }
  175. llwarns << "Token is not an integer !" << llendl;
  176. return false;
  177. }
  178. // Done with C functions, here's the tokenizer.
  179. typedef boost::tokenizer<boost::char_separator<char> > tokenizer;
  180. LLTemplateTokenizer::LLTemplateTokenizer(const std::string& contents)
  181. : mStarted(false),
  182. mTokens()
  183. {
  184. boost::char_separator<char> newline("\r\n", "", boost::keep_empty_tokens);
  185. boost::char_separator<char> spaces(" \t");
  186. U32 line_counter = 1;
  187. tokenizer line_tokens(contents, newline);
  188. for (tokenizer::iterator line_iter = line_tokens.begin();
  189. line_iter != line_tokens.end(); ++line_iter, ++line_counter)
  190. {
  191. tokenizer word_tokens(*line_iter, spaces);
  192. for (tokenizer::iterator word_iter = word_tokens.begin();
  193. word_iter != word_tokens.end(); ++word_iter)
  194. {
  195. if ((*word_iter)[0] == '/')
  196. {
  197. break; // skip to end of line on comments
  198. }
  199. positioned_token pt;// = new positioned_token();
  200. pt.str = std::string(*word_iter);
  201. pt.line = line_counter;
  202. mTokens.push_back(pt);
  203. }
  204. }
  205. mCurrent = mTokens.begin();
  206. }
  207. void LLTemplateTokenizer::inc()
  208. {
  209. if (atEOF())
  210. {
  211. error("trying to increment token of EOF");
  212. }
  213. else if (mStarted)
  214. {
  215. ++mCurrent;
  216. }
  217. else
  218. {
  219. mStarted = true;
  220. mCurrent = mTokens.begin();
  221. }
  222. }
  223. void LLTemplateTokenizer::dec()
  224. {
  225. if (mCurrent == mTokens.begin())
  226. {
  227. if (mStarted)
  228. {
  229. mStarted = false;
  230. }
  231. else
  232. {
  233. error("trying to decrement past beginning of file");
  234. }
  235. }
  236. else
  237. {
  238. --mCurrent;
  239. }
  240. }
  241. std::string LLTemplateTokenizer::get() const
  242. {
  243. if (atEOF())
  244. {
  245. error("trying to get EOF");
  246. }
  247. return mCurrent->str;
  248. }
  249. U32 LLTemplateTokenizer::line() const
  250. {
  251. return atEOF() ? 0 : mCurrent->line;
  252. }
  253. bool LLTemplateTokenizer::atEOF() const
  254. {
  255. return mCurrent == mTokens.end();
  256. }
  257. std::string LLTemplateTokenizer::next()
  258. {
  259. inc();
  260. return get();
  261. }
  262. bool LLTemplateTokenizer::want(const std::string& token)
  263. {
  264. if (atEOF())
  265. {
  266. return false;
  267. }
  268. inc();
  269. if (atEOF())
  270. {
  271. return false;
  272. }
  273. if (get() != token)
  274. {
  275. dec(); // back up a step
  276. return false;
  277. }
  278. return true;
  279. }
  280. bool LLTemplateTokenizer::wantEOF()
  281. {
  282. // See if the next token is EOF
  283. if (atEOF())
  284. {
  285. return true;
  286. }
  287. inc();
  288. if (!atEOF())
  289. {
  290. dec(); // Back up a step
  291. return false;
  292. }
  293. return true;
  294. }
  295. void LLTemplateTokenizer::error(std::string message) const
  296. {
  297. if (atEOF())
  298. {
  299. llerrs << "Unexpected end of file: " << message << llendl;
  300. }
  301. else
  302. {
  303. llerrs << "Problem parsing message template at line " << line()
  304. << ", with token '" << get() << "' : " << message << llendl;
  305. }
  306. }
  307. // Done with tokenizer, next is the parser.
  308. LLTemplateParser::LLTemplateParser(LLTemplateTokenizer& tokens)
  309. : mVersion(0.f),
  310. mMessages()
  311. {
  312. // The version number should be the first thing in the file
  313. if (tokens.want("version"))
  314. {
  315. // version number
  316. std::string vers_string = tokens.next();
  317. mVersion = (F32)atof(vers_string.c_str());
  318. llinfos << "### Message template version " << mVersion << " ###"
  319. << llendl;
  320. }
  321. else
  322. {
  323. llerrs << "Version must be first in the message template, found "
  324. << tokens.next() << llendl;
  325. }
  326. while (LLMessageTemplate* templatep = parseMessage(tokens))
  327. {
  328. if (templatep->getDeprecation() != MD_DEPRECATED)
  329. {
  330. mMessages.push_back(templatep);
  331. }
  332. else
  333. {
  334. delete templatep;
  335. }
  336. }
  337. if (!tokens.wantEOF())
  338. {
  339. llerrs << "Expected end of template or a message, instead found: "
  340. << tokens.next() << " at " << tokens.line() << llendl;
  341. }
  342. }
  343. F32 LLTemplateParser::getVersion() const
  344. {
  345. return mVersion;
  346. }
  347. LLTemplateParser::message_iterator LLTemplateParser::getMessagesBegin() const
  348. {
  349. return mMessages.begin();
  350. }
  351. LLTemplateParser::message_iterator LLTemplateParser::getMessagesEnd() const
  352. {
  353. return mMessages.end();
  354. }
  355. // static
  356. LLMessageTemplate* LLTemplateParser::parseMessage(LLTemplateTokenizer& tokens)
  357. {
  358. if (!tokens.want("{"))
  359. {
  360. return NULL;
  361. }
  362. // Name first
  363. std::string template_name = tokens.next();
  364. // Is name a legit C variable name ?
  365. if (!b_variable_ok(template_name.c_str()))
  366. {
  367. llerrs << "Not legit variable name: " << template_name << " at "
  368. << tokens.line() << llendl;
  369. }
  370. LLMessageTemplate* templatep = NULL;
  371. // OK, now get Frequency ("High", "Medium", or "Low")
  372. EMsgFrequency frequency = MFT_LOW;
  373. std::string freq_string = tokens.next();
  374. if (freq_string == "High")
  375. {
  376. frequency = MFT_HIGH;
  377. }
  378. else if (freq_string == "Medium")
  379. {
  380. frequency = MFT_MEDIUM;
  381. }
  382. else if (freq_string == "Low" || freq_string == "Fixed")
  383. {
  384. frequency = MFT_LOW;
  385. }
  386. else
  387. {
  388. llerrs << "Expected frequency, got " << freq_string << " at "
  389. << tokens.line() << llendl;
  390. }
  391. // *TODO: more explicit checking here please
  392. U32 message_number = strtoul(tokens.next().c_str(), NULL, 0);
  393. switch (frequency)
  394. {
  395. case MFT_HIGH:
  396. break;
  397. case MFT_MEDIUM:
  398. message_number = (255 << 8) | message_number;
  399. break;
  400. case MFT_LOW:
  401. message_number = (255 << 24) | (255 << 16) | message_number;
  402. break;
  403. default:
  404. llerrs << "Unknown frequency enum: " << frequency << llendl;
  405. }
  406. templatep = new LLMessageTemplate(template_name.c_str(), message_number,
  407. frequency);
  408. // Now get trust ("Trusted", "NotTrusted")
  409. std::string trust = tokens.next();
  410. if (trust == "Trusted")
  411. {
  412. templatep->setTrust(MT_TRUST);
  413. }
  414. else if (trust == "NotTrusted")
  415. {
  416. templatep->setTrust(MT_NOTRUST);
  417. }
  418. else
  419. {
  420. llerrs << "Bad trust " << trust << " at " << tokens.line() << llendl;
  421. }
  422. // Get encoding
  423. std::string encoding = tokens.next();
  424. if (encoding == "Unencoded")
  425. {
  426. templatep->setEncoding(ME_UNENCODED);
  427. }
  428. else if (encoding == "Zerocoded")
  429. {
  430. templatep->setEncoding(ME_ZEROCODED);
  431. }
  432. else
  433. {
  434. llerrs << "Bad encoding " << encoding << " at " << tokens.line()
  435. << llendl;
  436. }
  437. // Get deprecation
  438. if (tokens.want("Deprecated"))
  439. {
  440. templatep->setDeprecation(MD_DEPRECATED);
  441. }
  442. else if (tokens.want("UDPDeprecated"))
  443. {
  444. templatep->setDeprecation(MD_UDPDEPRECATED);
  445. }
  446. else if (tokens.want("UDPBlackListed"))
  447. {
  448. templatep->setDeprecation(MD_UDPBLACKLISTED);
  449. }
  450. else if (tokens.want("NotDeprecated"))
  451. {
  452. // This is the default value, but it can't hurt to set it twice
  453. templatep->setDeprecation(MD_NOTDEPRECATED);
  454. }
  455. // else ... it is probably a brace, let's just start block processing
  456. while (LLMessageBlock * blockp = parseBlock(tokens))
  457. {
  458. templatep->addBlock(blockp);
  459. }
  460. if (!tokens.want("}"))
  461. {
  462. llerrs << "Expecting closing } for message " << template_name << " at "
  463. << tokens.line() << llendl;
  464. }
  465. return templatep;
  466. }
  467. // static
  468. LLMessageBlock* LLTemplateParser::parseBlock(LLTemplateTokenizer& tokens)
  469. {
  470. if (!tokens.want("{"))
  471. {
  472. return NULL;
  473. }
  474. // Name first
  475. std::string block_name = tokens.next();
  476. // Is name a legit C variable name ?
  477. if (!b_variable_ok(block_name.c_str()))
  478. {
  479. llerrs << "not a legal block name: " << block_name << " at "
  480. << tokens.line() << llendl;
  481. }
  482. LLMessageBlock* blockp = NULL;
  483. // now, block type ("Single", "Multiple", or "Variable")
  484. std::string block_type = tokens.next();
  485. // which one is it?
  486. if (block_type == "Single")
  487. {
  488. // OK, we can create a block
  489. blockp = new LLMessageBlock(block_name.c_str(), MBT_SINGLE);
  490. }
  491. else if (block_type == "Multiple")
  492. {
  493. // Need to get the number of repeats
  494. std::string repeats = tokens.next();
  495. // Is it a legal integer ?
  496. if (!b_positive_integer_ok(repeats.c_str()))
  497. {
  498. llerrs << "not a legal integer for block multiple count: "
  499. << repeats << " at " << tokens.line() << llendl;
  500. }
  501. // OK, we can create a block
  502. blockp = new LLMessageBlock(block_name.c_str(), MBT_MULTIPLE,
  503. atoi(repeats.c_str()));
  504. }
  505. else if (block_type == "Variable")
  506. {
  507. // OK, we can create a block
  508. blockp = new LLMessageBlock(block_name.c_str(), MBT_VARIABLE);
  509. }
  510. else
  511. {
  512. llerrs << "bad block type: " << block_type
  513. << " at " << tokens.line() << llendl;
  514. }
  515. while (LLMessageVariable* varp = parseVariable(tokens))
  516. {
  517. blockp->addVariable(varp->getName(), varp->getType(), varp->getSize());
  518. delete varp;
  519. }
  520. if (!tokens.want("}"))
  521. {
  522. llerrs << "Expecting closing } for block " << block_name << " at "
  523. << tokens.line() << llendl;
  524. }
  525. return blockp;
  526. }
  527. // static
  528. LLMessageVariable* LLTemplateParser::parseVariable(LLTemplateTokenizer& tokens)
  529. {
  530. if (!tokens.want("{"))
  531. {
  532. return NULL;
  533. }
  534. std::string var_name = tokens.next();
  535. if (!b_variable_ok(var_name.c_str()))
  536. {
  537. llerrs << "Not a legit variable name: " << var_name << " at "
  538. << tokens.line() << llendl;
  539. }
  540. LLMessageVariable* varp = NULL;
  541. std::string var_type = tokens.next();
  542. if (var_type == "U8")
  543. {
  544. varp = new LLMessageVariable(var_name.c_str(), MVT_U8, 1);
  545. }
  546. else if (var_type == "U16")
  547. {
  548. varp = new LLMessageVariable(var_name.c_str(), MVT_U16, 2);
  549. }
  550. else if (var_type == "U32")
  551. {
  552. varp = new LLMessageVariable(var_name.c_str(), MVT_U32, 4);
  553. }
  554. else if (var_type == "U64")
  555. {
  556. varp = new LLMessageVariable(var_name.c_str(), MVT_U64, 8);
  557. }
  558. else if (var_type == "S8")
  559. {
  560. varp = new LLMessageVariable(var_name.c_str(), MVT_S8, 1);
  561. }
  562. else if (var_type == "S16")
  563. {
  564. varp = new LLMessageVariable(var_name.c_str(), MVT_S16, 2);
  565. }
  566. else if (var_type == "S32")
  567. {
  568. varp = new LLMessageVariable(var_name.c_str(), MVT_S32, 4);
  569. }
  570. else if (var_type == "S64")
  571. {
  572. varp = new LLMessageVariable(var_name.c_str(), MVT_S64, 8);
  573. }
  574. else if (var_type == "F32")
  575. {
  576. varp = new LLMessageVariable(var_name.c_str(), MVT_F32, 4);
  577. }
  578. else if (var_type == "F64")
  579. {
  580. varp = new LLMessageVariable(var_name.c_str(), MVT_F64, 8);
  581. }
  582. else if (var_type == "LLVector3")
  583. {
  584. varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector3, 12);
  585. }
  586. else if (var_type == "LLVector3d")
  587. {
  588. varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector3d, 24);
  589. }
  590. else if (var_type == "LLVector4")
  591. {
  592. varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector4, 16);
  593. }
  594. else if (var_type == "LLQuaternion")
  595. {
  596. varp = new LLMessageVariable(var_name.c_str(), MVT_LLQuaternion, 12);
  597. }
  598. else if (var_type == "LLUUID")
  599. {
  600. varp = new LLMessageVariable(var_name.c_str(), MVT_LLUUID, 16);
  601. }
  602. else if (var_type == "BOOL")
  603. {
  604. varp = new LLMessageVariable(var_name.c_str(), MVT_BOOL, 1);
  605. }
  606. else if (var_type == "IPADDR")
  607. {
  608. varp = new LLMessageVariable(var_name.c_str(), MVT_IP_ADDR, 4);
  609. }
  610. else if (var_type == "IPPORT")
  611. {
  612. varp = new LLMessageVariable(var_name.c_str(), MVT_IP_PORT, 2);
  613. }
  614. else if (var_type == "Fixed" || var_type == "Variable")
  615. {
  616. std::string variable_size = tokens.next();
  617. if (!b_positive_integer_ok(variable_size.c_str()))
  618. {
  619. llerrs << "not a legal integer variable size: " << variable_size
  620. << " at " << tokens.line() << llendl;
  621. }
  622. EMsgVariableType type_enum;
  623. if (var_type == "Variable")
  624. {
  625. type_enum = MVT_VARIABLE;
  626. }
  627. else if (var_type == "Fixed")
  628. {
  629. type_enum = MVT_FIXED;
  630. }
  631. else
  632. {
  633. type_enum = MVT_FIXED; // removes a warning
  634. llerrs << "bad variable type: " << var_type << " at "
  635. << tokens.line() << llendl;
  636. }
  637. varp = new LLMessageVariable(var_name.c_str(), type_enum,
  638. atoi(variable_size.c_str()));
  639. }
  640. else
  641. {
  642. llerrs << "bad variable type:" << var_type << " at " << tokens.line()
  643. << llendl;
  644. }
  645. if (!tokens.want("}"))
  646. {
  647. llerrs << "Expecting closing } for variable " << var_name << " at "
  648. << tokens.line() << llendl;
  649. }
  650. return varp;
  651. }