/** * @file llmessagetemplateparser.cpp * @brief LLMessageTemplateParser implementation * * $LicenseInfo:firstyear=2007&license=viewergpl$ * * Copyright (c) 2007-2009, Linden Research, Inc. * * Second Life Viewer Source Code * The source code in this file ("Source Code") is provided by Linden Lab * to you under the terms of the GNU General Public License, version 2.0 * ("GPL"), unless you have obtained a separate licensing agreement * ("Other License"), formally executed by you and Linden Lab. Terms of * the GPL can be found in doc/GPL-license.txt in this distribution, or * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2 * * There are special exceptions to the terms and conditions of the GPL as * it is applied to this Source Code. View the full text of the exception * in the file doc/FLOSS-exception.txt in this software distribution, or * online at * http://secondlifegrid.net/programs/open_source/licensing/flossexception * * By copying, modifying or distributing this software, you acknowledge * that you have read and understood your obligations described above, * and agree to abide by those obligations. * * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY, * COMPLETENESS OR PERFORMANCE. * $/LicenseInfo$ */ #include "linden_common.h" #include "boost/tokenizer.hpp" #include "llmessagetemplateparser.h" // What follows is a bunch of C functions to do validation. // Lets support a small subset of regular expressions here // Syntax is a string made up of: // a - checks against alphanumeric ([A-Za-z0-9]) // c - checks against character ([A-Za-z]) // f - checks against first variable character ([A-Za-z_]) // v - checks against variable ([A-Za-z0-9_]) // s - checks against sign of integer ([-0-9]) // d - checks against integer digit ([0-9]) // * - repeat last check // Checks 'a' bool b_return_alphanumeric_ok(char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9'); } // Checks 'c' bool b_return_character_ok(char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); } // Checks 'f' bool b_return_first_variable_ok(char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'; } // Checks 'v' bool b_return_variable_ok(char c) { return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '_'; } // Checks 's' bool b_return_signed_integer_ok(char c) { return (c >= '0' && c <= '9') || c == '-'; } // Checks 'd' bool b_return_integer_ok(char c) { return c >= '0' && c <= '9'; } bool (*gParseCheckCharacters[])(char c) = { b_return_alphanumeric_ok, b_return_character_ok, b_return_first_variable_ok, b_return_variable_ok, b_return_signed_integer_ok, b_return_integer_ok }; S32 get_checker_number(char checker) { switch (checker) { case 'a': return 0; case 'c': return 1; case 'f': return 2; case 'v': return 3; case 's': return 4; case 'd': return 5; case '*': return 9999; default: return -1; } } // Checks token based on passed simplified regular expression bool b_check_token(const char* token, const char* regexp) { S32 tptr, rptr = 0; S32 current_checker, next_checker = 0; current_checker = get_checker_number(regexp[rptr++]); if (current_checker == -1) { llerrs << "Invalid regular expression value!" << llendl; return false; } if (current_checker == 9999) { llerrs << "Regular expression can't start with *!" << llendl; return false; } for (tptr = 0; token[tptr]; ++tptr) { if (current_checker == -1) { llerrs << "Input exceeds regular expression!\nDid you forget a *?" << llendl; return false; } if (!gParseCheckCharacters[current_checker](token[tptr])) { return false; } if (next_checker != 9999) { next_checker = get_checker_number(regexp[rptr++]); if (next_checker != 9999) { current_checker = next_checker; } } } return true; } // C variable can be made up of upper or lower case letters, underscores, or // numbers, but can't start with a number bool b_variable_ok(const char* token) { if (b_check_token(token, "fv*")) { return true; } llwarns << "Token '" << token << "' is not a variable !" << llendl; return false; } // An integer is made up of the digits 0-9 and may be preceded by a '-' bool b_integer_ok(const char* token) { if (b_check_token(token, "sd*")) { return true; } llwarns << "Token is not an integer !" << llendl; return false; } // An integer is made up of the digits 0-9 bool b_positive_integer_ok(const char* token) { if (b_check_token(token, "d*")) { return true; } llwarns << "Token is not an integer !" << llendl; return false; } // Done with C functions, here's the tokenizer. typedef boost::tokenizer > tokenizer; LLTemplateTokenizer::LLTemplateTokenizer(const std::string& contents) : mStarted(false), mTokens() { boost::char_separator newline("\r\n", "", boost::keep_empty_tokens); boost::char_separator spaces(" \t"); U32 line_counter = 1; tokenizer line_tokens(contents, newline); for (tokenizer::iterator line_iter = line_tokens.begin(); line_iter != line_tokens.end(); ++line_iter, ++line_counter) { tokenizer word_tokens(*line_iter, spaces); for (tokenizer::iterator word_iter = word_tokens.begin(); word_iter != word_tokens.end(); ++word_iter) { if ((*word_iter)[0] == '/') { break; // skip to end of line on comments } positioned_token pt;// = new positioned_token(); pt.str = std::string(*word_iter); pt.line = line_counter; mTokens.push_back(pt); } } mCurrent = mTokens.begin(); } void LLTemplateTokenizer::inc() { if (atEOF()) { error("trying to increment token of EOF"); } else if (mStarted) { ++mCurrent; } else { mStarted = true; mCurrent = mTokens.begin(); } } void LLTemplateTokenizer::dec() { if (mCurrent == mTokens.begin()) { if (mStarted) { mStarted = false; } else { error("trying to decrement past beginning of file"); } } else { --mCurrent; } } std::string LLTemplateTokenizer::get() const { if (atEOF()) { error("trying to get EOF"); } return mCurrent->str; } U32 LLTemplateTokenizer::line() const { return atEOF() ? 0 : mCurrent->line; } bool LLTemplateTokenizer::atEOF() const { return mCurrent == mTokens.end(); } std::string LLTemplateTokenizer::next() { inc(); return get(); } bool LLTemplateTokenizer::want(const std::string& token) { if (atEOF()) { return false; } inc(); if (atEOF()) { return false; } if (get() != token) { dec(); // back up a step return false; } return true; } bool LLTemplateTokenizer::wantEOF() { // See if the next token is EOF if (atEOF()) { return true; } inc(); if (!atEOF()) { dec(); // Back up a step return false; } return true; } void LLTemplateTokenizer::error(std::string message) const { if (atEOF()) { llerrs << "Unexpected end of file: " << message << llendl; } else { llerrs << "Problem parsing message template at line " << line() << ", with token '" << get() << "' : " << message << llendl; } } // Done with tokenizer, next is the parser. LLTemplateParser::LLTemplateParser(LLTemplateTokenizer& tokens) : mVersion(0.f), mMessages() { // The version number should be the first thing in the file if (tokens.want("version")) { // version number std::string vers_string = tokens.next(); mVersion = (F32)atof(vers_string.c_str()); llinfos << "### Message template version " << mVersion << " ###" << llendl; } else { llerrs << "Version must be first in the message template, found " << tokens.next() << llendl; } while (LLMessageTemplate* templatep = parseMessage(tokens)) { if (templatep->getDeprecation() != MD_DEPRECATED) { mMessages.push_back(templatep); } else { delete templatep; } } if (!tokens.wantEOF()) { llerrs << "Expected end of template or a message, instead found: " << tokens.next() << " at " << tokens.line() << llendl; } } F32 LLTemplateParser::getVersion() const { return mVersion; } LLTemplateParser::message_iterator LLTemplateParser::getMessagesBegin() const { return mMessages.begin(); } LLTemplateParser::message_iterator LLTemplateParser::getMessagesEnd() const { return mMessages.end(); } // static LLMessageTemplate* LLTemplateParser::parseMessage(LLTemplateTokenizer& tokens) { if (!tokens.want("{")) { return NULL; } // Name first std::string template_name = tokens.next(); // Is name a legit C variable name ? if (!b_variable_ok(template_name.c_str())) { llerrs << "Not legit variable name: " << template_name << " at " << tokens.line() << llendl; } LLMessageTemplate* templatep = NULL; // OK, now get Frequency ("High", "Medium", or "Low") EMsgFrequency frequency = MFT_LOW; std::string freq_string = tokens.next(); if (freq_string == "High") { frequency = MFT_HIGH; } else if (freq_string == "Medium") { frequency = MFT_MEDIUM; } else if (freq_string == "Low" || freq_string == "Fixed") { frequency = MFT_LOW; } else { llerrs << "Expected frequency, got " << freq_string << " at " << tokens.line() << llendl; } // *TODO: more explicit checking here please U32 message_number = strtoul(tokens.next().c_str(), NULL, 0); switch (frequency) { case MFT_HIGH: break; case MFT_MEDIUM: message_number = (255 << 8) | message_number; break; case MFT_LOW: message_number = (255 << 24) | (255 << 16) | message_number; break; default: llerrs << "Unknown frequency enum: " << frequency << llendl; } templatep = new LLMessageTemplate(template_name.c_str(), message_number, frequency); // Now get trust ("Trusted", "NotTrusted") std::string trust = tokens.next(); if (trust == "Trusted") { templatep->setTrust(MT_TRUST); } else if (trust == "NotTrusted") { templatep->setTrust(MT_NOTRUST); } else { llerrs << "Bad trust " << trust << " at " << tokens.line() << llendl; } // Get encoding std::string encoding = tokens.next(); if (encoding == "Unencoded") { templatep->setEncoding(ME_UNENCODED); } else if (encoding == "Zerocoded") { templatep->setEncoding(ME_ZEROCODED); } else { llerrs << "Bad encoding " << encoding << " at " << tokens.line() << llendl; } // Get deprecation if (tokens.want("Deprecated")) { templatep->setDeprecation(MD_DEPRECATED); } else if (tokens.want("UDPDeprecated")) { templatep->setDeprecation(MD_UDPDEPRECATED); } else if (tokens.want("UDPBlackListed")) { templatep->setDeprecation(MD_UDPBLACKLISTED); } else if (tokens.want("NotDeprecated")) { // This is the default value, but it can't hurt to set it twice templatep->setDeprecation(MD_NOTDEPRECATED); } // else ... it is probably a brace, let's just start block processing while (LLMessageBlock * blockp = parseBlock(tokens)) { templatep->addBlock(blockp); } if (!tokens.want("}")) { llerrs << "Expecting closing } for message " << template_name << " at " << tokens.line() << llendl; } return templatep; } // static LLMessageBlock* LLTemplateParser::parseBlock(LLTemplateTokenizer& tokens) { if (!tokens.want("{")) { return NULL; } // Name first std::string block_name = tokens.next(); // Is name a legit C variable name ? if (!b_variable_ok(block_name.c_str())) { llerrs << "not a legal block name: " << block_name << " at " << tokens.line() << llendl; } LLMessageBlock* blockp = NULL; // now, block type ("Single", "Multiple", or "Variable") std::string block_type = tokens.next(); // which one is it? if (block_type == "Single") { // OK, we can create a block blockp = new LLMessageBlock(block_name.c_str(), MBT_SINGLE); } else if (block_type == "Multiple") { // Need to get the number of repeats std::string repeats = tokens.next(); // Is it a legal integer ? if (!b_positive_integer_ok(repeats.c_str())) { llerrs << "not a legal integer for block multiple count: " << repeats << " at " << tokens.line() << llendl; } // OK, we can create a block blockp = new LLMessageBlock(block_name.c_str(), MBT_MULTIPLE, atoi(repeats.c_str())); } else if (block_type == "Variable") { // OK, we can create a block blockp = new LLMessageBlock(block_name.c_str(), MBT_VARIABLE); } else { llerrs << "bad block type: " << block_type << " at " << tokens.line() << llendl; } while (LLMessageVariable* varp = parseVariable(tokens)) { blockp->addVariable(varp->getName(), varp->getType(), varp->getSize()); delete varp; } if (!tokens.want("}")) { llerrs << "Expecting closing } for block " << block_name << " at " << tokens.line() << llendl; } return blockp; } // static LLMessageVariable* LLTemplateParser::parseVariable(LLTemplateTokenizer& tokens) { if (!tokens.want("{")) { return NULL; } std::string var_name = tokens.next(); if (!b_variable_ok(var_name.c_str())) { llerrs << "Not a legit variable name: " << var_name << " at " << tokens.line() << llendl; } LLMessageVariable* varp = NULL; std::string var_type = tokens.next(); if (var_type == "U8") { varp = new LLMessageVariable(var_name.c_str(), MVT_U8, 1); } else if (var_type == "U16") { varp = new LLMessageVariable(var_name.c_str(), MVT_U16, 2); } else if (var_type == "U32") { varp = new LLMessageVariable(var_name.c_str(), MVT_U32, 4); } else if (var_type == "U64") { varp = new LLMessageVariable(var_name.c_str(), MVT_U64, 8); } else if (var_type == "S8") { varp = new LLMessageVariable(var_name.c_str(), MVT_S8, 1); } else if (var_type == "S16") { varp = new LLMessageVariable(var_name.c_str(), MVT_S16, 2); } else if (var_type == "S32") { varp = new LLMessageVariable(var_name.c_str(), MVT_S32, 4); } else if (var_type == "S64") { varp = new LLMessageVariable(var_name.c_str(), MVT_S64, 8); } else if (var_type == "F32") { varp = new LLMessageVariable(var_name.c_str(), MVT_F32, 4); } else if (var_type == "F64") { varp = new LLMessageVariable(var_name.c_str(), MVT_F64, 8); } else if (var_type == "LLVector3") { varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector3, 12); } else if (var_type == "LLVector3d") { varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector3d, 24); } else if (var_type == "LLVector4") { varp = new LLMessageVariable(var_name.c_str(), MVT_LLVector4, 16); } else if (var_type == "LLQuaternion") { varp = new LLMessageVariable(var_name.c_str(), MVT_LLQuaternion, 12); } else if (var_type == "LLUUID") { varp = new LLMessageVariable(var_name.c_str(), MVT_LLUUID, 16); } else if (var_type == "BOOL") { varp = new LLMessageVariable(var_name.c_str(), MVT_BOOL, 1); } else if (var_type == "IPADDR") { varp = new LLMessageVariable(var_name.c_str(), MVT_IP_ADDR, 4); } else if (var_type == "IPPORT") { varp = new LLMessageVariable(var_name.c_str(), MVT_IP_PORT, 2); } else if (var_type == "Fixed" || var_type == "Variable") { std::string variable_size = tokens.next(); if (!b_positive_integer_ok(variable_size.c_str())) { llerrs << "not a legal integer variable size: " << variable_size << " at " << tokens.line() << llendl; } EMsgVariableType type_enum; if (var_type == "Variable") { type_enum = MVT_VARIABLE; } else if (var_type == "Fixed") { type_enum = MVT_FIXED; } else { type_enum = MVT_FIXED; // removes a warning llerrs << "bad variable type: " << var_type << " at " << tokens.line() << llendl; } varp = new LLMessageVariable(var_name.c_str(), type_enum, atoi(variable_size.c_str())); } else { llerrs << "bad variable type:" << var_type << " at " << tokens.line() << llendl; } if (!tokens.want("}")) { llerrs << "Expecting closing } for variable " << var_name << " at " << tokens.line() << llendl; } return varp; }