1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516 |
- /**
- * @file llstring.cpp
- * @brief String utility functions and the std::string class.
- *
- * $LicenseInfo:firstyear=2001&license=viewergpl$
- *
- * Copyright (c) 2001-2009, Linden Research, Inc.
- *
- * Second Life Viewer Source Code
- * The source code in this file ("Source Code") is provided by Linden Lab
- * to you under the terms of the GNU General Public License, version 2.0
- * ("GPL"), unless you have obtained a separate licensing agreement
- * ("Other License"), formally executed by you and Linden Lab. Terms of
- * the GPL can be found in doc/GPL-license.txt in this distribution, or
- * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
- *
- * There are special exceptions to the terms and conditions of the GPL as
- * it is applied to this Source Code. View the full text of the exception
- * in the file doc/FLOSS-exception.txt in this software distribution, or
- * online at
- * http://secondlifegrid.net/programs/open_source/licensing/flossexception
- *
- * By copying, modifying or distributing this software, you acknowledge
- * that you have read and understood your obligations described above,
- * and agree to abide by those obligations.
- *
- * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
- * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
- * COMPLETENESS OR PERFORMANCE.
- * $/LicenseInfo$
- */
- #include "linden_common.h"
- #include <cstdarg>
- #if LL_WINDOWS
- # include <winnls.h> // For WideCharToMultiByte
- # include <vector>
- #endif
- #include "llsd.h"
- #include "llstring.h"
- U8 hex_as_nybble(char hex)
- {
- if (hex >= '0' && hex <= '9')
- {
- return (U8)(hex - '0');
- }
- else if (hex >= 'a' && hex <='f')
- {
- return (U8)(10 + hex - 'a');
- }
- else if (hex >= 'A' && hex <='F')
- {
- return (U8)(10 + hex - 'A');
- }
- return 0; // uh - oh, not hex any more...
- }
- // See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c
- // for the Unicode implementation - this doesn't match because it was written
- // before finding it.
- std::ostream& operator<<(std::ostream& s, const LLWString& wstr)
- {
- std::string utf8_str = wstring_to_utf8str(wstr);
- s << utf8_str;
- return s;
- }
- std::ptrdiff_t wchar_to_utf8chars(llwchar in_char, char* outchars)
- {
- U32 cur_char = (U32)in_char;
- char* base = outchars;
- if (cur_char < 0x80)
- {
- *outchars++ = (U8)cur_char;
- }
- else if (cur_char < 0x800)
- {
- *outchars++ = 0xC0 | (cur_char >> 6);
- *outchars++ = 0x80 | (cur_char & 0x3F);
- }
- else if (cur_char < 0x10000)
- {
- *outchars++ = 0xE0 | (cur_char >> 12);
- *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
- *outchars++ = 0x80 | (cur_char & 0x3F);
- }
- else if (cur_char < 0x200000)
- {
- *outchars++ = 0xF0 | (cur_char >> 18);
- *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
- *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
- *outchars++ = 0x80 | (cur_char & 0x3F);
- }
- else if (cur_char < 0x4000000)
- {
- *outchars++ = 0xF8 | (cur_char >> 24);
- *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
- *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
- *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
- *outchars++ = 0x80 | (cur_char & 0x3F);
- }
- else if (cur_char < 0x80000000)
- {
- *outchars++ = 0xFC | (cur_char >> 30);
- *outchars++ = 0x80 | ((cur_char >> 24) & 0x3F);
- *outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
- *outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
- *outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
- *outchars++ = 0x80 | (cur_char & 0x3F);
- }
- else
- {
- llwarns << "Invalid Unicode character " << cur_char << "!" << llendl;
- *outchars++ = LL_UNKNOWN_CHAR;
- }
- return outchars - base;
- }
- static std::ptrdiff_t utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
- {
- const U16* base = inchars;
- U16 cur_char = *inchars++;
- llwchar char32 = cur_char;
- if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF))
- {
- // Surrogates
- char32 = ((llwchar)(cur_char - 0xD800)) << 10;
- cur_char = *inchars++;
- char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL;
- }
- else
- {
- char32 = (llwchar)cur_char;
- }
- *outchar = char32;
- return std::ptrdiff_t(inchars - base);
- }
- llutf16string wstring_to_utf16str(const LLWString& utf32str, S32 len)
- {
- llutf16string out;
- S32 i = 0;
- while (i < len)
- {
- U32 cur_char = utf32str[i++];
- if (cur_char > 0xFFFF)
- {
- out += (0xD7C0 + (cur_char >> 10));
- out += (0xDC00 | (cur_char & 0x3FF));
- }
- else
- {
- out += cur_char;
- }
- }
- return out;
- }
- LLWString utf16str_to_wstring(const llutf16string& utf16str, S32 len)
- {
- LLWString wout;
- if (len <= 0 || utf16str.empty()) return wout;
- S32 i = 0;
- // craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
- const U16* chars16 = &(*(utf16str.begin()));
- while (i < len)
- {
- llwchar cur_char;
- i += utf16chars_to_wchar(chars16 + i, &cur_char);
- wout += cur_char;
- }
- return wout;
- }
- // Length in llwchar (UTF-32) of the first len units (16 bits) of the given
- // UTF-16 string.
- S32 utf16str_wstring_length(const llutf16string& utf16str, S32 utf16_len)
- {
- S32 surrogate_pairs = 0;
- // ... craziness to make gcc happy (llutf16string.c_str() is tweaked on
- // linux):
- const U16* const utf16_chars = &(*(utf16str.begin()));
- S32 i = 0;
- while (i < utf16_len)
- {
- const U16 c = utf16_chars[i++];
- if (c >= 0xD800 && c <= 0xDBFF) // See http://en.wikipedia.org/wiki/UTF-16
- {
- // Have first byte of a surrogate pair
- if (i >= utf16_len)
- {
- break;
- }
- const U16 d = utf16_chars[i];
- if (d >= 0xDC00 && d <= 0xDFFF)
- { // Have valid second byte of a surrogate pair
- ++surrogate_pairs;
- ++i;
- }
- }
- }
- return utf16_len - surrogate_pairs;
- }
- // Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
- S32 wstring_utf16_length(const LLWString& wstr, S32 woffset, S32 wlen)
- {
- const S32 end = llmin((S32)wstr.length(), woffset + wlen);
- if (end < woffset)
- {
- return 0;
- }
- else
- {
- S32 length = end - woffset;
- for (S32 i = woffset; i < end; ++i)
- {
- if (wstr[i] >= 0x10000)
- {
- ++length;
- }
- }
- return length;
- }
- }
- // Given a wstring and an offset in it, returns the length as wstring (i.e.,
- // number of llwchars) of the longest substring that starts at the offset
- // and whose equivalent utf-16 string does not exceeds the given utf16_length.
- S32 wstring_length_from_utf16_length(const LLWString& wstr, S32 woffset,
- S32 utf16_length, bool* unaligned)
- {
- const S32 end = wstr.length();
- bool u = false;
- S32 n = woffset + utf16_length;
- S32 i = woffset;
- while (i < end)
- {
- if (wstr[i] >= 0x10000)
- {
- --n;
- }
- if (i >= n)
- {
- u = (i > n);
- break;
- }
- ++i;
- }
- if (unaligned)
- {
- *unaligned = u;
- }
- return i - woffset;
- }
- S32 wchar_utf8_length(const llwchar wc)
- {
- if (wc < 0x80)
- {
- // This case will also catch negative values which are
- // technically invalid.
- return 1;
- }
- else if (wc < 0x800)
- {
- return 2;
- }
- else if (wc < 0x10000)
- {
- return 3;
- }
- else if (wc < 0x200000)
- {
- return 4;
- }
- else if (wc < 0x4000000)
- {
- return 5;
- }
- else
- {
- return 6;
- }
- }
- S32 wstring_utf8_length(const LLWString& wstr)
- {
- S32 len = 0;
- for (S32 i = 0, count = wstr.length(); i < count; ++i)
- {
- len += wchar_utf8_length(wstr[i]);
- }
- return len;
- }
- LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
- {
- LLWString wout;
- S32 max_len = utf8str.length();
- S32 i = 0;
- while (i < len)
- {
- llwchar unichar;
- U8 cur_char = utf8str[i];
- if (cur_char < 0x80)
- {
- // Ascii character, just add it
- unichar = cur_char;
- }
- else
- {
- S32 cont_bytes = 0;
- if ((cur_char >> 5) == 0x6) // Two byte UTF8 -> 1 UTF32
- {
- unichar = 0x1F & cur_char;
- cont_bytes = 1;
- }
- else if ((cur_char >> 4) == 0xe) // Three byte UTF8 -> 1 UTF32
- {
- unichar = 0x0F & cur_char;
- cont_bytes = 2;
- }
- else if ((cur_char >> 3) == 0x1e) // Four byte UTF8 -> 1 UTF32
- {
- unichar = 0x07 & cur_char;
- cont_bytes = 3;
- }
- else if ((cur_char >> 2) == 0x3e) // Five byte UTF8 -> 1 UTF32
- {
- unichar = 0x03 & cur_char;
- cont_bytes = 4;
- }
- else if ((cur_char >> 1) == 0x7e) // Six byte UTF8 -> 1 UTF32
- {
- unichar = 0x01 & cur_char;
- cont_bytes = 5;
- }
- else
- {
- wout += LL_UNKNOWN_CHAR;
- ++i;
- continue;
- }
- // Check that this character doesn't go past the end of the string
- S32 end = len < i + cont_bytes ? len : i + cont_bytes;
- do
- {
- if (++i >= max_len)
- {
- // Malformed sequence - roll back to look at this as a new
- // char
- unichar = LL_UNKNOWN_CHAR;
- --i;
- break;
- }
- cur_char = utf8str[i];
- if ((cur_char >> 6) == 0x2)
- {
- unichar <<= 6;
- unichar += 0x3F & cur_char;
- }
- else
- {
- // Malformed sequence - roll back to look at this as a new
- // char
- unichar = LL_UNKNOWN_CHAR;
- --i;
- break;
- }
- }
- while (i < end);
- // Handle overlong characters and NULL characters
- if ((cont_bytes == 1 && unichar < 0x80) ||
- (cont_bytes == 2 && unichar < 0x800) ||
- (cont_bytes == 3 && unichar < 0x10000) ||
- (cont_bytes == 4 && unichar < 0x200000) ||
- (cont_bytes == 5 && unichar < 0x4000000))
- {
- unichar = LL_UNKNOWN_CHAR;
- }
- }
- wout += unichar;
- ++i;
- }
- return wout;
- }
- std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
- {
- std::string out;
- S32 i = 0;
- while (i < len)
- {
- char tchars[8];
- S32 n = wchar_to_utf8chars(utf32str[i++], tchars);
- tchars[n] = 0;
- out += tchars;
- }
- return out;
- }
- std::string utf8str_trim(const std::string& utf8str)
- {
- LLWString wstr = utf8str_to_wstring(utf8str);
- LLWStringUtil::trim(wstr);
- return wstring_to_utf8str(wstr);
- }
- std::string utf8str_tolower(const std::string& utf8str)
- {
- LLWString out_str = utf8str_to_wstring(utf8str);
- LLWStringUtil::toLower(out_str);
- return wstring_to_utf8str(out_str);
- }
- S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs)
- {
- LLWString wlhs = utf8str_to_wstring(lhs);
- LLWString wrhs = utf8str_to_wstring(rhs);
- return LLWStringUtil::compareInsensitive(wlhs, wrhs);
- }
- std::string utf8str_truncate(const std::string& utf8str, S32 max_len)
- {
- if (!max_len)
- {
- return std::string();
- }
- if ((S32)utf8str.length() <= max_len)
- {
- return utf8str;
- }
- else
- {
- S32 cur_char = max_len;
- // If we are ASCII, we do not need to do anything
- if ((U8)utf8str[cur_char] > 0x7f)
- {
- // If first two bits are (10), it is the tail end of a multibyte
- // char. We need to shift back to the first character
- while ((0xc0 & utf8str[cur_char]) == 0x80)
- {
- // Keep moving forward until we hit the first char;
- if (--cur_char == 0)
- {
- // Make sure we do not trash memory if we've got a bogus
- // string.
- break;
- }
- }
- }
- // The byte index we are on is one we want to get rid of, so we only
- // want to copy up to (cur_char-1) chars
- return utf8str.substr(0, cur_char);
- }
- }
- std::string utf8str_substChar(const std::string& utf8str,
- const llwchar target_char,
- const llwchar replace_char)
- {
- LLWString wstr = utf8str_to_wstring(utf8str);
- LLWStringUtil::replaceChar(wstr, target_char, replace_char);
- //wstr = wstring_substChar(wstr, target_char, replace_char);
- return wstring_to_utf8str(wstr);
- }
- std::string utf8str_makeASCII(const std::string& utf8str)
- {
- LLWString wstr = utf8str_to_wstring(utf8str);
- LLWStringUtil::_makeASCII(wstr);
- return wstring_to_utf8str(wstr);
- }
- std::string mbcsstring_makeASCII(const std::string& wstr)
- {
- // Replace non-ASCII chars with replace_char
- std::string out_str = wstr;
- for (S32 i = 0, len = out_str.length(); i < len; ++i)
- {
- if ((U8)out_str[i] > 0x7f)
- {
- out_str[i] = LL_UNKNOWN_CHAR;
- }
- }
- return out_str;
- }
- std::string utf8str_removeCRLF(const std::string& utf8str)
- {
- std::string out;
- size_t len = utf8str.length();
- if (!len) return out;
- out.reserve(len);
- for (size_t i = 0; i < len; ++i)
- {
- unsigned char c = utf8str[i];
- if (c != 13)
- {
- out.push_back(c);
- }
- }
- return out;
- }
- std::string iso8859_to_utf8(const std::string& iso8859str)
- {
- std::string out;
- size_t len = iso8859str.length();
- if (!len) return out;
- out.reserve(2 * len);
- for (size_t i = 0; i < len; ++i)
- {
- unsigned char c = iso8859str[i];
- if (c < 128)
- {
- out.push_back(c);
- }
- else
- {
- out.push_back(0xc2 + (c > 0xbf));
- out.push_back(0x80 + (c & 0x3f));
- }
- }
- return out;
- }
- std::string utf8_to_iso8859(const std::string& utf8str)
- {
- std::string out;
- size_t len = utf8str.length();
- if (!len) return out;
- out.reserve(len);
- for (size_t i = 0; i < len; ++i)
- {
- unsigned char c = utf8str[i];
- if (c < 128)
- {
- out.push_back(c);
- }
- else if (i < len - 1)
- {
- out.push_back(((c & 0x1f) << 6) + (utf8str[++i] & 0x3f));
- }
- }
- return out;
- }
- #if LL_WINDOWS
- std::string ll_convert_wide_to_string(const wchar_t* in,
- unsigned int code_page)
- {
- std::string out;
- if (in)
- {
- int len_in = wcslen(in);
- int len_out = WideCharToMultiByte(code_page, 0, in, len_in, NULL, 0, 0,
- 0);
- // We will need two more bytes for the double NULL ending created in
- // WideCharToMultiByte().
- char* pout = new char[len_out + 2];
- memset(pout, 0, len_out + 2);
- if (pout)
- {
- WideCharToMultiByte(code_page, 0, in, len_in, pout, len_out, 0, 0);
- out.assign(pout);
- delete[] pout;
- }
- }
- return out;
- }
- std::string ll_convert_wide_to_string(const wchar_t* in)
- {
- return ll_convert_wide_to_string(in, CP_UTF8);
- }
- LLWString ll_convert_wide_to_wstring(const std::wstring& in)
- {
- // This function, like its converse, is a placeholder, encapsulating a
- // guilty little hack: the only "official" way Nat has found to convert
- // between std::wstring (16 bits on Windows) and LLWString (UTF-32) is
- // by using iconv, which we have avoided so far. It sorts of works to just
- // copy individual characters...
- // The point is that if/when we DO introduce some more official way to
- // perform such conversions, we should only have to call it here.
- return { in.begin(), in.end() };
- }
- std::wstring ll_convert_wstring_to_wide(const LLWString& in)
- {
- // See comments in ll_convert_wide_to_wstring()
- return { in.begin(), in.end() };
- }
- std::basic_string<wchar_t> ll_convert_string_to_wide(const std::string& in)
- {
- return ll_convert_string_to_wide(in, CP_UTF8);
- }
- std::basic_string<wchar_t> ll_convert_string_to_wide(const std::string& in,
- unsigned int code_page)
- {
- // From review:
- // We can preallocate a wide char buffer that is the same length (in
- // wchar_t elements) as the utf8 input, plus one for a nul terminator, and
- // be guaranteed to not overflow.
- // Normally, I would call that sort of thing premature optimization, but we
- // *are* seeing string operations taking a bunch of time, especially when
- // constructing widgets.
- //int output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(),
- // in.length(), NULL, 0);
- // Reserve an output buffer that will be destroyed on exit, with a place to
- // put a NUL terminator.
- std::vector<wchar_t> w_out(in.length() + 1);
- size_t len = w_out.size();
- memset(&w_out[0], 0, len);
- int real_output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(),
- in.length(), &w_out[0], len);
- // Looks like MultiByteToWideChar didn't add null terminator to converted
- // string, see EXT-4858.
- w_out[real_output_str_len] = 0;
- // Construct string<wchar_t> from our temporary output buffer
- return {&w_out[0]};
- }
- std::string ll_convert_string_to_utf8_string(const std::string& in)
- {
- auto w_mesg = ll_convert_string_to_wide(in, CP_ACP);
- std::string out_utf8(ll_convert_wide_to_string(w_mesg.c_str(), CP_UTF8));
- return out_utf8;
- }
- #endif // LL_WINDOWS
- ///////////////////////////////////////////////////////////////////////////////
- // Formerly in u64.cpp - Utilities for conversions between U64 and string
- ///////////////////////////////////////////////////////////////////////////////
- U64 str_to_U64(const std::string& str)
- {
- U64 result = 0;
- const char* aptr = strpbrk(str.c_str(), "0123456789");
- if (!aptr)
- {
- llwarns << "Bad string to U64 conversion attempt: format" << llendl;
- }
- else
- {
- while (*aptr >= '0' && *aptr <= '9')
- {
- result = result * 10 + (*aptr++ - '0');
- }
- }
- return result;
- }
- std::string U64_to_str(U64 value)
- {
- std::string res;
- U32 part1, part2, part3;
- part3 = (U32)(value % (U64)10000000);
- value /= 10000000;
- part2 = (U32)(value % (U64)10000000);
- value /= 10000000;
- part1 = (U32)(value % (U64)10000000);
- // Three cases to avoid leading zeroes unless necessary
- if (part1)
- {
- res = llformat("%u%07u%07u", part1, part2, part3);
- }
- else if (part2)
- {
- res = llformat("%u%07u", part2, part3);
- }
- else
- {
- res = llformat("%u", part3);
- }
- return res;
- }
- char* U64_to_str(U64 value, char* result, S32 result_size)
- {
- std::string res = U64_to_str(value);
- LLStringUtil::copy(result, res.c_str(), result_size);
- return result;
- }
- U64 llstrtou64(const char* str, char** end, S32 base)
- {
- #ifdef LL_WINDOWS
- return _strtoui64(str, end, base);
- #else
- return strtoull(str, end, base);
- #endif
- }
- ///////////////////////////////////////////////////////////////////////////////
- // LLStringOps class
- ///////////////////////////////////////////////////////////////////////////////
- long LLStringOps::sPacificTimeOffset = 0;
- long LLStringOps::sLocalTimeOffset = 0;
- bool LLStringOps::sPacificDaylightTime = 0;
- std::map<std::string, std::string> LLStringOps::datetimeToCodes;
- std::vector<std::string> LLStringOps::sWeekDayList;
- std::vector<std::string> LLStringOps::sWeekDayShortList;
- std::vector<std::string> LLStringOps::sMonthList;
- std::vector<std::string> LLStringOps::sMonthShortList;
- std::string LLStringOps::sDayFormat;
- std::string LLStringOps::sAM;
- std::string LLStringOps::sPM;
- //static
- bool LLStringOps::isHexString(const std::string& str)
- {
- const char* buf = str.c_str();
- int len = str.size();
- while (--len >= 0)
- {
- if (!isxdigit(buf[len])) return false;
- }
- return true;
- }
- //static
- bool LLStringOps::isEmoji(llwchar a)
- {
- #if 0 // Do not consider special characters that might have a corresponding
- // glyph in the monochorme fallback fonts as a "genuine" emoji. HB
- return a == 0xa9 || a == 0xae || (a >= 0x2000 && a < 0x3300) ||
- (a >= 0x1f000 && a < 0x20000);
- #else
- // These are indeed "genuine" emojis. HB
- return a >= 0x1f000 && a < 0x20000;
- #endif
- }
- //static
- S32 LLStringOps::collate(const llwchar* a, const llwchar* b)
- {
- #if LL_WINDOWS
- // Under Windows, wide string functions operator on 16-bit strings, not the
- // proper 32 bit wide string.
- return strcmp(wstring_to_utf8str(LLWString(a)).c_str(),
- wstring_to_utf8str(LLWString(b)).c_str());
- #else
- return wcscoll(a, b);
- #endif
- }
- //static
- void LLStringOps::setupDatetimeInfo(bool daylight)
- {
- time_t nowT, localT, gmtT;
- struct tm * tmpT;
- nowT = time (NULL);
- tmpT = gmtime(&nowT);
- gmtT = mktime (tmpT);
- tmpT = localtime (&nowT);
- localT = mktime (tmpT);
- sLocalTimeOffset = (long) (gmtT - localT);
- if (tmpT->tm_isdst)
- {
- sLocalTimeOffset -= 60 * 60; // 1 hour
- }
- sPacificDaylightTime = daylight;
- sPacificTimeOffset = (sPacificDaylightTime? 7 : 8) * 60 * 60;
- datetimeToCodes["wkday"] = "%a"; // Thu
- datetimeToCodes["weekday"] = "%A"; // Thursday
- datetimeToCodes["year4"] = "%Y"; // 2009
- datetimeToCodes["year"] = "%Y"; // 2009
- datetimeToCodes["year2"] = "%y"; // 09
- datetimeToCodes["mth"] = "%b"; // Aug
- datetimeToCodes["month"] = "%B"; // August
- datetimeToCodes["mthnum"] = "%m"; // 08
- datetimeToCodes["day"] = "%d"; // 31
- datetimeToCodes["sday"] = "%-d"; // 9
- datetimeToCodes["hour24"] = "%H"; // 14
- datetimeToCodes["hour"] = "%H"; // 14
- datetimeToCodes["hour12"] = "%I"; // 02
- datetimeToCodes["min"] = "%M"; // 59
- datetimeToCodes["ampm"] = "%p"; // AM
- datetimeToCodes["second"] = "%S"; // 59
- datetimeToCodes["timezone"] = "%Z"; // PST
- }
- static void tokenize_str_to_aray(const std::string& data,
- std::vector<std::string>& output)
- {
- output.clear();
- size_t length = data.size();
- // Tokenize it and put it in the array
- std::string cur_word;
- for (size_t i = 0; i < length; ++i)
- {
- if (data[i] == ':')
- {
- output.push_back(cur_word);
- cur_word.clear();
- }
- else
- {
- cur_word.append(1, data[i]);
- }
- }
- output.push_back(cur_word);
- }
- //static
- void LLStringOps::setupWeekDaysNames(const std::string& data)
- {
- tokenize_str_to_aray(data, sWeekDayList);
- }
- //static
- void LLStringOps::setupWeekDaysShortNames(const std::string& data)
- {
- tokenize_str_to_aray(data, sWeekDayShortList);
- }
- //static
- void LLStringOps::setupMonthNames(const std::string& data)
- {
- tokenize_str_to_aray(data, sMonthList);
- }
- //static
- void LLStringOps::setupMonthShortNames(const std::string& data)
- {
- tokenize_str_to_aray(data, sMonthShortList);
- }
- //static
- void LLStringOps::setupDayFormat(const std::string& data)
- {
- sDayFormat = data;
- }
- //static
- std::string LLStringOps::getDatetimeCode(std::string key)
- {
- std::map<std::string, std::string>::iterator iter;
- iter = datetimeToCodes.find (key);
- if (iter != datetimeToCodes.end())
- {
- return iter->second;
- }
- return std::string("");
- }
- namespace LLStringFn
- {
- // Note: this restricts output to ASCII
- void replace_nonprintable_in_ascii(std::basic_string<char>& str,
- char replacement)
- {
- constexpr char SPACE = 0x20;
- for (size_t i = 0, len = str.size(); i < len; ++i)
- {
- if (str[i] < SPACE)
- {
- str[i] = replacement;
- }
- }
- }
- // Note: this restricts output to ASCII
- void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str,
- char replacement)
- {
- constexpr char SPACE = 0x20;
- constexpr char PIPE = 0x7c;
- for (size_t i = 0, len = str.size(); i < len; ++i)
- {
- if (str[i] < SPACE || str[i] == PIPE)
- {
- str[i] = replacement;
- }
- }
- }
- // Replaces all control characters (c < 0x20) with replacement in string.
- void replace_ascii_controlchars(std::basic_string<char>& str,
- char replacement)
- {
- constexpr unsigned char SPACE = 0x20;
- for (size_t i = 0, len = str.size(); i < len; ++i)
- {
- const unsigned char c = (unsigned char)str[i];
- if (c < SPACE)
- {
- str[i] = replacement;
- }
- }
- }
- // https://wiki.lindenlab.com/wiki/Unicode_Guidelines has details on
- // allowable code points for XML. Specifically, they are:
- // 0x09, 0x0a, 0x0d, and 0x20 on up. JC
- std::string strip_invalid_xml(const std::string& str)
- {
- constexpr unsigned char SPACE = 0x20;
- constexpr unsigned char TAB = 0x09;
- constexpr unsigned char LF = 0x0a;
- constexpr unsigned char CR = 0x0d;
- std::string output;
- output.reserve(str.size());
- std::string::const_iterator it = str.begin();
- while (it != str.end())
- {
- // Must compare as unsigned for >=
- // Test most likely match first
- const unsigned char c = (unsigned char)*it;
- if (c >= SPACE || c == TAB || c == LF || c == CR)
- {
- output.push_back(c);
- }
- ++it;
- }
- return output;
- }
- typedef std::map<char, std::string> literals_map_t;
- static const literals_map_t xml_elem_literals =
- {
- { '<', "<" },
- { '>', ">" },
- { '&', "&" }
- };
- static const literals_map_t xml_attr_literals =
- {
- { '"', """ },
- { '\'', "'" }
- };
- static void literals_encode(std::string& text,
- const literals_map_t& literals)
- {
- for (literals_map_t::const_iterator it = literals.begin(),
- end = literals.end();
- it != end; ++it)
- {
- size_t pos = 0;
- while ((pos = text.find(it->first, pos)) != std::string::npos)
- {
- text.replace(pos, 1, it->second);
- pos += it->second.size();
- }
- }
- }
- // Replaces all characters that are not allowed in XML 1.0 with the
- // corresponding literals.
- std::string xml_encode(const std::string& input, bool for_attribute)
- {
- std::string result(input);
- literals_encode(result, xml_elem_literals);
- if (for_attribute)
- {
- literals_encode(result, xml_attr_literals);
- }
- return result;
- }
- static void literals_decode(std::string& text,
- const literals_map_t& literals)
- {
- for (literals_map_t::const_iterator it = literals.begin(),
- end = literals.end();
- it != end; ++it)
- {
- size_t pos = 0;
- while ((pos = text.find(it->second, pos)) != std::string::npos)
- {
- text[pos++] = it->first;
- text.erase(pos, it->second.size() - 1);
- }
- }
- }
- // Replaces some of XML literals that are defined in XML 1.0 with the
- // corresponding characters.
- std::string xml_decode(const std::string& input, bool for_attribute)
- {
- std::string result(input);
- literals_decode(result, xml_elem_literals);
- if (for_attribute)
- {
- literals_decode(result, xml_attr_literals);
- }
- return result;
- }
- }
- ////////////////////////////////////////////////////////////
- // Forward specialization of LLStringUtil::format before use in
- // LLStringUtil::formatDatetime.
- template<>
- S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions);
- //static
- template<>
- void LLStringUtil::getTokens(const std::string& instr,
- std::vector<std::string>& tokens,
- const std::string& delims)
- {
- std::string token;
- size_t start = instr.find_first_not_of(delims);
- while (start != std::string::npos)
- {
- size_t end = instr.find_first_of(delims, start);
- if (end == std::string::npos)
- {
- end = instr.length();
- }
- token = instr.substr(start, end - start);
- LLStringUtil::trim(token);
- tokens.push_back(token);
- start = instr.find_first_not_of(delims, end);
- }
- }
- //static
- template<>
- LLStringUtil::size_type LLStringUtil::getSubstitution(const std::string& instr,
- size_type& start,
- std::vector<std::string>& tokens)
- {
- const std::string delims (",");
- // Find the first [
- size_type pos1 = instr.find('[', start);
- if (pos1 == std::string::npos)
- return std::string::npos;
- //Find the first ] after the initial [
- size_type pos2 = instr.find(']', pos1);
- if (pos2 == std::string::npos)
- return std::string::npos;
- // Find the last [ before ] in case of nested [[]]
- pos1 = instr.find_last_of('[', pos2 - 1);
- if (pos1 == std::string::npos || pos1 < start)
- return std::string::npos;
- getTokens(std::string(instr ,pos1 + 1, pos2 - pos1 - 1), tokens, delims);
- start = pos2 + 1;
- return pos1;
- }
- //static
- template<>
- bool LLStringUtil::simpleReplacement(std::string& replacement,
- const std::string& token,
- const format_map_t& substitutions)
- {
- // See if we have a replacement for the bracketed string (without the
- // brackets) test first using has() because if we just look up with
- // operator[] we get back an empty string even if the value is missing.
- // We want to distinguish between missing replacements and deliberately
- // empty replacement strings.
- format_map_t::const_iterator iter = substitutions.find(token);
- if (iter != substitutions.end())
- {
- replacement = iter->second;
- return true;
- }
- // If not, see if there's one WITH brackets
- iter = substitutions.find(std::string("[" + token + "]"));
- if (iter != substitutions.end())
- {
- replacement = iter->second;
- return true;
- }
- return false;
- }
- //static
- template<>
- bool LLStringUtil::simpleReplacement(std::string& replacement,
- const std::string& token,
- const LLSD& substitutions)
- {
- // See if we have a replacement for the bracketed string (without the
- // brackets). Test first using has() because if we just look up with
- // operator[] we get back an empty string even if the value is missing.
- // We want to distinguish between missing replacements and deliberately
- // empty replacement strings.
- if (substitutions.has(token))
- {
- replacement = substitutions[token].asString();
- return true;
- }
- // If not, see if there's one WITH brackets
- else if (substitutions.has(std::string("[" + token + "]")))
- {
- replacement = substitutions[std::string("[" + token + "]")].asString();
- return true;
- }
- return false;
- }
- //static
- template<>
- void LLStringUtil::setLocale(std::string in_locale)
- {
- sLocale = in_locale;
- }
- //static
- template<>
- std::string LLStringUtil::getLocale()
- {
- return sLocale;
- }
- //static
- template<>
- void LLStringUtil::formatNumber(std::string& num_str, S32 decimals)
- {
- std::stringstream str_stream;
- if (!sLocale.empty())
- {
- // std::locale() throws if the locale is unknown ! (EXT-7926)
- try
- {
- str_stream.imbue(std::locale(sLocale.c_str()));
- }
- catch (const std::exception&)
- {
- llwarns_once << "Cannot set locale to " << sLocale << llendl;
- }
- }
- if (!decimals)
- {
- S32 int_str;
- if (convertToS32(num_str, int_str))
- {
- str_stream << int_str;
- num_str = str_stream.str();
- }
- }
- else
- {
- F32 float_str;
- if (convertToF32(num_str, float_str))
- {
- str_stream << std::fixed << std::showpoint
- << std::setprecision(decimals) << float_str;
- num_str = str_stream.str();
- }
- }
- }
- //static
- template<>
- bool LLStringUtil::formatDatetime(std::string& replacement,
- const std::string& token,
- const std::string& param, S32 sec_epoch)
- {
- if (param == "local") // Local time
- {
- sec_epoch -= LLStringOps::getLocalTimeOffset();
- }
- else if (param != "utc" && param != "gmt") // SL time
- {
- sec_epoch -= LLStringOps::getPacificTimeOffset();
- }
- // If never fell into those two ifs above, param must be utc
- if (sec_epoch < 0) sec_epoch = 0;
- LLDate datetime((F64)sec_epoch);
- std::string code = LLStringOps::getDatetimeCode(token);
- // Special case to handle timezone
- if (code == "%Z")
- {
- if (param == "utc" || param == "gmt")
- {
- replacement = "UTC";
- }
- else if (param == "local")
- {
- replacement.clear(); // User knows their own timezone
- }
- else
- {
- // "slt" = Second Life Time, which is deprecated.
- // If not UTC or user local time, fallback to pacific time
- replacement = LLStringOps::getPacificDaylightTime() ? "PDT"
- : "PST";
- }
- return true;
- }
- // EXT-7013: few codes are not suppotred by strtime function (example:
- // weekdays for Japanese), so use predefined ones.
- // If sWeekDayList is not empty than current locale does not support the
- // weekday name.
- time_t loc_seconds = (time_t) sec_epoch;
- if (LLStringOps::sWeekDayList.size() == 7 && code == "%A")
- {
- struct tm* gmt = gmtime(&loc_seconds);
- replacement = LLStringOps::sWeekDayList[gmt->tm_wday];
- }
- else if (LLStringOps::sWeekDayShortList.size() == 7 && code == "%a")
- {
- struct tm* gmt = gmtime(&loc_seconds);
- replacement = LLStringOps::sWeekDayShortList[gmt->tm_wday];
- }
- else if (LLStringOps::sMonthList.size() == 12 && code == "%B")
- {
- struct tm* gmt = gmtime(&loc_seconds);
- replacement = LLStringOps::sMonthList[gmt->tm_mon];
- }
- else if (!LLStringOps::sDayFormat.empty() && code == "%d")
- {
- struct tm* gmt = gmtime(&loc_seconds);
- LLStringUtil::format_map_t args;
- args["[MDAY]"] = llformat ("%d", gmt->tm_mday);
- replacement = LLStringOps::sDayFormat;
- LLStringUtil::format(replacement, args);
- }
- else if (code == "%-d")
- {
- struct tm* gmt = gmtime(&loc_seconds);
- // Day of the month without leading zero
- replacement = llformat ("%d", gmt->tm_mday);
- }
- else if (!LLStringOps::sAM.empty() && !LLStringOps::sPM.empty() &&
- code == "%p")
- {
- struct tm* gmt = gmtime(&loc_seconds);
- if (gmt->tm_hour<12)
- {
- replacement = LLStringOps::sAM;
- }
- else
- {
- replacement = LLStringOps::sPM;
- }
- }
- else
- {
- replacement = datetime.toHTTPDateString(code.c_str());
- }
- // *HACK: delete leading zero from hour string in case 'hour12' (code = %I)
- // time format to show time without leading zero, e.g. 08:16 -> 8:16
- // (EXT-2738). We could have used '%l' format instead, but it is not
- // supported by Windows.
- if (code == "%I" && token == "hour12" && replacement[0] == '0')
- {
- replacement = replacement[1];
- }
- return !code.empty();
- }
- // LLStringUtil::format recogizes the following patterns.
- // All substitutions *must* be encased in []'s in the input string. The []'s
- // are optional in the substitution map.
- // [FOO_123]
- // [FOO,number,precision]
- // [FOO,datetime,format]
- //static
- template<>
- S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)
- {
- S32 res = 0;
- std::string output;
- std::vector<std::string> tokens;
- size_t start = 0;
- size_t prev_start = 0;
- size_t key_start = 0;
- while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
- {
- output += std::string(s, prev_start, key_start-prev_start);
- prev_start = start;
- bool found_replacement = false;
- std::string replacement;
- if (tokens.size() == 0)
- {
- found_replacement = false;
- }
- else if (tokens.size() == 1)
- {
- found_replacement = simpleReplacement(replacement, tokens[0],
- substitutions);
- }
- else if (tokens[1] == "number")
- {
- std::string param = "0";
- if (tokens.size() > 2)
- {
- param = tokens[2];
- }
- found_replacement = simpleReplacement(replacement, tokens[0],
- substitutions);
- if (found_replacement)
- {
- formatNumber(replacement, atoi(param.c_str()));
- }
- }
- else if (tokens[1] == "datetime")
- {
- std::string param;
- if (tokens.size() > 2)
- {
- param = tokens[2];
- }
- format_map_t::const_iterator iter = substitutions.find("datetime");
- if (iter != substitutions.end())
- {
- S32 sec_epoch = 0;
- bool r = LLStringUtil::convertToS32(iter->second,
- sec_epoch);
- if (r)
- {
- found_replacement = formatDatetime(replacement, tokens[0],
- param, sec_epoch);
- }
- }
- }
- if (found_replacement)
- {
- output += replacement;
- ++res;
- }
- else
- {
- // We had no replacement, use the string as is. E.g.
- // "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
- output += std::string(s, key_start, start - key_start);
- }
- tokens.clear();
- }
- // Send the remainder of the string (with no further matches for bracketed
- // names)
- output += std::string(s, start);
- s = output;
- return res;
- }
- //static
- template<>
- S32 LLStringUtil::format(std::string& s, const LLSD& substitutions)
- {
- S32 res = 0;
- if (!substitutions.isMap())
- {
- return res;
- }
- std::string output;
- std::vector<std::string> tokens;
- size_t start = 0;
- size_t prev_start = 0;
- size_t key_start = 0;
- while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
- {
- output += std::string(s, prev_start, key_start - prev_start);
- prev_start = start;
- bool found_replacement = false;
- std::string replacement;
- if (tokens.size() == 0)
- {
- found_replacement = false;
- }
- else if (tokens.size() == 1)
- {
- found_replacement = simpleReplacement(replacement, tokens[0],
- substitutions);
- }
- else if (tokens[1] == "number")
- {
- std::string param = "0";
- if (tokens.size() > 2)
- {
- param = tokens[2];
- }
- found_replacement = simpleReplacement(replacement, tokens[0],
- substitutions);
- if (found_replacement)
- {
- formatNumber(replacement, atoi(param.c_str()));
- }
- }
- else if (tokens[1] == "datetime")
- {
- std::string param;
- if (tokens.size() > 2)
- {
- param = tokens[2];
- }
- S32 sec_epoch = (S32)substitutions["datetime"].asInteger();
- found_replacement = formatDatetime(replacement, tokens[0],
- param, sec_epoch);
- }
- if (found_replacement)
- {
- output += replacement;
- ++res;
- }
- else
- {
- // We had no replacement, use the string as is. E.g.
- // "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
- output += std::string(s, key_start, start-key_start);
- }
- tokens.clear();
- }
- // Send the remainder of the string (with no further matches for bracketed
- // names)
- output += std::string(s, start);
- s = output;
- return res;
- }
- // This used to be in separate llformat.cpp file. Moved here for coherency. HB
- // Note: uses an internal buffer limited to 1024 characters (but vsnprintf
- // prevents any overrun).
- std::string llformat(const char* fmt, ...)
- {
- // Avoid allocating 1024 bytes on the stack (or worst, depending on the
- // compiler: on the heap) at *each* call; instead use a static buffer in
- // the thread local storage (so that we stay thread-safe). HB
- thread_local char buffer[1024];
- if (LL_UNLIKELY(!fmt))
- {
- return std::string();
- }
- va_list va;
- va_start(va, fmt);
- #if LL_WINDOWS
- _vsnprintf(buffer, 1024, fmt, va);
- #else
- vsnprintf(buffer, 1024, fmt, va);
- #endif
- va_end(va);
- return std::string(buffer);
- }
|