GwynethLlewelyn
/
CoolVLViewer


			
							1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516
							/**
 * @file llstring.cpp
 * @brief String utility functions and the std::string class.
 *
 * $LicenseInfo:firstyear=2001&license=viewergpl$
 *
 * Copyright (c) 2001-2009, Linden Research, Inc.
 *
 * Second Life Viewer Source Code
 * The source code in this file ("Source Code") is provided by Linden Lab
 * to you under the terms of the GNU General Public License, version 2.0
 * ("GPL"), unless you have obtained a separate licensing agreement
 * ("Other License"), formally executed by you and Linden Lab.  Terms of
 * the GPL can be found in doc/GPL-license.txt in this distribution, or
 * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
 *
 * There are special exceptions to the terms and conditions of the GPL as
 * it is applied to this Source Code. View the full text of the exception
 * in the file doc/FLOSS-exception.txt in this software distribution, or
 * online at
 * http://secondlifegrid.net/programs/open_source/licensing/flossexception
 *
 * By copying, modifying or distributing this software, you acknowledge
 * that you have read and understood your obligations described above,
 * and agree to abide by those obligations.
 *
 * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
 * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
 * COMPLETENESS OR PERFORMANCE.
 * $/LicenseInfo$
 */

#include "linden_common.h"

#include <cstdarg>

#if LL_WINDOWS
# include <winnls.h>	// For WideCharToMultiByte
# include <vector>
#endif

#include "llsd.h"
#include "llstring.h"

U8 hex_as_nybble(char hex)
{
	if (hex >= '0' && hex <= '9')
	{
		return (U8)(hex - '0');
	}
	else if (hex >= 'a' && hex <='f')
	{
		return (U8)(10 + hex - 'a');
	}
	else if (hex >= 'A' && hex <='F')
	{
		return (U8)(10 + hex - 'A');
	}
	return 0; // uh - oh, not hex any more...
}

// See http://www.unicode.org/Public/BETA/CVTUTF-1-2/ConvertUTF.c
// for the Unicode implementation - this doesn't match because it was written
// before finding it.

std::ostream& operator<<(std::ostream& s, const LLWString& wstr)
{
	std::string utf8_str = wstring_to_utf8str(wstr);
	s << utf8_str;
	return s;
}

std::ptrdiff_t wchar_to_utf8chars(llwchar in_char, char* outchars)
{
	U32 cur_char = (U32)in_char;
	char* base = outchars;
	if (cur_char < 0x80)
	{
		*outchars++ = (U8)cur_char;
	}
	else if (cur_char < 0x800)
	{
		*outchars++ = 0xC0 | (cur_char >> 6);
		*outchars++ = 0x80 | (cur_char & 0x3F);
	}
	else if (cur_char < 0x10000)
	{
		*outchars++ = 0xE0 | (cur_char >> 12);
		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
		*outchars++ = 0x80 | (cur_char & 0x3F);
	}
	else if (cur_char < 0x200000)
	{
		*outchars++ = 0xF0 | (cur_char >> 18);
		*outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
		*outchars++ = 0x80 | (cur_char & 0x3F);
	}
	else if (cur_char < 0x4000000)
	{
		*outchars++ = 0xF8 | (cur_char >> 24);
		*outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
		*outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
		*outchars++ = 0x80 | (cur_char & 0x3F);
	}
	else if (cur_char < 0x80000000)
	{
		*outchars++ = 0xFC | (cur_char >> 30);
		*outchars++ = 0x80 | ((cur_char >> 24) & 0x3F);
		*outchars++ = 0x80 | ((cur_char >> 18) & 0x3F);
		*outchars++ = 0x80 | ((cur_char >> 12) & 0x3F);
		*outchars++ = 0x80 | ((cur_char >> 6) & 0x3F);
		*outchars++ = 0x80 | (cur_char & 0x3F);
	}
	else
	{
		llwarns << "Invalid Unicode character " << cur_char << "!" << llendl;
		*outchars++ = LL_UNKNOWN_CHAR;
	}
	return outchars - base;
}

static std::ptrdiff_t utf16chars_to_wchar(const U16* inchars, llwchar* outchar)
{
	const U16* base = inchars;
	U16 cur_char = *inchars++;
	llwchar char32 = cur_char;
	if ((cur_char >= 0xD800) && (cur_char <= 0xDFFF))
	{
		// Surrogates
		char32 = ((llwchar)(cur_char - 0xD800)) << 10;
		cur_char = *inchars++;
		char32 += (llwchar)(cur_char - 0xDC00) + 0x0010000UL;
	}
	else
	{
		char32 = (llwchar)cur_char;
	}
	*outchar = char32;
	return std::ptrdiff_t(inchars - base);
}

llutf16string wstring_to_utf16str(const LLWString& utf32str, S32 len)
{
	llutf16string out;

	S32 i = 0;
	while (i < len)
	{
		U32 cur_char = utf32str[i++];
		if (cur_char > 0xFFFF)
		{
			out += (0xD7C0 + (cur_char >> 10));
			out += (0xDC00 | (cur_char & 0x3FF));
		}
		else
		{
			out += cur_char;
		}
	}
	return out;
}

LLWString utf16str_to_wstring(const llutf16string& utf16str, S32 len)
{
	LLWString wout;
	if (len <= 0 || utf16str.empty()) return wout;

	S32 i = 0;
	// craziness to make gcc happy (llutf16string.c_str() is tweaked on linux):
	const U16* chars16 = &(*(utf16str.begin()));
	while (i < len)
	{
		llwchar cur_char;
		i += utf16chars_to_wchar(chars16 + i, &cur_char);
		wout += cur_char;
	}
	return wout;
}

// Length in llwchar (UTF-32) of the first len units (16 bits) of the given
// UTF-16 string.
S32 utf16str_wstring_length(const llutf16string& utf16str, S32 utf16_len)
{
	S32 surrogate_pairs = 0;
	// ... craziness to make gcc happy (llutf16string.c_str() is tweaked on
	// linux):
	const U16* const utf16_chars = &(*(utf16str.begin()));
	S32 i = 0;
	while (i < utf16_len)
	{
		const U16 c = utf16_chars[i++];
		if (c >= 0xD800 && c <= 0xDBFF)	// See http://en.wikipedia.org/wiki/UTF-16
		{ 
			 // Have first byte of a surrogate pair
			if (i >= utf16_len)
			{
				break;
			}
			const U16 d = utf16_chars[i];
			if (d >= 0xDC00 && d <= 0xDFFF)
			{   // Have valid second byte of a surrogate pair
				++surrogate_pairs;
				++i;
			}
		}
	}
	return utf16_len - surrogate_pairs;
}

// Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
S32 wstring_utf16_length(const LLWString& wstr, S32 woffset, S32 wlen)
{
	const S32 end = llmin((S32)wstr.length(), woffset + wlen);
	if (end < woffset)
	{
		return 0;
	}
	else
	{
		S32 length = end - woffset;
		for (S32 i = woffset; i < end; ++i)
		{
			if (wstr[i] >= 0x10000)
			{
				++length;
			}
		}
		return length;
	}
}

// Given a wstring and an offset in it, returns the length as wstring (i.e.,
// number of llwchars) of the longest substring that starts at the offset
// and whose equivalent utf-16 string does not exceeds the given utf16_length.
S32 wstring_length_from_utf16_length(const LLWString& wstr, S32 woffset,
									 S32 utf16_length, bool* unaligned)
{
	const S32 end = wstr.length();
	bool u = false;
	S32 n = woffset + utf16_length;
	S32 i = woffset;
	while (i < end)
	{
		if (wstr[i] >= 0x10000)
		{
			--n;
		}
		if (i >= n)
		{
			u = (i > n);
			break;
		}
		++i;
	}
	if (unaligned)
	{
		*unaligned = u;
	}
	return i - woffset;
}

S32 wchar_utf8_length(const llwchar wc)
{
	if (wc < 0x80)
	{
		// This case will also catch negative values which are
		// technically invalid.
		return 1;
	}
	else if (wc < 0x800)
	{
		return 2;
	}
	else if (wc < 0x10000)
	{
		return 3;
	}
	else if (wc < 0x200000)
	{
		return 4;
	}
	else if (wc < 0x4000000)
	{
		return 5;
	}
	else
	{
		return 6;
	}
}

S32 wstring_utf8_length(const LLWString& wstr)
{
	S32 len = 0;
	for (S32 i = 0, count = wstr.length(); i < count; ++i)
	{
		len += wchar_utf8_length(wstr[i]);
	}
	return len;
}

LLWString utf8str_to_wstring(const std::string& utf8str, S32 len)
{
	LLWString wout;
	S32 max_len = utf8str.length();
	S32 i = 0;
	while (i < len)
	{
		llwchar unichar;
		U8 cur_char = utf8str[i];

		if (cur_char < 0x80)
		{
			// Ascii character, just add it
			unichar = cur_char;
		}
		else
		{
			S32 cont_bytes = 0;
			if ((cur_char >> 5) == 0x6)			// Two byte UTF8 -> 1 UTF32
			{
				unichar = 0x1F & cur_char;
				cont_bytes = 1;
			}
			else if ((cur_char >> 4) == 0xe)	// Three byte UTF8 -> 1 UTF32
			{
				unichar = 0x0F & cur_char;
				cont_bytes = 2;
			}
			else if ((cur_char >> 3) == 0x1e)	// Four byte UTF8 -> 1 UTF32
			{
				unichar = 0x07 & cur_char;
				cont_bytes = 3;
			}
			else if ((cur_char >> 2) == 0x3e)	// Five byte UTF8 -> 1 UTF32
			{
				unichar = 0x03 & cur_char;
				cont_bytes = 4;
			}
			else if ((cur_char >> 1) == 0x7e)	// Six byte UTF8 -> 1 UTF32
			{
				unichar = 0x01 & cur_char;
				cont_bytes = 5;
			}
			else
			{
				wout += LL_UNKNOWN_CHAR;
				++i;
				continue;
			}

			// Check that this character doesn't go past the end of the string
			S32 end = len < i + cont_bytes ? len : i + cont_bytes;
			do
			{
				if (++i >= max_len)
				{
					// Malformed sequence - roll back to look at this as a new
					// char
					unichar = LL_UNKNOWN_CHAR;
					--i;
					break;
				}

				cur_char = utf8str[i];
				if ((cur_char >> 6) == 0x2)
				{
					unichar <<= 6;
					unichar += 0x3F & cur_char;
				}
				else
				{
					// Malformed sequence - roll back to look at this as a new
					// char
					unichar = LL_UNKNOWN_CHAR;
					--i;
					break;
				}
			}
			while (i < end);

			// Handle overlong characters and NULL characters
			if ((cont_bytes == 1 && unichar < 0x80) ||
				(cont_bytes == 2 && unichar < 0x800) ||
				(cont_bytes == 3 && unichar < 0x10000) ||
				(cont_bytes == 4 && unichar < 0x200000) ||
				(cont_bytes == 5 && unichar < 0x4000000))
			{
				unichar = LL_UNKNOWN_CHAR;
			}
		}

		wout += unichar;
		++i;
	}
	return wout;
}

std::string wstring_to_utf8str(const LLWString& utf32str, S32 len)
{
	std::string out;
	S32 i = 0;
	while (i < len)
	{
		char tchars[8];
		S32 n = wchar_to_utf8chars(utf32str[i++], tchars);
		tchars[n] = 0;
		out += tchars;
	}
	return out;
}

std::string utf8str_trim(const std::string& utf8str)
{
	LLWString wstr = utf8str_to_wstring(utf8str);
	LLWStringUtil::trim(wstr);
	return wstring_to_utf8str(wstr);
}

std::string utf8str_tolower(const std::string& utf8str)
{
	LLWString out_str = utf8str_to_wstring(utf8str);
	LLWStringUtil::toLower(out_str);
	return wstring_to_utf8str(out_str);
}

S32 utf8str_compare_insensitive(const std::string& lhs, const std::string& rhs)
{
	LLWString wlhs = utf8str_to_wstring(lhs);
	LLWString wrhs = utf8str_to_wstring(rhs);
	return LLWStringUtil::compareInsensitive(wlhs, wrhs);
}

std::string utf8str_truncate(const std::string& utf8str, S32 max_len)
{
	if (!max_len)
	{
		return std::string();
	}
	if ((S32)utf8str.length() <= max_len)
	{
		return utf8str;
	}
	else
	{
		S32 cur_char = max_len;

		// If we are ASCII, we do not need to do anything
		if ((U8)utf8str[cur_char] > 0x7f)
		{
			// If first two bits are (10), it is the tail end of a multibyte
			// char. We need to shift back to the first character
			while ((0xc0 & utf8str[cur_char]) == 0x80)
			{
				// Keep moving forward until we hit the first char;
				if (--cur_char == 0)
				{
					// Make sure we do not trash memory if we've got a bogus
					// string.
					break;
				}
			}
		}
		// The byte index we are on is one we want to get rid of, so we only
		// want to copy up to (cur_char-1) chars
		return utf8str.substr(0, cur_char);
	}
}

std::string utf8str_substChar(const std::string& utf8str,
							  const llwchar target_char,
							  const llwchar replace_char)
{
	LLWString wstr = utf8str_to_wstring(utf8str);
	LLWStringUtil::replaceChar(wstr, target_char, replace_char);
	//wstr = wstring_substChar(wstr, target_char, replace_char);
	return wstring_to_utf8str(wstr);
}

std::string utf8str_makeASCII(const std::string& utf8str)
{
	LLWString wstr = utf8str_to_wstring(utf8str);
	LLWStringUtil::_makeASCII(wstr);
	return wstring_to_utf8str(wstr);
}

std::string mbcsstring_makeASCII(const std::string& wstr)
{
	// Replace non-ASCII chars with replace_char
	std::string out_str = wstr;
	for (S32 i = 0, len = out_str.length(); i < len; ++i)
	{
		if ((U8)out_str[i] > 0x7f)
		{
			out_str[i] = LL_UNKNOWN_CHAR;
		}
	}
	return out_str;
}

std::string utf8str_removeCRLF(const std::string& utf8str)
{
	std::string out;

	size_t len = utf8str.length();
	if (!len) return out;

	out.reserve(len);

	for (size_t i = 0; i < len; ++i)
	{
		unsigned char c = utf8str[i];
		if (c != 13)
		{
			out.push_back(c);
		}
	}

	return out;
}

std::string iso8859_to_utf8(const std::string& iso8859str)
{
	std::string out;

	size_t len = iso8859str.length();
	if (!len) return out;

	out.reserve(2 * len);

	for (size_t i = 0; i < len; ++i)
	{
		unsigned char c = iso8859str[i];
		if (c < 128)
		{
			out.push_back(c);
		}
		else
		{
			out.push_back(0xc2 + (c > 0xbf));
			out.push_back(0x80 + (c & 0x3f));
		}
	}

	return out;
}

std::string utf8_to_iso8859(const std::string& utf8str)
{
	std::string out;

	size_t len = utf8str.length();
	if (!len) return out;

	out.reserve(len);

	for (size_t i = 0; i < len; ++i)
	{
		unsigned char c = utf8str[i];
		if (c < 128)
		{
			out.push_back(c);
		}
		else if (i < len - 1)
		{
			out.push_back(((c & 0x1f) << 6) + (utf8str[++i] & 0x3f));
		}
	}

	return out;
}

#if LL_WINDOWS

std::string ll_convert_wide_to_string(const wchar_t* in,
									  unsigned int code_page)
{
	std::string out;
	if (in)
	{
		int len_in = wcslen(in);
		int len_out = WideCharToMultiByte(code_page, 0, in, len_in, NULL, 0, 0,
										  0);
		// We will need two more bytes for the double NULL ending created in
		// WideCharToMultiByte().
		char* pout = new char[len_out + 2];
		memset(pout, 0, len_out + 2);
		if (pout)
		{
			WideCharToMultiByte(code_page, 0, in, len_in, pout, len_out, 0, 0);
			out.assign(pout);
			delete[] pout;
		}
	}
	return out;
}

std::string ll_convert_wide_to_string(const wchar_t* in)
{
	return ll_convert_wide_to_string(in, CP_UTF8);
}

LLWString ll_convert_wide_to_wstring(const std::wstring& in)
{
	// This function, like its converse, is a placeholder, encapsulating a
	// guilty little hack: the only "official" way Nat has found to convert
	// between std::wstring (16 bits on Windows) and LLWString (UTF-32) is
	// by using iconv, which we have avoided so far. It sorts of works to just
	// copy individual characters...
	// The point is that if/when we DO introduce some more official way to
	// perform such conversions, we should only have to call it here.
	return { in.begin(), in.end() };
}

std::wstring ll_convert_wstring_to_wide(const LLWString& in)
{
	// See comments in ll_convert_wide_to_wstring()
	return { in.begin(), in.end() };
}

std::basic_string<wchar_t> ll_convert_string_to_wide(const std::string& in)
{
	return ll_convert_string_to_wide(in, CP_UTF8);
}

std::basic_string<wchar_t> ll_convert_string_to_wide(const std::string& in,
													 unsigned int code_page)
{
	// From review:
	// We can preallocate a wide char buffer that is the same length (in
	// wchar_t elements) as the utf8 input, plus one for a nul terminator, and
	// be guaranteed to not overflow.

	// Normally, I would call that sort of thing premature optimization, but we
	// *are* seeing string operations taking a bunch of time, especially when
	// constructing widgets.
	//int output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(),
	//										 in.length(), NULL, 0);

	// Reserve an output buffer that will be destroyed on exit, with a place to
	// put a NUL terminator.
	std::vector<wchar_t> w_out(in.length() + 1);

	size_t len =  w_out.size();
	memset(&w_out[0], 0, len);
	int real_output_str_len = MultiByteToWideChar(code_page, 0, in.c_str(),
												  in.length(), &w_out[0], len);

	// Looks like MultiByteToWideChar didn't add null terminator to converted
	// string, see EXT-4858.
	w_out[real_output_str_len] = 0;

	// Construct string<wchar_t> from our temporary output buffer
	return {&w_out[0]};
}

std::string ll_convert_string_to_utf8_string(const std::string& in)
{
	auto w_mesg = ll_convert_string_to_wide(in, CP_ACP);
	std::string out_utf8(ll_convert_wide_to_string(w_mesg.c_str(), CP_UTF8));
	return out_utf8;
}

#endif // LL_WINDOWS

///////////////////////////////////////////////////////////////////////////////
// Formerly in u64.cpp - Utilities for conversions between U64 and string
///////////////////////////////////////////////////////////////////////////////

U64 str_to_U64(const std::string& str)
{
	U64 result = 0;
	const char* aptr = strpbrk(str.c_str(), "0123456789");

	if (!aptr)
	{
		llwarns << "Bad string to U64 conversion attempt: format" << llendl;
	}
	else
	{
		while (*aptr >= '0' && *aptr <= '9')
		{
			result = result * 10 + (*aptr++ - '0');
		}
	}
	return result;
}

std::string U64_to_str(U64 value)
{
	std::string res;
	U32 part1, part2, part3;

	part3 = (U32)(value % (U64)10000000);

	value /= 10000000;
	part2 = (U32)(value % (U64)10000000);

	value /= 10000000;
	part1 = (U32)(value % (U64)10000000);

	// Three cases to avoid leading zeroes unless necessary
	if (part1)
	{
		res = llformat("%u%07u%07u", part1, part2, part3);
	}
	else if (part2)
	{
		res = llformat("%u%07u", part2, part3);
	}
	else
	{
		res = llformat("%u", part3);
	}
	return res;
}

char* U64_to_str(U64 value, char* result, S32 result_size)
{
	std::string res = U64_to_str(value);
	LLStringUtil::copy(result, res.c_str(), result_size);
	return result;
}

U64	llstrtou64(const char* str, char** end, S32 base)
{
#ifdef LL_WINDOWS
	return _strtoui64(str, end, base);
#else
	return strtoull(str, end, base);
#endif
}

///////////////////////////////////////////////////////////////////////////////
// LLStringOps class
///////////////////////////////////////////////////////////////////////////////

long LLStringOps::sPacificTimeOffset = 0;
long LLStringOps::sLocalTimeOffset = 0;
bool LLStringOps::sPacificDaylightTime = 0;
std::map<std::string, std::string> LLStringOps::datetimeToCodes;

std::vector<std::string> LLStringOps::sWeekDayList;
std::vector<std::string> LLStringOps::sWeekDayShortList;
std::vector<std::string> LLStringOps::sMonthList;
std::vector<std::string> LLStringOps::sMonthShortList;

std::string LLStringOps::sDayFormat;
std::string LLStringOps::sAM;
std::string LLStringOps::sPM;

//static
bool LLStringOps::isHexString(const std::string& str)
{
	const char* buf = str.c_str();
	int len = str.size();
	while (--len >= 0)
	{
		if (!isxdigit(buf[len])) return false;
	}

	return true;
}

//static
bool LLStringOps::isEmoji(llwchar a)
{
#if 0	// Do not consider special characters that might have a corresponding
		// glyph in the monochorme fallback fonts as a "genuine" emoji. HB
	return a == 0xa9 || a == 0xae || (a >= 0x2000 && a < 0x3300) ||
		   (a >= 0x1f000 && a < 0x20000);
#else
	// These are indeed "genuine" emojis. HB
	return a >= 0x1f000 && a < 0x20000;
#endif
}

//static
S32	LLStringOps::collate(const llwchar* a, const llwchar* b)
{
#if LL_WINDOWS
	// Under Windows, wide string functions operator on 16-bit strings, not the
	// proper 32 bit wide string.
	return strcmp(wstring_to_utf8str(LLWString(a)).c_str(),
				  wstring_to_utf8str(LLWString(b)).c_str());
#else
	return wcscoll(a, b);
#endif
}

//static
void LLStringOps::setupDatetimeInfo(bool daylight)
{
	time_t nowT, localT, gmtT;
	struct tm * tmpT;

	nowT = time (NULL);

	tmpT = gmtime(&nowT);
	gmtT = mktime (tmpT);

	tmpT = localtime (&nowT);
	localT = mktime (tmpT);

	sLocalTimeOffset = (long) (gmtT - localT);
	if (tmpT->tm_isdst)
	{
		sLocalTimeOffset -= 60 * 60;	// 1 hour
	}

	sPacificDaylightTime = daylight;
	sPacificTimeOffset = (sPacificDaylightTime? 7 : 8) * 60 * 60;

	datetimeToCodes["wkday"]	= "%a";		// Thu
	datetimeToCodes["weekday"]	= "%A";		// Thursday
	datetimeToCodes["year4"]	= "%Y";		// 2009
	datetimeToCodes["year"]		= "%Y";		// 2009
	datetimeToCodes["year2"]	= "%y";		// 09
	datetimeToCodes["mth"]		= "%b";		// Aug
	datetimeToCodes["month"]	= "%B";		// August
	datetimeToCodes["mthnum"]	= "%m";		// 08
	datetimeToCodes["day"]		= "%d";		// 31
	datetimeToCodes["sday"]		= "%-d";	// 9
	datetimeToCodes["hour24"]	= "%H";		// 14
	datetimeToCodes["hour"]		= "%H";		// 14
	datetimeToCodes["hour12"]	= "%I";		// 02
	datetimeToCodes["min"]		= "%M";		// 59
	datetimeToCodes["ampm"]		= "%p";		// AM
	datetimeToCodes["second"]	= "%S";		// 59
	datetimeToCodes["timezone"]	= "%Z";		// PST
}

static void tokenize_str_to_aray(const std::string& data,
								 std::vector<std::string>& output)
{
	output.clear();
	size_t length = data.size();

	// Tokenize it and put it in the array
	std::string cur_word;
	for (size_t i = 0; i < length; ++i)
	{
		if (data[i] == ':')
		{
			output.push_back(cur_word);
			cur_word.clear();
		}
		else
		{
			cur_word.append(1, data[i]);
		}
	}
	output.push_back(cur_word);
}

//static
void LLStringOps::setupWeekDaysNames(const std::string& data)
{
	tokenize_str_to_aray(data, sWeekDayList);
}

//static
void LLStringOps::setupWeekDaysShortNames(const std::string& data)
{
	tokenize_str_to_aray(data, sWeekDayShortList);
}

//static
void LLStringOps::setupMonthNames(const std::string& data)
{
	tokenize_str_to_aray(data, sMonthList);
}

//static
void LLStringOps::setupMonthShortNames(const std::string& data)
{
	tokenize_str_to_aray(data, sMonthShortList);
}

//static
void LLStringOps::setupDayFormat(const std::string& data)
{
	sDayFormat = data;
}

//static
std::string LLStringOps::getDatetimeCode(std::string key)
{
	std::map<std::string, std::string>::iterator iter;

	iter = datetimeToCodes.find (key);
	if (iter != datetimeToCodes.end())
	{
		return iter->second;
	}
	return std::string("");
}

namespace LLStringFn
{
	// Note: this restricts output to ASCII
	void replace_nonprintable_in_ascii(std::basic_string<char>& str,
									   char replacement)
	{
		constexpr char SPACE = 0x20;
		for (size_t i = 0, len = str.size(); i < len; ++i)
		{
			if (str[i] < SPACE)
			{
				str[i] = replacement;
			}
		}
	}


	// Note: this restricts output to ASCII
	void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str,
												char replacement)
	{
		constexpr char SPACE = 0x20;
		constexpr char PIPE = 0x7c;
		for (size_t i = 0, len = str.size(); i < len; ++i)
		{
			if (str[i] < SPACE || str[i] == PIPE)
			{
				str[i] = replacement;
			}
		}
	}

	// Replaces all control characters (c < 0x20) with replacement in string.
	void replace_ascii_controlchars(std::basic_string<char>& str,
									char replacement)
	{
		constexpr unsigned char SPACE = 0x20;
		for (size_t i = 0, len = str.size(); i < len; ++i)
		{
			const unsigned char c = (unsigned char)str[i];
			if (c < SPACE)
			{
				str[i] = replacement;
			}
		}
	}

	// https://wiki.lindenlab.com/wiki/Unicode_Guidelines has details on
	// allowable code points for XML. Specifically, they are:
	// 0x09, 0x0a, 0x0d, and 0x20 on up.  JC
	std::string strip_invalid_xml(const std::string& str)
	{
		constexpr unsigned char SPACE = 0x20;
		constexpr unsigned char TAB = 0x09;
		constexpr unsigned char LF = 0x0a;
		constexpr unsigned char CR = 0x0d;
		std::string output;
		output.reserve(str.size());
		std::string::const_iterator it = str.begin();
		while (it != str.end())
		{
			// Must compare as unsigned for >=
			// Test most likely match first
			const unsigned char c = (unsigned char)*it;
			if (c >= SPACE || c == TAB || c == LF || c == CR)
			{
				output.push_back(c);
			}
			++it;
		}
		return output;
	}

	typedef std::map<char, std::string> literals_map_t;
	static const literals_map_t xml_elem_literals =
	{
		{ '<', "&lt;" },
		{ '>', "&gt;" },
		{ '&', "&amp;" }
	};
	static const literals_map_t xml_attr_literals =
	{
		{ '"', "&quot;" },
		{ '\'', "&apos;" }
	};

	static void literals_encode(std::string& text,
								const literals_map_t& literals)
	{
		for (literals_map_t::const_iterator it = literals.begin(),
											end = literals.end();
			 it != end; ++it)
		{
			size_t pos = 0;
			while ((pos = text.find(it->first, pos)) != std::string::npos)
			{
				text.replace(pos, 1, it->second);
				pos += it->second.size();
			}
		}
	}

	// Replaces all characters that are not allowed in XML 1.0 with the
	// corresponding literals.
	std::string xml_encode(const std::string& input, bool for_attribute)
	{
		std::string result(input);
		literals_encode(result, xml_elem_literals);
		if (for_attribute)
		{
			literals_encode(result, xml_attr_literals);
		}
		return result;
	}

	static void literals_decode(std::string& text,
								const literals_map_t& literals)
	{
		for (literals_map_t::const_iterator it = literals.begin(),
											end = literals.end();
			 it != end; ++it)
		{
			size_t pos = 0;
			while ((pos = text.find(it->second, pos)) != std::string::npos)
			{
				text[pos++] = it->first;
				text.erase(pos, it->second.size() - 1);
			}
		}
	}

	// Replaces some of XML literals that are defined in XML 1.0 with the
	// corresponding characters.
	std::string xml_decode(const std::string& input, bool for_attribute)
	{
		std::string result(input);
		literals_decode(result, xml_elem_literals);
		if (for_attribute)
		{
			literals_decode(result, xml_attr_literals);
		}
		return result;
	}
}

////////////////////////////////////////////////////////////

// Forward specialization of LLStringUtil::format before use in
// LLStringUtil::formatDatetime.
template<>
S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions);

//static
template<>
void LLStringUtil::getTokens(const std::string& instr,
							 std::vector<std::string>& tokens,
							 const std::string& delims)
{
	std::string token;
	size_t start = instr.find_first_not_of(delims);
	while (start != std::string::npos)
	{
		size_t end = instr.find_first_of(delims, start);
		if (end == std::string::npos)
		{
			end = instr.length();
		}

		token = instr.substr(start, end - start);
		LLStringUtil::trim(token);
		tokens.push_back(token);
		start = instr.find_first_not_of(delims, end);
	}
}

//static
template<>
LLStringUtil::size_type LLStringUtil::getSubstitution(const std::string& instr,
													  size_type& start,
													  std::vector<std::string>& tokens)
{
	const std::string delims (",");

	// Find the first [
	size_type pos1 = instr.find('[', start);
	if (pos1 == std::string::npos)
		return std::string::npos;

	//Find the first ] after the initial [
	size_type pos2 = instr.find(']', pos1);
	if (pos2 == std::string::npos)
		return std::string::npos;

	// Find the last [ before ] in case of nested [[]]
	pos1 = instr.find_last_of('[', pos2 - 1);
	if (pos1 == std::string::npos || pos1 < start)
		return std::string::npos;

	getTokens(std::string(instr ,pos1 + 1, pos2 - pos1 - 1), tokens, delims);
	start = pos2 + 1;

	return pos1;
}

//static
template<>
bool LLStringUtil::simpleReplacement(std::string& replacement,
									 const std::string& token,
									 const format_map_t& substitutions)
{
	// See if we have a replacement for the bracketed string (without the
	// brackets) test first using has() because if we just look up with
	// operator[] we get back an empty string even if the value is missing.
	// We want to distinguish between missing replacements and deliberately
	// empty replacement strings.
	format_map_t::const_iterator iter = substitutions.find(token);
	if (iter != substitutions.end())
	{
		replacement = iter->second;
		return true;
	}
	// If not, see if there's one WITH brackets
	iter = substitutions.find(std::string("[" + token + "]"));
	if (iter != substitutions.end())
	{
		replacement = iter->second;
		return true;
	}

	return false;
}

//static
template<>
bool LLStringUtil::simpleReplacement(std::string& replacement,
									 const std::string& token,
									 const LLSD& substitutions)
{
	// See if we have a replacement for the bracketed string (without the
	// brackets). Test first using has() because if we just look up with
	// operator[] we get back an empty string even if the value is missing.
	// We want to distinguish between missing replacements and deliberately
	// empty replacement strings.
	if (substitutions.has(token))
	{
		replacement = substitutions[token].asString();
		return true;
	}
	// If not, see if there's one WITH brackets
	else if (substitutions.has(std::string("[" + token + "]")))
	{
		replacement = substitutions[std::string("[" + token + "]")].asString();
		return true;
	}

	return false;
}

//static
template<>
void LLStringUtil::setLocale(std::string in_locale)
{
	sLocale = in_locale;
}

//static
template<>
std::string LLStringUtil::getLocale()
{
	return sLocale;
}

//static
template<>
void LLStringUtil::formatNumber(std::string& num_str, S32 decimals)
{
	std::stringstream str_stream;
	if (!sLocale.empty())
	{
		// std::locale() throws if the locale is unknown ! (EXT-7926)
		try
		{
			str_stream.imbue(std::locale(sLocale.c_str()));
		}
		catch (const std::exception&)
		{
			llwarns_once << "Cannot set locale to " << sLocale << llendl;
		}
	}

	if (!decimals)
	{
		S32 int_str;
		if (convertToS32(num_str, int_str))
		{
			str_stream << int_str;
			num_str = str_stream.str();
		}
	}
	else
	{
		F32 float_str;
		if (convertToF32(num_str, float_str))
		{
			str_stream << std::fixed << std::showpoint
					  << std::setprecision(decimals) << float_str;
			num_str = str_stream.str();
		}
	}
}

//static
template<>
bool LLStringUtil::formatDatetime(std::string& replacement,
								  const std::string& token,
								  const std::string& param, S32 sec_epoch)
{
	if (param == "local")   // Local time
	{
		sec_epoch -= LLStringOps::getLocalTimeOffset();
	}
	else if (param != "utc" && param != "gmt") // SL time
	{
		sec_epoch -= LLStringOps::getPacificTimeOffset();
	}

	// If never fell into those two ifs above, param must be utc
	if (sec_epoch < 0) sec_epoch = 0;

	LLDate datetime((F64)sec_epoch);
	std::string code = LLStringOps::getDatetimeCode(token);
	// Special case to handle timezone
	if (code == "%Z")
	{
		if (param == "utc" || param == "gmt")
		{
			replacement = "UTC";
		}
		else if (param == "local")
		{
			replacement.clear();	// User knows their own timezone
		}
		else
		{
			// "slt" = Second Life Time, which is deprecated.
			// If not UTC or user local time, fallback to pacific time
			replacement = LLStringOps::getPacificDaylightTime() ? "PDT"
																: "PST";
		}
		return true;
	}

	// EXT-7013: few codes are not suppotred by strtime function (example:
	// weekdays for Japanese), so use predefined ones.

	// If sWeekDayList is not empty than current locale does not support the
	// weekday name.
	time_t loc_seconds = (time_t) sec_epoch;
	if (LLStringOps::sWeekDayList.size() == 7 && code == "%A")
	{
		struct tm* gmt = gmtime(&loc_seconds);
		replacement = LLStringOps::sWeekDayList[gmt->tm_wday];
	}
	else if (LLStringOps::sWeekDayShortList.size() == 7 && code == "%a")
	{
		struct tm* gmt = gmtime(&loc_seconds);
		replacement = LLStringOps::sWeekDayShortList[gmt->tm_wday];
	}
	else if (LLStringOps::sMonthList.size() == 12 && code == "%B")
	{
		struct tm* gmt = gmtime(&loc_seconds);
		replacement = LLStringOps::sMonthList[gmt->tm_mon];
	}
	else if (!LLStringOps::sDayFormat.empty() && code == "%d")
	{
		struct tm* gmt = gmtime(&loc_seconds);
		LLStringUtil::format_map_t args;
		args["[MDAY]"] = llformat ("%d", gmt->tm_mday);
		replacement = LLStringOps::sDayFormat;
		LLStringUtil::format(replacement, args);
	}
	else if (code == "%-d")
	{
		struct tm* gmt = gmtime(&loc_seconds);
		// Day of the month without leading zero
		replacement = llformat ("%d", gmt->tm_mday);
	}
	else if (!LLStringOps::sAM.empty() && !LLStringOps::sPM.empty() &&
			 code == "%p")
	{
		struct tm* gmt = gmtime(&loc_seconds);
		if (gmt->tm_hour<12)
		{
			replacement = LLStringOps::sAM;
		}
		else
		{
			replacement = LLStringOps::sPM;
		}
	}
	else
	{
		replacement = datetime.toHTTPDateString(code.c_str());
	}

	// *HACK: delete leading zero from hour string in case 'hour12' (code = %I)
	// time format to show time without leading zero, e.g. 08:16 -> 8:16
	// (EXT-2738). We could have used '%l' format instead, but it is not
	// supported by Windows.
	if (code == "%I" && token == "hour12" && replacement[0] == '0')
	{
		replacement = replacement[1];
	}

	return !code.empty();
}

// LLStringUtil::format recogizes the following patterns.
// All substitutions *must* be encased in []'s in the input string. The []'s
// are optional in the substitution map.
// [FOO_123]
// [FOO,number,precision]
// [FOO,datetime,format]

//static
template<>
S32 LLStringUtil::format(std::string& s, const format_map_t& substitutions)
{
	S32 res = 0;

	std::string output;
	std::vector<std::string> tokens;

	size_t start = 0;
	size_t prev_start = 0;
	size_t key_start = 0;
	while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
	{
		output += std::string(s, prev_start, key_start-prev_start);
		prev_start = start;

		bool found_replacement = false;
		std::string replacement;

		if (tokens.size() == 0)
		{
			found_replacement = false;
		}
		else if (tokens.size() == 1)
		{
			found_replacement = simpleReplacement(replacement, tokens[0],
												  substitutions);
		}
		else if (tokens[1] == "number")
		{
			std::string param = "0";
			if (tokens.size() > 2)
			{
				param = tokens[2];
			}
			found_replacement = simpleReplacement(replacement, tokens[0],
												  substitutions);
			if (found_replacement)
			{
				formatNumber(replacement, atoi(param.c_str()));
			}
		}
		else if (tokens[1] == "datetime")
		{
			std::string param;
			if (tokens.size() > 2)
			{
				param = tokens[2];
			}

			format_map_t::const_iterator iter = substitutions.find("datetime");
			if (iter != substitutions.end())
			{
				S32 sec_epoch = 0;
				bool r = LLStringUtil::convertToS32(iter->second,
													sec_epoch);
				if (r)
				{
					found_replacement = formatDatetime(replacement, tokens[0],
													   param, sec_epoch);
				}
			}
		}

		if (found_replacement)
		{
			output += replacement;
			++res;
		}
		else
		{
			// We had no replacement, use the string as is. E.g.
			// "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
			output += std::string(s, key_start, start - key_start);
		}
		tokens.clear();
	}
	// Send the remainder of the string (with no further matches for bracketed
	// names)
	output += std::string(s, start);
	s = output;
	return res;
}

//static
template<>
S32 LLStringUtil::format(std::string& s, const LLSD& substitutions)
{
	S32 res = 0;

	if (!substitutions.isMap())
	{
		return res;
	}

	std::string output;
	std::vector<std::string> tokens;

	size_t start = 0;
	size_t prev_start = 0;
	size_t key_start = 0;
	while ((key_start = getSubstitution(s, start, tokens)) != std::string::npos)
	{
		output += std::string(s, prev_start, key_start - prev_start);
		prev_start = start;

		bool found_replacement = false;
		std::string replacement;

		if (tokens.size() == 0)
		{
			found_replacement = false;
		}
		else if (tokens.size() == 1)
		{
			found_replacement = simpleReplacement(replacement, tokens[0],
												  substitutions);
		}
		else if (tokens[1] == "number")
		{
			std::string param = "0";
			if (tokens.size() > 2)
			{
				param = tokens[2];
			}
			found_replacement = simpleReplacement(replacement, tokens[0],
												  substitutions);
			if (found_replacement)
			{
				formatNumber(replacement, atoi(param.c_str()));
			}
		}
		else if (tokens[1] == "datetime")
		{
			std::string param;
			if (tokens.size() > 2)
			{
				param = tokens[2];
			}

			S32 sec_epoch = (S32)substitutions["datetime"].asInteger();
			found_replacement = formatDatetime(replacement, tokens[0],
											   param, sec_epoch);
		}

		if (found_replacement)
		{
			output += replacement;
			++res;
		}
		else
		{
			// We had no replacement, use the string as is. E.g.
			// "hello [MISSING_REPLACEMENT]" or "-=[Stylized Name]=-"
			output += std::string(s, key_start, start-key_start);
		}
		tokens.clear();
	}
	// Send the remainder of the string (with no further matches for bracketed
	// names)
	output += std::string(s, start);
	s = output;
	return res;
}

// This used to be in separate llformat.cpp file. Moved here for coherency. HB
// Note: uses an internal buffer limited to 1024 characters (but vsnprintf
// prevents any overrun).
std::string llformat(const char* fmt, ...)
{
	// Avoid allocating 1024 bytes on the stack (or worst, depending on the
	// compiler: on the heap) at *each* call; instead use a static buffer in
	// the thread local storage (so that we stay thread-safe). HB
	thread_local char buffer[1024];

	if (LL_UNLIKELY(!fmt))
	{
		return std::string();
	}

	va_list va;
	va_start(va, fmt);
#if LL_WINDOWS
	_vsnprintf(buffer, 1024, fmt, va);
#else
	vsnprintf(buffer, 1024, fmt, va);
#endif
	va_end(va);

	return std::string(buffer);
}