llstring.h 50 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808
  1. /**
  2. * @file llstring.h
  3. * @brief String utility functions and std::string class.
  4. *
  5. * $LicenseInfo:firstyear=2001&license=viewergpl$
  6. *
  7. * Copyright (c) 2001-2009, Linden Research, Inc.
  8. *
  9. * Second Life Viewer Source Code
  10. * The source code in this file ("Source Code") is provided by Linden Lab
  11. * to you under the terms of the GNU General Public License, version 2.0
  12. * ("GPL"), unless you have obtained a separate licensing agreement
  13. * ("Other License"), formally executed by you and Linden Lab. Terms of
  14. * the GPL can be found in doc/GPL-license.txt in this distribution, or
  15. * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
  16. *
  17. * There are special exceptions to the terms and conditions of the GPL as
  18. * it is applied to this Source Code. View the full text of the exception
  19. * in the file doc/FLOSS-exception.txt in this software distribution, or
  20. * online at
  21. * http://secondlifegrid.net/programs/open_source/licensing/flossexception
  22. *
  23. * By copying, modifying or distributing this software, you acknowledge
  24. * that you have read and understood your obligations described above,
  25. * and agree to abide by those obligations.
  26. *
  27. * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
  28. * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
  29. * COMPLETENESS OR PERFORMANCE.
  30. * $/LicenseInfo$
  31. */
  32. #ifndef LL_LLSTRING_H
  33. #define LL_LLSTRING_H
  34. #include <algorithm>
  35. #include <cstdio>
  36. #include <iomanip>
  37. #include <locale>
  38. #include <memory>
  39. #include <string>
  40. #include "llsd.h"
  41. #if LL_LINUX
  42. # include <wctype.h>
  43. # include <wchar.h>
  44. #endif
  45. #include <string.h>
  46. constexpr char LL_UNKNOWN_CHAR = '?';
  47. class LLStringOps
  48. {
  49. public:
  50. LL_INLINE static char toUpper(char elem) { return toupper((unsigned char)elem); }
  51. LL_INLINE static llwchar toUpper(llwchar elem) { return towupper(elem); }
  52. LL_INLINE static char toLower(char elem) { return tolower((unsigned char)elem); }
  53. LL_INLINE static llwchar toLower(llwchar elem) { return towlower(elem); }
  54. LL_INLINE static bool isSpace(char elem) { return isspace((unsigned char)elem) != 0; }
  55. LL_INLINE static bool isSpace(llwchar elem) { return iswspace(elem) != 0; }
  56. LL_INLINE static bool isUpper(char elem) { return isupper((unsigned char)elem) != 0; }
  57. LL_INLINE static bool isUpper(llwchar elem) { return iswupper(elem) != 0; }
  58. LL_INLINE static bool isLower(char elem) { return islower((unsigned char)elem) != 0; }
  59. LL_INLINE static bool isLower(llwchar elem) { return iswlower(elem) != 0; }
  60. LL_INLINE static bool isDigit(char a) { return isdigit((unsigned char)a) != 0; }
  61. LL_INLINE static bool isDigit(llwchar a) { return iswdigit(a) != 0; }
  62. LL_INLINE static bool isPunct(char a) { return ispunct((unsigned char)a) != 0; }
  63. LL_INLINE static bool isPunct(llwchar a) { return iswpunct(a) != 0; }
  64. LL_INLINE static bool isAlpha(char a) { return isalpha((unsigned char)a) != 0; }
  65. LL_INLINE static bool isAlpha(llwchar a) { return iswalpha(a) != 0; }
  66. LL_INLINE static bool isAlnum(char a) { return isalnum((unsigned char)a) != 0; }
  67. LL_INLINE static bool isAlnum(llwchar a) { return iswalnum(a) != 0; }
  68. LL_INLINE static S32 collate(const char* a, const char* b)
  69. {
  70. return strcoll(a, b);
  71. }
  72. static S32 collate(const llwchar* a, const llwchar* b);
  73. static bool isHexString(const std::string& str);
  74. // Returns true when 'a' corresponds to a "genuine" emoji. HB
  75. static bool isEmoji(llwchar a);
  76. static void setupDatetimeInfo(bool pacific_daylight_time);
  77. static void setupWeekDaysNames(const std::string& data);
  78. static void setupWeekDaysShortNames(const std::string& data);
  79. static void setupMonthNames(const std::string& data);
  80. static void setupMonthShortNames(const std::string& data);
  81. static void setupDayFormat(const std::string& data);
  82. LL_INLINE static long getPacificTimeOffset() { return sPacificTimeOffset;}
  83. LL_INLINE static long getLocalTimeOffset() { return sLocalTimeOffset;}
  84. // Returns true when the Pacific time zone (aka server time zone) is
  85. // currently in daylight savings time.
  86. LL_INLINE static bool getPacificDaylightTime() { return sPacificDaylightTime;}
  87. static std::string getDatetimeCode(std::string key);
  88. public:
  89. static std::vector<std::string> sWeekDayList;
  90. static std::vector<std::string> sWeekDayShortList;
  91. static std::vector<std::string> sMonthList;
  92. static std::vector<std::string> sMonthShortList;
  93. static std::string sDayFormat;
  94. static std::string sAM;
  95. static std::string sPM;
  96. private:
  97. static long sPacificTimeOffset;
  98. static long sLocalTimeOffset;
  99. static bool sPacificDaylightTime;
  100. static std::map<std::string, std::string> datetimeToCodes;
  101. };
  102. // Return a string constructed from in without crashing if the pointer is NULL.
  103. LL_INLINE std::string ll_safe_string(const char* in)
  104. {
  105. if (in && *in)
  106. {
  107. return std::string(in);
  108. }
  109. return std::string();
  110. }
  111. LL_INLINE std::string ll_safe_string(const char* in, S32 maxlen)
  112. {
  113. if (in && *in && maxlen > 0)
  114. {
  115. return std::string(in, maxlen);
  116. }
  117. return std::string();
  118. }
  119. // Allowing assignments from non-strings into format_map_t is apparently
  120. // *really* error-prone, so subclass std::string with just basic c'tors.
  121. class LLFormatMapString
  122. {
  123. public:
  124. LLFormatMapString()
  125. {
  126. }
  127. LLFormatMapString(const char* s)
  128. : mString(ll_safe_string(s))
  129. {
  130. }
  131. LLFormatMapString(const std::string& s)
  132. : mString(s)
  133. {
  134. }
  135. LL_INLINE operator std::string() const { return mString; }
  136. LL_INLINE bool operator<(const LLFormatMapString& rhs) const
  137. {
  138. return mString < rhs.mString;
  139. }
  140. LL_INLINE std::size_t length() const { return mString.length(); }
  141. private:
  142. std::string mString;
  143. };
  144. template <class T>
  145. class LLStringUtilBase
  146. {
  147. private:
  148. static std::string sLocale;
  149. public:
  150. typedef std::basic_string<T> string_type;
  151. typedef typename string_type::size_type size_type;
  152. public:
  153. /////////////////////////////////////////////////////////////////////////////////////////
  154. // Static Utility methods that operate on std::strings
  155. static const string_type null;
  156. typedef std::map<LLFormatMapString, LLFormatMapString> format_map_t;
  157. static void getTokens(const string_type& instr,
  158. std::vector<string_type>& tokens,
  159. const string_type& delims);
  160. // Like simple scan overload, but returns scanned vector
  161. static std::vector<string_type> getTokens(const string_type& instr,
  162. const string_type& delims);
  163. // Adds support for keep_delims and quotes (either could be empty string)
  164. static void getTokens(const string_type& instr,
  165. std::vector<string_type>& tokens,
  166. const string_type& drop_delims,
  167. const string_type& keep_delims,
  168. const string_type& quotes = string_type());
  169. // Like keep_delims-and-quotes overload, but returns scanned vector
  170. static std::vector<string_type> getTokens(const string_type& instr,
  171. const string_type& drop_delims,
  172. const string_type& keep_delims,
  173. const string_type& quotes =
  174. string_type());
  175. // Adds support for escapes (could be empty string)
  176. static void getTokens(const string_type& instr,
  177. std::vector<string_type>& tokens,
  178. const string_type& drop_delims,
  179. const string_type& keep_delims,
  180. const string_type& quotes,
  181. const string_type& escapes);
  182. // Like escapes overload, but returns scanned vector
  183. static std::vector<string_type> getTokens(const string_type& instr,
  184. const string_type& drop_delims,
  185. const string_type& keep_delims,
  186. const string_type& quotes,
  187. const string_type& escapes);
  188. static void formatNumber(string_type& num_str, S32 decimals);
  189. static bool formatDatetime(string_type& replacement,
  190. const string_type& token,
  191. const string_type& param, S32 sec_from_epoch);
  192. static S32 format(string_type& s, const format_map_t& substitutions);
  193. static S32 format(string_type& s, const LLSD& substitutions);
  194. static bool simpleReplacement(string_type& replacement,
  195. const string_type& token,
  196. const format_map_t& substitutions);
  197. static bool simpleReplacement(string_type& replacement,
  198. const string_type& token,
  199. const LLSD& substitutions);
  200. static void setLocale(std::string in_locale);
  201. static std::string getLocale();
  202. LL_INLINE static bool contains(const string_type& string, T c,
  203. size_type i = 0)
  204. {
  205. return string.find(c, i) != string_type::npos;
  206. }
  207. static void trimHead(string_type& string);
  208. static void trimTail(string_type& string);
  209. LL_INLINE static void trim(string_type& string)
  210. {
  211. trimHead(string);
  212. trimTail(string);
  213. }
  214. static void truncate(string_type& string, size_type count);
  215. static void toUpper(string_type& string);
  216. static void toLower(string_type& string);
  217. // True if this is the head of s.
  218. static bool isHead(const string_type& string, const T* s);
  219. // Returns true if string starts with substr. If either string or substr
  220. // are empty, this method returns false.
  221. static bool startsWith(const string_type& str, const string_type& substr);
  222. // Returns true if string starts with substr. If either string or substr
  223. // are empty, this method returns false.
  224. static bool endsWith(const string_type& string, const string_type& substr);
  225. static void addCRLF(string_type& string);
  226. static void removeCRLF(string_type& string);
  227. static void replaceTabsWithSpaces(string_type& string,
  228. size_type spaces_per_tab);
  229. static void replaceNonstandardASCII(string_type& string, T replacement);
  230. static void replaceChar(string_type& string, T target, T replacement);
  231. static void replaceString(string_type& string, string_type target,
  232. string_type replacement);
  233. static bool containsNonprintable(const string_type& string);
  234. static void stripNonprintable(string_type& string);
  235. // Unsafe way to make ascii characters. You should probably only call this
  236. // when interacting with the host operating system.
  237. // The 1 byte std::string does not work correctly.
  238. // The 2 and 4 byte std::string probably work, so LLWStringUtil::_makeASCII
  239. // should work.
  240. static void _makeASCII(string_type& string);
  241. // Conversion to other data types
  242. static bool convertToBool(const string_type& string, bool& value);
  243. static bool convertToU8(const string_type& string, U8& value);
  244. static bool convertToS8(const string_type& string, S8& value);
  245. static bool convertToS16(const string_type& string, S16& value);
  246. static bool convertToU16(const string_type& string, U16& value);
  247. static bool convertToU32(const string_type& string, U32& value);
  248. static bool convertToS32(const string_type& string, S32& value);
  249. static bool convertToF32(const string_type& string, F32& value);
  250. static bool convertToF64(const string_type& string, F64& value);
  251. ///////////////////////////////////////////////////////////////////////////
  252. // Utility methods for working with char*'s and strings
  253. // Like strcmp but also handles empty strings. Uses current locale.
  254. static S32 compareStrings(const T* lhs, const T* rhs);
  255. static S32 compareStrings(const string_type& lhs,
  256. const string_type& rhs);
  257. // Case-insensitive version of above. Uses current locale on Win32, and
  258. // falls back to a non-locale aware comparison on Linux.
  259. static S32 compareInsensitive(const T* lhs, const T* rhs);
  260. static S32 compareInsensitive(const string_type& lhs,
  261. const string_type& rhs);
  262. // Case-sensitive comparison with good handling of numbers. Does not use
  263. // current locale.
  264. // a.k.a. strdictcmp()
  265. static S32 compareDict(const string_type& a, const string_type& b);
  266. // Case *in*sensitive comparison with good handling of numbers. Does not
  267. // use current locale.
  268. // a.k.a. strdictcmp()
  269. static S32 compareDictInsensitive(const string_type& a,
  270. const string_type& b);
  271. // Puts compareDict() in a form appropriate for LL container classes to use
  272. // for sorting.
  273. static bool precedesDict(const string_type& a, const string_type& b);
  274. // A replacement for strncpy.
  275. // If the dst buffer is dst_size bytes long or more, ensures that dst is
  276. // null terminated and holds up to dst_size-1 characters of src.
  277. static void copy(T* dst, const T* src, size_type dst_size);
  278. // Copies src into dst at a given offset.
  279. static void copyInto(string_type& dst, const string_type& src,
  280. size_type offset);
  281. LL_INLINE static bool isPartOfWord(T c)
  282. {
  283. return (c == (T)'_') || LLStringOps::isAlnum(c);
  284. }
  285. LL_INLINE static bool isPartOfLexicalWord(T c)
  286. {
  287. return (c == (T)'\'') || LLStringOps::isAlpha(c) ||
  288. !(LLStringOps::isDigit(c) || LLStringOps::isSpace(c) ||
  289. LLStringOps::isPunct(c));
  290. }
  291. private:
  292. static size_type getSubstitution(const string_type& instr,
  293. size_type& start,
  294. std::vector<string_type >& tokens);
  295. };
  296. template<class T> const std::basic_string<T> LLStringUtilBase<T>::null;
  297. template<class T> std::string LLStringUtilBase<T>::sLocale;
  298. typedef LLStringUtilBase<char> LLStringUtil;
  299. typedef LLStringUtilBase<llwchar> LLWStringUtil;
  300. typedef std::basic_string<llwchar> LLWString;
  301. struct LLDictionaryLess
  302. {
  303. public:
  304. LL_INLINE bool operator()(const std::string& a, const std::string& b) const
  305. {
  306. return LLStringUtil::precedesDict(a, b);
  307. }
  308. };
  309. // Chops off the trailing characters in a string. Returns a copy of in string
  310. // minus the trailing count bytes. NOTE: this function works on bytes rather
  311. // than glyphs, so this will incorrectly truncate non-single byte strings: use
  312. // utf8str_truncate() for UTF-8 strings.
  313. LL_INLINE std::string chop_tail_copy(const std::string& in,
  314. std::string::size_type count)
  315. {
  316. return std::string(in, 0, in.length() - count);
  317. }
  318. // This translates a nybble stored as a hex value from 0-f back to a nybble
  319. // in the low order bits of the return byte.
  320. U8 hex_as_nybble(char hex);
  321. // Unicode support
  322. // We should never use UTF16 except when communicating with Win32 (or the macOS
  323. // clipboard, needing utf16str_to_wstring()) !
  324. typedef std::basic_string<U16> llutf16string;
  325. #if LL_WINDOWS && defined(_NATIVE_WCHAR_T_DEFINED)
  326. // wchar_t is a distinct native type, so llutf16string is also a distinct type
  327. // and there IS a point to converting separately to/from llutf16string.
  328. // Generic conversion aliases
  329. template<typename TO, typename FROM, typename Enable = void>
  330. class ll_convert_impl
  331. {
  332. public:
  333. // Do not even provide a generic implementation. We specialize for every
  334. // combination we do support.
  335. TO operator()(const FROM& in) const;
  336. };
  337. // Use a function template to get the nice ll_convert<TO>(from_value) API.
  338. template<typename TO, typename FROM>
  339. TO ll_convert(const FROM& in)
  340. {
  341. return ll_convert_impl<TO, FROM>()(in);
  342. }
  343. // Degenerate case
  344. template<typename T>
  345. class ll_convert_impl<T, T>
  346. {
  347. public:
  348. LL_INLINE T operator()(const T& in) const { return in; }
  349. };
  350. // Specialize ll_convert_impl<TO, FROM> to return EXPR
  351. # define LL_CONVERT_ALIAS(TO, FROM, EXPR) \
  352. template<> \
  353. class ll_convert_impl<TO, FROM> \
  354. { \
  355. public: \
  356. TO operator()(const FROM& in) const { return EXPR; } \
  357. };
  358. // LLWString is identical to std::wstring, so these aliases for std::wstring
  359. // would collide with those for LLWString; converting between std::wstring and
  360. // llutf16string means copying chars.
  361. LL_CONVERT_ALIAS(llutf16string, std::wstring, llutf16string(in.begin(), in.end()));
  362. LL_CONVERT_ALIAS(std::wstring, llutf16string, std::wstring(in.begin(), in.end()));
  363. #else // LL_WINDOWS && defined(_NATIVE_WCHAR_T_DEFINED)
  364. // No such conversions needed under Linux, macOS, or Windows with /Zc:wchar_t-
  365. // MSVC compilation option.
  366. # define LL_CONVERT_ALIAS(TO, FROM, EXPR)
  367. #endif // LL_WINDOWS && defined(_NATIVE_WCHAR_T_DEFINED)
  368. LLWString utf16str_to_wstring(const llutf16string& utf16str, S32 len);
  369. LL_INLINE LLWString utf16str_to_wstring(const llutf16string& utf16str)
  370. {
  371. return utf16str_to_wstring(utf16str, (S32)utf16str.length());
  372. }
  373. LL_CONVERT_ALIAS(LLWString, llutf16string, utf16str_to_wstring(in));
  374. llutf16string wstring_to_utf16str(const LLWString& utf32str, S32 len);
  375. LL_INLINE llutf16string wstring_to_utf16str(const LLWString& utf32str)
  376. {
  377. return wstring_to_utf16str(utf32str, (S32)utf32str.length());
  378. }
  379. LL_CONVERT_ALIAS(llutf16string, LLWString, wstring_to_utf16str(in));
  380. LLWString utf8str_to_wstring(const std::string& utf8str, S32 len);
  381. LL_INLINE LLWString utf8str_to_wstring(const std::string& utf8str)
  382. {
  383. return utf8str_to_wstring(utf8str, (S32)utf8str.length());
  384. }
  385. LL_CONVERT_ALIAS(LLWString, std::string, utf8str_to_wstring(in));
  386. LL_INLINE llutf16string utf8str_to_utf16str(const std::string& utf8str)
  387. {
  388. return wstring_to_utf16str(utf8str_to_wstring(utf8str));
  389. }
  390. LL_CONVERT_ALIAS(llutf16string, std::string, utf8str_to_utf16str(in));
  391. std::ptrdiff_t wchar_to_utf8chars(llwchar inchar, char* outchars);
  392. std::string wstring_to_utf8str(const LLWString& utf32str, S32 len);
  393. LL_INLINE std::string wstring_to_utf8str(const LLWString& utf32str)
  394. {
  395. return wstring_to_utf8str(utf32str, (S32)utf32str.length());
  396. }
  397. LL_CONVERT_ALIAS(std::string, LLWString, wstring_to_utf8str(in));
  398. // Make the incoming string a utf8 string. Replaces any unknown glyph
  399. // with the UNKNOWN_CHARACTER. Once any unknown glyph is found, the rest
  400. // of the data may not be recovered.
  401. LL_INLINE std::string rawstr_to_utf8(const std::string& raw)
  402. {
  403. return wstring_to_utf8str(utf8str_to_wstring(raw));
  404. }
  405. LL_INLINE std::string utf16str_to_utf8str(const llutf16string& utf16str,
  406. S32 len)
  407. {
  408. return wstring_to_utf8str(utf16str_to_wstring(utf16str, len), len);
  409. }
  410. LL_INLINE std::string utf16str_to_utf8str(const llutf16string& utf16str)
  411. {
  412. return wstring_to_utf8str(utf16str_to_wstring(utf16str));
  413. }
  414. LL_CONVERT_ALIAS(std::string, llutf16string, utf16str_to_utf8str(in));
  415. // Length of this UTF32 string in bytes when transformed to UTF8
  416. S32 wstring_utf8_length(const LLWString& wstr);
  417. // Length in bytes of this wide char in a UTF8 string
  418. S32 wchar_utf8_length(const llwchar wc);
  419. std::string utf8str_tolower(const std::string& utf8str);
  420. // Length in llwchar (UTF-32) of the first len units (16 bits) of the given
  421. // UTF-16 string.
  422. S32 utf16str_wstring_length(const llutf16string& utf16str, S32 len);
  423. // Length in utf16string (UTF-16) of wlen wchars beginning at woffset.
  424. S32 wstring_utf16_length(const LLWString& wstr, S32 woffset, S32 wlen);
  425. // Length in wstring (i.e., llwchar count) of a part of a wstring specified by
  426. // utf16 length (i.e., utf16 units.)
  427. S32 wstring_length_from_utf16_length(const LLWString& wstr, S32 woffset,
  428. S32 utf16_length, bool* unaligned = NULL);
  429. // Properly truncates an UTF-8 string to a maximum byte count.
  430. // The returned string may be less than max_len if the truncation happens in
  431. // the middle of a glyph. If max_len is longer than the string passed in, the
  432. // return value == utf8str.
  433. // 'utf8str' must be a valid UTF-8 string to truncate, 'max_len' is the maximum
  434. // number of bytes in the return value. Returns a valid UTF-8 string with byte
  435. // count <= max_len.
  436. std::string utf8str_truncate(const std::string& utf8str, S32 max_len);
  437. std::string utf8str_trim(const std::string& utf8str);
  438. S32 utf8str_compare_insensitive(const std::string& lhs,
  439. const std::string& rhs);
  440. // Replaces all occurences of target_char with replace_char
  441. // 'utf8str' is the UTF-8 string to process, 'target_char' is the wchar to be
  442. // replaced and 'replace_char' is the wchar which is written on replace.
  443. std::string utf8str_substChar(const std::string& utf8str,
  444. const llwchar target_char,
  445. const llwchar replace_char);
  446. std::string utf8str_makeASCII(const std::string& utf8str);
  447. // Hack - used for evil notecards.
  448. std::string mbcsstring_makeASCII(const std::string& str);
  449. std::string utf8str_removeCRLF(const std::string& utf8str);
  450. std::string iso8859_to_utf8(const std::string& iso8859str);
  451. std::string utf8_to_iso8859(const std::string& utf8str);
  452. #if LL_WINDOWS
  453. // Windows string helpers
  454. // Converts a wide string to std::string. This replaces the unsafe W2A macro
  455. // from ATL.
  456. std::string ll_convert_wide_to_string(const wchar_t* in,
  457. unsigned int code_page);
  458. // Defaults to CP_UTF8
  459. std::string ll_convert_wide_to_string(const wchar_t* in);
  460. // Converts a string to wide string.
  461. std::wstring ll_convert_string_to_wide(const std::string& in,
  462. unsigned int code_page);
  463. std::wstring ll_convert_string_to_wide(const std::string& in);
  464. // Defaults CP_UTF8
  465. LL_CONVERT_ALIAS(std::wstring, std::string, ll_convert_string_to_wide(in));
  466. LLWString ll_convert_wide_to_wstring(const std::wstring& in);
  467. LL_CONVERT_ALIAS(LLWString, std::wstring, ll_convert_wide_to_wstring(in));
  468. // Converts incoming string into utf8 string
  469. std::string ll_convert_string_to_utf8_string(const std::string& in);
  470. #endif // LL_WINDOWS
  471. ///////////////////////////////////////////////////////////////////////////////
  472. // Formerly in u64.h - Utilities for conversions between U64 and string
  473. ///////////////////////////////////////////////////////////////////////////////
  474. // Forgivingly parses a nul terminated character array. Returns the first U64
  475. // value found in the string or 0 on failure.
  476. U64 str_to_U64(const std::string& str);
  477. // Given a U64 value, returns a printable representation. 'value' is the U64 to
  478. // turn into a printable character array. Returns the result string.
  479. std::string U64_to_str(U64 value);
  480. // Given a U64 value, returns a printable representation.
  481. // The client of this function is expected to provide an allocated buffer. The
  482. // function then snprintf() into that buffer, so providing NULL has undefined
  483. // behavior. Providing a buffer which is too small will truncate the printable
  484. // value, so usually you want to declare the buffer:
  485. // char result[U64_BUF];
  486. // std::cout << "value: " << U64_to_str(value, result, U64_BUF);
  487. //
  488. // 'value' is the U64 to turn into a printable character array.
  489. // 'result' is the buffer to use.
  490. // 'result_size' is the size of the buffer allocated. Use U64_BUF.
  491. // Returns the result pointer.
  492. char* U64_to_str(U64 value, char* result, S32 result_size);
  493. // Helper function to wrap strtoull() which is not available on windows.
  494. U64 llstrtou64(const char* str, char** end, S32 base);
  495. ///////////////////////////////////////////////////////////////////////////////
  496. // Many of the 'strip' and 'replace' methods of LLStringUtilBase need
  497. // specialization to work with the signed char type. Sadly, it is not possible
  498. // (AFAIK) to specialize a single method of a template class. That stuff should
  499. // go here.
  500. namespace LLStringFn
  501. {
  502. // Replaces all non-printable characters with replacement in str.
  503. // NOTE: this will zap non-ASCII characters.
  504. // For 'replacement', use LL_UNKNOWN_CHAR if unsure.
  505. void replace_nonprintable_in_ascii(std::basic_string<char>& str,
  506. char replacement);
  507. // Replaces all non-printable and pipe characters with replacement in
  508. // str. NOTE: this will zap non-ASCII characters.
  509. // For 'replacement', use LL_UNKNOWN_CHAR if unsure.
  510. void replace_nonprintable_and_pipe_in_ascii(std::basic_string<char>& str,
  511. char replacement);
  512. // Replaces all control characters (0 <= c < 0x20) with replacement in
  513. // str. This is safe for UTF-8. For 'replacement', use LL_UNKNOWN_CHAR if
  514. // unsure.
  515. void replace_ascii_controlchars(std::basic_string<char>& str,
  516. char replacement);
  517. // Removes all characters that are not allowed in XML 1.0.
  518. // Returns a copy of the string with those characters removed.
  519. // Works with US ASCII and UTF-8 encoded strings. JC
  520. std::string strip_invalid_xml(const std::string& input);
  521. // Replaces all characters that are not allowed in XML 1.0 with the
  522. // corresponding literals.
  523. std::string xml_encode(const std::string& str, bool for_attribute = false);
  524. // Replaces some of XML literals that are defined in XML 1.0 with the
  525. // corresponding characters.
  526. std::string xml_decode(const std::string& str, bool for_attribute = false);
  527. }
  528. ///////////////////////////////////////////////////////////////////////////////
  529. // getTokens() templates
  530. ///////////////////////////////////////////////////////////////////////////////
  531. //static
  532. template <class T>
  533. std::vector<typename LLStringUtilBase<T>::string_type>
  534. LLStringUtilBase<T>::getTokens(const string_type& instr,
  535. const string_type& delims)
  536. {
  537. std::vector<string_type> tokens;
  538. getTokens(instr, tokens, delims);
  539. return tokens;
  540. }
  541. //static
  542. template <class T>
  543. std::vector<typename LLStringUtilBase<T>::string_type>
  544. LLStringUtilBase<T>::getTokens(const string_type& instr,
  545. const string_type& drop_delims,
  546. const string_type& keep_delims,
  547. const string_type& quotes)
  548. {
  549. std::vector<string_type> tokens;
  550. getTokens(instr, tokens, drop_delims, keep_delims, quotes);
  551. return tokens;
  552. }
  553. //static
  554. template <class T>
  555. std::vector<typename LLStringUtilBase<T>::string_type>
  556. LLStringUtilBase<T>::getTokens(const string_type& instr,
  557. const string_type& drop_delims,
  558. const string_type& keep_delims,
  559. const string_type& quotes,
  560. const string_type& escapes)
  561. {
  562. std::vector<string_type> tokens;
  563. getTokens(instr, tokens, drop_delims, keep_delims, quotes, escapes);
  564. return tokens;
  565. }
  566. namespace LLStringUtilBaseImpl
  567. {
  568. // Input string scanner helper for getTokens(), or really any other character
  569. // parsing routine that may have to deal with escape characters. This
  570. // implementation defines the concept (also an interface, should you choose to
  571. // implement the concept by subclassing) and provides trivial implementations
  572. // for a string @em without escape processing.
  573. template <class T>
  574. class InString
  575. {
  576. public:
  577. typedef std::basic_string<T> string_type;
  578. typedef typename string_type::const_iterator const_iterator;
  579. LL_INLINE InString(const_iterator b, const_iterator e)
  580. : mIter(b),
  581. mEnd(e)
  582. {
  583. }
  584. virtual ~InString() = default;
  585. LL_INLINE bool done() const { return mIter == mEnd; }
  586. // Is the current character (*mIter) escaped ? This implementation can
  587. // answer trivially because it does not support escapes.
  588. LL_INLINE virtual bool escaped() const { return false; }
  589. // Obtains the current character and advances mIter.
  590. LL_INLINE virtual T next() { return *mIter++; }
  591. // Does the current character match specified character ?
  592. LL_INLINE virtual bool is(T ch) const { return !done() && *mIter == ch; }
  593. // Is the current character any one of the specified characters ?
  594. LL_INLINE virtual bool oneof(const string_type& delims) const
  595. {
  596. return !done() && LLStringUtilBase<T>::contains(delims, *mIter);
  597. }
  598. // Scans forward from 'from' until either 'delim' or end. This is primarily
  599. // useful for processing quoted sub-strings.
  600. // If it sees 'delim', appends everything from 'from' until (excluding)
  601. // 'delim' to 'into', advances mIter to skip 'delim', and returns true.
  602. // If it does not see 'delim', it does not alter 'into' or mIter and
  603. // returns false.
  604. // Note: the false case described above implements normal getTokens()
  605. // treatment of an unmatched open quote: it treats the quote character as
  606. // if escaped, that is, simply collects it as part of the current token.
  607. // Other plausible behaviors directly affect the way getTokens() deals with
  608. // an unmatched quote: e.g. throwing an exception to treat it as an error,
  609. // or assuming a close quote beyond end of string (in which case it returns
  610. // true).
  611. virtual bool collect_until(string_type& into, const_iterator from, T delim)
  612. {
  613. const_iterator found = std::find(from, mEnd, delim);
  614. // If we did not find delim, change nothing, just tell caller.
  615. if (found == mEnd)
  616. {
  617. return false;
  618. }
  619. // Found delim; append everything between from and found.
  620. into.append(from, found);
  621. // Advance past delim in input
  622. mIter = found + 1;
  623. return true;
  624. }
  625. public:
  626. const_iterator mIter;
  627. const_iterator mEnd;
  628. };
  629. // InString subclass that handles escape characters
  630. template <class T>
  631. class InEscString : public InString<T>
  632. {
  633. public:
  634. typedef InString<T> super;
  635. typedef typename super::string_type string_type;
  636. typedef typename super::const_iterator const_iterator;
  637. using super::done;
  638. using super::mIter;
  639. using super::mEnd;
  640. LL_INLINE InEscString(const_iterator b, const_iterator e,
  641. const string_type& escapes)
  642. : super(b, e),
  643. mEscapes(escapes)
  644. {
  645. // Even though we have already initialized 'mIter' via our base-class
  646. // constructor, set it again to check for initial escape char.
  647. setiter(b);
  648. }
  649. // This implementation uses the answer cached by setiter().
  650. LL_INLINE bool escaped() const override { return mIsEsc; }
  651. T next() override
  652. {
  653. // If we are looking at the escape character of an escape sequence,
  654. // skip that character. This is the one time we can modify mIter
  655. // without using setiter: for this one case we DO NOT CARE if the
  656. // escaped character is itself an escape.
  657. if (mIsEsc)
  658. {
  659. ++mIter;
  660. }
  661. // If we were looking at an escape character, this is the escaped
  662. // character; otherwise it is just the next character.
  663. T result(*mIter);
  664. // Advance mIter, checking for escape sequence.
  665. setiter(mIter + 1);
  666. return result;
  667. }
  668. LL_INLINE bool is(T ch) const override
  669. {
  670. // Like base-class is(), except that an escaped character matches
  671. // nothing.
  672. return !mIsEsc && !done() && *mIter == ch;
  673. }
  674. LL_INLINE bool oneof(const string_type& delims) const override
  675. {
  676. // Like base-class is(), except that an escaped character matches
  677. // nothing.
  678. return !mIsEsc && !done() &&
  679. LLStringUtilBase<T>::contains(delims, *mIter);
  680. }
  681. bool collect_until(string_type& into, const_iterator from,
  682. T delim) override
  683. {
  684. // Deal with escapes in the characters we collect; that is, an escaped
  685. // character must become just that character without the preceding
  686. // escape. Collect characters in a separate string rather than directly
  687. // appending to 'into' in case we do not find delim, in which case we
  688. // are supposed to leave 'into' unmodified.
  689. string_type collected;
  690. // For scanning purposes, we are going to work directly with 'mIter'.
  691. // Save its current value in case we fail to see delim.
  692. const_iterator save_iter(mIter);
  693. // Okay, set 'mIter', checking for escape.
  694. setiter(from);
  695. while (!done())
  696. {
  697. // If we see an unescaped delim, stop and report success.
  698. if (!mIsEsc && *mIter == delim)
  699. {
  700. // Append collected chars to 'into'.
  701. into.append(collected);
  702. // Do not forget to advance mIter past 'delim'.
  703. setiter(mIter + 1);
  704. return true;
  705. }
  706. // We are not at end, and either we're not looking at delim or it
  707. // is escaped. Collect this character and keep going.
  708. collected.push_back(next());
  709. }
  710. // Here we hit mEnd without ever seeing delim. Restore mIter and tell
  711. // caller.
  712. setiter(save_iter);
  713. return false;
  714. }
  715. private:
  716. LL_INLINE void setiter(const_iterator i)
  717. {
  718. mIter = i;
  719. // Every time we change mIter, set mIsEsc to be able to repetitively
  720. // answer escaped() without having to rescan mEscapes. mIsEsc caches
  721. // contains(mEscapes, *mIter).
  722. // We are looking at an escaped char if we are not already at end (that
  723. // is, *mIter is even meaningful); if *mIter is in fact one of the
  724. // specified escape characters; and if there is one more character
  725. // following it. That is, if an escape character is the very last
  726. // character of the input string, it loses its special meaning.
  727. mIsEsc = !done() && LLStringUtilBase<T>::contains(mEscapes, *mIter) &&
  728. mIter + 1 != mEnd;
  729. }
  730. private:
  731. const string_type mEscapes;
  732. bool mIsEsc;
  733. };
  734. // getTokens() implementation based on InString concept
  735. template <typename INSTRING, typename string_type>
  736. void getTokens(INSTRING& instr, std::vector<string_type>& tokens,
  737. const string_type& drop_delims, const string_type& keep_delims,
  738. const string_type& quotes)
  739. {
  740. // There are times when we want to match either drop_delims or keep_delims.
  741. // Concatenate them up front to speed things up.
  742. string_type all_delims = drop_delims + keep_delims;
  743. // No tokens yet
  744. tokens.clear();
  745. // Try for another token
  746. while (!instr.done())
  747. {
  748. // Scan past any drop_delims
  749. while (instr.oneof(drop_delims))
  750. {
  751. // Skip this drop_delim
  752. instr.next();
  753. // But if that was the end of the string, we are done
  754. if (instr.done())
  755. {
  756. return;
  757. }
  758. }
  759. // Found the start of another token: make a slot for it.
  760. tokens.push_back(string_type());
  761. if (instr.oneof(keep_delims))
  762. {
  763. // *iter is a keep_delim, a token of exactly 1 character. Append
  764. // that character to the new token and proceed.
  765. tokens.back().push_back(instr.next());
  766. continue;
  767. }
  768. // Here we have a non-delimiter token, which might consist of a mix of
  769. // quoted and unquoted parts. Use bash rules for quoting: you can embed
  770. // a quoted substring in the midst of an unquoted token (e.g.
  771. // ~/"sub dir"/myfile.txt); you can ram two quoted substrings together
  772. // to make a single token (e.g. 'He said, "'"Don't."'"'). We diverge
  773. // from bash in that bash considers an unmatched quote an error. Our
  774. // param signature does not allow for errors, so just pretend it is not
  775. // a quote and embed it.
  776. // At this level, keep scanning until we hit the next delimiter of
  777. // either type (drop_delims or keep_delims).
  778. while (!instr.oneof(all_delims))
  779. {
  780. // If we are looking at an open quote, search forward for a close
  781. // quote, collecting characters along the way.
  782. if (!instr.oneof(quotes) ||
  783. !instr.collect_until(tokens.back(), instr.mIter + 1,
  784. *instr.mIter))
  785. {
  786. // Either *iter is not a quote, or there is no matching close
  787. // quote: in other words, just an ordinary char. Append it to
  788. // current token.
  789. tokens.back().push_back(instr.next());
  790. }
  791. // Having scanned that segment of this token, if we have reached
  792. // the end of the string, we are done.
  793. if (instr.done())
  794. {
  795. return;
  796. }
  797. }
  798. }
  799. }
  800. } // namespace LLStringUtilBaseImpl
  801. //static
  802. template <class T>
  803. void LLStringUtilBase<T>::getTokens(const string_type& string,
  804. std::vector<string_type>& tokens,
  805. const string_type& drop_delims,
  806. const string_type& keep_delims,
  807. const string_type& quotes)
  808. {
  809. // Because this overload does not support escapes, use simple InString to
  810. // manage input range.
  811. LLStringUtilBaseImpl::InString<T> instring(string.begin(), string.end());
  812. LLStringUtilBaseImpl::getTokens(instring, tokens, drop_delims, keep_delims,
  813. quotes);
  814. }
  815. //static
  816. template <class T>
  817. void LLStringUtilBase<T>::getTokens(const string_type& string,
  818. std::vector<string_type>& tokens,
  819. const string_type& drop_delims,
  820. const string_type& keep_delims,
  821. const string_type& quotes,
  822. const string_type& escapes)
  823. {
  824. // This overload must deal with escapes. Delegate that to InEscString
  825. // (unless there are no escapes).
  826. std::unique_ptr<LLStringUtilBaseImpl::InString<T> > instrp;
  827. if (escapes.empty())
  828. {
  829. instrp.reset(new LLStringUtilBaseImpl::InString<T>(string.begin(),
  830. string.end()));
  831. }
  832. else
  833. {
  834. instrp.reset(new LLStringUtilBaseImpl::InEscString<T>(string.begin(),
  835. string.end(),
  836. escapes));
  837. }
  838. LLStringUtilBaseImpl::getTokens(*instrp, tokens, drop_delims, keep_delims,
  839. quotes);
  840. }
  841. ///////////////////////////////////////////////////////////////////////////////
  842. //static
  843. template<class T>
  844. S32 LLStringUtilBase<T>::compareStrings(const T* lhs, const T* rhs)
  845. {
  846. S32 result;
  847. if (lhs == rhs)
  848. {
  849. result = 0;
  850. }
  851. else if (!lhs || !lhs[0])
  852. {
  853. result = ((!rhs || !rhs[0]) ? 0 : 1);
  854. }
  855. else if (!rhs || !rhs[0])
  856. {
  857. result = -1;
  858. }
  859. else
  860. {
  861. result = LLStringOps::collate(lhs, rhs);
  862. }
  863. return result;
  864. }
  865. //static
  866. template<class T>
  867. S32 LLStringUtilBase<T>::compareStrings(const std::basic_string<T>& lhs,
  868. const std::basic_string<T>& rhs)
  869. {
  870. return LLStringOps::collate(lhs.c_str(), rhs.c_str());
  871. }
  872. //static
  873. template<class T>
  874. S32 LLStringUtilBase<T>::compareInsensitive(const T* lhs, const T* rhs)
  875. {
  876. S32 result;
  877. if (lhs == rhs)
  878. {
  879. result = 0;
  880. }
  881. else if (!lhs || !lhs[0])
  882. {
  883. result = ((!rhs || !rhs[0]) ? 0 : 1);
  884. }
  885. else if (!rhs || !rhs[0])
  886. {
  887. result = -1;
  888. }
  889. else
  890. {
  891. std::basic_string<T> lhs_string(lhs);
  892. std::basic_string<T> rhs_string(rhs);
  893. LLStringUtilBase<T>::toUpper(lhs_string);
  894. LLStringUtilBase<T>::toUpper(rhs_string);
  895. result = LLStringOps::collate(lhs_string.c_str(), rhs_string.c_str());
  896. }
  897. return result;
  898. }
  899. //static
  900. template<class T>
  901. S32 LLStringUtilBase<T>::compareInsensitive(const std::basic_string<T>& lhs,
  902. const std::basic_string<T>& rhs)
  903. {
  904. std::basic_string<T> lhs_string(lhs);
  905. std::basic_string<T> rhs_string(rhs);
  906. LLStringUtilBase<T>::toUpper(lhs_string);
  907. LLStringUtilBase<T>::toUpper(rhs_string);
  908. return LLStringOps::collate(lhs_string.c_str(), rhs_string.c_str());
  909. }
  910. // Case sensitive comparison with good handling of numbers. Does not use
  911. // current locale. AKA strdictcmp()
  912. //static
  913. template<class T>
  914. S32 LLStringUtilBase<T>::compareDict(const std::basic_string<T>& astr,
  915. const std::basic_string<T>& bstr)
  916. {
  917. const T* a = astr.c_str();
  918. const T* b = bstr.c_str();
  919. T ca, cb;
  920. S32 ai, bi, cnt = 0;
  921. S32 bias = 0;
  922. ca = *(a++);
  923. cb = *(b++);
  924. while (ca && cb)
  925. {
  926. if (bias == 0)
  927. {
  928. if (LLStringOps::isUpper(ca))
  929. {
  930. ca = LLStringOps::toLower(ca);
  931. --bias;
  932. }
  933. if (LLStringOps::isUpper(cb))
  934. {
  935. cb = LLStringOps::toLower(cb);
  936. ++bias;
  937. }
  938. }
  939. else
  940. {
  941. if (LLStringOps::isUpper(ca))
  942. {
  943. ca = LLStringOps::toLower(ca);
  944. }
  945. if (LLStringOps::isUpper(cb))
  946. {
  947. cb = LLStringOps::toLower(cb);
  948. }
  949. }
  950. if (LLStringOps::isDigit(ca))
  951. {
  952. if (cnt-- > 0)
  953. {
  954. if (cb != ca) break;
  955. }
  956. else
  957. {
  958. if (!LLStringOps::isDigit(cb)) break;
  959. for (ai = 0; LLStringOps::isDigit(a[ai]); ++ai);
  960. for (bi = 0; LLStringOps::isDigit(b[bi]); ++bi);
  961. if (ai < bi)
  962. {
  963. ca = 0;
  964. break;
  965. }
  966. if (bi < ai)
  967. {
  968. cb = 0;
  969. break;
  970. }
  971. if (ca != cb)
  972. {
  973. break;
  974. }
  975. cnt = ai;
  976. }
  977. }
  978. else if (ca != cb)
  979. {
  980. break;
  981. }
  982. ca = *(a++);
  983. cb = *(b++);
  984. }
  985. if (ca == cb)
  986. {
  987. ca += bias;
  988. }
  989. return ca - cb;
  990. }
  991. //static
  992. template<class T>
  993. S32 LLStringUtilBase<T>::compareDictInsensitive(const std::basic_string<T>& astr,
  994. const std::basic_string<T>& bstr)
  995. {
  996. const T* a = astr.c_str();
  997. const T* b = bstr.c_str();
  998. T ca, cb;
  999. S32 ai, bi, cnt = 0;
  1000. ca = *(a++);
  1001. cb = *(b++);
  1002. while (ca && cb)
  1003. {
  1004. if (LLStringOps::isUpper(ca))
  1005. {
  1006. ca = LLStringOps::toLower(ca);
  1007. }
  1008. if (LLStringOps::isUpper(cb))
  1009. {
  1010. cb = LLStringOps::toLower(cb);
  1011. }
  1012. if (LLStringOps::isDigit(ca))
  1013. {
  1014. if (cnt-- > 0)
  1015. {
  1016. if (cb != ca) break;
  1017. }
  1018. else
  1019. {
  1020. if (!LLStringOps::isDigit(cb))
  1021. {
  1022. break;
  1023. }
  1024. for (ai = 0; LLStringOps::isDigit(a[ai]); ++ai);
  1025. for (bi = 0; LLStringOps::isDigit(b[bi]); ++bi);
  1026. if (ai < bi)
  1027. {
  1028. ca = 0;
  1029. break;
  1030. }
  1031. if (bi < ai)
  1032. {
  1033. cb = 0;
  1034. break;
  1035. }
  1036. if (ca != cb)
  1037. {
  1038. break;
  1039. }
  1040. cnt = ai;
  1041. }
  1042. }
  1043. else if (ca!=cb)
  1044. {
  1045. break;
  1046. }
  1047. ca = *(a++);
  1048. cb = *(b++);
  1049. }
  1050. return ca - cb;
  1051. }
  1052. // Puts compareDict() in a form appropriate for LL container classes to use for
  1053. // sorting.
  1054. //static
  1055. template<class T>
  1056. bool LLStringUtilBase<T>::precedesDict(const std::basic_string<T>& a,
  1057. const std::basic_string<T>& b)
  1058. {
  1059. if (a.size() && b.size())
  1060. {
  1061. return LLStringUtilBase<T>::compareDict(a.c_str(), b.c_str()) < 0;
  1062. }
  1063. else
  1064. {
  1065. return !b.empty();
  1066. }
  1067. }
  1068. //static
  1069. template<class T>
  1070. void LLStringUtilBase<T>::toUpper(std::basic_string<T>& string)
  1071. {
  1072. if (!string.empty())
  1073. {
  1074. std::transform(string.begin(), string.end(), string.begin(),
  1075. (T(*)(T)) &LLStringOps::toUpper);
  1076. }
  1077. }
  1078. //static
  1079. template<class T>
  1080. void LLStringUtilBase<T>::toLower(std::basic_string<T>& string)
  1081. {
  1082. if (!string.empty())
  1083. {
  1084. std::transform(string.begin(), string.end(), string.begin(),
  1085. (T(*)(T)) &LLStringOps::toLower);
  1086. }
  1087. }
  1088. //static
  1089. template<class T>
  1090. void LLStringUtilBase<T>::trimHead(std::basic_string<T>& string)
  1091. {
  1092. if (!string.empty())
  1093. {
  1094. size_type i = 0;
  1095. while (i < string.length() && LLStringOps::isSpace(string[i]))
  1096. {
  1097. ++i;
  1098. }
  1099. string.erase(0, i);
  1100. }
  1101. }
  1102. //static
  1103. template<class T>
  1104. void LLStringUtilBase<T>::trimTail(std::basic_string<T>& string)
  1105. {
  1106. if (string.size())
  1107. {
  1108. size_type len = string.length();
  1109. size_type i = len;
  1110. while (i > 0 && LLStringOps::isSpace(string[i - 1]))
  1111. {
  1112. --i;
  1113. }
  1114. string.erase(i, len - i);
  1115. }
  1116. }
  1117. // Replace line feeds with carriage return-line feed pairs.
  1118. //static
  1119. template<class T>
  1120. void LLStringUtilBase<T>::addCRLF(std::basic_string<T>& string)
  1121. {
  1122. const T LF = 10;
  1123. const T CR = 13;
  1124. // Count the number of line feeds
  1125. size_type count = 0;
  1126. size_type len = string.size();
  1127. size_type i;
  1128. for (i = 0; i < len; ++i)
  1129. {
  1130. if (string[i] == LF)
  1131. {
  1132. ++count;
  1133. }
  1134. }
  1135. // Insert a carriage return before each line feed
  1136. if (count)
  1137. {
  1138. size_type size = len + count;
  1139. T* t = new T[size];
  1140. size_type j = 0;
  1141. for (i = 0; i < len; ++i)
  1142. {
  1143. if (string[i] == LF)
  1144. {
  1145. t[j++] = CR;
  1146. }
  1147. t[j++] = string[i];
  1148. }
  1149. string.assign(t, size);
  1150. delete[] t;
  1151. }
  1152. }
  1153. // Remove all carriage returns
  1154. //static
  1155. template<class T>
  1156. void LLStringUtilBase<T>::removeCRLF(std::basic_string<T>& string)
  1157. {
  1158. const T CR = 13;
  1159. size_type cr_count = 0;
  1160. size_type len = string.size();
  1161. size_type i;
  1162. for (i = 0; i < len - cr_count; ++i)
  1163. {
  1164. if (string[i + cr_count] == CR)
  1165. {
  1166. ++cr_count;
  1167. }
  1168. string[i] = string[i + cr_count];
  1169. }
  1170. string.erase(i, cr_count);
  1171. }
  1172. //static
  1173. template<class T>
  1174. void LLStringUtilBase<T>::replaceChar(std::basic_string<T>& string, T target,
  1175. T replacement)
  1176. {
  1177. size_type found_pos = 0;
  1178. while ((found_pos = string.find(target, found_pos)) != std::basic_string<T>::npos)
  1179. {
  1180. string[found_pos] = replacement;
  1181. ++found_pos; // avoid infinite defeat if target == replacement
  1182. }
  1183. }
  1184. //static
  1185. template<class T>
  1186. void LLStringUtilBase<T>::replaceString(std::basic_string<T>& string,
  1187. std::basic_string<T> target,
  1188. std::basic_string<T> replacement)
  1189. {
  1190. size_type found_pos = 0;
  1191. while ((found_pos = string.find(target, found_pos)) != std::basic_string<T>::npos)
  1192. {
  1193. string.replace(found_pos, target.length(), replacement);
  1194. // Avoid infinite defeat if replacement contains target
  1195. found_pos += replacement.length();
  1196. }
  1197. }
  1198. //static
  1199. template<class T>
  1200. void LLStringUtilBase<T>::replaceNonstandardASCII(std::basic_string<T>& string,
  1201. T replacement)
  1202. {
  1203. constexpr char LF = '\n';
  1204. constexpr S8 MIN = ' ';
  1205. size_type len = string.size();
  1206. for (size_type i = 0; i < len; ++i)
  1207. {
  1208. // No need to test MAX < mText[i] because we treat mText[i] as a
  1209. // signed char which has a max value of 127.
  1210. if (S8(string[i]) < MIN && string[i] != LF)
  1211. {
  1212. string[i] = replacement;
  1213. }
  1214. }
  1215. }
  1216. //static
  1217. template<class T>
  1218. void LLStringUtilBase<T>::replaceTabsWithSpaces(std::basic_string<T>& str,
  1219. size_type spaces_per_tab)
  1220. {
  1221. const T TAB = '\t';
  1222. const T SPACE = ' ';
  1223. std::basic_string<T> out_str;
  1224. // Replace tabs with spaces
  1225. for (size_type i = 0; i < str.length(); ++i)
  1226. {
  1227. if (str[i] == TAB)
  1228. {
  1229. for (size_type j = 0; j < spaces_per_tab; ++j)
  1230. {
  1231. out_str += SPACE;
  1232. }
  1233. }
  1234. else
  1235. {
  1236. out_str += str[i];
  1237. }
  1238. }
  1239. str = out_str;
  1240. }
  1241. //static
  1242. template<class T>
  1243. bool LLStringUtilBase<T>::containsNonprintable(const std::basic_string<T>& string)
  1244. {
  1245. const char MIN = 32;
  1246. bool rv = false;
  1247. for (size_type i = 0, count = string.size(); i < count; ++i)
  1248. {
  1249. if (string[i] < MIN)
  1250. {
  1251. rv = true;
  1252. break;
  1253. }
  1254. }
  1255. return rv;
  1256. }
  1257. //static
  1258. template<class T>
  1259. void LLStringUtilBase<T>::stripNonprintable(std::basic_string<T>& string)
  1260. {
  1261. const char MIN = 32;
  1262. size_type j = 0;
  1263. if (string.empty())
  1264. {
  1265. return;
  1266. }
  1267. size_t src_size = string.size();
  1268. char* c_string = new char[src_size + 1];
  1269. if (c_string == NULL)
  1270. {
  1271. return;
  1272. }
  1273. copy(c_string, string.c_str(), src_size + 1);
  1274. char* write_head = &c_string[0];
  1275. for (size_type i = 0; i < src_size; ++i)
  1276. {
  1277. char* read_head = &string[i];
  1278. write_head = &c_string[j];
  1279. if (!(*read_head < MIN))
  1280. {
  1281. *write_head = *read_head;
  1282. ++j;
  1283. }
  1284. }
  1285. c_string[j]= '\0';
  1286. string = c_string;
  1287. delete []c_string;
  1288. }
  1289. template<class T>
  1290. void LLStringUtilBase<T>::_makeASCII(std::basic_string<T>& string)
  1291. {
  1292. // Replace non-ASCII chars with LL_UNKNOWN_CHAR
  1293. for (size_type i = 0, count = string.length(); i < count; ++i)
  1294. {
  1295. if (string[i] > 0x7f)
  1296. {
  1297. string[i] = LL_UNKNOWN_CHAR;
  1298. }
  1299. }
  1300. }
  1301. //static
  1302. template<class T>
  1303. void LLStringUtilBase<T>::copy(T* dst, const T* src, size_type dst_size)
  1304. {
  1305. if (dst_size > 0)
  1306. {
  1307. size_type min_len = 0;
  1308. if (src)
  1309. {
  1310. min_len = llmin(dst_size - 1, strlen(src));
  1311. memcpy(dst, src, min_len * sizeof(T));
  1312. }
  1313. dst[min_len] = '\0';
  1314. }
  1315. }
  1316. //static
  1317. template<class T>
  1318. void LLStringUtilBase<T>::copyInto(std::basic_string<T>& dst,
  1319. const std::basic_string<T>& src,
  1320. size_type offset)
  1321. {
  1322. if (offset == dst.length())
  1323. {
  1324. // Special case: append to end of string and avoid expensive (when
  1325. // strings are large) string manipulations
  1326. dst += src;
  1327. }
  1328. else
  1329. {
  1330. std::basic_string<T> tail = dst.substr(offset);
  1331. dst = dst.substr(0, offset);
  1332. dst += src;
  1333. dst += tail;
  1334. };
  1335. }
  1336. // True if this is the head of s.
  1337. //static
  1338. template<class T>
  1339. bool LLStringUtilBase<T>::isHead(const std::basic_string<T>& string,
  1340. const T* s)
  1341. {
  1342. if (string.empty())
  1343. {
  1344. // Early exit
  1345. return false;
  1346. }
  1347. else
  1348. {
  1349. return strncmp(s, string.c_str(), string.size()) == 0;
  1350. }
  1351. }
  1352. //static
  1353. template<class T>
  1354. LL_INLINE bool LLStringUtilBase<T>::startsWith(const std::basic_string<T>& str,
  1355. const std::basic_string<T>& substr)
  1356. {
  1357. size_t str_len = str.length();
  1358. if (!str_len) return false;
  1359. size_t sub_len = substr.length();
  1360. if (!sub_len) return false;
  1361. return str_len >= sub_len && str.compare(0, sub_len, substr) == 0;
  1362. }
  1363. //static
  1364. template<class T>
  1365. LL_INLINE bool LLStringUtilBase<T>::endsWith(const std::basic_string<T>& str,
  1366. const std::basic_string<T>& substr)
  1367. {
  1368. size_t str_len = str.length();
  1369. if (!str_len) return false;
  1370. size_t sub_len = substr.length();
  1371. if (!sub_len) return false;
  1372. return str_len >= sub_len &&
  1373. str.compare(str_len - sub_len, sub_len, substr) == 0;
  1374. }
  1375. template<class T>
  1376. bool LLStringUtilBase<T>::convertToBool(const std::basic_string<T>& string,
  1377. bool& value)
  1378. {
  1379. if (string.empty())
  1380. {
  1381. return false;
  1382. }
  1383. std::basic_string<T> temp(string);
  1384. trim(temp);
  1385. if (temp == "1" || temp == "T" || temp == "t" || temp == "TRUE" ||
  1386. temp == "true" || temp == "True")
  1387. {
  1388. value = true;
  1389. return true;
  1390. }
  1391. else if (temp == "0" || temp == "F" || temp == "f" || temp == "FALSE" ||
  1392. temp == "false" || temp == "False")
  1393. {
  1394. value = false;
  1395. return true;
  1396. }
  1397. return false;
  1398. }
  1399. template<class T>
  1400. bool LLStringUtilBase<T>::convertToU8(const std::basic_string<T>& string,
  1401. U8& value)
  1402. {
  1403. S32 value32 = 0;
  1404. bool success = convertToS32(string, value32);
  1405. if (success && U8_MIN <= value32 && value32 <= U8_MAX)
  1406. {
  1407. value = (U8)value32;
  1408. return true;
  1409. }
  1410. return false;
  1411. }
  1412. template<class T>
  1413. bool LLStringUtilBase<T>::convertToS8(const std::basic_string<T>& string,
  1414. S8& value)
  1415. {
  1416. S32 value32 = 0;
  1417. bool success = convertToS32(string, value32);
  1418. if (success && S8_MIN <= value32 && value32 <= S8_MAX)
  1419. {
  1420. value = (S8)value32;
  1421. return true;
  1422. }
  1423. return false;
  1424. }
  1425. template<class T>
  1426. bool LLStringUtilBase<T>::convertToS16(const std::basic_string<T>& string,
  1427. S16& value)
  1428. {
  1429. S32 value32 = 0;
  1430. bool success = convertToS32(string, value32);
  1431. if (success && S16_MIN <= value32 && value32 <= S16_MAX)
  1432. {
  1433. value = (S16)value32;
  1434. return true;
  1435. }
  1436. return false;
  1437. }
  1438. template<class T>
  1439. bool LLStringUtilBase<T>::convertToU16(const std::basic_string<T>& string,
  1440. U16& value)
  1441. {
  1442. S32 value32 = 0;
  1443. bool success = convertToS32(string, value32);
  1444. if (success && U16_MIN <= value32 && value32 <= U16_MAX)
  1445. {
  1446. value = (U16)value32;
  1447. return true;
  1448. }
  1449. return false;
  1450. }
  1451. template<class T>
  1452. bool LLStringUtilBase<T>::convertToU32(const std::basic_string<T>& string,
  1453. U32& value)
  1454. {
  1455. if (string.empty())
  1456. {
  1457. return false;
  1458. }
  1459. std::basic_string<T> temp(string);
  1460. trim(temp);
  1461. U32 v;
  1462. std::basic_istringstream<T> i_stream((std::basic_string<T>)temp);
  1463. if (i_stream >> v)
  1464. {
  1465. value = v;
  1466. return true;
  1467. }
  1468. return false;
  1469. }
  1470. template<class T>
  1471. bool LLStringUtilBase<T>::convertToS32(const std::basic_string<T>& string,
  1472. S32& value)
  1473. {
  1474. if (string.empty())
  1475. {
  1476. return false;
  1477. }
  1478. std::basic_string<T> temp(string);
  1479. trim(temp);
  1480. S32 v;
  1481. std::basic_istringstream<T> i_stream((std::basic_string<T>)temp);
  1482. if (i_stream >> v)
  1483. {
  1484. #if 0 // *TODO: figure out overflow and underflow reporting here
  1485. if (LONG_MAX == v || LONG_MIN == v)
  1486. {
  1487. // Underflow or overflow
  1488. return false;
  1489. }
  1490. #endif
  1491. value = v;
  1492. return true;
  1493. }
  1494. return false;
  1495. }
  1496. template<class T>
  1497. bool LLStringUtilBase<T>::convertToF32(const std::basic_string<T>& string,
  1498. F32& value)
  1499. {
  1500. F64 value64 = 0.0;
  1501. bool success = convertToF64(string, value64);
  1502. if (success && -F32_MAX <= value64 && value64 <= F32_MAX)
  1503. {
  1504. value = (F32)value64;
  1505. return true;
  1506. }
  1507. return false;
  1508. }
  1509. template<class T>
  1510. bool LLStringUtilBase<T>::convertToF64(const std::basic_string<T>& string,
  1511. F64& value)
  1512. {
  1513. if (string.empty())
  1514. {
  1515. return false;
  1516. }
  1517. std::basic_string<T> temp(string);
  1518. trim(temp);
  1519. F64 v;
  1520. std::basic_istringstream<T> i_stream((std::basic_string<T>)temp);
  1521. if (i_stream >> v)
  1522. {
  1523. #if 0 // *TODO: figure out overflow and underflow reporting here
  1524. if (-HUGE_VAL == v || HUGE_VAL == v)
  1525. {
  1526. // Underflow or overflow
  1527. return false;
  1528. }
  1529. #endif
  1530. value = v;
  1531. return true;
  1532. }
  1533. return false;
  1534. }
  1535. template<class T>
  1536. void LLStringUtilBase<T>::truncate(std::basic_string<T>& string,
  1537. size_type count)
  1538. {
  1539. size_type cur_size = string.size();
  1540. string.resize(count < cur_size ? count : cur_size);
  1541. }
  1542. // Overload for use with boost::unordered_map and boost::unordered_set.
  1543. // Note: the hash does not need to be unique (it is only used to determine in
  1544. // which bucket the actual key will be stored), thus why we only care for a
  1545. // few characters and the length of the string: this is faster than boost's
  1546. // hash (which uses hash_combine() on each character of the string), but on the
  1547. // other hand, there will be more hash collisions if the strings are very
  1548. // similar (which is not the case for the maps this hash is used for). HB
  1549. LL_INLINE size_t hash_value(const std::string& str) noexcept
  1550. {
  1551. const char* ptr = str.data();
  1552. size_t len = str.length();
  1553. U32 hash = len + 1;
  1554. if (LL_LIKELY(len > 3))
  1555. {
  1556. // We use the four last characters of the string, which are more likely
  1557. // to differ from one string to the other in our code and data...
  1558. U32* ptr32 = (U32*)(ptr + len - 4);
  1559. // Note: ptr[2] = first letter after "LL" in "LLStuff", which is
  1560. // important, for example, with singletons names.
  1561. return (size_t)(*ptr32 * hash + ptr[2]);
  1562. }
  1563. // This path is very unlikely to be taken, given our usage of strings as
  1564. // keys in the viewer... Still faster than a loop, especially if the
  1565. // compiler optimizes properly with a jump table.
  1566. switch (len)
  1567. {
  1568. case 3:
  1569. hash <<= 8;
  1570. hash += ptr[2];
  1571. case 2:
  1572. hash <<= 8;
  1573. hash += ptr[1];
  1574. case 1:
  1575. hash <<= 8;
  1576. hash += *ptr;
  1577. default:
  1578. return (size_t)hash;
  1579. }
  1580. }
  1581. // This used to be in separate llformat.h header file. Moved here for
  1582. // coherency. HB
  1583. //
  1584. // Use as follows:
  1585. // std::string result = llformat("Test:%d (%.2f %.2f)", idx, x, y);
  1586. //
  1587. // Note: uses an internal buffer limited to 1024, (but vsnprintf prevents any
  1588. // overrun).
  1589. std::string llformat(const char* fmt, ...);
  1590. #endif // LL_STRING_H