llspellcheck.cpp 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394
  1. /**
  2. * @file llspellcheck.cpp
  3. * @brief LLSpellCheck class implementation
  4. *
  5. * $LicenseInfo:firstyear=2012&license=viewergpl$
  6. *
  7. * Copyright (c) 2009 LordGregGreg Back, 2012 Henri Beauchamp
  8. *
  9. * Second Life Viewer Source Code
  10. * The source code in this file ("Source Code") is provided by Linden Lab
  11. * to you under the terms of the GNU General Public License, version 2.0
  12. * ("GPL"), unless you have obtained a separate licensing agreement
  13. * ("Other License"), formally executed by you and Linden Lab. Terms of
  14. * the GPL can be found in doc/GPL-license.txt in this distribution, or
  15. * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
  16. *
  17. * There are special exceptions to the terms and conditions of the GPL as
  18. * it is applied to this Source Code. View the full text of the exception
  19. * in the file doc/FLOSS-exception.txt in this software distribution, or
  20. * online at
  21. * http://secondlifegrid.net/programs/open_source/licensing/flossexception
  22. *
  23. * By copying, modifying or distributing this software, you acknowledge
  24. * that you have read and understood your obligations described above,
  25. * and agree to abide by those obligations.
  26. *
  27. * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
  28. * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
  29. * COMPLETENESS OR PERFORMANCE.
  30. * $/LicenseInfo$
  31. */
  32. #include "linden_common.h"
  33. #if LL_WINDOWS
  34. #include "hunspell/hunspelldll.h"
  35. #else
  36. #include "hunspell/hunspell.hxx"
  37. #endif
  38. #include "llspellcheck.h"
  39. #include "lldir.h"
  40. #include "lldiriterator.h"
  41. #include "llstring.h"
  42. LLSpellCheck::LLSpellCheck()
  43. : mHunspell(NULL),
  44. mSpellCheckEnable(false),
  45. mShowMisspelled(false)
  46. {
  47. }
  48. LLSpellCheck::~LLSpellCheck()
  49. {
  50. if (mHunspell)
  51. {
  52. delete mHunspell;
  53. mHunspell = NULL;
  54. mIgnoreList.clear();
  55. mSpellCheckEnable = false;
  56. }
  57. }
  58. // static
  59. void LLSpellCheck::initClass()
  60. {
  61. }
  62. std::string LLSpellCheck::getDicFullPath(const std::string& file)
  63. {
  64. // Check if it exists in user dir and if not take it from app dir
  65. std::string path = gDirUtil.getFullPath(LL_PATH_USER_SETTINGS,
  66. "dictionaries", file);
  67. if (!LLFile::exists(path))
  68. {
  69. path = gDirUtil.getFullPath(LL_PATH_APP_SETTINGS, "dictionaries",
  70. file);
  71. }
  72. return path;
  73. }
  74. // Return all *.aff files found either in the user settings or in the app
  75. // settings directories and for which there are corresponding *.dic files
  76. // Note that the names are returned without the .aff extension.
  77. std::set<std::string> LLSpellCheck::getBaseDicts()
  78. {
  79. std::set<std::string> names;
  80. std::string name;
  81. std::string path = gDirUtil.getFullPath(LL_PATH_USER_SETTINGS,
  82. "dictionaries", "");
  83. {
  84. LLDirIterator iter(path, "*.aff");
  85. while (iter.next(name))
  86. {
  87. LLStringUtil::truncate(name, name.length() - 4);
  88. if (LLFile::exists(path + name + ".dic"))
  89. {
  90. names.emplace(name);
  91. }
  92. }
  93. } // Destroys LLDirIterator iter
  94. path = gDirUtil.getFullPath(LL_PATH_APP_SETTINGS, "dictionaries", "");
  95. {
  96. LLDirIterator iter(path, "*.aff");
  97. while (iter.next(name))
  98. {
  99. LLStringUtil::truncate(name, name.length() - 4);
  100. if (LLFile::exists(path + name + ".dic"))
  101. {
  102. names.emplace(name);
  103. }
  104. }
  105. } // Destroys LLDirIterator iter
  106. return names;
  107. }
  108. // Return all *.dic files found either in the user settings or in the app
  109. // settings directories and for which there is no corresponding *.aff file
  110. std::set<std::string> LLSpellCheck::getExtraDicts()
  111. {
  112. std::set<std::string> names;
  113. std::string name;
  114. std::string root;
  115. std::string path = gDirUtil.getFullPath(LL_PATH_USER_SETTINGS,
  116. "dictionaries", "");
  117. {
  118. LLDirIterator iter(path, "*.dic");
  119. while (iter.next(name))
  120. {
  121. root = name;
  122. LLStringUtil::truncate(root, root.length() - 4);
  123. if (!LLFile::exists(path + root + ".aff"))
  124. {
  125. names.emplace(name);
  126. }
  127. }
  128. } // Destroys LLDirIterator iter
  129. path = gDirUtil.getFullPath(LL_PATH_APP_SETTINGS, "dictionaries", "");
  130. {
  131. LLDirIterator iter(path, "*.dic");
  132. while (iter.next(name))
  133. {
  134. root = name;
  135. LLStringUtil::truncate(root, root.length() - 4);
  136. if (!LLFile::exists(path + root + ".aff"))
  137. {
  138. names.emplace(name);
  139. }
  140. }
  141. } // Destroys LLDirIterator iter
  142. return names;
  143. }
  144. void LLSpellCheck::addDictionary(const std::string& dict_name)
  145. {
  146. if (mHunspell && !dict_name.empty())
  147. {
  148. std::string dict = getDicFullPath(dict_name);
  149. if (LLFile::exists(dict))
  150. {
  151. llinfos << "Adding additional dictionary: " << dict << llendl;
  152. mHunspell->add_dic(dict.c_str());
  153. }
  154. }
  155. }
  156. void LLSpellCheck::setDictionary(const std::string& dict_name)
  157. {
  158. std::string name = dict_name;
  159. LLStringUtil::toLower(name);
  160. std::string dicaffpath = getDicFullPath(name + ".aff");
  161. std::string dicdicpath = dicaffpath;
  162. LLStringUtil::truncate(dicdicpath, dicdicpath.length() - 4);
  163. dicdicpath += ".dic";
  164. if (!LLFile::exists(dicaffpath) || !LLFile::exists(dicdicpath))
  165. {
  166. llwarns << "Cannot find the dictionary files for: "
  167. << dict_name << llendl;
  168. return;
  169. }
  170. llinfos << "Setting new base dictionary to " << dicdicpath
  171. << " with associated affix file " << dicaffpath << llendl;
  172. mCurrentDictName = name;
  173. if (mHunspell)
  174. {
  175. delete mHunspell;
  176. }
  177. mHunspell = new Hunspell(dicaffpath.c_str(), dicdicpath.c_str());
  178. mDictEncoding = mHunspell->get_dic_encoding();
  179. llinfos << "Dictionary encoding is: " << mDictEncoding << llendl;
  180. LLStringUtil::toLower(mDictEncoding);
  181. std::string fname = gDirUtil.getFullPath(LL_PATH_USER_SETTINGS,
  182. "dictionaries", "custom.dic");
  183. if (!LLFile::exists(fname))
  184. {
  185. llinfos << "Creating custom.dic..." << llendl;
  186. LLFile::mkdir(gDirUtil.getFullPath(LL_PATH_USER_SETTINGS,
  187. "dictionaries"));
  188. llofstream outfile(fname.c_str());
  189. if (outfile.is_open())
  190. {
  191. outfile << 1 << std::endl;
  192. outfile << "SL" << std::endl;
  193. outfile.close();
  194. }
  195. else
  196. {
  197. llwarns << "Error creating custom.dic. Cannot open file for writing."
  198. << llendl;
  199. }
  200. }
  201. llinfos << "Adding extra *.dic dictionaries..." << llendl;
  202. std::set<std::string> to_install = getExtraDicts();
  203. for (std::set<std::string>::iterator it = to_install.begin();
  204. it != to_install.end(); ++it)
  205. {
  206. addDictionary(*it);
  207. }
  208. llinfos << "Done setting the dictionaries." << llendl;
  209. }
  210. void LLSpellCheck::addToCustomDictionary(const std::string& word)
  211. {
  212. if (!mHunspell)
  213. {
  214. return;
  215. }
  216. mHunspell->add(word.c_str());
  217. std::vector<std::string> word_list;
  218. std::string fname = gDirUtil.getFullPath(LL_PATH_USER_SETTINGS,
  219. "dictionaries", "custom.dic");
  220. // Read the dictionary, if it exists
  221. if (LLFile::exists(fname))
  222. {
  223. // Get words already there..
  224. llifstream infile(fname.c_str());
  225. if (infile.is_open())
  226. {
  227. std::string line;
  228. S32 line_num = 0;
  229. while (getline(infile, line))
  230. {
  231. if (line_num != 0) // Skip the count of lines in the list
  232. {
  233. word_list.emplace_back(line);
  234. }
  235. ++line_num;
  236. }
  237. }
  238. infile.close();
  239. }
  240. // Add the new word to the list
  241. #if 0
  242. if (mDictEncoding == "utf-8")
  243. {
  244. word_list.emplace_back(word);
  245. }
  246. else
  247. {
  248. word_list.emplace_back(utf8_to_iso8859(word));
  249. }
  250. #else
  251. word_list.emplace_back(word);
  252. #endif
  253. llofstream outfile(fname.c_str());
  254. if (outfile.is_open())
  255. {
  256. outfile << word_list.size() << std::endl;
  257. for (std::vector<std::string>::const_iterator it = word_list.begin();
  258. it != word_list.end(); ++it)
  259. {
  260. outfile << *it << std::endl;
  261. }
  262. outfile.close();
  263. }
  264. else
  265. {
  266. llwarns << "Could not add \"" << word
  267. << "\" to the custom dictionary. Error opening the file for writing."
  268. << llendl;
  269. }
  270. }
  271. void LLSpellCheck::addToIgnoreList(const std::string& word)
  272. {
  273. if (word.length() > 2)
  274. {
  275. std::string lc_word = word;
  276. LLStringUtil::toLower(lc_word);
  277. mIgnoreList.emplace(lc_word);
  278. }
  279. }
  280. void LLSpellCheck::addWordsToIgnoreList(const std::string& words)
  281. {
  282. // Add each lexical word in "words"
  283. for (size_t i = 0; i < words.length(); ++i)
  284. {
  285. std::string word;
  286. while (i < words.length() &&
  287. LLStringUtil::isPartOfLexicalWord(words[i]))
  288. {
  289. if (words[i] != '\'' ||
  290. (!word.empty() && i + 1 < words.length() &&
  291. words[i + 1] != '\'' &&
  292. LLStringUtil::isPartOfLexicalWord(words[i + 1])))
  293. {
  294. word += words[i];
  295. }
  296. ++i;
  297. }
  298. if (word.length() > 2)
  299. {
  300. addToIgnoreList(word);
  301. LL_DEBUGS("SpellCheck") << "Added \"" << word
  302. << "\" to the ignore list." << LL_ENDL;
  303. }
  304. }
  305. }
  306. bool LLSpellCheck::checkSpelling(const std::string& word)
  307. {
  308. if (mHunspell && word.length() > 2)
  309. {
  310. std::string lc_word = word;
  311. LLStringUtil::toLower(lc_word);
  312. if (mIgnoreList.count(lc_word))
  313. {
  314. return true;
  315. }
  316. #if 0
  317. if (mDictEncoding == "utf-8")
  318. {
  319. return mHunspell->spell(word.c_str());
  320. }
  321. else
  322. {
  323. return mHunspell->spell(utf8_to_iso8859(word).c_str());
  324. }
  325. #else
  326. return mHunspell->spell(word.c_str());
  327. #endif
  328. }
  329. return true;
  330. }
  331. S32 LLSpellCheck::getSuggestions(const std::string& word,
  332. std::vector<std::string>& suggestions)
  333. {
  334. suggestions.clear();
  335. if (mHunspell && word.length() > 2)
  336. {
  337. char** suggestion_list;
  338. S32 suggestion_count = mHunspell->suggest(&suggestion_list,
  339. word.c_str());
  340. if (suggestion_count > 0)
  341. {
  342. bool is_utf8 = mDictEncoding == "utf-8";
  343. std::string word;
  344. for (S32 i = 0; i < suggestion_count; ++i)
  345. {
  346. if (is_utf8)
  347. {
  348. word = suggestion_list[i];
  349. }
  350. else
  351. {
  352. word = iso8859_to_utf8(suggestion_list[i]);
  353. }
  354. suggestions.emplace_back(word);
  355. }
  356. }
  357. mHunspell->free_list(&suggestion_list, suggestion_count);
  358. }
  359. return suggestions.size();
  360. }