csutil.hxx 7.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220
  1. #ifndef __CSUTILHXX__
  2. #define __CSUTILHXX__
  3. #include "hunvisapi.h"
  4. // First some base level utility routines
  5. #include <string.h>
  6. #include "w_char.hxx"
  7. #include "htypes.hxx"
  8. #ifdef MOZILLA_CLIENT
  9. #include "nscore.h" // for mozalloc headers
  10. #endif
  11. // casing
  12. #define NOCAP 0
  13. #define INITCAP 1
  14. #define ALLCAP 2
  15. #define HUHCAP 3
  16. #define HUHINITCAP 4
  17. // default encoding and keystring
  18. #define SPELL_ENCODING "ISO8859-1"
  19. #define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
  20. // default morphological fields
  21. #define MORPH_STEM "st:"
  22. #define MORPH_ALLOMORPH "al:"
  23. #define MORPH_POS "po:"
  24. #define MORPH_DERI_PFX "dp:"
  25. #define MORPH_INFL_PFX "ip:"
  26. #define MORPH_TERM_PFX "tp:"
  27. #define MORPH_DERI_SFX "ds:"
  28. #define MORPH_INFL_SFX "is:"
  29. #define MORPH_TERM_SFX "ts:"
  30. #define MORPH_SURF_PFX "sp:"
  31. #define MORPH_FREQ "fr:"
  32. #define MORPH_PHON "ph:"
  33. #define MORPH_HYPH "hy:"
  34. #define MORPH_PART "pa:"
  35. #define MORPH_FLAG "fl:"
  36. #define MORPH_HENTRY "_H:"
  37. #define MORPH_TAG_LEN strlen(MORPH_STEM)
  38. #define MSEP_FLD ' '
  39. #define MSEP_REC '\n'
  40. #define MSEP_ALT '\v'
  41. // default flags
  42. #define DEFAULTFLAGS 65510
  43. #define FORBIDDENWORD 65510
  44. #define ONLYUPCASEFLAG 65511
  45. // convert UTF-16 characters to UTF-8
  46. LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
  47. // convert UTF-8 characters to UTF-16
  48. LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
  49. // sort 2-byte vector
  50. LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
  51. // binary search in 2-byte vector
  52. LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
  53. // remove end of line char(s)
  54. LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
  55. // duplicate string
  56. LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s);
  57. // strcat for limited length destination string
  58. LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
  59. // duplicate reverse of string
  60. LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
  61. // parse into tokens with char delimiter
  62. LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
  63. // parse into tokens with char delimiter
  64. LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
  65. // parse into tokens with char delimiter
  66. LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
  67. // append s to ends of every lines in text
  68. LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
  69. // tokenize into lines with new line
  70. LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
  71. // tokenize into lines with new line and uniq in place
  72. LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
  73. LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
  74. // change oldchar to newchar in place
  75. LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
  76. // reverse word
  77. LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
  78. // reverse word
  79. LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
  80. // remove duplicates
  81. LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
  82. // free character array list
  83. LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
  84. // character encoding information
  85. struct cs_info {
  86. unsigned char ccase;
  87. unsigned char clower;
  88. unsigned char cupper;
  89. };
  90. LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
  91. LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
  92. LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
  93. LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
  94. LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
  95. LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
  96. // get language identifiers of language codes
  97. LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
  98. // get characters of the given 8bit encoding with lower- and uppercase forms
  99. LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
  100. // convert null terminated string to all caps using encoding
  101. LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
  102. // convert null terminated string to all little using encoding
  103. LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
  104. // convert null terminated string to have initial capital using encoding
  105. LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
  106. // convert null terminated string to all caps
  107. LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
  108. // convert null terminated string to all little
  109. LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
  110. // convert null terminated string to have initial capital
  111. LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
  112. // convert first nc characters of UTF-8 string to little
  113. LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
  114. // convert first nc characters of UTF-8 string to capital
  115. LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
  116. // get type of capitalization
  117. LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
  118. // get type of capitalization (UTF-8)
  119. LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
  120. // strip all ignored characters in the string
  121. LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
  122. // strip all ignored characters in the string
  123. LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
  124. LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
  125. LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
  126. int * out_utf16_len, int utf8, int ln);
  127. LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
  128. LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
  129. LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
  130. LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
  131. // conversion function for protected memory
  132. LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
  133. // conversion function for protected memory
  134. LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
  135. // hash entry macros
  136. LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
  137. {
  138. char *ret;
  139. if (!h->var)
  140. ret = NULL;
  141. else if (h->var & H_OPT_ALIASM)
  142. ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
  143. else
  144. ret = HENTRY_WORD(h) + h->blen + 1;
  145. return ret;
  146. }
  147. // NULL-free version for warning-free OOo build
  148. LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
  149. {
  150. const char *ret;
  151. if (!h->var)
  152. ret = "";
  153. else if (h->var & H_OPT_ALIASM)
  154. ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
  155. else
  156. ret = HENTRY_WORD(h) + h->blen + 1;
  157. return ret;
  158. }
  159. LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
  160. {
  161. return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
  162. }
  163. #define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
  164. #endif