affixmgr.hxx 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250
  1. #ifndef _AFFIXMGR_HXX_
  2. #define _AFFIXMGR_HXX_
  3. #include "hunvisapi.h"
  4. #include <stdio.h>
  5. #include "atypes.hxx"
  6. #include "baseaffix.hxx"
  7. #include "hashmgr.hxx"
  8. #include "phonet.hxx"
  9. #include "replist.hxx"
  10. // check flag duplication
  11. #define dupSFX (1 << 0)
  12. #define dupPFX (1 << 1)
  13. class PfxEntry;
  14. class SfxEntry;
  15. class LIBHUNSPELL_DLL_EXPORTED AffixMgr
  16. {
  17. PfxEntry * pStart[SETSIZE];
  18. SfxEntry * sStart[SETSIZE];
  19. PfxEntry * pFlag[SETSIZE];
  20. SfxEntry * sFlag[SETSIZE];
  21. HashMgr * pHMgr;
  22. HashMgr ** alldic;
  23. int * maxdic;
  24. char * keystring;
  25. char * trystring;
  26. char * encoding;
  27. struct cs_info * csconv;
  28. int utf8;
  29. int complexprefixes;
  30. FLAG compoundflag;
  31. FLAG compoundbegin;
  32. FLAG compoundmiddle;
  33. FLAG compoundend;
  34. FLAG compoundroot;
  35. FLAG compoundforbidflag;
  36. FLAG compoundpermitflag;
  37. int checkcompounddup;
  38. int checkcompoundrep;
  39. int checkcompoundcase;
  40. int checkcompoundtriple;
  41. int simplifiedtriple;
  42. FLAG forbiddenword;
  43. FLAG nosuggest;
  44. FLAG nongramsuggest;
  45. FLAG needaffix;
  46. int cpdmin;
  47. int numrep;
  48. replentry * reptable;
  49. RepList * iconvtable;
  50. RepList * oconvtable;
  51. int nummap;
  52. mapentry * maptable;
  53. int numbreak;
  54. char ** breaktable;
  55. int numcheckcpd;
  56. patentry * checkcpdtable;
  57. int simplifiedcpd;
  58. int numdefcpd;
  59. flagentry * defcpdtable;
  60. phonetable * phone;
  61. int maxngramsugs;
  62. int maxcpdsugs;
  63. int maxdiff;
  64. int onlymaxdiff;
  65. int nosplitsugs;
  66. int sugswithdots;
  67. int cpdwordmax;
  68. int cpdmaxsyllable;
  69. char * cpdvowels;
  70. w_char * cpdvowels_utf16;
  71. int cpdvowels_utf16_len;
  72. char * cpdsyllablenum;
  73. const char * pfxappnd; // BUG: not stateless
  74. const char * sfxappnd; // BUG: not stateless
  75. FLAG sfxflag; // BUG: not stateless
  76. char * derived; // BUG: not stateless
  77. SfxEntry * sfx; // BUG: not stateless
  78. PfxEntry * pfx; // BUG: not stateless
  79. int checknum;
  80. char * wordchars;
  81. unsigned short * wordchars_utf16;
  82. int wordchars_utf16_len;
  83. char * ignorechars;
  84. unsigned short * ignorechars_utf16;
  85. int ignorechars_utf16_len;
  86. char * version;
  87. char * lang;
  88. int langnum;
  89. FLAG lemma_present;
  90. FLAG circumfix;
  91. FLAG onlyincompound;
  92. FLAG keepcase;
  93. FLAG forceucase;
  94. FLAG warn;
  95. int forbidwarn;
  96. FLAG substandard;
  97. int checksharps;
  98. int fullstrip;
  99. int havecontclass; // boolean variable
  100. char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix)
  101. public:
  102. AffixMgr(const char * affpath, HashMgr** ptr, int * md,
  103. const char * key = NULL);
  104. ~AffixMgr();
  105. struct hentry * affix_check(const char * word, int len,
  106. const unsigned short needflag = (unsigned short) 0,
  107. char in_compound = IN_CPD_NOT);
  108. struct hentry * prefix_check(const char * word, int len,
  109. char in_compound, const FLAG needflag = FLAG_NULL);
  110. inline int isSubset(const char * s1, const char * s2);
  111. struct hentry * prefix_check_twosfx(const char * word, int len,
  112. char in_compound, const FLAG needflag = FLAG_NULL);
  113. inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
  114. struct hentry * suffix_check(const char * word, int len, int sfxopts,
  115. PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
  116. const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL,
  117. char in_compound = IN_CPD_NOT);
  118. struct hentry * suffix_check_twosfx(const char * word, int len,
  119. int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
  120. char * affix_check_morph(const char * word, int len,
  121. const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
  122. char * prefix_check_morph(const char * word, int len,
  123. char in_compound, const FLAG needflag = FLAG_NULL);
  124. char * suffix_check_morph (const char * word, int len, int sfxopts,
  125. PfxEntry * ppfx, const FLAG cclass = FLAG_NULL,
  126. const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
  127. char * prefix_check_twosfx_morph(const char * word, int len,
  128. char in_compound, const FLAG needflag = FLAG_NULL);
  129. char * suffix_check_twosfx_morph(const char * word, int len,
  130. int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL);
  131. char * morphgen(char * ts, int wl, const unsigned short * ap,
  132. unsigned short al, char * morph, char * targetmorph, int level);
  133. int expand_rootword(struct guessword * wlst, int maxn, const char * ts,
  134. int wl, const unsigned short * ap, unsigned short al, char * bad,
  135. int, char *);
  136. short get_syllable (const char * word, int wlen);
  137. int cpdrep_check(const char * word, int len);
  138. int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2,
  139. const char affixed);
  140. int defcpd_check(hentry *** words, short wnum, hentry * rv,
  141. hentry ** rwords, char all);
  142. int cpdcase_check(const char * word, int len);
  143. inline int candidate_check(const char * word, int len);
  144. void setcminmax(int * cmin, int * cmax, const char * word, int len);
  145. struct hentry * compound_check(const char * word, int len, short wordnum,
  146. short numsyllable, short maxwordnum, short wnum, hentry ** words,
  147. char hu_mov_rule, char is_sug, int * info);
  148. int compound_check_morph(const char * word, int len, short wordnum,
  149. short numsyllable, short maxwordnum, short wnum, hentry ** words,
  150. char hu_mov_rule, char ** result, char * partresult);
  151. struct hentry * lookup(const char * word);
  152. int get_numrep() const;
  153. struct replentry * get_reptable() const;
  154. RepList * get_iconvtable() const;
  155. RepList * get_oconvtable() const;
  156. struct phonetable * get_phonetable() const;
  157. int get_nummap() const;
  158. struct mapentry * get_maptable() const;
  159. int get_numbreak() const;
  160. char ** get_breaktable() const;
  161. char * get_encoding();
  162. int get_langnum() const;
  163. char * get_key_string();
  164. char * get_try_string() const;
  165. const char * get_wordchars() const;
  166. unsigned short * get_wordchars_utf16(int * len) const;
  167. char * get_ignore() const;
  168. unsigned short * get_ignore_utf16(int * len) const;
  169. int get_compound() const;
  170. FLAG get_compoundflag() const;
  171. FLAG get_compoundbegin() const;
  172. FLAG get_forbiddenword() const;
  173. FLAG get_nosuggest() const;
  174. FLAG get_nongramsuggest() const;
  175. FLAG get_needaffix() const;
  176. FLAG get_onlyincompound() const;
  177. FLAG get_compoundroot() const;
  178. FLAG get_lemma_present() const;
  179. int get_checknum() const;
  180. const char * get_prefix() const;
  181. const char * get_suffix() const;
  182. const char * get_derived() const;
  183. const char * get_version() const;
  184. int have_contclass() const;
  185. int get_utf8() const;
  186. int get_complexprefixes() const;
  187. char * get_suffixed(char ) const;
  188. int get_maxngramsugs() const;
  189. int get_maxcpdsugs() const;
  190. int get_maxdiff() const;
  191. int get_onlymaxdiff() const;
  192. int get_nosplitsugs() const;
  193. int get_sugswithdots(void) const;
  194. FLAG get_keepcase(void) const;
  195. FLAG get_forceucase(void) const;
  196. FLAG get_warn(void) const;
  197. int get_forbidwarn(void) const;
  198. int get_checksharps(void) const;
  199. char * encode_flag(unsigned short aflag) const;
  200. int get_fullstrip() const;
  201. private:
  202. int parse_file(const char * affpath, const char * key);
  203. int parse_flag(char * line, unsigned short * out, FileMgr * af);
  204. int parse_num(char * line, int * out, FileMgr * af);
  205. int parse_cpdsyllable(char * line, FileMgr * af);
  206. int parse_reptable(char * line, FileMgr * af);
  207. int parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword);
  208. int parse_phonetable(char * line, FileMgr * af);
  209. int parse_maptable(char * line, FileMgr * af);
  210. int parse_breaktable(char * line, FileMgr * af);
  211. int parse_checkcpdtable(char * line, FileMgr * af);
  212. int parse_defcpdtable(char * line, FileMgr * af);
  213. int parse_affix(char * line, const char at, FileMgr * af, char * dupflags);
  214. void reverse_condition(char *);
  215. void debugflag(char * result, unsigned short flag);
  216. int condlen(char *);
  217. int encodeit(affentry &entry, char * cs);
  218. int build_pfxtree(PfxEntry* pfxptr);
  219. int build_sfxtree(SfxEntry* sfxptr);
  220. int process_pfx_order();
  221. int process_sfx_order();
  222. PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr);
  223. SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr);
  224. int process_pfx_tree_to_list();
  225. int process_sfx_tree_to_list();
  226. int redundant_condition(char, char * strip, int stripl,
  227. const char * cond, int);
  228. };
  229. #endif