llpatch_dct.cpp 36 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419
  1. /**
  2. * @file llpatch_dct.cpp
  3. * @brief DCT patch.
  4. *
  5. * $LicenseInfo:firstyear=2000&license=viewergpl$
  6. *
  7. * Copyright (c) 2000-2009, Linden Research, Inc.
  8. *
  9. * Second Life Viewer Source Code
  10. * The source code in this file ("Source Code") is provided by Linden Lab
  11. * to you under the terms of the GNU General Public License, version 2.0
  12. * ("GPL"), unless you have obtained a separate licensing agreement
  13. * ("Other License"), formally executed by you and Linden Lab. Terms of
  14. * the GPL can be found in doc/GPL-license.txt in this distribution, or
  15. * online at http://secondlifegrid.net/programs/open_source/licensing/gplv2
  16. *
  17. * There are special exceptions to the terms and conditions of the GPL as
  18. * it is applied to this Source Code. View the full text of the exception
  19. * in the file doc/FLOSS-exception.txt in this software distribution, or
  20. * online at
  21. * http://secondlifegrid.net/programs/open_source/licensing/flossexception
  22. *
  23. * By copying, modifying or distributing this software, you acknowledge
  24. * that you have read and understood your obligations described above,
  25. * and agree to abide by those obligations.
  26. *
  27. * ALL LINDEN LAB SOURCE CODE IS PROVIDED "AS IS." LINDEN LAB MAKES NO
  28. * WARRANTIES, EXPRESS, IMPLIED OR OTHERWISE, REGARDING ITS ACCURACY,
  29. * COMPLETENESS OR PERFORMANCE.
  30. * $/LicenseInfo$
  31. */
  32. #include "linden_common.h"
  33. #include "llmath.h"
  34. #include "llvector3.h"
  35. #include "llpatch_dct.h"
  36. typedef struct s_patch_compress_global_data
  37. {
  38. S32 patch_size;
  39. S32 patch_stride;
  40. U32 charptr;
  41. S32 layer_type;
  42. } PCGD;
  43. PCGD gPatchCompressGlobalData;
  44. void reset_patch_compressor()
  45. {
  46. PCGD* pcp = &gPatchCompressGlobalData;
  47. pcp->charptr = 0;
  48. }
  49. S32 gCurrentSize = 0;
  50. F32 gPatchQuantizeTable[LARGE_PATCH_SIZE * LARGE_PATCH_SIZE];
  51. void build_patch_quantize_table(S32 size)
  52. {
  53. for (S32 j = 0; j < size; ++j)
  54. {
  55. for (S32 i = 0; i < size; ++i)
  56. {
  57. gPatchQuantizeTable[j * size + i] = 1.f / (1.f + 2.f * (i + j));
  58. }
  59. }
  60. }
  61. F32 gPatchCosines[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE];
  62. void setup_patch_cosines(S32 size)
  63. {
  64. F32 oosob = F_PI * 0.5f / size;
  65. for (S32 u = 0; u < size; ++u)
  66. {
  67. for (S32 n = 0; n < size; ++n)
  68. {
  69. gPatchCosines[u * size + n] = cosf((2.f * n + 1.f) * u * oosob);
  70. }
  71. }
  72. }
  73. S32 gCopyMatrix[LARGE_PATCH_SIZE * LARGE_PATCH_SIZE];
  74. void build_copy_matrix(S32 size)
  75. {
  76. bool b_diag = false;
  77. bool b_right = true;
  78. S32 i = 0;
  79. S32 j = 0;
  80. S32 count = 0;
  81. while (i < size && j < size)
  82. {
  83. gCopyMatrix[j * size + i] = count++;
  84. if (!b_diag)
  85. {
  86. if (b_right)
  87. {
  88. if (i < size - 1)
  89. {
  90. ++i;
  91. }
  92. else
  93. {
  94. ++j;
  95. }
  96. b_right = false;
  97. b_diag = true;
  98. }
  99. else
  100. {
  101. if (j < size - 1)
  102. {
  103. ++j;
  104. }
  105. else
  106. {
  107. ++i;
  108. }
  109. b_right = true;
  110. b_diag = true;
  111. }
  112. }
  113. else
  114. {
  115. if (b_right)
  116. {
  117. ++i;
  118. --j;
  119. if (i == size - 1 || j == 0)
  120. {
  121. b_diag = false;
  122. }
  123. }
  124. else
  125. {
  126. --i;
  127. ++j;
  128. if (i == 0 || j == size - 1)
  129. {
  130. b_diag = false;
  131. }
  132. }
  133. }
  134. }
  135. }
  136. void init_patch_compressor(S32 patch_size, S32 patch_stride, S32 layer_type)
  137. {
  138. PCGD* pcp = &gPatchCompressGlobalData;
  139. pcp->charptr = 0;
  140. pcp->patch_size = patch_size;
  141. pcp->patch_stride = patch_stride;
  142. pcp->layer_type = layer_type;
  143. if (patch_size != gCurrentSize)
  144. {
  145. gCurrentSize = patch_size;
  146. build_patch_quantize_table(patch_size);
  147. setup_patch_cosines(patch_size);
  148. build_copy_matrix(patch_size);
  149. }
  150. }
  151. void prescan_patch(F32* patch, LLPatchHeader* php, F32& zmax, F32& zmin)
  152. {
  153. PCGD* pcp = &gPatchCompressGlobalData;
  154. S32 stride = pcp->patch_stride;
  155. S32 size = pcp->patch_size;
  156. S32 jstride;
  157. zmax = -99999999.f;
  158. zmin = 99999999.f;
  159. for (S32 j = 0; j < size; ++j)
  160. {
  161. jstride = j * stride;
  162. for (S32 i = 0; i < size; ++i)
  163. {
  164. if (*(patch + jstride + i) > zmax)
  165. {
  166. zmax = *(patch + jstride + i);
  167. }
  168. if (*(patch + jstride + i) < zmin)
  169. {
  170. zmin = *(patch + jstride + i);
  171. }
  172. }
  173. }
  174. php->dc_offset = zmin;
  175. php->range = (U16)(zmax - zmin + 1.f);
  176. }
  177. void dct_line(F32* linein, F32* lineout, S32 line)
  178. {
  179. F32 total;
  180. F32 *pcp = gPatchCosines;
  181. S32 line_size = line * NORMAL_PATCH_SIZE;
  182. #ifdef _PATCH_SIZE_16_AND_32_ONLY
  183. F32* tlinein;
  184. F32* tpcp;
  185. tlinein = linein + line_size;
  186. total = *(tlinein++);
  187. total += *(tlinein++);
  188. total += *(tlinein++);
  189. total += *(tlinein++);
  190. total += *(tlinein++);
  191. total += *(tlinein++);
  192. total += *(tlinein++);
  193. total += *(tlinein++);
  194. total += *(tlinein++);
  195. total += *(tlinein++);
  196. total += *(tlinein++);
  197. total += *(tlinein++);
  198. total += *(tlinein++);
  199. total += *(tlinein++);
  200. total += *(tlinein++);
  201. total += *(tlinein);
  202. *(lineout + line_size) = OO_SQRT2 * total;
  203. for (S32 u = 1; u < NORMAL_PATCH_SIZE; ++u)
  204. {
  205. tlinein = linein + line_size;
  206. tpcp = pcp + (u<<4);
  207. total = *(tlinein++) * (*(tpcp++));
  208. total += *(tlinein++) * (*(tpcp++));
  209. total += *(tlinein++) * (*(tpcp++));
  210. total += *(tlinein++) * (*(tpcp++));
  211. total += *(tlinein++) * (*(tpcp++));
  212. total += *(tlinein++) * (*(tpcp++));
  213. total += *(tlinein++) * (*(tpcp++));
  214. total += *(tlinein++) * (*(tpcp++));
  215. total += *(tlinein++) * (*(tpcp++));
  216. total += *(tlinein++) * (*(tpcp++));
  217. total += *(tlinein++) * (*(tpcp++));
  218. total += *(tlinein++) * (*(tpcp++));
  219. total += *(tlinein++) * (*(tpcp++));
  220. total += *(tlinein++) * (*(tpcp++));
  221. total += *(tlinein++) * (*(tpcp++));
  222. total += *(tlinein)*(*tpcp);
  223. *(lineout + line_size + u) = total;
  224. }
  225. #else
  226. S32 size = gPatchCompressGlobalData.patch_size;
  227. total = 0.f;
  228. for (S32 n = 0; n < size; ++n)
  229. {
  230. total += linein[line_size + n];
  231. }
  232. lineout[line_size] = OO_SQRT2*total;
  233. for (S32 u = 1; u < size; ++u)
  234. {
  235. total = 0.f;
  236. for (S32 n = 0; n < size; ++n)
  237. {
  238. total += linein[line_size + n] * pcp[u * size + n];
  239. }
  240. lineout[line_size + u] = total;
  241. }
  242. #endif
  243. }
  244. void dct_line_large(F32* linein, F32* lineout, S32 line)
  245. {
  246. F32 total;
  247. F32* pcp = gPatchCosines;
  248. S32 line_size = line * LARGE_PATCH_SIZE;
  249. F32* tlinein;
  250. F32* tpcp;
  251. tlinein = linein + line_size;
  252. total = *(tlinein++);
  253. total += *(tlinein++);
  254. total += *(tlinein++);
  255. total += *(tlinein++);
  256. total += *(tlinein++);
  257. total += *(tlinein++);
  258. total += *(tlinein++);
  259. total += *(tlinein++);
  260. total += *(tlinein++);
  261. total += *(tlinein++);
  262. total += *(tlinein++);
  263. total += *(tlinein++);
  264. total += *(tlinein++);
  265. total += *(tlinein++);
  266. total += *(tlinein++);
  267. total += *(tlinein++);
  268. total += *(tlinein++);
  269. total += *(tlinein++);
  270. total += *(tlinein++);
  271. total += *(tlinein++);
  272. total += *(tlinein++);
  273. total += *(tlinein++);
  274. total += *(tlinein++);
  275. total += *(tlinein++);
  276. total += *(tlinein++);
  277. total += *(tlinein++);
  278. total += *(tlinein++);
  279. total += *(tlinein++);
  280. total += *(tlinein++);
  281. total += *(tlinein++);
  282. total += *(tlinein++);
  283. total += *(tlinein);
  284. *(lineout + line_size) = OO_SQRT2 * total;
  285. for (S32 u = 1; u < LARGE_PATCH_SIZE; ++u)
  286. {
  287. tlinein = linein + line_size;
  288. tpcp = pcp + (u << 5);
  289. total = *(tlinein++) * (*(tpcp++));
  290. total += *(tlinein++) * (*(tpcp++));
  291. total += *(tlinein++) * (*(tpcp++));
  292. total += *(tlinein++) * (*(tpcp++));
  293. total += *(tlinein++) * (*(tpcp++));
  294. total += *(tlinein++) * (*(tpcp++));
  295. total += *(tlinein++) * (*(tpcp++));
  296. total += *(tlinein++) * (*(tpcp++));
  297. total += *(tlinein++) * (*(tpcp++));
  298. total += *(tlinein++) * (*(tpcp++));
  299. total += *(tlinein++) * (*(tpcp++));
  300. total += *(tlinein++) * (*(tpcp++));
  301. total += *(tlinein++) * (*(tpcp++));
  302. total += *(tlinein++) * (*(tpcp++));
  303. total += *(tlinein++) * (*(tpcp++));
  304. total += *(tlinein++) * (*(tpcp++));
  305. total += *(tlinein++) * (*(tpcp++));
  306. total += *(tlinein++) * (*(tpcp++));
  307. total += *(tlinein++) * (*(tpcp++));
  308. total += *(tlinein++) * (*(tpcp++));
  309. total += *(tlinein++) * (*(tpcp++));
  310. total += *(tlinein++) * (*(tpcp++));
  311. total += *(tlinein++) * (*(tpcp++));
  312. total += *(tlinein++) * (*(tpcp++));
  313. total += *(tlinein++) * (*(tpcp++));
  314. total += *(tlinein++) * (*(tpcp++));
  315. total += *(tlinein++) * (*(tpcp++));
  316. total += *(tlinein++) * (*(tpcp++));
  317. total += *(tlinein++) * (*(tpcp++));
  318. total += *(tlinein++) * (*(tpcp++));
  319. total += *(tlinein++) * (*(tpcp++));
  320. total += *(tlinein) * (*tpcp);
  321. *(lineout + line_size + u) = total;
  322. }
  323. }
  324. LL_INLINE void dct_column(F32* linein, S32* lineout, S32 column)
  325. {
  326. F32 total;
  327. F32 oosob = 2.f / 16.f;
  328. F32* pcp = gPatchCosines;
  329. S32* copy_matrix = gCopyMatrix;
  330. F32* qt = gPatchQuantizeTable;
  331. #ifdef _PATCH_SIZE_16_AND_32_ONLY
  332. F32* tlinein;
  333. F32* tpcp;
  334. S32 sizeu;
  335. tlinein = linein + column;
  336. total = *(tlinein);
  337. total += *(tlinein += NORMAL_PATCH_SIZE);
  338. total += *(tlinein += NORMAL_PATCH_SIZE);
  339. total += *(tlinein += NORMAL_PATCH_SIZE);
  340. total += *(tlinein += NORMAL_PATCH_SIZE);
  341. total += *(tlinein += NORMAL_PATCH_SIZE);
  342. total += *(tlinein += NORMAL_PATCH_SIZE);
  343. total += *(tlinein += NORMAL_PATCH_SIZE);
  344. total += *(tlinein += NORMAL_PATCH_SIZE);
  345. total += *(tlinein += NORMAL_PATCH_SIZE);
  346. total += *(tlinein += NORMAL_PATCH_SIZE);
  347. total += *(tlinein += NORMAL_PATCH_SIZE);
  348. total += *(tlinein += NORMAL_PATCH_SIZE);
  349. total += *(tlinein += NORMAL_PATCH_SIZE);
  350. total += *(tlinein += NORMAL_PATCH_SIZE);
  351. total += *(tlinein += NORMAL_PATCH_SIZE);
  352. *(lineout + *(copy_matrix + column)) = (S32)(OO_SQRT2 * total * oosob*(*(qt + column)));
  353. for (S32 u = 1; u < NORMAL_PATCH_SIZE; ++u)
  354. {
  355. tlinein = linein + column;
  356. tpcp = pcp + (u << 4);
  357. total = *(tlinein)*(*(tpcp++));
  358. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp++));
  359. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp++));
  360. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp++));
  361. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp++));
  362. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp++));
  363. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp++));
  364. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp++));
  365. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp++));
  366. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp++));
  367. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp++));
  368. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp++));
  369. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp++));
  370. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp++));
  371. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp++));
  372. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp));
  373. sizeu = NORMAL_PATCH_SIZE * u + column;
  374. *(lineout + *(copy_matrix + sizeu)) = (S32)(total * oosob * (*(qt + sizeu)));
  375. }
  376. #else
  377. S32 size = gPatchCompressGlobalData.patch_size;
  378. F32 oosob = 2.f / size;
  379. total = 0.f;
  380. for (S32 n = 0; n < size; ++n)
  381. {
  382. total += linein[size * n + column];
  383. }
  384. lineout[copy_matrix[column]] = OO_SQRT2 * total * oosob * qt[column];
  385. for (S32 u = 1; u < size; ++u)
  386. {
  387. total = 0.f;
  388. for (S32 n = 0; n < size; ++n)
  389. {
  390. total += linein[size * n + column] * pcp[u * size + n];
  391. }
  392. lineout[copy_matrix[size * u + column]] = total * oosob * qt[size * u + column];
  393. }
  394. #endif
  395. }
  396. LL_INLINE void dct_column_large(F32* linein, S32* lineout, S32 column)
  397. {
  398. F32 total;
  399. F32 oosob = 2.f / 32.f;
  400. F32* pcp = gPatchCosines;
  401. S32* copy_matrix = gCopyMatrix;
  402. F32* qt = gPatchQuantizeTable;
  403. F32* tlinein;
  404. F32* tpcp;
  405. S32 sizeu;
  406. tlinein = linein + column;
  407. total = *(tlinein);
  408. total += *(tlinein += LARGE_PATCH_SIZE);
  409. total += *(tlinein += LARGE_PATCH_SIZE);
  410. total += *(tlinein += LARGE_PATCH_SIZE);
  411. total += *(tlinein += LARGE_PATCH_SIZE);
  412. total += *(tlinein += LARGE_PATCH_SIZE);
  413. total += *(tlinein += LARGE_PATCH_SIZE);
  414. total += *(tlinein += LARGE_PATCH_SIZE);
  415. total += *(tlinein += LARGE_PATCH_SIZE);
  416. total += *(tlinein += LARGE_PATCH_SIZE);
  417. total += *(tlinein += LARGE_PATCH_SIZE);
  418. total += *(tlinein += LARGE_PATCH_SIZE);
  419. total += *(tlinein += LARGE_PATCH_SIZE);
  420. total += *(tlinein += LARGE_PATCH_SIZE);
  421. total += *(tlinein += LARGE_PATCH_SIZE);
  422. total += *(tlinein += LARGE_PATCH_SIZE);
  423. total += *(tlinein += LARGE_PATCH_SIZE);
  424. total += *(tlinein += LARGE_PATCH_SIZE);
  425. total += *(tlinein += LARGE_PATCH_SIZE);
  426. total += *(tlinein += LARGE_PATCH_SIZE);
  427. total += *(tlinein += LARGE_PATCH_SIZE);
  428. total += *(tlinein += LARGE_PATCH_SIZE);
  429. total += *(tlinein += LARGE_PATCH_SIZE);
  430. total += *(tlinein += LARGE_PATCH_SIZE);
  431. total += *(tlinein += LARGE_PATCH_SIZE);
  432. total += *(tlinein += LARGE_PATCH_SIZE);
  433. total += *(tlinein += LARGE_PATCH_SIZE);
  434. total += *(tlinein += LARGE_PATCH_SIZE);
  435. total += *(tlinein += LARGE_PATCH_SIZE);
  436. total += *(tlinein += LARGE_PATCH_SIZE);
  437. total += *(tlinein += LARGE_PATCH_SIZE);
  438. total += *(tlinein += LARGE_PATCH_SIZE);
  439. *(lineout + *(copy_matrix + column)) = (S32)(OO_SQRT2*total*oosob*(*(qt + column)));
  440. for (S32 u = 1; u < LARGE_PATCH_SIZE; ++u)
  441. {
  442. tlinein = linein + column;
  443. tpcp = pcp + (u << 5);
  444. total = *(tlinein)*(*(tpcp++));
  445. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  446. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  447. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  448. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  449. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  450. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  451. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  452. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  453. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  454. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  455. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  456. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  457. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  458. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  459. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  460. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  461. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  462. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  463. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  464. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  465. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  466. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  467. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  468. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  469. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  470. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  471. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  472. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  473. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  474. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp++));
  475. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp));
  476. sizeu = LARGE_PATCH_SIZE * u + column;
  477. *(lineout + *(copy_matrix + sizeu)) = (S32)(total * oosob * (*(qt + sizeu)));
  478. }
  479. }
  480. LL_INLINE void dct_patch(F32* block, S32* cpatch)
  481. {
  482. F32 temp[NORMAL_PATCH_SIZE * NORMAL_PATCH_SIZE];
  483. #ifdef _PATCH_SIZE_16_AND_32_ONLY
  484. dct_line(block, temp, 0);
  485. dct_line(block, temp, 1);
  486. dct_line(block, temp, 2);
  487. dct_line(block, temp, 3);
  488. dct_line(block, temp, 4);
  489. dct_line(block, temp, 5);
  490. dct_line(block, temp, 6);
  491. dct_line(block, temp, 7);
  492. dct_line(block, temp, 8);
  493. dct_line(block, temp, 9);
  494. dct_line(block, temp, 10);
  495. dct_line(block, temp, 11);
  496. dct_line(block, temp, 12);
  497. dct_line(block, temp, 13);
  498. dct_line(block, temp, 14);
  499. dct_line(block, temp, 15);
  500. dct_column(temp, cpatch, 0);
  501. dct_column(temp, cpatch, 1);
  502. dct_column(temp, cpatch, 2);
  503. dct_column(temp, cpatch, 3);
  504. dct_column(temp, cpatch, 4);
  505. dct_column(temp, cpatch, 5);
  506. dct_column(temp, cpatch, 6);
  507. dct_column(temp, cpatch, 7);
  508. dct_column(temp, cpatch, 8);
  509. dct_column(temp, cpatch, 9);
  510. dct_column(temp, cpatch, 10);
  511. dct_column(temp, cpatch, 11);
  512. dct_column(temp, cpatch, 12);
  513. dct_column(temp, cpatch, 13);
  514. dct_column(temp, cpatch, 14);
  515. dct_column(temp, cpatch, 15);
  516. #else
  517. S32 size = gPatchCompressGlobalData.patch_size;
  518. for (S32 i = 0; i < size; ++i)
  519. {
  520. dct_line(block, temp, i);
  521. }
  522. for (S32 i = 0; i < size; ++i)
  523. {
  524. dct_column(temp, cpatch, i);
  525. }
  526. #endif
  527. }
  528. LL_INLINE void dct_patch_large(F32* block, S32* cpatch)
  529. {
  530. F32 temp[LARGE_PATCH_SIZE * LARGE_PATCH_SIZE];
  531. dct_line_large(block, temp, 0);
  532. dct_line_large(block, temp, 1);
  533. dct_line_large(block, temp, 2);
  534. dct_line_large(block, temp, 3);
  535. dct_line_large(block, temp, 4);
  536. dct_line_large(block, temp, 5);
  537. dct_line_large(block, temp, 6);
  538. dct_line_large(block, temp, 7);
  539. dct_line_large(block, temp, 8);
  540. dct_line_large(block, temp, 9);
  541. dct_line_large(block, temp, 10);
  542. dct_line_large(block, temp, 11);
  543. dct_line_large(block, temp, 12);
  544. dct_line_large(block, temp, 13);
  545. dct_line_large(block, temp, 14);
  546. dct_line_large(block, temp, 15);
  547. dct_line_large(block, temp, 16);
  548. dct_line_large(block, temp, 17);
  549. dct_line_large(block, temp, 18);
  550. dct_line_large(block, temp, 19);
  551. dct_line_large(block, temp, 20);
  552. dct_line_large(block, temp, 21);
  553. dct_line_large(block, temp, 22);
  554. dct_line_large(block, temp, 23);
  555. dct_line_large(block, temp, 24);
  556. dct_line_large(block, temp, 25);
  557. dct_line_large(block, temp, 26);
  558. dct_line_large(block, temp, 27);
  559. dct_line_large(block, temp, 28);
  560. dct_line_large(block, temp, 29);
  561. dct_line_large(block, temp, 30);
  562. dct_line_large(block, temp, 31);
  563. dct_column_large(temp, cpatch, 0);
  564. dct_column_large(temp, cpatch, 1);
  565. dct_column_large(temp, cpatch, 2);
  566. dct_column_large(temp, cpatch, 3);
  567. dct_column_large(temp, cpatch, 4);
  568. dct_column_large(temp, cpatch, 5);
  569. dct_column_large(temp, cpatch, 6);
  570. dct_column_large(temp, cpatch, 7);
  571. dct_column_large(temp, cpatch, 8);
  572. dct_column_large(temp, cpatch, 9);
  573. dct_column_large(temp, cpatch, 10);
  574. dct_column_large(temp, cpatch, 11);
  575. dct_column_large(temp, cpatch, 12);
  576. dct_column_large(temp, cpatch, 13);
  577. dct_column_large(temp, cpatch, 14);
  578. dct_column_large(temp, cpatch, 15);
  579. dct_column_large(temp, cpatch, 16);
  580. dct_column_large(temp, cpatch, 17);
  581. dct_column_large(temp, cpatch, 18);
  582. dct_column_large(temp, cpatch, 19);
  583. dct_column_large(temp, cpatch, 20);
  584. dct_column_large(temp, cpatch, 21);
  585. dct_column_large(temp, cpatch, 22);
  586. dct_column_large(temp, cpatch, 23);
  587. dct_column_large(temp, cpatch, 24);
  588. dct_column_large(temp, cpatch, 25);
  589. dct_column_large(temp, cpatch, 26);
  590. dct_column_large(temp, cpatch, 27);
  591. dct_column_large(temp, cpatch, 28);
  592. dct_column_large(temp, cpatch, 29);
  593. dct_column_large(temp, cpatch, 30);
  594. dct_column_large(temp, cpatch, 31);
  595. }
  596. void compress_patch(F32* patch, S32* cpatch, LLPatchHeader* php, S32 prequant)
  597. {
  598. PCGD* pcp = &gPatchCompressGlobalData;
  599. S32 stride = pcp->patch_stride;
  600. S32 size = pcp->patch_size;
  601. F32 block[LARGE_PATCH_SIZE*LARGE_PATCH_SIZE], *tblock;
  602. F32* tpatch;
  603. S32 wordsize = prequant;
  604. F32 oozrange = 1.f / php->range;
  605. F32 dc = php->dc_offset;
  606. S32 range = 1 << prequant;
  607. F32 premult = oozrange*range;
  608. F32 sub = (F32)(1 << (prequant - 1)) + dc * premult;
  609. php->quant_wbits = wordsize - 2;
  610. php->quant_wbits |= (prequant - 2) << 4;
  611. for (S32 j = 0; j < size; ++j)
  612. {
  613. tblock = block + j * size;
  614. tpatch = patch + j * stride;
  615. for (S32 i = 0; i < size; ++i)
  616. {
  617. *(tblock++) = *(tpatch++)*premult - sub;
  618. }
  619. }
  620. if (size == 16)
  621. {
  622. dct_patch(block, cpatch);
  623. }
  624. else
  625. {
  626. dct_patch_large(block, cpatch);
  627. }
  628. }
  629. void get_patch_group_header(LLGroupHeader *gopp)
  630. {
  631. PCGD* pcp = &gPatchCompressGlobalData;
  632. gopp->stride = pcp->patch_stride;
  633. gopp->patch_size = pcp->patch_size;
  634. gopp->layer_type = pcp->layer_type;
  635. }
  636. ///////////////////////////////////////////////////////////////////////////////
  637. // Formerly in patch_idct.cpp
  638. ///////////////////////////////////////////////////////////////////////////////
  639. LLGroupHeader* gGOPP;
  640. void set_group_of_patch_header(LLGroupHeader* gopp)
  641. {
  642. gGOPP = gopp;
  643. }
  644. F32 gPatchDequantizeTable[LARGE_PATCH_SIZE * LARGE_PATCH_SIZE];
  645. void build_patch_dequantize_table(S32 size)
  646. {
  647. for (S32 j = 0; j < size; ++j)
  648. {
  649. for (S32 i = 0; i < size; ++i)
  650. {
  651. gPatchDequantizeTable[j * size + i] = (1.f + 2.f * (i + j));
  652. }
  653. }
  654. }
  655. S32 gCurrentDeSize = 0;
  656. F32 gPatchICosines[LARGE_PATCH_SIZE * LARGE_PATCH_SIZE];
  657. void setup_patch_icosines(S32 size)
  658. {
  659. F32 oosob = F_PI * 0.5f / size;
  660. for (S32 u = 0; u < size; ++u)
  661. {
  662. for (S32 n = 0; n < size; ++n)
  663. {
  664. gPatchICosines[u * size + n] = cosf((2.f * n + 1.f) * u * oosob);
  665. }
  666. }
  667. }
  668. S32 gDeCopyMatrix[LARGE_PATCH_SIZE * LARGE_PATCH_SIZE];
  669. void build_decopy_matrix(S32 size)
  670. {
  671. bool b_diag = false;
  672. bool b_right = true;
  673. S32 i = 0;
  674. S32 j = 0;
  675. S32 count = 0;
  676. while (i < size && j < size)
  677. {
  678. gDeCopyMatrix[j * size + i] = count++;
  679. if (!b_diag)
  680. {
  681. if (b_right)
  682. {
  683. if (i < size - 1)
  684. {
  685. ++i;
  686. }
  687. else
  688. {
  689. ++j;
  690. }
  691. b_right = false;
  692. b_diag = true;
  693. }
  694. else
  695. {
  696. if (j < size - 1)
  697. {
  698. ++j;
  699. }
  700. else
  701. {
  702. ++i;
  703. }
  704. b_right = true;
  705. b_diag = true;
  706. }
  707. }
  708. else
  709. {
  710. if (b_right)
  711. {
  712. ++i;
  713. --j;
  714. if (i == size - 1 || j == 0)
  715. {
  716. b_diag = false;
  717. }
  718. }
  719. else
  720. {
  721. --i;
  722. ++j;
  723. if (i == 0 || j == size - 1)
  724. {
  725. b_diag = false;
  726. }
  727. }
  728. }
  729. }
  730. }
  731. void init_patch_decompressor(S32 size)
  732. {
  733. if (size != gCurrentDeSize)
  734. {
  735. gCurrentDeSize = size;
  736. build_patch_dequantize_table(size);
  737. setup_patch_icosines(size);
  738. build_decopy_matrix(size);
  739. }
  740. }
  741. LL_INLINE void idct_line(F32* linein, F32* lineout, S32 line)
  742. {
  743. F32 total;
  744. F32* pcp = gPatchICosines;
  745. #ifdef _PATCH_SIZE_16_AND_32_ONLY
  746. F32 oosob = 2.f / 16.f;
  747. S32 line_size = line * NORMAL_PATCH_SIZE;
  748. F32* tlinein;
  749. F32* tpcp;
  750. for (S32 n = 0; n < NORMAL_PATCH_SIZE; ++n)
  751. {
  752. tpcp = pcp + n;
  753. tlinein = linein + line_size;
  754. total = OO_SQRT2 * (*(tlinein++));
  755. total += *(tlinein++) * (*(tpcp += NORMAL_PATCH_SIZE));
  756. total += *(tlinein++) * (*(tpcp += NORMAL_PATCH_SIZE));
  757. total += *(tlinein++) * (*(tpcp += NORMAL_PATCH_SIZE));
  758. total += *(tlinein++) * (*(tpcp += NORMAL_PATCH_SIZE));
  759. total += *(tlinein++) * (*(tpcp += NORMAL_PATCH_SIZE));
  760. total += *(tlinein++) * (*(tpcp += NORMAL_PATCH_SIZE));
  761. total += *(tlinein++) * (*(tpcp += NORMAL_PATCH_SIZE));
  762. total += *(tlinein++) * (*(tpcp += NORMAL_PATCH_SIZE));
  763. total += *(tlinein++) * (*(tpcp += NORMAL_PATCH_SIZE));
  764. total += *(tlinein++) * (*(tpcp += NORMAL_PATCH_SIZE));
  765. total += *(tlinein++) * (*(tpcp += NORMAL_PATCH_SIZE));
  766. total += *(tlinein++) * (*(tpcp += NORMAL_PATCH_SIZE));
  767. total += *(tlinein++) * (*(tpcp += NORMAL_PATCH_SIZE));
  768. total += *(tlinein++) * (*(tpcp += NORMAL_PATCH_SIZE));
  769. total += *(tlinein) * (*(tpcp += NORMAL_PATCH_SIZE));
  770. *(lineout + line_size + n) = total * oosob;
  771. }
  772. #else
  773. F32 oosob = 2.f / size;
  774. S32 size = gGOPP->patch_size;
  775. S32 line_size = line * size;
  776. for (S32 n = 0; n < size; ++n)
  777. {
  778. total = OO_SQRT2*linein[line_size];
  779. for (S32 u = 1; u < size; ++u)
  780. {
  781. total += linein[line_size + u] * pcp[u * size + n];
  782. }
  783. lineout[line_size + n] = total * oosob;
  784. }
  785. #endif
  786. }
  787. LL_INLINE void idct_line_large_slow(F32* linein, F32* lineout, S32 line)
  788. {
  789. F32 total;
  790. F32* pcp = gPatchICosines;
  791. F32 oosob = 2.f / 32.f;
  792. S32 line_size = line * LARGE_PATCH_SIZE;
  793. F32* tlinein;
  794. F32* tpcp;
  795. for (S32 n = 0; n < LARGE_PATCH_SIZE; ++n)
  796. {
  797. tpcp = pcp + n;
  798. tlinein = linein + line_size;
  799. total = OO_SQRT2 * (*(tlinein++));
  800. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  801. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  802. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  803. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  804. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  805. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  806. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  807. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  808. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  809. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  810. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  811. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  812. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  813. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  814. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  815. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  816. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  817. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  818. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  819. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  820. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  821. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  822. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  823. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  824. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  825. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  826. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  827. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  828. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  829. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  830. total += *(tlinein)*(*(tpcp += LARGE_PATCH_SIZE));
  831. *(lineout + line_size + n) = total * oosob;
  832. }
  833. }
  834. // Nota Bene: assumes that coefficients beyond 128 are 0 !
  835. void idct_line_large(F32* linein, F32* lineout, S32 line)
  836. {
  837. F32 total;
  838. F32 *pcp = gPatchICosines;
  839. F32 oosob = 2.f / 32.f;
  840. S32 line_size = line*LARGE_PATCH_SIZE;
  841. F32* tlinein;
  842. F32* tpcp;
  843. F32* baselinein = linein + line_size;
  844. F32* baselineout = lineout + line_size;
  845. for (S32 n = 0; n < LARGE_PATCH_SIZE; ++n)
  846. {
  847. tpcp = pcp++;
  848. tlinein = baselinein;
  849. total = OO_SQRT2 * (*(tlinein++));
  850. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  851. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  852. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  853. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  854. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  855. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  856. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  857. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  858. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  859. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  860. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  861. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  862. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  863. total += *(tlinein++) * (*(tpcp += LARGE_PATCH_SIZE));
  864. total += *(tlinein) * (*(tpcp));
  865. *baselineout++ = total * oosob;
  866. }
  867. }
  868. LL_INLINE void idct_column(F32* linein, F32* lineout, S32 column)
  869. {
  870. F32 total;
  871. F32 *pcp = gPatchICosines;
  872. #ifdef _PATCH_SIZE_16_AND_32_ONLY
  873. F32* tlinein;
  874. F32* tpcp;
  875. for (S32 n = 0; n < NORMAL_PATCH_SIZE; ++n)
  876. {
  877. tpcp = pcp + n;
  878. tlinein = linein + column;
  879. total = OO_SQRT2*(*tlinein);
  880. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp += NORMAL_PATCH_SIZE));
  881. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp += NORMAL_PATCH_SIZE));
  882. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp += NORMAL_PATCH_SIZE));
  883. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp += NORMAL_PATCH_SIZE));
  884. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp += NORMAL_PATCH_SIZE));
  885. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp += NORMAL_PATCH_SIZE));
  886. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp += NORMAL_PATCH_SIZE));
  887. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp += NORMAL_PATCH_SIZE));
  888. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp += NORMAL_PATCH_SIZE));
  889. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp += NORMAL_PATCH_SIZE));
  890. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp += NORMAL_PATCH_SIZE));
  891. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp += NORMAL_PATCH_SIZE));
  892. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp += NORMAL_PATCH_SIZE));
  893. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp += NORMAL_PATCH_SIZE));
  894. total += *(tlinein += NORMAL_PATCH_SIZE) * (*(tpcp += NORMAL_PATCH_SIZE));
  895. *(lineout + (n<<4) + column) = total;
  896. }
  897. #else
  898. S32 size = gGOPP->patch_size;
  899. S32 u;
  900. S32 u_size;
  901. for (n = 0; n < size; n++)
  902. {
  903. total = OO_SQRT2*linein[column];
  904. for (u = 1; u < size; u++)
  905. {
  906. u_size = u*size;
  907. total += linein[u_size + column]*pcp[u_size+n];
  908. }
  909. lineout[size*n + column] = total;
  910. }
  911. #endif
  912. }
  913. LL_INLINE void idct_column_large_slow(F32* linein, F32* lineout, S32 column)
  914. {
  915. F32 total;
  916. F32* pcp = gPatchICosines;
  917. F32* tlinein;
  918. F32* tpcp;
  919. for (S32 n = 0; n < LARGE_PATCH_SIZE; ++n)
  920. {
  921. tpcp = pcp + n;
  922. tlinein = linein + column;
  923. total = OO_SQRT2 * (*tlinein);
  924. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  925. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  926. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  927. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  928. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  929. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  930. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  931. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  932. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  933. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  934. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  935. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  936. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  937. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  938. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  939. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  940. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  941. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  942. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  943. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  944. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  945. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  946. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  947. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  948. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  949. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  950. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  951. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  952. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  953. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  954. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  955. *(lineout + (n << 5) + column) = total;
  956. }
  957. }
  958. // Nota Bene: assumes that coefficients beyond 128 are 0!
  959. void idct_column_large(F32* linein, F32* lineout, S32 column)
  960. {
  961. F32 total;
  962. F32* pcp = gPatchICosines;
  963. F32* tlinein;
  964. F32* tpcp;
  965. F32* baselinein = linein + column;
  966. F32* baselineout = lineout + column;
  967. for (S32 n = 0; n < LARGE_PATCH_SIZE; ++n)
  968. {
  969. tpcp = pcp++;
  970. tlinein = baselinein;
  971. total = OO_SQRT2 * (*tlinein);
  972. for (S32 m = 1; m < NORMAL_PATCH_SIZE; ++m)
  973. {
  974. total += *(tlinein += LARGE_PATCH_SIZE) * (*(tpcp += LARGE_PATCH_SIZE));
  975. }
  976. *(baselineout + (n << 5)) = total;
  977. }
  978. }
  979. LL_INLINE void idct_patch(F32* block)
  980. {
  981. F32 temp[LARGE_PATCH_SIZE * LARGE_PATCH_SIZE];
  982. #ifdef _PATCH_SIZE_16_AND_32_ONLY
  983. idct_column(block, temp, 0);
  984. idct_column(block, temp, 1);
  985. idct_column(block, temp, 2);
  986. idct_column(block, temp, 3);
  987. idct_column(block, temp, 4);
  988. idct_column(block, temp, 5);
  989. idct_column(block, temp, 6);
  990. idct_column(block, temp, 7);
  991. idct_column(block, temp, 8);
  992. idct_column(block, temp, 9);
  993. idct_column(block, temp, 10);
  994. idct_column(block, temp, 11);
  995. idct_column(block, temp, 12);
  996. idct_column(block, temp, 13);
  997. idct_column(block, temp, 14);
  998. idct_column(block, temp, 15);
  999. idct_line(temp, block, 0);
  1000. idct_line(temp, block, 1);
  1001. idct_line(temp, block, 2);
  1002. idct_line(temp, block, 3);
  1003. idct_line(temp, block, 4);
  1004. idct_line(temp, block, 5);
  1005. idct_line(temp, block, 6);
  1006. idct_line(temp, block, 7);
  1007. idct_line(temp, block, 8);
  1008. idct_line(temp, block, 9);
  1009. idct_line(temp, block, 10);
  1010. idct_line(temp, block, 11);
  1011. idct_line(temp, block, 12);
  1012. idct_line(temp, block, 13);
  1013. idct_line(temp, block, 14);
  1014. idct_line(temp, block, 15);
  1015. #else
  1016. S32 size = gGOPP->patch_size;
  1017. for (S32 i = 0; i < size; ++i)
  1018. {
  1019. idct_column(block, temp, i);
  1020. }
  1021. for (S32 i = 0; i < size; ++i)
  1022. {
  1023. idct_line(temp, block, i);
  1024. }
  1025. #endif
  1026. }
  1027. LL_INLINE void idct_patch_large(F32* block)
  1028. {
  1029. F32 temp[LARGE_PATCH_SIZE * LARGE_PATCH_SIZE];
  1030. idct_column_large_slow(block, temp, 0);
  1031. idct_column_large_slow(block, temp, 1);
  1032. idct_column_large_slow(block, temp, 2);
  1033. idct_column_large_slow(block, temp, 3);
  1034. idct_column_large_slow(block, temp, 4);
  1035. idct_column_large_slow(block, temp, 5);
  1036. idct_column_large_slow(block, temp, 6);
  1037. idct_column_large_slow(block, temp, 7);
  1038. idct_column_large_slow(block, temp, 8);
  1039. idct_column_large_slow(block, temp, 9);
  1040. idct_column_large_slow(block, temp, 10);
  1041. idct_column_large_slow(block, temp, 11);
  1042. idct_column_large_slow(block, temp, 12);
  1043. idct_column_large_slow(block, temp, 13);
  1044. idct_column_large_slow(block, temp, 14);
  1045. idct_column_large_slow(block, temp, 15);
  1046. idct_column_large_slow(block, temp, 16);
  1047. idct_column_large_slow(block, temp, 17);
  1048. idct_column_large_slow(block, temp, 18);
  1049. idct_column_large_slow(block, temp, 19);
  1050. idct_column_large_slow(block, temp, 20);
  1051. idct_column_large_slow(block, temp, 21);
  1052. idct_column_large_slow(block, temp, 22);
  1053. idct_column_large_slow(block, temp, 23);
  1054. idct_column_large_slow(block, temp, 24);
  1055. idct_column_large_slow(block, temp, 25);
  1056. idct_column_large_slow(block, temp, 26);
  1057. idct_column_large_slow(block, temp, 27);
  1058. idct_column_large_slow(block, temp, 28);
  1059. idct_column_large_slow(block, temp, 29);
  1060. idct_column_large_slow(block, temp, 30);
  1061. idct_column_large_slow(block, temp, 31);
  1062. idct_line_large_slow(temp, block, 0);
  1063. idct_line_large_slow(temp, block, 1);
  1064. idct_line_large_slow(temp, block, 2);
  1065. idct_line_large_slow(temp, block, 3);
  1066. idct_line_large_slow(temp, block, 4);
  1067. idct_line_large_slow(temp, block, 5);
  1068. idct_line_large_slow(temp, block, 6);
  1069. idct_line_large_slow(temp, block, 7);
  1070. idct_line_large_slow(temp, block, 8);
  1071. idct_line_large_slow(temp, block, 9);
  1072. idct_line_large_slow(temp, block, 10);
  1073. idct_line_large_slow(temp, block, 11);
  1074. idct_line_large_slow(temp, block, 12);
  1075. idct_line_large_slow(temp, block, 13);
  1076. idct_line_large_slow(temp, block, 14);
  1077. idct_line_large_slow(temp, block, 15);
  1078. idct_line_large_slow(temp, block, 16);
  1079. idct_line_large_slow(temp, block, 17);
  1080. idct_line_large_slow(temp, block, 18);
  1081. idct_line_large_slow(temp, block, 19);
  1082. idct_line_large_slow(temp, block, 20);
  1083. idct_line_large_slow(temp, block, 21);
  1084. idct_line_large_slow(temp, block, 22);
  1085. idct_line_large_slow(temp, block, 23);
  1086. idct_line_large_slow(temp, block, 24);
  1087. idct_line_large_slow(temp, block, 25);
  1088. idct_line_large_slow(temp, block, 26);
  1089. idct_line_large_slow(temp, block, 27);
  1090. idct_line_large_slow(temp, block, 28);
  1091. idct_line_large_slow(temp, block, 29);
  1092. idct_line_large_slow(temp, block, 30);
  1093. idct_line_large_slow(temp, block, 31);
  1094. }
  1095. S32 gDitherNoise = 128;
  1096. void decompress_patch(F32* patch, S32* cpatch, LLPatchHeader* ph)
  1097. {
  1098. F32 block[LARGE_PATCH_SIZE * LARGE_PATCH_SIZE];
  1099. F32* tblock = block;
  1100. F32* tpatch;
  1101. LLGroupHeader* gopp = gGOPP;
  1102. S32 size = gopp->patch_size;
  1103. F32 range = ph->range;
  1104. S32 prequant = (ph->quant_wbits >> 4) + 2;
  1105. S32 quantize = 1<<prequant;
  1106. F32 hmin = ph->dc_offset;
  1107. S32 stride = gopp->stride;
  1108. F32 ooq = 1.f / (F32)quantize;
  1109. F32* dq = gPatchDequantizeTable;
  1110. S32* decopy_matrix = gDeCopyMatrix;
  1111. F32 mult = ooq * range;
  1112. F32 addval = mult * (F32)(1 << (prequant - 1)) + hmin;
  1113. for (S32 i = 0; i < size*size; ++i)
  1114. {
  1115. *(tblock++) = *(cpatch + *(decopy_matrix++)) * (*dq++);
  1116. }
  1117. if (size == 16)
  1118. {
  1119. idct_patch(block);
  1120. }
  1121. else
  1122. {
  1123. idct_patch_large(block);
  1124. }
  1125. for (S32 j = 0; j < size; ++j)
  1126. {
  1127. tpatch = patch + j * stride;
  1128. tblock = block + j * size;
  1129. for (S32 i = 0; i < size; ++i)
  1130. {
  1131. *(tpatch++) = *(tblock++) * mult + addval;
  1132. }
  1133. }
  1134. }
  1135. void decompress_patchv(LLVector3* v, S32* cpatch, LLPatchHeader* ph)
  1136. {
  1137. F32 block[LARGE_PATCH_SIZE * LARGE_PATCH_SIZE];
  1138. F32* tblock = block;
  1139. LLVector3* tvec;
  1140. LLGroupHeader* gopp = gGOPP;
  1141. S32 size = gopp->patch_size;
  1142. F32 range = ph->range;
  1143. S32 prequant = (ph->quant_wbits >> 4) + 2;
  1144. S32 quantize = 1 << prequant;
  1145. F32 hmin = ph->dc_offset;
  1146. S32 stride = gopp->stride;
  1147. F32 ooq = 1.f / (F32)quantize;
  1148. F32* dq = gPatchDequantizeTable;
  1149. S32* decopy_matrix = gDeCopyMatrix;
  1150. F32 mult = ooq * range;
  1151. F32 addval = mult*(F32)(1 << (prequant - 1)) + hmin;
  1152. for (S32 i = 0; i < size * size; ++i)
  1153. {
  1154. *(tblock++) = *(cpatch + *(decopy_matrix++)) * (*dq++);
  1155. }
  1156. if (size == 16)
  1157. {
  1158. idct_patch(block);
  1159. }
  1160. else
  1161. {
  1162. idct_patch_large(block);
  1163. }
  1164. for (S32 j = 0; j < size; ++j)
  1165. {
  1166. tvec = v + j * stride;
  1167. tblock = block + j*size;
  1168. for (S32 i = 0; i < size; ++i)
  1169. {
  1170. (*tvec++).mV[VZ] = *(tblock++) * mult + addval;
  1171. }
  1172. }
  1173. }