llskinningutil.cpp 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. /**
  2. * @file llskinningutil.cpp
  3. * @brief Functions for mesh object skinning
  4. * @author [email protected]
  5. *
  6. * $LicenseInfo:firstyear=2015&license=viewerlgpl$
  7. * Second Life Viewer Source Code
  8. * Copyright (C) 2015, Linden Research, Inc.
  9. *
  10. * This library is free software; you can redistribute it and/or
  11. * modify it under the terms of the GNU Lesser General Public
  12. * License as published by the Free Software Foundation;
  13. * version 2.1 of the License only.
  14. *
  15. * This library is distributed in the hope that it will be useful,
  16. * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17. * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  18. * Lesser General Public License for more details.
  19. *
  20. * You should have received a copy of the GNU Lesser General Public
  21. * License along with this library; if not, write to the Free Software
  22. * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23. *
  24. * Linden Research, Inc., 945 Battery Street, San Francisco, CA 94111 USA
  25. * $/LicenseInfo$
  26. */
  27. #include "llviewerprecompiledheaders.h"
  28. #include "llskinningutil.h"
  29. #include "llthread.h"
  30. #include "llmeshrepository.h"
  31. #include "llvoavatar.h"
  32. #define OPTIMIZED 1
  33. //static
  34. void LLSkinningUtil::scrubInvalidJoints(LLVOAvatar* avatar,
  35. LLMeshSkinInfo* skin)
  36. {
  37. // Skip if already done.
  38. if (!skin || !avatar || skin->mInvalidJointsScrubbed)
  39. {
  40. return;
  41. }
  42. // NOTE: do NOT use OpenMP here. Even with the is_main_thread() check,
  43. // you would get a crash in avatar->getJoint(skin->mJointKeys[j]). HB
  44. for (U32 j = 0, count = skin->mJointKeys.size(); j < count; ++j)
  45. {
  46. // Fix invalid to pelvis joint. Currently meshes with invalid names
  47. // will be blocked on upload, so this is just needed for handling of
  48. // any legacy bad data.
  49. LLJoint* joint = avatar->getJoint(skin->mJointKeys[j]);
  50. // Check against joint num is needed to catch some special joints
  51. // like mRoot.
  52. if (!joint || joint->getJointNum() < 0)
  53. {
  54. LL_DEBUGS("Avatar") << "Mesh rigged to invalid joint"
  55. << skin->mJointNames[j] << LL_ENDL;
  56. skin->mJointKeys[j] = LL_JOINT_KEY_PELVIS;
  57. skin->mJointNames[j] = "mPelvis";
  58. }
  59. }
  60. skin->mInvalidJointsScrubbed = true;
  61. }
  62. //static
  63. U32 LLSkinningUtil::initSkinningMatrixPalette(LLMatrix4a* mat,
  64. const LLMeshSkinInfo* skin,
  65. LLVOAvatar* avatar)
  66. {
  67. if (!mat || !skin || !avatar) return 0;
  68. U32 count = llmin(LL_MAX_JOINTS_PER_MESH_OBJECT,
  69. (U32)skin->mJointKeys.size());
  70. LLMatrix4a bind, world;
  71. #if LL_OPENMP
  72. // NOTE: we cannot use OpenMP when called from the mesh repository which is
  73. // itself a (p)thread (pthread and OpenMP threads are incompatible)... HB
  74. if (is_main_thread())
  75. {
  76. # pragma omp parallel for private(bind, world)
  77. // NOTE: VS2017 OpenMP requires a signed integer loop index... HB
  78. for (S32 j = 0; j < (S32)count; ++j)
  79. {
  80. LLJoint* joint = avatar->getJoint(skin->mJointKeys[j]);
  81. if (joint)
  82. {
  83. bind.loadu(skin->mInvBindMatrix[j]);
  84. world.loadu(joint->getWorldMatrix());
  85. mat[j].matMul(bind, world);
  86. }
  87. else
  88. {
  89. // This should not happen; in mesh upload, skinned rendering
  90. // should be disabled unless all joints are valid. In other
  91. // cases of skinned rendering, invalid joints should already
  92. // have been removed during remap.
  93. llwarns_once << "Rigged to invalid joint name: "
  94. << skin->mJointNames[j] << llendl;
  95. mat[j].loadu(skin->mInvBindMatrix[j]);
  96. }
  97. }
  98. }
  99. else
  100. #endif
  101. {
  102. for (U32 j = 0; j < count; ++j)
  103. {
  104. LLJoint* joint = avatar->getJoint(skin->mJointKeys[j]);
  105. if (joint)
  106. {
  107. bind.loadu(skin->mInvBindMatrix[j]);
  108. world.loadu(joint->getWorldMatrix());
  109. mat[j].matMul(bind, world);
  110. }
  111. else
  112. {
  113. // This should not happen; in mesh upload, skinned rendering
  114. // should be disabled unless all joints are valid. In other
  115. // cases of skinned rendering, invalid joints should already
  116. // have been removed during remap.
  117. llwarns_once << "Rigged to invalid joint name: "
  118. << skin->mJointNames[j] << llendl;
  119. mat[j].loadu(skin->mInvBindMatrix[j]);
  120. }
  121. }
  122. }
  123. return count;
  124. }
  125. //static
  126. void LLSkinningUtil::checkSkinWeights(const LLVector4a* weights,
  127. U32 num_vertices,
  128. const LLMeshSkinInfo* skin)
  129. {
  130. #ifdef LL_DEBUG
  131. const S32 max_joints = skin->mJointKeys.size();
  132. for (U32 j = 0; j < num_vertices; ++j)
  133. {
  134. const F32* w = weights[j].getF32ptr();
  135. F32 wsum = 0.f;
  136. for (U32 k = 0; k < 4; ++k)
  137. {
  138. S32 i = llfloor(w[k]);
  139. llassert(i >= 0 && i < max_joints);
  140. wsum += w[k] - i;
  141. }
  142. llassert(wsum > 0.f);
  143. }
  144. #endif
  145. }
  146. //static
  147. void LLSkinningUtil::scrubSkinWeights(LLVector4a* weights, U32 num_vertices,
  148. const LLMeshSkinInfo* skin)
  149. {
  150. const S32 max_joints = skin->mJointNames.size() - 1;
  151. #if LL_OPENMP
  152. // NOTE: we cannot use OpenMP when called from the mesh repository which is
  153. // itself a (p)thread (pthread and OpenMP threads are incompatible)... HB
  154. if (is_main_thread())
  155. {
  156. # pragma omp parallel for
  157. // NOTE: VS2017 OpenMP requires a signed integer loop index... HB
  158. for (S32 j = 0; j < (S32)num_vertices; ++j)
  159. {
  160. F32* w = weights[j].getF32ptr();
  161. // Unrolled loop on w[k]
  162. S32 i = llfloor(w[0]);
  163. F32 f = w[0] - i;
  164. i = llclamp(i, 0, max_joints);
  165. w[0] = i + f;
  166. i = llfloor(w[1]);
  167. f = w[1] - i;
  168. i = llclamp(i, 0, max_joints);
  169. w[1] = i + f;
  170. i = llfloor(w[2]);
  171. f = w[2] - i;
  172. i = llclamp(i, 0, max_joints);
  173. w[2] = i + f;
  174. i = llfloor(w[3]);
  175. f = w[3] - i;
  176. i = llclamp(i, 0, max_joints);
  177. w[3] = i + f;
  178. }
  179. }
  180. else
  181. #endif
  182. {
  183. for (U32 j = 0; j < num_vertices; ++j)
  184. {
  185. F32* w = weights[j].getF32ptr();
  186. // Unrolled loop on w[k]
  187. S32 i = llfloor(w[0]);
  188. F32 f = w[0] - i;
  189. i = llclamp(i, 0, max_joints);
  190. w[0] = i + f;
  191. i = llfloor(w[1]);
  192. f = w[1] - i;
  193. i = llclamp(i, 0, max_joints);
  194. w[1] = i + f;
  195. i = llfloor(w[2]);
  196. f = w[2] - i;
  197. i = llclamp(i, 0, max_joints);
  198. w[2] = i + f;
  199. i = llfloor(w[3]);
  200. f = w[3] - i;
  201. i = llclamp(i, 0, max_joints);
  202. w[3] = i + f;
  203. }
  204. }
  205. checkSkinWeights(weights, num_vertices, skin);
  206. }
  207. //static
  208. void LLSkinningUtil::getPerVertexSkinMatrix(const LLVector4a& weights,
  209. const LLMatrix4a* mat,
  210. LLMatrix4a& final_mat,
  211. bool handle_bad_scale)
  212. {
  213. bool valid_weights = true;
  214. #if OPTIMIZED
  215. static const LLQuad m_zero = _mm_set_ps1(0.f);
  216. constexpr S16 LAST_JOINT = (S16)LL_MAX_JOINTS_PER_MESH_OBJECT - 1;
  217. static const __m128i max_idx = _mm_set_epi16(LAST_JOINT, LAST_JOINT,
  218. LAST_JOINT, LAST_JOINT,
  219. LAST_JOINT, LAST_JOINT,
  220. LAST_JOINT, LAST_JOINT);
  221. __m128i m_idx = _mm_cvttps_epi32((LLQuad)weights);
  222. LLVector4a wght = _mm_sub_ps((LLQuad)weights, _mm_cvtepi32_ps(m_idx));
  223. alignas(16) S32 idx[4];
  224. _mm_store_si128((__m128i*)idx, _mm_min_epi16(m_idx, max_idx));
  225. LLQuad m_scale = _mm_add_ps(wght, _mm_movehl_ps(wght, wght));
  226. m_scale = _mm_add_ss(m_scale, _mm_shuffle_ps(m_scale, m_scale, 1));
  227. m_scale = _mm_shuffle_ps(m_scale, m_scale, 0);
  228. if (handle_bad_scale && _mm_comigt_ss(m_scale, m_zero) != 1)
  229. {
  230. wght = LLVector4a(1.f, 0.f, 0.f, 0.f);
  231. valid_weights = false;
  232. }
  233. else
  234. {
  235. wght = _mm_div_ps(wght, m_scale);
  236. }
  237. #else
  238. const F32* fwghts = weights.getF32ptr();
  239. constexpr S32 LAST_JOINT = (S32)LL_MAX_JOINTS_PER_MESH_OBJECT - 1;
  240. LLVector4 wght;
  241. S32 idx[4];
  242. F32 scale = 0.f;
  243. for (U32 k = 0; k < 4; ++k)
  244. {
  245. F32 w = fwghts[k];
  246. F32 temp = floorf(w);
  247. idx[k] = llclamp((S32)temp, 0, LAST_JOINT);
  248. temp = w - temp;
  249. wght[k] = temp;
  250. scale += temp;
  251. }
  252. if (handle_bad_scale && scale <= 0.f)
  253. {
  254. wght = LLVector4(1.f, 0.f, 0.f, 0.f);
  255. valid_weights = false;
  256. }
  257. else
  258. {
  259. wght /= scale;
  260. }
  261. #endif
  262. final_mat.clear();
  263. LLMatrix4a src;
  264. for (U32 k = 0; k < 4; ++k)
  265. {
  266. src.setMul(mat[idx[k]], wght[k]);
  267. final_mat.add(src);
  268. }
  269. // SL-366 - with weight validation/cleanup code, it should no longer be
  270. // possible to hit the bad scale case.
  271. if (!valid_weights)
  272. {
  273. llwarns << "Invalid weights !" << llendl;
  274. llassert(false);
  275. }
  276. }
  277. void LLSkinningUtil::updateRiggingInfo(const LLMeshSkinInfo* skin,
  278. LLVOAvatar* avatar,
  279. LLVolumeFace& vol_face)
  280. {
  281. S32 num_verts = vol_face.mNumVertices;
  282. U32 max_count = (U32)skin->mJointKeys.size();
  283. if (num_verts <= 0 || !vol_face.mWeights || !max_count)
  284. {
  285. return;
  286. }
  287. LLJointRiggingInfoTab& riginfotab = vol_face.mJointRiggingInfoTab;
  288. if (riginfotab.size() || !riginfotab.needsUpdate())
  289. {
  290. return;
  291. }
  292. riginfotab.resize(LL_CHARACTER_MAX_ANIMATED_JOINTS);
  293. #if !LL_OPENMP
  294. static LLMatrix4a inv_bind;
  295. static LLVector4a pos_joint_space;
  296. static LLVector4 wght;
  297. static S32 idx[4];
  298. #else
  299. LLMatrix4a inv_bind;
  300. LLVector4a pos_joint_space;
  301. LLVector4 wght;
  302. S32 idx[4];
  303. assert_main_thread();
  304. # pragma omp parallel for private(inv_bind, pos_joint_space, wght, idx)
  305. #endif
  306. for (S32 i = 0; i < num_verts; ++i)
  307. {
  308. LLVector4a& pos = vol_face.mPositions[i];
  309. F32* weights = vol_face.mWeights[i].getF32ptr();
  310. F32 scale = 0.f;
  311. // *TODO: unpacking of weights should be optimized if possible.
  312. for (U32 k = 0; k < 4; ++k)
  313. {
  314. F32 w = weights[k];
  315. idx[k] = llclamp((S32)floorf(w), 0,
  316. (S32)LL_CHARACTER_MAX_ANIMATED_JOINTS - 1);
  317. wght[k] = w - idx[k];
  318. scale += wght[k];
  319. }
  320. if (scale > 0.f)
  321. {
  322. F32 scale_inv = 1.f / scale;
  323. wght[0] *= scale_inv;
  324. wght[1] *= scale_inv;
  325. wght[2] *= scale_inv;
  326. wght[3] *= scale_inv;
  327. }
  328. for (U32 k = 0; k < 4; ++k)
  329. {
  330. U32 joint_index = idx[k];
  331. if (joint_index >= max_count || wght[k] <= 0.f)
  332. {
  333. continue;
  334. }
  335. // Note: joint key 0 = "unnamed", 1 = "mScreen" (so we skip them)
  336. S32 i = (S32)skin->mJointKeys[joint_index] - 2;
  337. if (i >= 0 && i < (S32)LL_CHARACTER_MAX_ANIMATED_JOINTS)
  338. {
  339. riginfotab[i].setIsRiggedTo();
  340. inv_bind.loadu(skin->mInvBindShapeMatrix[joint_index]);
  341. inv_bind.affineTransform(pos, pos_joint_space);
  342. pos_joint_space.mul(wght[k]);
  343. LLVector4a* extents = riginfotab[i].getRiggedExtents();
  344. update_min_max(extents[0], extents[1], pos_joint_space);
  345. }
  346. }
  347. }
  348. riginfotab.setNeedsUpdate(false);
  349. }
  350. // This is used for extracting rotation from a bind shape matrix that already
  351. // has scales baked in
  352. LLQuaternion LLSkinningUtil::getUnscaledQuaternion(const LLMatrix4& mat4)
  353. {
  354. LLMatrix3 bind_mat = mat4.getMat3();
  355. #if LL_GNUC && GCC_VERSION >= 80000
  356. # pragma GCC unroll 3
  357. #elif LL_CLANG
  358. # pragma clang loop unroll(full)
  359. #endif
  360. for (U32 i = 0; i < 3; ++i)
  361. {
  362. auto& coords = bind_mat.mMatrix[i];
  363. F32 len = coords[0] * coords[0];
  364. len += coords[1] * coords[1];
  365. len += coords[2] * coords[2];
  366. if (len > 0.f)
  367. {
  368. F32 inv_len = 1.f / sqrtf(len);
  369. coords[0] *= inv_len;
  370. coords[1] *= inv_len;
  371. coords[2] *= inv_len;
  372. }
  373. }
  374. bind_mat.invert();
  375. LLQuaternion bind_rot = bind_mat.quaternion();
  376. bind_rot.normalize();
  377. return bind_rot;
  378. }