├── .gitignore ├── README.md ├── bindCorSkinCluster.mel ├── corSkinCluster.cpp ├── corSkinCluster.sdf ├── corSkinCluster.sln ├── corSkinCluster.suo ├── corSkinCluster.v11.suo ├── corSkinCluster.vcxproj └── corSkinDef.cl /.gitignore: -------------------------------------------------------------------------------- 1 | x64/* -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # CS6260-proj 2 | Implementation of Le and Hodgin's Realtime Skinning with Optimized Centers of Rotation in Maya 2017 3 | 4 | This project consists of a Maya Deformer plugin of Le and Hodgin's 5 | algorithm for Realtime Skinning using Optimized Centers of Rotation 6 | for Maya 2017. The deformer is in what I would term as an alpha state 7 | and is essentially a prototype for further development. In its current 8 | implementation it consists of a serial CPU and a GPU implementation 9 | of the deformation algorithm, as well as serial implementation of the 10 | precomputation algorithm, in its naive form. 11 | 12 | Reference: 13 | https://www.disneyresearch.com/publication/skinning-with-optimized-cors/ 14 | 15 | Le, B. H., & Hodgins, J. K. (2016). Real-time skeletal skinning with 16 | optimized centers of rotation. ACM Transactions on Graphics (TOG), 35(4), 37. 17 | 18 | TODO: 19 | Implementation of precomputation using optimizations specified in 20 | Le and Hodgin's paper. 21 | 22 | Implementation of a Qt UI to itegrate the deformer into the Maya interface. 23 | 24 | Usage: 25 | The plugin has been tested and compiles under Microsoft Visual C++ 2012, 26 | update 4, on Microsoft Windows operating systems. No testing has been 27 | performed on Linux or OSX, however, no libraries external to OpenMaya have 28 | been used, so given proper configurations for projects, the plugin should 29 | compile on those platforms without issue. 30 | 31 | To apply the deformer, select the bones, then mesh and run the bind script. 32 | After weighting your mesh, uncheck valid precomputation and then playback to 33 | force precomputation evaluation. 34 | 35 | Disclaimer: 36 | This plugin is not production ready at this time. It is a prototype. 37 | No warranty is expressed or implied, use at your own risk. 38 | 39 | Licensing: 40 | This code is licensce under the Creative Common's Attribution-Non-Commercial- 41 | ShareAlike 4.0 license, details of which can be found here: 42 | 43 | https://creativecommons.org/licenses/by-nc-sa/4.0/legalcode 44 | 45 | Basically, don't sell it, tweak it to your heart's content and acknowledge 46 | the use of what you found here. Thanks for playing fair. 47 | -------------------------------------------------------------------------------- /bindCorSkinCluster.mel: -------------------------------------------------------------------------------- 1 | loadPlugin corSkinCluster; 2 | 3 | proc connectJointCluster( string $j, int $i, string $c ) 4 | { 5 | if ( !objExists( $j+".lockInfluenceWeights" ) ) 6 | { 7 | select -r $j; 8 | addAttr -sn "liw" -ln "lockInfluenceWeights" -at "bool"; 9 | } 10 | connectAttr ($j+".liw") ($c+".lockWeights["+$i+"]"); 11 | connectAttr ($j+".worldMatrix[0]") ($c+".matrix["+$i+"]"); 12 | connectAttr ($j+".objectColorRGB") ($c+".influenceColor["+$i+"]"); 13 | float $m[] = `getAttr ($j+".wim")`; 14 | setAttr ($c+".bindPreMatrix["+$i+"]") -type "matrix" $m[0] $m[1] $m[2] $m[3] $m[4] $m[5] $m[6] $m[7] $m[8] $m[9] $m[10] $m[11] $m[12] $m[13] $m[14] $m[15]; 15 | } 16 | 17 | string $selected[] = `ls -sl`; 18 | 19 | int $len = `size $selected`; 20 | 21 | string $geo = $selected[$len-1]; 22 | 23 | select $geo; 24 | 25 | string $skin_cluster[] = `deformer -type "corSkinCluster"`; 26 | 27 | string $attr = $skin_cluster[0] + ".useComponentsMatrix"; 28 | 29 | setAttr $attr 1; 30 | 31 | for ($i = 0; $i < $len-1; $i++){ 32 | // print $selected[$i]; 33 | connectJointCluster($selected[$i], $i, $skin_cluster[0]); 34 | } 35 | 36 | int $temp = $len - 1; 37 | 38 | skinCluster -e -maximumInfluences $temp $skin_cluster; -------------------------------------------------------------------------------- /corSkinCluster.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | #include 17 | #include 18 | #include 19 | 20 | #include 21 | #include 22 | #include 23 | #include 24 | #include 25 | #include 26 | #include 27 | #include 28 | #include 29 | #include 30 | 31 | #include 32 | 33 | //my additions 34 | #define NAME "corSkinCluster" 35 | #define DEFAULT_OMEGA 0.1 36 | #include 37 | 38 | class corSkinCluster : public MPxSkinCluster 39 | { 40 | public: 41 | static void* creator(); 42 | 43 | static MStatus initialize(); 44 | 45 | virtual MStatus deform(MDataBlock &block, 46 | MItGeometry &iter, 47 | const MMatrix &mat, 48 | unsigned int multiIndex); 49 | 50 | virtual MStatus precomp(MDataBlock); 51 | 52 | static const MTypeId id; 53 | static const int _profileCategory; 54 | static MObject cor_valid; 55 | static MObject cor_ar; 56 | 57 | private: 58 | static MStatus handle_to_doublearray(MArrayDataHandle&, MDoubleArray&); 59 | 60 | static MStatus similarity(MDoubleArray&, MDoubleArray&, int, double&); 61 | 62 | static MStatus qlerp(MQuaternion&, MQuaternion&, MQuaternion&); 63 | 64 | static const double omega; 65 | }; 66 | 67 | //class parallel_corSkinCluster : public corSkinCluster 68 | //{ 69 | //public: 70 | // virtual MStatus deform(MDataBlock &block, 71 | // MItGeometry &iter, 72 | // const MMatrix &mat, 73 | // unsigned int multiIndex); 74 | //}; 75 | 76 | // const MTypeId parallel_corSkinCluster::id (0x0122573); 77 | 78 | const MTypeId corSkinCluster::id( 0x22573 ); 79 | 80 | const double corSkinCluster::omega(DEFAULT_OMEGA); 81 | 82 | const int corSkinCluster::_profileCategory(MProfiler::addCategory(NAME)); 83 | 84 | MObject corSkinCluster::cor_valid; 85 | 86 | MObject corSkinCluster::cor_ar; 87 | 88 | void* corSkinCluster::creator() 89 | { 90 | void *node = new corSkinCluster(); 91 | return node; 92 | } 93 | 94 | MStatus corSkinCluster::initialize() 95 | { 96 | MGlobal::startErrorLogging("C:\\\\Users\\iam\\Desktop\\corSkinCluster_init_log"); 97 | 98 | MStatus status = MStatus::kSuccess; 99 | 100 | MFnNumericAttribute numeric_fn; 101 | cor_valid = numeric_fn.create("Valid_Precomputation", "valid", MFnNumericData::kBoolean, 0.0, &status); 102 | if (status != MS::kSuccess){ 103 | MGlobal::doErrorLogEntry("corSkinCluster: error setting up valid attr.\n"); 104 | return status; 105 | } 106 | numeric_fn.setStorable(true); 107 | addAttribute(cor_valid); 108 | 109 | MPointArray temp_ar; 110 | MFnPointArrayData fn; 111 | MObject default_ar_obj = fn.create(temp_ar); 112 | 113 | MFnTypedAttribute typed_fn; 114 | cor_ar = typed_fn.create("Centers_of_Rotation", "cor", MFnData::Type::kPointArray, default_ar_obj, &status); 115 | if (status != MS::kSuccess){ 116 | MGlobal::doErrorLogEntry("corSkinCluster: error setting up CoR point array attr.\n"); 117 | return status; 118 | } 119 | numeric_fn.setStorable(true); 120 | addAttribute(cor_ar); 121 | 122 | MGlobal::closeErrorLog(); 123 | 124 | return MStatus::kSuccess; 125 | } 126 | 127 | MStatus corSkinCluster::handle_to_doublearray(MArrayDataHandle &handle, MDoubleArray &vec){ 128 | int count = 0; 129 | int max = handle.elementCount(); 130 | double val = 0.0; 131 | for (count = 0; count < max; count++){ 132 | if (handle.jumpToElement(count) != MStatus::kSuccess){ 133 | vec[count] = 0.0; 134 | }else{ 135 | vec[count] = handle.inputValue().asDouble(); 136 | } 137 | } 138 | return MStatus::kSuccess; 139 | } 140 | 141 | MStatus corSkinCluster::similarity(MDoubleArray &weight_p, 142 | MDoubleArray &weight_v, 143 | int number_of_transforms, 144 | double &result) 145 | { 146 | int j = 0; 147 | int k = 0; 148 | result = 0; 149 | double temp = 0; 150 | 151 | 152 | for (j = 0; j < number_of_transforms; j++){ 153 | for (k = 0; k < number_of_transforms; k++){ 154 | if (j != k){ 155 | auto wpj = weight_p[j]; 156 | auto wpk = weight_p[k]; 157 | auto wvj = weight_v[j]; 158 | auto wvk = weight_v[k]; 159 | temp = wpj*wpk*wvj*wvk; 160 | temp *= exp(-(pow(wpj*wvk-wpk*wvj,2.0)/pow(omega,2.0))); 161 | result += temp; 162 | } 163 | } // end k loop 164 | } // end j loop 165 | 166 | return MStatus::kSuccess; 167 | } 168 | 169 | MStatus corSkinCluster::qlerp(MQuaternion& q_a, MQuaternion& q_b, MQuaternion& result){ 170 | MStatus stat; 171 | double dot_product; 172 | double q_a_comp[4]; 173 | double q_b_comp[4]; 174 | stat = q_a.get(q_a_comp); 175 | if (stat != MStatus::kSuccess){ 176 | std::cerr << "corSkinCluster::qlerp, unable to extract q_a" << std::endl; 177 | return MStatus::kFailure; 178 | } 179 | stat = q_b.get(q_b_comp); 180 | if (stat != MStatus::kSuccess){ 181 | std::cerr << "corSkinCluster::qlerp, unable to extract q_b" << std::endl; 182 | return MStatus::kFailure; 183 | } 184 | dot_product = 0.0; 185 | for (int i = 0; i < 4; i++){ 186 | dot_product += q_a_comp[i]*q_b_comp[i]; 187 | } 188 | if (dot_product >= 0){ 189 | result = q_a + q_b; 190 | }else{ 191 | result = q_a - q_b; 192 | } 193 | return MStatus::kSuccess; 194 | } 195 | 196 | 197 | MStatus corSkinCluster::precomp(MDataBlock block) 198 | { 199 | // MGlobal::startErrorLogging("C:\\\\Users\\iam\\Desktop\\corSkinCluster_precomp_log"); 200 | MStatus stat; 201 | 202 | // load current cor_ar and clear it 203 | MDataHandle cor_arHandle = block.inputValue(cor_ar); 204 | MFnData::Type test = cor_arHandle.type(); 205 | MObject cor_arData = cor_arHandle.data(); 206 | if (cor_arData.hasFn(MFn::Type::kPointArrayData)){ 207 | }else{ 208 | return MS::kFailure; 209 | } 210 | MFnPointArrayData cor_arFn(cor_arData, &stat); 211 | if (stat.error()){ 212 | stat.perror("corSkinCluster::precomp, unable to attached MFnPtAr to cor\n"); 213 | return stat; 214 | } 215 | MPointArray cor_PA = cor_arFn.array(); 216 | cor_PA.clear(); 217 | 218 | // get mesh iterator 219 | MArrayDataHandle inputHandle = block.inputArrayValue(input); 220 | stat = inputHandle.jumpToArrayElement(0); 221 | if (stat != MS::kSuccess){ 222 | return stat; 223 | } 224 | MDataHandle cor_IOGeoHandle = inputHandle.inputValue().child(inputGeom); 225 | MObject cor_IOGeoObj = cor_IOGeoHandle.asMesh(); 226 | MItMeshPolygon T(cor_IOGeoObj, &stat); 227 | if (stat.error()){ 228 | stat.perror("corSkinCluster::precomp, unable to get mesh iterator\n"); 229 | return stat; 230 | } 231 | 232 | // get vertex iterator 233 | MItGeometry v_i(cor_IOGeoHandle, false, &stat); 234 | if (stat.error()){ 235 | stat.perror("corSkinCluster::precomp, unable to get vertex iterator\n"); 236 | return stat; 237 | } 238 | 239 | // weights 240 | MArrayDataHandle w_i = block.inputArrayValue(weightList); 241 | if ( w_i.elementCount() == 0 ) { 242 | // no weights - nothing to do 243 | return MStatus::kFailure; 244 | } 245 | 246 | // bone transforms 247 | MArrayDataHandle transformHandle = block.inputArrayValue(matrix); 248 | int num_transforms = transformHandle.elementCount(); 249 | 250 | //calculate per triange area 251 | MDoubleArray tri_area; 252 | //calculate average vertex position 253 | MPointArray tri_avg_pos; 254 | //calculate average vertex weights 255 | std::vector tri_avg_weights; 256 | int num_tris; 257 | int idx; 258 | MPoint alpha,beta,gamma; 259 | MPointArray tri_verts; 260 | MIntArray tri_idx; 261 | MVector beta_alpha; 262 | MVector gamma_alpha; 263 | MDoubleArray tri_avg_weight; 264 | 265 | // pre calc all the areas, average weights and positions 266 | while(!(T.isDone())){ 267 | // each hit on the iterator returns a face 268 | // that face is made of multiple triangles 269 | // how many? 270 | stat = T.numTriangles(num_tris); 271 | if (stat == MStatus::kSuccess){ 272 | // for each triangle 273 | for (idx = 0; idx < num_tris; idx++){ 274 | // get the verts 275 | stat = T.getTriangle(idx, tri_verts, tri_idx, MSpace::kObject); // switched this to world, from kObject 276 | if (stat.error()){ 277 | stat.perror("corSkinCluster::precomp, unable to get triangle from iterator\n"); 278 | return stat; 279 | } 280 | alpha = tri_verts[0]; 281 | beta = tri_verts[1]; 282 | gamma = tri_verts[2]; 283 | // calc and store area of triangle 284 | beta_alpha = MVector(beta-alpha); 285 | gamma_alpha = MVector(gamma-alpha); 286 | stat = tri_area.append(((beta_alpha ^ gamma_alpha).length())*0.5); 287 | if (stat.error()){ 288 | stat.perror("corskinCluster::precomp, unable to append area\n"); 289 | return stat; 290 | } 291 | 292 | // calc and store average vertex position 293 | stat = tri_avg_pos.append((alpha+beta+gamma)/3); 294 | if (stat.error()){ 295 | stat.perror("corskinCluster::precomp, unable to apped average position\n"); 296 | return stat; 297 | } 298 | 299 | // calc and store avg weights 300 | 301 | // get alpha weights 302 | stat = w_i.jumpToElement(tri_idx[0]); 303 | if (stat.error()){ 304 | stat.perror("corSkinCluster::precomp, unable to get weights for alpha.\n"); 305 | return stat; 306 | } 307 | MArrayDataHandle alpha_weightsHandle = w_i.inputValue().child(weights); 308 | 309 | // get beta weights 310 | stat = w_i.jumpToElement(tri_idx[1]); 311 | if (stat.error()){ 312 | stat.perror("corSkinCluster::precomp, unable to get weights for beta.\n"); 313 | return stat; 314 | } 315 | MArrayDataHandle beta_weightsHandle = w_i.inputValue().child(weights); 316 | 317 | // get gamma weights 318 | stat = w_i.jumpToElement(tri_idx[2]); 319 | if (stat.error()){ 320 | stat.perror("corSkinCluster::precomp, unable to get weights for gamma.\n"); 321 | return stat; 322 | } 323 | MArrayDataHandle gamma_weightsHandle = w_i.inputValue().child(weights); 324 | 325 | double a, b, c; 326 | tri_avg_weight.clear(); 327 | for (int i = 0; i < num_transforms; i++){ 328 | // average and store weights 329 | // get ith weight for alpha 330 | stat = alpha_weightsHandle.jumpToElement(i); 331 | if (stat == MStatus::kSuccess){ 332 | a = alpha_weightsHandle.inputValue().asDouble(); 333 | }else{ 334 | a = 0.0; 335 | } 336 | // get ith weight for beta 337 | stat = beta_weightsHandle.jumpToElement(i); 338 | if (stat == MStatus::kSuccess){ 339 | b = beta_weightsHandle.inputValue().asDouble(); 340 | }else{ 341 | b = 0.0; 342 | } 343 | // get ith weight for gamma 344 | stat = gamma_weightsHandle.jumpToElement(i); 345 | if (stat == MStatus::kSuccess){ 346 | c = gamma_weightsHandle.inputValue().asDouble(); 347 | }else{ 348 | c = 0.0; 349 | } 350 | stat = tri_avg_weight.append((a + b + c)/3.0); 351 | if (stat.error()){ 352 | stat.perror("corSkinCluster::precomp, unable to add average weight to array.\n"); 353 | return stat; 354 | } 355 | } // end for 356 | tri_avg_weights.push_back(tri_avg_weight); 357 | } // end for 358 | }else{ // if num triangles fail 359 | stat.perror("corSkinCluster::precomp, face has no triangles?\n"); 360 | return stat; 361 | } //end else 362 | T.next(); // next face 363 | } // end while, weights averaged, vertex positions averaged 364 | 365 | w_i.jumpToElement(0); 366 | v_i.reset(); 367 | 368 | MPoint cor; 369 | double s, lower; 370 | MPoint upper; 371 | num_tris = tri_area.length(); 372 | MDoubleArray vertex_weights; 373 | 374 | // for each point in the iterator 375 | while(!(v_i.isDone())){ 376 | // calculate the COR 377 | 378 | // get the vertex weights in a double array 379 | vertex_weights.clear(); 380 | MArrayDataHandle vertex_weights_handle = w_i.inputValue().child(weights); 381 | for (int i = 0; i < num_transforms; i++){ 382 | stat = vertex_weights_handle.jumpToElement(i); 383 | if (stat.error()){ 384 | vertex_weights.append(0.0); 385 | }else{ 386 | vertex_weights.append(vertex_weights_handle.inputValue().asDouble()); 387 | } 388 | } 389 | 390 | s = 0.0; 391 | upper = MPoint(0.0,0.0,0.0); 392 | lower = 0.0; 393 | // for each triangle 394 | for (int i = 0; i < num_tris; i++){ 395 | stat = similarity(vertex_weights, tri_avg_weights[i], num_transforms, s); 396 | upper += tri_avg_pos[i]*s*tri_area[i]; 397 | lower += s*tri_area[i]; 398 | } 399 | if (lower > 0){ 400 | cor = upper/lower; 401 | }else{ 402 | cor = MPoint(0.0,0.0,0.0); 403 | } 404 | cor_PA.append(cor); 405 | 406 | // iterate the loop 407 | v_i.next(); 408 | w_i.next(); 409 | } // end while 410 | 411 | // put the computed point array back on the attribute 412 | cor_arFn.set(cor_PA); 413 | 414 | MGlobal::closeErrorLog(); 415 | 416 | return MStatus::kSuccess; 417 | } 418 | 419 | MStatus corSkinCluster::deform( MDataBlock& block, 420 | MItGeometry& iter, 421 | const MMatrix& /*m*/, 422 | unsigned int multiIndex) 423 | // 424 | // Method: deform 425 | // 426 | // Description: Deforms the point with a simple smooth skinning algorithm 427 | // 428 | // Arguments: 429 | // block : the datablock of the node 430 | // iter : an iterator for the geometry to be deformed 431 | // m : matrix to transform the point into world space 432 | // multiIndex : the index of the geometry that we are deforming 433 | // 434 | // 435 | { 436 | MGlobal::startErrorLogging("C:\\\\Users\\iam\\Desktop\\corSkinCluster_deform_log"); 437 | 438 | MStatus returnStatus; 439 | 440 | // get the influence transforms 441 | // 442 | MArrayDataHandle transformsHandle = block.inputArrayValue( matrix ); 443 | int numTransforms = transformsHandle.elementCount(); 444 | if ( numTransforms == 0 ) { 445 | return MS::kSuccess; 446 | } 447 | 448 | int precomp_event = MProfiler::eventBegin(corSkinCluster::_profileCategory, MProfiler::kColorG_L1, "corSkinCluster: precomp"); 449 | 450 | // insert precomp test here 451 | MDataHandle validHandle = block.inputValue(cor_valid, &returnStatus); 452 | if (returnStatus != MS::kSuccess){ 453 | MGlobal::doErrorLogEntry("corSkinCluster::deform, unable to get valid handle off datablock\n"); 454 | return returnStatus; 455 | } 456 | if (!validHandle.asBool()){ 457 | returnStatus = precomp(block); 458 | if (returnStatus != MS::kSuccess){ 459 | MGlobal::doErrorLogEntry("corSkinCluster::deform, precomp returned error"); 460 | return returnStatus; 461 | } 462 | validHandle.setBool(true); 463 | } 464 | 465 | // get the CORs 466 | MDataHandle cor_arHandle = block.inputValue(cor_ar, &returnStatus); 467 | if (returnStatus != MS::kSuccess){ 468 | MGlobal::doErrorLogEntry("corSkinCluster::deform, unable to get cor_ar handle off datablock\n"); 469 | return returnStatus; 470 | } 471 | MObject cor_arData = cor_arHandle.data(); 472 | MFnPointArrayData cor_arFn(cor_arData, &returnStatus); 473 | if (returnStatus != MS::kSuccess){ 474 | MGlobal::doErrorLogEntry("corSkinCluster::deform, unable to attach function set for cor ar attr obj\n"); 475 | return returnStatus; 476 | } 477 | MPointArray cor_PA = cor_arFn.array(); 478 | 479 | MProfiler::eventEnd(precomp_event); 480 | 481 | MMatrixArray transforms; 482 | for ( int i=0; i 0 ) { 489 | for ( int i=0; i q_j; 497 | for (int j=0; j 0) localWorkSize = workGroupSize; 764 | size_t globalWorkSize = (localWorkSize - fNumElements % localWorkSize) + fNumElements; // global work size must be a multiple of localWorkSize 765 | // set up our input events. The input event could be NULL, in that case we need to pass 766 | // slightly different parameters into clEnqueueNDRangeKernel 767 | unsigned int numInputEvents = 0; 768 | if (inputEvent.get()) 769 | { 770 | numInputEvents = 1; 771 | } 772 | // run the kernel 773 | err = clEnqueueNDRangeKernel( 774 | MOpenCLInfo::getMayaDefaultOpenCLCommandQueue(), 775 | fKernel.get(), 776 | 1, 777 | NULL, 778 | &globalWorkSize, 779 | &localWorkSize, 780 | numInputEvents, 781 | numInputEvents ? inputEvent.getReadOnlyRef() : 0, 782 | outputEvent.getReferenceForAssignment() ); 783 | MOpenCLInfo::checkCLErrorStatus(err); 784 | return MPxGPUDeformer::kDeformerSuccess; 785 | } 786 | 787 | void corSkinGPUDeformer::terminate() 788 | { 789 | // MHWRender::MRenderer::theRenderer()->releaseGPUMemory(fNumElements*sizeof(float)); 790 | fCLWeights.reset(); 791 | fCoR.reset(); 792 | fTM.reset(); 793 | fQ.reset(); 794 | MOpenCLInfo::releaseOpenCLKernel(fKernel); 795 | fKernel.reset(); 796 | } 797 | 798 | void corSkinGPUDeformer::extractWeightArray(MDataBlock& block, const MEvaluationNode& evaluationNode, const MPlug& plug) 799 | { 800 | // if we've already got a weight array and it is not changing then don't bother copying it 801 | // to the GPU again 802 | MStatus status; 803 | // Note that right now dirtyPlugExists takes an attribute, so if any element in the multi is changing we think it is dirty... 804 | // To avoid false dirty issues here you'd need to only use one element of the MPxDeformerNode::input multi attribute for each 805 | // corSkinCluster node. 806 | if ((!fCLWeights.isNull() && !evaluationNode.dirtyPlugExists(corSkinCluster::weightList, &status)) || !status) 807 | { 808 | return; 809 | } 810 | 811 | // what do we need to do 812 | // get the weight list 813 | // for each element of the weight list, push each of the weights 814 | 815 | std::vector temp; 816 | 817 | MArrayDataHandle transformsHandle = block.inputArrayValue( corSkinCluster::matrix ); 818 | int numTransforms = transformsHandle.elementCount(); 819 | if ( numTransforms == 0 ) return; 820 | 821 | MStatus stat; 822 | MArrayDataHandle weightListHandle = block.inputArrayValue(corSkinCluster::weightList, &stat); 823 | if (stat.error()) return; 824 | for (unsigned int i = 0; i < fNumElements; i++){ 825 | MArrayDataHandle weightsHandle = weightListHandle.inputValue().child(corSkinCluster::weights); 826 | for(int j = 0; j < numTransforms; j ++){ 827 | stat = weightsHandle.jumpToElement(j); 828 | if (stat.error()){ 829 | temp.push_back(0.0f); 830 | }else{ 831 | double asDbl = weightsHandle.inputValue().asDouble(); 832 | float asFlt = weightsHandle.inputValue().asFloat(); 833 | temp.push_back((float)asDbl); 834 | } 835 | } 836 | weightListHandle.next(); 837 | } 838 | 839 | // weights all in temp 840 | 841 | // Two possibilities, we could be updating an existing OpenCL buffer or allocating a new one. 842 | cl_int err = CL_SUCCESS; 843 | if (!fCLWeights.get()) 844 | { 845 | // MHWRender::MRenderer::theRenderer()->holdGPUMemory(fNumElements*numTransforms*sizeof(float)); 846 | fCLWeights.attach(clCreateBuffer(MOpenCLInfo::getOpenCLContext(), CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, fNumElements * numTransforms * sizeof(float), (void*)&temp[0], &err)); 847 | } 848 | else 849 | { 850 | // I use a blocking write here, non-blocking could be faster... need to manage the lifetime of temp, and have the kernel wait until the write finishes before running 851 | // I'm also assuming that the weight buffer is not growing. 852 | err = clEnqueueWriteBuffer(MOpenCLInfo::getMayaDefaultOpenCLCommandQueue(), fCLWeights.get(), CL_TRUE, 0, fNumElements * numTransforms * sizeof(float), (void*)&temp[0], 0, NULL, NULL); 853 | } 854 | } 855 | 856 | MStatus corSkinGPUDeformer::extractCoR(MDataBlock& block, const MEvaluationNode& evaluationNode, const MPlug& plug) 857 | { 858 | // get CoRs 859 | MStatus stat; 860 | 861 | // check for existing CoR in buffer 862 | // did the weight list change? if so, we need to regrab the CoRs 863 | if ((fCoR.get() && !evaluationNode.dirtyPlugExists(corSkinCluster::weightList, &stat)) || !stat){ 864 | return stat; 865 | } 866 | 867 | // if they're not, pull them off 868 | MDataHandle cor_arHandle = block.inputValue(corSkinCluster::cor_ar, &stat); 869 | if (stat!= MS::kSuccess){ 870 | return stat; 871 | } 872 | MObject cor_arData = cor_arHandle.data(); 873 | MFnPointArrayData cor_arFn(cor_arData, &stat); 874 | if (stat!= MS::kSuccess){ 875 | return stat; 876 | } 877 | MPointArray cor_PA = cor_arFn.array(); 878 | 879 | std::vectortemp; 880 | 881 | for (unsigned int i = 0; i < cor_PA.length(); i++){ 882 | temp.push_back((float)cor_PA[i].x); 883 | temp.push_back((float)cor_PA[i].y); 884 | temp.push_back((float)cor_PA[i].z); 885 | temp.push_back((float)cor_PA[i].w); 886 | } 887 | 888 | // all CoR in temp 889 | 890 | cl_int err = CL_SUCCESS; 891 | if (!fCoR.get()) 892 | { 893 | // MHWRender::MRenderer::theRenderer()->holdGPUMemory(fNumElements * 4 * sizeof(float)); 894 | fCoR.attach(clCreateBuffer(MOpenCLInfo::getOpenCLContext(), CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, fNumElements * 4 * sizeof(float), (void*)&temp[0], &err)); 895 | } 896 | else 897 | { 898 | // I use a blocking write here, non-blocking could be faster... need to manage the lifetime of temp, and have the kernel wait until the write finishes before running 899 | // I'm also assuming that the weight buffer is not growing. 900 | err = clEnqueueWriteBuffer(MOpenCLInfo::getMayaDefaultOpenCLCommandQueue(), fCoR.get(), CL_TRUE, 0, fNumElements * 4 * sizeof(float), (void*)&temp[0], 0, NULL, NULL); 901 | } 902 | 903 | return MStatus::kSuccess; 904 | } 905 | 906 | void corSkinGPUDeformer::extractTMnQ(MDataBlock& block, const MEvaluationNode& evaluationNode, const MPlug& plug) 907 | { 908 | // I pass the offset matrix to OpenCL using a buffer as well. I also send down the inverse matrix to avoid calculating it many times on the GPU 909 | MStatus status; 910 | if ((fTM.get() && !evaluationNode.dirtyPlugExists(corSkinCluster::matrix , &status)) || !status) 911 | { 912 | return; 913 | } 914 | 915 | MArrayDataHandle transformsHandle = block.inputArrayValue( corSkinCluster::matrix ); 916 | int numTransforms = transformsHandle.elementCount(); 917 | if ( numTransforms == 0 ) { 918 | return; 919 | } 920 | 921 | MMatrixArray transforms; 922 | for ( int i=0; i 0 ) { 929 | for ( int i=0; i conv_transforms; 958 | for ( int i = 0; i < numTransforms; i++){ 959 | for ( int r = 0; r < 4; r++){ 960 | for ( int c = 0; c < 4; c++){ 961 | conv_transforms.push_back((float)transforms_transpose[i](r,c)); 962 | } 963 | } 964 | } 965 | 966 | // now we prep that group for openCL 967 | cl_int err = CL_SUCCESS; 968 | if (!fTM.get()) 969 | { 970 | fTM.attach(clCreateBuffer(MOpenCLInfo::getOpenCLContext(), CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, conv_transforms.size() * sizeof(float), (void*)&conv_transforms[0], &err)); 971 | } 972 | else 973 | { 974 | // I use a blocking write here, non-blocking could be faster... need to manage the lifetime of temp, and have the kernel wait until the write finishes before running 975 | err = clEnqueueWriteBuffer(MOpenCLInfo::getMayaDefaultOpenCLCommandQueue(), fTM.get(), CL_TRUE, 0, conv_transforms.size() * sizeof(float), (void*)&conv_transforms[0], 0, NULL, NULL); 976 | } 977 | 978 | // now, as that we already have the transforms here, 979 | // let's get the quaternions while we're at it 980 | 981 | // get the unit quaternions for the rotations of the matricies 982 | std::vector q_j; 983 | for (int j=0; jconv_q; 993 | 994 | for (int j = 0; j < numTransforms; ++j){ 995 | conv_q.push_back((float)q_j[j].x); 996 | conv_q.push_back((float)q_j[j].y); 997 | conv_q.push_back((float)q_j[j].z); 998 | conv_q.push_back((float)q_j[j].w); 999 | } 1000 | 1001 | // pipe them to the device 1002 | err = CL_SUCCESS; 1003 | if (!fQ.get()) 1004 | { 1005 | fQ.attach(clCreateBuffer(MOpenCLInfo::getOpenCLContext(), CL_MEM_COPY_HOST_PTR | CL_MEM_READ_ONLY, conv_q.size() * sizeof(float), (void*)&conv_q[0], &err)); 1006 | } 1007 | else 1008 | { 1009 | // I use a blocking write here, non-blocking could be faster... need to manage the lifetime of temp, and have the kernel wait until the write finishes before running 1010 | err = clEnqueueWriteBuffer(MOpenCLInfo::getMayaDefaultOpenCLCommandQueue(), fQ.get(), CL_TRUE, 0, conv_q.size() * sizeof(float), (void*)&conv_q[0], 0, NULL, NULL); 1011 | } 1012 | 1013 | fNumTransforms = (unsigned int)numTransforms; 1014 | } 1015 | 1016 | 1017 | // standard initialization procedures 1018 | // 1019 | 1020 | MStatus initializePlugin( MObject obj ) 1021 | { 1022 | MStatus result; 1023 | 1024 | MFnPlugin plugin( obj, "Benjamin Slack", "0.1", "Any"); 1025 | result = plugin.registerNode( 1026 | "corSkinCluster" , 1027 | corSkinCluster::id , 1028 | &corSkinCluster::creator , 1029 | &corSkinCluster::initialize , 1030 | MPxNode::kSkinCluster 1031 | ); 1032 | 1033 | MString nodeClassName("corSkinCluster"); 1034 | MString registrantId("corSkinGPUoverride"); 1035 | MStatus stat; 1036 | stat = MGPUDeformerRegistry::registerGPUDeformerCreator( 1037 | nodeClassName, 1038 | registrantId, 1039 | corSkinGPUDeformer::getGPUDeformerInfo()); 1040 | 1041 | stat = MGPUDeformerRegistry::addConditionalAttribute( 1042 | nodeClassName, 1043 | registrantId, 1044 | corSkinCluster::cor_valid); 1045 | 1046 | corSkinGPUDeformer::plugin_path = plugin.loadPath(); 1047 | 1048 | return result; 1049 | } 1050 | 1051 | MStatus uninitializePlugin( MObject obj ) 1052 | { 1053 | MStatus result; 1054 | 1055 | MFnPlugin plugin( obj ); 1056 | result = plugin.deregisterNode( corSkinCluster::id ); 1057 | 1058 | MString nodeClassName("corSkinCluster"); 1059 | MString registrantId("corSkinGPUoverride"); 1060 | MGPUDeformerRegistry::deregisterGPUDeformerCreator( 1061 | nodeClassName, 1062 | registrantId); 1063 | 1064 | return result; 1065 | } 1066 | 1067 | // 1068 | //MStatus uninitializePlugin( MObject obj) 1069 | //{ 1070 | // MStatus result; 1071 | // MFnPlugin plugin( obj ); 1072 | // result = plugin.deregisterNode( offset::id ); 1073 | // 1074 | // MString nodeClassName("offset"); 1075 | // MString registrantId("mayaPluginExample"); 1076 | // MGPUDeformerRegistry::deregisterGPUDeformerCreator( 1077 | // nodeClassName, 1078 | // registrantId); 1079 | // 1080 | // return result; 1081 | //} -------------------------------------------------------------------------------- /corSkinCluster.sdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/baslack/CS6260-proj/c8d58476fef616d949841d57c583fc229ea09b49/corSkinCluster.sdf -------------------------------------------------------------------------------- /corSkinCluster.sln: -------------------------------------------------------------------------------- 1 | Microsoft Visual Studio Solution File, Format Version 12.00 2 | # Visual Studio 2012 3 | Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "corSkinCluster", "corSkinCluster.vcxproj", "{681412B0-F197-4A2F-9263-DEA2E8690146}" 4 | EndProject 5 | Global 6 | GlobalSection(SolutionConfigurationPlatforms) = preSolution 7 | Debug|x64 = Debug|x64 8 | Release|x64 = Release|x64 9 | ReleaseDebug|x64 = ReleaseDebug|x64 10 | EndGlobalSection 11 | GlobalSection(ProjectConfigurationPlatforms) = postSolution 12 | {681412B0-F197-4A2F-9263-DEA2E8690146}.Debug|x64.ActiveCfg = Debug|x64 13 | {681412B0-F197-4A2F-9263-DEA2E8690146}.Debug|x64.Build.0 = Debug|x64 14 | {681412B0-F197-4A2F-9263-DEA2E8690146}.Release|x64.ActiveCfg = Release|x64 15 | {681412B0-F197-4A2F-9263-DEA2E8690146}.Release|x64.Build.0 = Release|x64 16 | {681412B0-F197-4A2F-9263-DEA2E8690146}.ReleaseDebug|x64.ActiveCfg = ReleaseDebug|x64 17 | {681412B0-F197-4A2F-9263-DEA2E8690146}.ReleaseDebug|x64.Build.0 = ReleaseDebug|x64 18 | EndGlobalSection 19 | GlobalSection(SolutionProperties) = preSolution 20 | HideSolutionNode = FALSE 21 | EndGlobalSection 22 | EndGlobal 23 | -------------------------------------------------------------------------------- /corSkinCluster.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/baslack/CS6260-proj/c8d58476fef616d949841d57c583fc229ea09b49/corSkinCluster.suo -------------------------------------------------------------------------------- /corSkinCluster.v11.suo: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/baslack/CS6260-proj/c8d58476fef616d949841d57c583fc229ea09b49/corSkinCluster.v11.suo -------------------------------------------------------------------------------- /corSkinCluster.vcxproj: -------------------------------------------------------------------------------- 1 |  2 | 3 | 4 | 5 | Debug 6 | x64 7 | 8 | 9 | ReleaseDebug 10 | x64 11 | 12 | 13 | Release 14 | x64 15 | 16 | 17 | 18 | Win32Proj 19 | {681412B0-F197-4A2F-9263-DEA2E8690146} 20 | 21 | 22 | 23 | DynamicLibrary 24 | true 25 | v110 26 | 27 | 28 | DynamicLibrary 29 | false 30 | v110 31 | 32 | 33 | DynamicLibrary 34 | false 35 | v110 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | true 55 | $(Platform)\$(Configuration)\ 56 | .mll 57 | AllRules.ruleset 58 | 59 | 60 | 61 | _DEBUG;OSWin_;WIN32;_WINDOWS;_USRDLL;NT_PLUGIN;_HAS_ITERATOR_DEBUGGING=0;_SECURE_SCL=0;_SECURE_SCL_THROWS=0;_SECURE_SCL_DEPRECATE=0;_CRT_SECURE_NO_DEPRECATE;TBB_USE_DEBUG=0;__TBB_LIB_NAME=tbb.lib;Bits64_;%(PreprocessorDefinitions) 62 | .;..\..\..\include;..\..\..\..\include;C:\Program Files\Autodesk\Maya2017\include;$(CUDA_INC_PATH);%(AdditionalIncludeDirectories) 63 | MultiThreadedDebugDLL 64 | Level3 65 | ProgramDatabase 66 | true 67 | $(IntDir)$(ProjectName).pdb 68 | Disabled 69 | 70 | 71 | true 72 | Windows 73 | C:\Program Files\Autodesk\Maya2017\lib;..\..\..\lib;..\..\..\..\lib;$(CUDA_LINK_PATH)\x64;%(AdditionalLibraryDirectories) 74 | OpenMaya.lib;OpenMayaAnim.lib;OpenMayaRender.lib;Foundation.lib;OpenCL.lib;clew.lib;%(AdditionalDependencies) 75 | /export:initializePlugin /export:uninitializePlugin %(AdditionalOptions) 76 | false 77 | $(OutDir)$(TargetName).lib 78 | NotSet 79 | 80 | 81 | 82 | false 83 | $(Platform)\$(Configuration)\ 84 | .mll 85 | AllRules.ruleset 86 | 87 | 88 | 89 | NDEBUG;OSWin_;WIN32;_WINDOWS;_USRDLL;NT_PLUGIN;_HAS_ITERATOR_DEBUGGING=0;_SECURE_SCL=0;_SECURE_SCL_THROWS=0;_SECURE_SCL_DEPRECATE=0;_CRT_SECURE_NO_DEPRECATE;TBB_USE_DEBUG=0;__TBB_LIB_NAME=tbb.lib;Bits64_;%(PreprocessorDefinitions) 90 | .;..\..\..\include;..\..\..\..\include;%(AdditionalIncludeDirectories) 91 | MultiThreadedDLL 92 | Level3 93 | ProgramDatabase 94 | true 95 | $(IntDir)$(ProjectName).pdb 96 | OnlyExplicitInline 97 | true 98 | true 99 | 100 | 101 | true 102 | Windows 103 | ..\..\..\lib;..\..\..\..\lib;%(AdditionalLibraryDirectories) 104 | OpenMaya.lib;OpenMayaAnim.lib;Foundation.lib;%(AdditionalDependencies) 105 | /export:initializePlugin /export:uninitializePlugin %(AdditionalOptions) 106 | false 107 | $(OutDir)$(TargetName).lib 108 | true 109 | true 110 | 111 | 112 | 113 | false 114 | $(Platform)\$(Configuration)\ 115 | .mll 116 | AllRules.ruleset 117 | 118 | 119 | 120 | NDEBUG;OSWin_;WIN32;_WINDOWS;_USRDLL;NT_PLUGIN;_HAS_ITERATOR_DEBUGGING=0;_SECURE_SCL=0;_SECURE_SCL_THROWS=0;_SECURE_SCL_DEPRECATE=0;_CRT_SECURE_NO_DEPRECATE;TBB_USE_DEBUG=0;__TBB_LIB_NAME=tbb.lib;Bits64_;%(PreprocessorDefinitions) 121 | .;..\..\..\include;..\..\..\..\include;C:\Program Files\Autodesk\Maya2017\include;$(CUDA_INC_PATH);%(AdditionalIncludeDirectories) 122 | MultiThreadedDLL 123 | Level3 124 | ProgramDatabase 125 | true 126 | $(IntDir)$(ProjectName).pdb 127 | OnlyExplicitInline 128 | true 129 | true 130 | 131 | 132 | false 133 | Windows 134 | C:\Program Files\Autodesk\Maya2017\lib;..\..\..\lib;..\..\..\..\lib;$(CUDA_LINK_PATH)\x64;%(AdditionalLibraryDirectories) 135 | OpenMaya.lib;OpenMayaAnim.lib;OpenMayaRender.lib;Foundation.lib;OpenCL.lib;clew.lib;%(AdditionalDependencies) 136 | /export:initializePlugin /export:uninitializePlugin %(AdditionalOptions) 137 | false 138 | $(OutDir)$(TargetName).lib 139 | true 140 | true 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | -------------------------------------------------------------------------------- /corSkinDef.cl: -------------------------------------------------------------------------------- 1 | /* 2 | corSkinDef kernel 3 | */ 4 | 5 | __kernel void corSkinDef( 6 | __global float* finalPos, //float3 7 | __global const float* initialPos, //float3 8 | __global const float* weights, 9 | __global const float4* matrices, 10 | __global const float4* quaternions, 11 | __global const float4* CoRs, 12 | const uint positionCount, 13 | const uint numTransforms) 14 | { 15 | 16 | 17 | // this is the CUDA equavalent for indexing the arrays 18 | unsigned int positionId = get_global_id(0); // access finalPos and initialPos using this value 19 | if (positionId >= positionCount ) return; // We create an execute unit for more indices then we have data for, just exit early if this guy if one of the extras 20 | 21 | unsigned int positionOffset = positionId * 3; // Base positions are float3 when they come in here! 22 | float4 initialPosition; 23 | initialPosition.x = initialPos[positionOffset]; 24 | initialPosition.y = initialPos[positionOffset+1]; 25 | initialPosition.z = initialPos[positionOffset+2]; 26 | initialPosition.w = 1.0f; 27 | 28 | float4 finalPosition; 29 | finalPosition.x = 0.0f; 30 | finalPosition.y = 0.0f; 31 | finalPosition.z = 0.0f; 32 | finalPosition.w = 1.0f; 33 | 34 | // finalPos[positionOffset] = initialPosition.x; 35 | // finalPos[positionOffset+1] = initialPosition.y; 36 | // finalPos[positionOffset+2] = initialPosition.z; 37 | 38 | /* 39 | // testing group 40 | float4 TEST[4]; 41 | TEST[0] = (float4) (1.0f, 0.0f, 0.0f, 1.0f); 42 | TEST[1] = (float4) (0.0f, 1.0f, 0.0f, 0.0f); 43 | TEST[2] = (float4) (0.0f, 0.0f, 1.0f, 0.0f); 44 | TEST[3] = (float4) (0.0f, 0.0f, 0.0f, 1.0f); 45 | 46 | finalPosition.x = weights[positionId*numTransforms + 0] * dot(initialPosition, TEST[0]); 47 | finalPosition.x += weights[positionId*numTransforms + 1] * dot(initialPosition, TEST[0]); 48 | finalPosition.y = dot(initialPosition, TEST[1]); 49 | finalPosition.z = dot(initialPosition, TEST[2]); 50 | 51 | 52 | finalPos[positionOffset + 0] = finalPosition.x; 53 | finalPos[positionOffset + 1] = finalPosition.y; 54 | finalPos[positionOffset + 2] = finalPosition.z; 55 | */ 56 | 57 | /* 58 | // working LBS 59 | float4 LBS[4]; 60 | for (unsigned int i = 0; i < 4; i++){ 61 | LBS[i] = (float4)(0.0f); 62 | } 63 | for (unsigned int i = 0; i < 4; i++){ 64 | for (unsigned int j = 0; j < numTransforms; j++){ 65 | LBS[i] += weights[positionId*numTransforms + j]*matrices[j*4 + i]; 66 | } 67 | } 68 | finalPosition.x = dot(LBS[0], initialPosition); 69 | finalPosition.y = dot(LBS[1], initialPosition); 70 | finalPosition.z = dot(LBS[2], initialPosition); 71 | 72 | finalPos[positionOffset + 0] = finalPosition.x; 73 | finalPos[positionOffset + 1] = finalPosition.y; 74 | finalPos[positionOffset + 2] = finalPosition.z; 75 | */ 76 | 77 | 78 | 79 | // for each vertex 80 | // work out the weighted sum of products for the quaternions components 81 | unsigned int weights_offset = numTransforms; 82 | unsigned int weights_index = positionId * weights_offset; 83 | float4 q = (float4) (0.0f); 84 | unsigned int quaternion_offset = 4; 85 | 86 | for (unsigned int j = 0; j < numTransforms; j++){ 87 | // calc the dot prod 88 | float dot_prod = dot(q, quaternions[j]); 89 | float4 pos_zero = (float4)(0.0f); 90 | float4 neg = (float4)(0.0f); 91 | // avoiding branching, calc both 92 | pos_zero = q + weights[weights_index + j]*quaternions[j]; 93 | neg = q - weights[weights_index + j]*quaternions[j]; 94 | // simple ternary for assignment 95 | q = (dot_prod >= 0.0f) ? pos_zero : neg; 96 | } 97 | 98 | 99 | // normalize it 100 | q = normalize(q); 101 | 102 | 103 | // convert it back to a rotation matrix 104 | /* 105 | this is row major, so we need column major 106 | Matrix( 107 | 1.0f - 2.0f*qy*qy - 2.0f*qz*qz, 2.0f*qx*qy - 2.0f*qz*qw, 2.0f*qx*qz + 2.0f*qy*qw, 0.0f, 108 | 2.0f*qx*qy + 2.0f*qz*qw, 1.0f - 2.0f*qx*qx - 2.0f*qz*qz, 2.0f*qy*qz - 2.0f*qx*qw, 0.0f, 109 | 2.0f*qx*qz - 2.0f*qy*qw, 2.0f*qy*qz + 2.0f*qx*qw, 1.0f - 2.0f*qx*qx - 2.0f*qy*qy, 0.0f, 110 | 0.0f, 0.0f, 0.0f, 1.0f); 111 | 112 | column major 113 | Matrix( 114 | 1.0f - 2.0f*qy*qy - 2.0f*qz*qz, 2.0f*qx*qy + 2.0f*qz*qw, 2.0f*qx*qz - 2.0f*qy*qw, 0.0f, 115 | 2.0f*qx*qy - 2.0f*qz*qw, 1.0f - 2.0f*qx*qx - 2.0f*qz*qz, 2.0f*qy*qz + 2.0f*qx*qw, 0.0f, 116 | 2.0f*qx*qz + 2.0f*qy*qw, 2.0f*qy*qz - 2.0f*qx*qw, 1.0f - 2.0f*qx*qx - 2.0f*qy*qy, 0.0f, 117 | 0.0f, 0.0f, 0.0f, 1.0f); 118 | */ 119 | 120 | float4 R[4]; 121 | R[0] = (float4) (1.0f - 2.0f*q.y*q.y - 2.0f*q.z*q.z, \ 122 | 2.0f*q.x*q.y - 2.0f*q.z*q.w, \ 123 | 2.0f*q.x*q.z + 2.0f*q.y*q.w, \ 124 | 0.0f); 125 | R[1] = (float4) (2.0f*q.x*q.y + 2.0f*q.z*q.w, \ 126 | 1.0f - 2.0f*q.x*q.x - 2.0f*q.z*q.z, \ 127 | 2.0f*q.y*q.z - 2.0f*q.x*q.w, \ 128 | 0.0f); 129 | R[2] = (float4) (2.0f*q.x*q.z - 2.0f*q.y*q.w, \ 130 | 2.0f*q.y*q.z + 2.0f*q.x*q.w, \ 131 | 1.0f - 2.0f*q.x*q.x - 2.0f*q.y*q.y, \ 132 | 0.0f); 133 | R[3] = (float4) (0.0f, 0.0f, 0.0f, 1.0f); 134 | 135 | 136 | // perform the the weighted LBS sum to get R_prime and t_prime 137 | // setup the accumulator 138 | 139 | float4 R_prime_T_prime[4]; 140 | for (unsigned int i = 0; i < 4; i++){ 141 | R_prime_T_prime[i] = (float4) (0.0f); 142 | } 143 | 144 | // perform the weighted sum of the matrices 145 | for (unsigned int i = 0; i < 4; i++){ 146 | for (unsigned int j = 0; j < numTransforms; j++){ 147 | R_prime_T_prime[i] += weights[weights_index + j]*matrices[j*4 + i]; 148 | } 149 | } 150 | 151 | // compute the translation (t) R_prime*CoR + t_prime - R*Cor 152 | // get the rotation only matrix from R_p_T_p 153 | float4 R_prime[4]; 154 | for (unsigned int i = 0; i < 4; i++){ 155 | R_prime[i] = R_prime_T_prime[i]; 156 | } 157 | for (unsigned int i = 0; i < 3; i++){ 158 | R_prime[i].w = 0.0f; 159 | } 160 | // get the translation from R_p_T_p 161 | float4 T_prime = (float4) (0.0f); 162 | T_prime.x = R_prime_T_prime[0].w; 163 | T_prime.y = R_prime_T_prime[1].w; 164 | T_prime.z = R_prime_T_prime[2].w; 165 | // compute R_prime * CoR 166 | float4 RpXCor = (float4) (0.0f); 167 | RpXCor.x = dot(CoRs[positionId], R_prime[0]); 168 | RpXCor.y = dot(CoRs[positionId], R_prime[1]); 169 | RpXCor.z = dot(CoRs[positionId], R_prime[2]); 170 | RpXCor.w = dot(CoRs[positionId], R_prime[3]); 171 | // compute R * CoR 172 | float4 RXCor = (float4) (0.0f); 173 | RXCor.x = dot(CoRs[positionId], R[0]); 174 | RXCor.y = dot(CoRs[positionId], R[1]); 175 | RXCor.z = dot(CoRs[positionId], R[2]); 176 | RXCor.w = dot(CoRs[positionId], R[3]); 177 | // compute t 178 | float4 T = (float4) (0.0f); 179 | T = RpXCor + T_prime - RXCor; 180 | // compute final pos, R*cur_pos + t 181 | finalPosition.x = dot(initialPosition, R[0]) + T.x; 182 | finalPosition.y = dot(initialPosition, R[1]) + T.y; 183 | finalPosition.z = dot(initialPosition, R[2]) + T.z; 184 | 185 | // finalPos[positionOffset] = initialPosition.x; 186 | // finalPos[positionOffset+1] = initialPosition.y; 187 | // finalPos[positionOffset+2] = initialPosition.z; 188 | 189 | // put the final value into the output buffer 190 | finalPos[positionOffset] = finalPosition.x; 191 | finalPos[positionOffset+1] = finalPosition.y; 192 | finalPos[positionOffset+2] = finalPosition.z; 193 | 194 | } --------------------------------------------------------------------------------