dual quat -> matrix optimizations
[xonotic/darkplaces.git] / mod_skeletal_animatevertices_generic.c
1 #include "mod_skeletal_animatevertices_generic.h"
2
3 typedef struct
4 {
5         float f[12];
6 }
7 float12_t;
8
9 void Mod_Skeletal_AnimateVertices_Generic(const dp_model_t * RESTRICT model, const frameblend_t * RESTRICT frameblend, const skeleton_t *skeleton, float * RESTRICT vertex3f, float * RESTRICT normal3f, float * RESTRICT svector3f, float * RESTRICT tvector3f)
10 {
11         // vertex weighted skeletal
12         int i, k;
13         int blends;
14         float12_t *bonepose;
15         float12_t *boneposerelative;
16         float m[12];
17         const blendweights_t * RESTRICT weights;
18
19         //unsigned long long ts = rdtsc();
20         bonepose = (float12_t *) Mod_Skeletal_AnimateVertices_AllocBuffers(sizeof(float12_t) * (model->num_bones*2 + model->surfmesh.num_blends));
21         boneposerelative = bonepose + model->num_bones;
22
23         if (skeleton && !skeleton->relativetransforms)
24                 skeleton = NULL;
25
26         // interpolate matrices
27         if (skeleton)
28         {
29                 for (i = 0;i < model->num_bones;i++)
30                 {
31                         Matrix4x4_ToArray12FloatD3D(&skeleton->relativetransforms[i], m);
32                         if (model->data_bones[i].parent >= 0)
33                                 R_ConcatTransforms(bonepose[model->data_bones[i].parent].f, m, bonepose[i].f);
34                         else
35                                 memcpy(bonepose[i].f, m, sizeof(m));
36
37                         // create a relative deformation matrix to describe displacement
38                         // from the base mesh, which is used by the actual weighting
39                         R_ConcatTransforms(bonepose[i].f, model->data_baseboneposeinverse + i * 12, boneposerelative[i].f);
40                 }
41         }
42         else
43         {
44                 for (i = 0;i < model->num_bones;i++)
45                 {
46                     const short * RESTRICT pose7s = model->data_poses7s + 7 * (frameblend[0].subframe * model->num_bones + i);
47                         float lerp = frameblend[0].lerp,
48                                 tx = pose7s[0], ty = pose7s[1], tz = pose7s[2],
49                                 rx = pose7s[3] * lerp,
50                                 ry = pose7s[4] * lerp,
51                                 rz = pose7s[5] * lerp,
52                                 rw = pose7s[6] * lerp,
53                                 dx = tx*rw + ty*rz - tz*ry,
54                                 dy = -tx*rz + ty*rw + tz*rx,
55                                 dz = tx*ry - ty*rx + tz*rw,
56                                 dw = -tx*rx - ty*ry - tz*rz,
57                                 scale, sx, sy, sz, sw;
58                         for (blends = 1;blends < MAX_FRAMEBLENDS && frameblend[blends].lerp > 0;blends++)
59                         {
60                                 const short * RESTRICT pose7s = model->data_poses7s + 7 * (frameblend[blends].subframe * model->num_bones + i);
61                                 float lerp = frameblend[blends].lerp,
62                                         tx = pose7s[0], ty = pose7s[1], tz = pose7s[2],
63                                         qx = pose7s[3], qy = pose7s[4], qz = pose7s[5], qw = pose7s[6];
64                                 if(rx*qx + ry*qy + rz*qz + rw*qw < 0) lerp = -lerp;
65                                 qx *= lerp;
66                                 qy *= lerp;
67                                 qz *= lerp;
68                                 qw *= lerp;
69                                 rx += qx;
70                                 ry += qy;
71                                 rz += qz;
72                                 rw += qw;
73                                 dx += tx*qw + ty*qz - tz*qy;
74                                 dy += -tx*qz + ty*qw + tz*qx;
75                                 dz += tx*qy - ty*qx + tz*qw;
76                                 dw += -tx*qx - ty*qy - tz*qz;
77                         }
78                         scale = 1.0f / (rx*rx + ry*ry + rz*rz + rw*rw);
79                         sx = rx * scale;
80                         sy = ry * scale;
81                         sz = rz * scale;
82                         sw = rw * scale;
83                         m[0] = sw*rw + sx*rx - sy*ry - sz*rz;
84                         m[1] = 2*(sx*ry - sw*rz);
85                         m[2] = 2*(sx*rz + sw*ry);
86                         m[3] = model->num_posescale*(dx*sw - dy*sz + dz*sy - dw*sx);
87                         m[4] = 2*(sx*ry + sw*rz);
88                         m[5] = sw*rw + sy*ry - sx*rx - sz*rz;
89                         m[6] = 2*(sy*rz - sw*rx);
90                         m[7] = model->num_posescale*(dx*sz + dy*sw - dz*sx - dw*sy);
91                         m[8] = 2*(sx*rz - sw*ry);
92                         m[9] = 2*(sy*rz + sw*rx);
93                         m[10] = sw*rw + sz*rz - sx*rx - sy*ry;
94                         m[11] = model->num_posescale*(dy*sx + dz*sw - dx*sy - dw*sz);
95                         if (i == r_skeletal_debugbone.integer)
96                                 m[r_skeletal_debugbonecomponent.integer % 12] += r_skeletal_debugbonevalue.value;
97                         m[3] *= r_skeletal_debugtranslatex.value;
98                         m[7] *= r_skeletal_debugtranslatey.value;
99                         m[11] *= r_skeletal_debugtranslatez.value;
100                         if (model->data_bones[i].parent >= 0)
101                                 R_ConcatTransforms(bonepose[model->data_bones[i].parent].f, m, bonepose[i].f);
102                         else
103                                 memcpy(bonepose[i].f, m, sizeof(m));
104                         // create a relative deformation matrix to describe displacement
105                         // from the base mesh, which is used by the actual weighting
106                         R_ConcatTransforms(bonepose[i].f, model->data_baseboneposeinverse + i * 12, boneposerelative[i].f);
107                 }
108         }
109
110         // generate matrices for all blend combinations
111         weights = model->surfmesh.data_blendweights;
112         for (i = 0;i < model->surfmesh.num_blends;i++, weights++)
113         {
114                 float * RESTRICT b = boneposerelative[model->num_bones + i].f;
115                 const float * RESTRICT m = boneposerelative[weights->index[0]].f;
116                 float f = weights->influence[0] * (1.0f / 255.0f);
117                 b[ 0] = f*m[ 0]; b[ 1] = f*m[ 1]; b[ 2] = f*m[ 2]; b[ 3] = f*m[ 3];
118                 b[ 4] = f*m[ 4]; b[ 5] = f*m[ 5]; b[ 6] = f*m[ 6]; b[ 7] = f*m[ 7];
119                 b[ 8] = f*m[ 8]; b[ 9] = f*m[ 9]; b[10] = f*m[10]; b[11] = f*m[11];
120                 for (k = 1;k < 4 && weights->influence[k];k++)
121                 {
122                         m = boneposerelative[weights->index[k]].f;
123                         f = weights->influence[k] * (1.0f / 255.0f);
124                         b[ 0] += f*m[ 0]; b[ 1] += f*m[ 1]; b[ 2] += f*m[ 2]; b[ 3] += f*m[ 3];
125                         b[ 4] += f*m[ 4]; b[ 5] += f*m[ 5]; b[ 6] += f*m[ 6]; b[ 7] += f*m[ 7];
126                         b[ 8] += f*m[ 8]; b[ 9] += f*m[ 9]; b[10] += f*m[10]; b[11] += f*m[11];
127                 }
128         }
129
130 #define LOAD_MATRIX_SCALAR() const float * RESTRICT m = boneposerelative[*b].f
131
132 #define LOAD_MATRIX3() \
133         LOAD_MATRIX_SCALAR()
134 #define LOAD_MATRIX4() \
135         LOAD_MATRIX_SCALAR()
136
137 #define TRANSFORM_POSITION_SCALAR(in, out) \
138         (out)[0] = ((in)[0] * m[0] + (in)[1] * m[1] + (in)[2] * m[ 2] + m[3]); \
139         (out)[1] = ((in)[0] * m[4] + (in)[1] * m[5] + (in)[2] * m[ 6] + m[7]); \
140         (out)[2] = ((in)[0] * m[8] + (in)[1] * m[9] + (in)[2] * m[10] + m[11]);
141 #define TRANSFORM_VECTOR_SCALAR(in, out) \
142         (out)[0] = ((in)[0] * m[0] + (in)[1] * m[1] + (in)[2] * m[ 2]); \
143         (out)[1] = ((in)[0] * m[4] + (in)[1] * m[5] + (in)[2] * m[ 6]); \
144         (out)[2] = ((in)[0] * m[8] + (in)[1] * m[9] + (in)[2] * m[10]);
145
146 #define TRANSFORM_POSITION(in, out) \
147         TRANSFORM_POSITION_SCALAR(in, out)
148 #define TRANSFORM_VECTOR(in, out) \
149         TRANSFORM_VECTOR_SCALAR(in, out)
150
151         // transform vertex attributes by blended matrices
152         if (vertex3f)
153         {
154                 const float * RESTRICT v = model->surfmesh.data_vertex3f;
155                 const unsigned short * RESTRICT b = model->surfmesh.blends;
156                 // special case common combinations of attributes to avoid repeated loading of matrices
157                 if (normal3f)
158                 {
159                         const float * RESTRICT n = model->surfmesh.data_normal3f;
160                         if (svector3f && tvector3f)
161                         {
162                                 const float * RESTRICT sv = model->surfmesh.data_svector3f;
163                                 const float * RESTRICT tv = model->surfmesh.data_tvector3f;
164
165                                 // Note that for SSE each iteration stores one element past end, so we break one vertex short
166                                 // and handle that with scalars in that case
167                                 for (i = 0; i < model->surfmesh.num_vertices; i++, v += 3, n += 3, sv += 3, tv += 3, b++,
168                                                 vertex3f += 3, normal3f += 3, svector3f += 3, tvector3f += 3)
169                                 {
170                                         LOAD_MATRIX4();
171                                         TRANSFORM_POSITION(v, vertex3f);
172                                         TRANSFORM_VECTOR(n, normal3f);
173                                         TRANSFORM_VECTOR(sv, svector3f);
174                                         TRANSFORM_VECTOR(tv, tvector3f);
175                                 }
176
177                                 return;
178                         }
179
180                         for (i = 0;i < model->surfmesh.num_vertices; i++, v += 3, n += 3, b++, vertex3f += 3, normal3f += 3)
181                         {
182                                 LOAD_MATRIX4();
183                                 TRANSFORM_POSITION(v, vertex3f);
184                                 TRANSFORM_VECTOR(n, normal3f);
185                         }
186                 }
187                 else
188                 {
189                         for (i = 0;i < model->surfmesh.num_vertices; i++, v += 3, b++, vertex3f += 3)
190                         {
191                                 LOAD_MATRIX4();
192                                 TRANSFORM_POSITION(v, vertex3f);
193                         }
194                 }
195         }
196
197         else if (normal3f)
198         {
199                 const float * RESTRICT n = model->surfmesh.data_normal3f;
200                 const unsigned short * RESTRICT b = model->surfmesh.blends;
201                 for (i = 0; i < model->surfmesh.num_vertices; i++, n += 3, b++, normal3f += 3)
202                 {
203                         LOAD_MATRIX3();
204                         TRANSFORM_VECTOR(n, normal3f);
205                 }
206         }
207
208         if (svector3f)
209         {
210                 const float * RESTRICT sv = model->surfmesh.data_svector3f;
211                 const unsigned short * RESTRICT b = model->surfmesh.blends;
212                 for (i = 0; i < model->surfmesh.num_vertices; i++, sv += 3, b++, svector3f += 3)
213                 {
214                         LOAD_MATRIX3();
215                         TRANSFORM_VECTOR(sv, svector3f);
216                 }
217         }
218
219         if (tvector3f)
220         {
221                 const float * RESTRICT tv = model->surfmesh.data_tvector3f;
222                 const unsigned short * RESTRICT b = model->surfmesh.blends;
223                 for (i = 0; i < model->surfmesh.num_vertices; i++, tv += 3, b++, tvector3f += 3)
224                 {
225                         LOAD_MATRIX3();
226                         TRANSFORM_VECTOR(tv, tvector3f);
227                 }
228         }
229 }