1/*
2---------------------------------------------------------------------------
3Open Asset Import Library (assimp)
4---------------------------------------------------------------------------
5
6Copyright (c) 2006-2017, assimp team
7
8
9All rights reserved.
10
11Redistribution and use of this software in source and binary forms,
12with or without modification, are permitted provided that the following
13conditions are met:
14
15* Redistributions of source code must retain the above
16 copyright notice, this list of conditions and the
17 following disclaimer.
18
19* Redistributions in binary form must reproduce the above
20 copyright notice, this list of conditions and the
21 following disclaimer in the documentation and/or other
22 materials provided with the distribution.
23
24* Neither the name of the assimp team, nor the names of its
25 contributors may be used to endorse or promote products
26 derived from this software without specific prior
27 written permission of the assimp team.
28
29THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40---------------------------------------------------------------------------
41*/
42
43/** @file Implementation of the post processing step to improve the cache locality of a mesh.
44 * <br>
45 * The algorithm is roughly basing on this paper:
46 * http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf
47 * .. although overdraw rduction isn't implemented yet ...
48 */
49
50
51
52// internal headers
53#include "ImproveCacheLocality.h"
54#include "VertexTriangleAdjacency.h"
55#include "StringUtils.h"
56#include <assimp/postprocess.h>
57#include <assimp/scene.h>
58#include <assimp/DefaultLogger.hpp>
59#include <stdio.h>
60#include <stack>
61
62using namespace Assimp;
63
64// ------------------------------------------------------------------------------------------------
65// Constructor to be privately used by Importer
66ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() {
67 configCacheDepth = PP_ICL_PTCACHE_SIZE;
68}
69
70// ------------------------------------------------------------------------------------------------
71// Destructor, private as well
72ImproveCacheLocalityProcess::~ImproveCacheLocalityProcess()
73{
74 // nothing to do here
75}
76
77// ------------------------------------------------------------------------------------------------
78// Returns whether the processing step is present in the given flag field.
79bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const
80{
81 return (pFlags & aiProcess_ImproveCacheLocality) != 0;
82}
83
84// ------------------------------------------------------------------------------------------------
85// Setup configuration
86void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp)
87{
88 // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer
89 configCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE);
90}
91
92// ------------------------------------------------------------------------------------------------
93// Executes the post processing step on the given imported data.
94void ImproveCacheLocalityProcess::Execute( aiScene* pScene)
95{
96 if (!pScene->mNumMeshes) {
97 DefaultLogger::get()->debug("ImproveCacheLocalityProcess skipped; there are no meshes");
98 return;
99 }
100
101 DefaultLogger::get()->debug("ImproveCacheLocalityProcess begin");
102
103 float out = 0.f;
104 unsigned int numf = 0, numm = 0;
105 for( unsigned int a = 0; a < pScene->mNumMeshes; a++){
106 const float res = ProcessMesh( pScene->mMeshes[a],a);
107 if (res) {
108 numf += pScene->mMeshes[a]->mNumFaces;
109 out += res;
110 ++numm;
111 }
112 }
113 if (!DefaultLogger::isNullLogger()) {
114 char szBuff[128]; // should be sufficiently large in every case
115 ai_snprintf(szBuff,128,"Cache relevant are %u meshes (%u faces). Average output ACMR is %f",
116 numm,numf,out/numf);
117
118 DefaultLogger::get()->info(szBuff);
119 DefaultLogger::get()->debug("ImproveCacheLocalityProcess finished. ");
120 }
121}
122
123// ------------------------------------------------------------------------------------------------
124// Improves the cache coherency of a specific mesh
125float ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum)
126{
127 // TODO: rewrite this to use std::vector or boost::shared_array
128 ai_assert(NULL != pMesh);
129
130 // Check whether the input data is valid
131 // - there must be vertices and faces
132 // - all faces must be triangulated or we can't operate on them
133 if (!pMesh->HasFaces() || !pMesh->HasPositions())
134 return 0.f;
135
136 if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) {
137 DefaultLogger::get()->error("This algorithm works on triangle meshes only");
138 return 0.f;
139 }
140
141 if(pMesh->mNumVertices <= configCacheDepth) {
142 return 0.f;
143 }
144
145 float fACMR = 3.f;
146 const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces;
147
148 // Input ACMR is for logging purposes only
149 if (!DefaultLogger::isNullLogger()) {
150
151 unsigned int* piFIFOStack = new unsigned int[configCacheDepth];
152 memset(piFIFOStack,0xff,configCacheDepth*sizeof(unsigned int));
153 unsigned int* piCur = piFIFOStack;
154 const unsigned int* const piCurEnd = piFIFOStack + configCacheDepth;
155
156 // count the number of cache misses
157 unsigned int iCacheMisses = 0;
158 for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) {
159
160 for (unsigned int qq = 0; qq < 3;++qq) {
161 bool bInCache = false;
162
163 for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) {
164 if (*pp == pcFace->mIndices[qq]) {
165 // the vertex is in cache
166 bInCache = true;
167 break;
168 }
169 }
170 if (!bInCache) {
171 ++iCacheMisses;
172 if (piCurEnd == piCur) {
173 piCur = piFIFOStack;
174 }
175 *piCur++ = pcFace->mIndices[qq];
176 }
177 }
178 }
179 delete[] piFIFOStack;
180 fACMR = (float)iCacheMisses / pMesh->mNumFaces;
181 if (3.0 == fACMR) {
182 char szBuff[128]; // should be sufficiently large in every case
183
184 // the JoinIdenticalVertices process has not been executed on this
185 // mesh, otherwise this value would normally be at least minimally
186 // smaller than 3.0 ...
187 ai_snprintf(szBuff,128,"Mesh %u: Not suitable for vcache optimization",meshNum);
188 DefaultLogger::get()->warn(szBuff);
189 return 0.f;
190 }
191 }
192
193 // first we need to build a vertex-triangle adjacency list
194 VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true);
195
196 // build a list to store per-vertex caching time stamps
197 unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices];
198 memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int));
199
200 // allocate an empty output index buffer. We store the output indices in one large array.
201 // Since the number of triangles won't change the input faces can be reused. This is how
202 // we save thousands of redundant mini allocations for aiFace::mIndices
203 const unsigned int iIdxCnt = pMesh->mNumFaces*3;
204 unsigned int* const piIBOutput = new unsigned int[iIdxCnt];
205 unsigned int* piCSIter = piIBOutput;
206
207 // allocate the flag array to hold the information
208 // whether a face has already been emitted or not
209 std::vector<bool> abEmitted(pMesh->mNumFaces,false);
210
211 // dead-end vertex index stack
212 std::stack<unsigned int, std::vector<unsigned int> > sDeadEndVStack;
213
214 // create a copy of the piNumTriPtr buffer
215 unsigned int* const piNumTriPtr = adj.mLiveTriangles;
216 const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices);
217
218 // get the largest number of referenced triangles and allocate the "candidate buffer"
219 unsigned int iMaxRefTris = 0; {
220 const unsigned int* piCur = adj.mLiveTriangles;
221 const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices;
222 for (;piCur != piCurEnd;++piCur) {
223 iMaxRefTris = std::max(iMaxRefTris,*piCur);
224 }
225 }
226 ai_assert(iMaxRefTris > 0);
227 unsigned int* piCandidates = new unsigned int[iMaxRefTris*3];
228 unsigned int iCacheMisses = 0;
229
230 // ...................................................................................
231 /** PSEUDOCODE for the algorithm
232
233 A = Build-Adjacency(I) Vertex-triangle adjacency
234 L = Get-Triangle-Counts(A) Per-vertex live triangle counts
235 C = Zero(Vertex-Count(I)) Per-vertex caching time stamps
236 D = Empty-Stack() Dead-end vertex stack
237 E = False(Triangle-Count(I)) Per triangle emitted flag
238 O = Empty-Index-Buffer() Empty output buffer
239 f = 0 Arbitrary starting vertex
240 s = k+1, i = 1 Time stamp and cursor
241 while f >= 0 For all valid fanning vertices
242 N = Empty-Set() 1-ring of next candidates
243 for each Triangle t in Neighbors(A, f)
244 if !Emitted(E,t)
245 for each Vertex v in t
246 Append(O,v) Output vertex
247 Push(D,v) Add to dead-end stack
248 Insert(N,v) Register as candidate
249 L[v] = L[v]-1 Decrease live triangle count
250 if s-C[v] > k If not in cache
251 C[v] = s Set time stamp
252 s = s+1 Increment time stamp
253 E[t] = true Flag triangle as emitted
254 Select next fanning vertex
255 f = Get-Next-Vertex(I,i,k,N,C,s,L,D)
256 return O
257 */
258 // ...................................................................................
259
260 int ivdx = 0;
261 int ics = 1;
262 int iStampCnt = configCacheDepth+1;
263 while (ivdx >= 0) {
264
265 unsigned int icnt = piNumTriPtrNoModify[ivdx];
266 unsigned int* piList = adj.GetAdjacentTriangles(ivdx);
267 unsigned int* piCurCandidate = piCandidates;
268
269 // get all triangles in the neighborhood
270 for (unsigned int tri = 0; tri < icnt;++tri) {
271
272 // if they have not yet been emitted, add them to the output IB
273 const unsigned int fidx = *piList++;
274 if (!abEmitted[fidx]) {
275
276 // so iterate through all vertices of the current triangle
277 const aiFace* pcFace = &pMesh->mFaces[ fidx ];
278 for (unsigned int* p = pcFace->mIndices, *p2 = pcFace->mIndices+3;p != p2;++p) {
279 const unsigned int dp = *p;
280
281 // the current vertex won't have any free triangles after this step
282 if (ivdx != (int)dp) {
283 // append the vertex to the dead-end stack
284 sDeadEndVStack.push(dp);
285
286 // register as candidate for the next step
287 *piCurCandidate++ = dp;
288
289 // decrease the per-vertex triangle counts
290 piNumTriPtr[dp]--;
291 }
292
293 // append the vertex to the output index buffer
294 *piCSIter++ = dp;
295
296 // if the vertex is not yet in cache, set its cache count
297 if (iStampCnt-piCachingStamps[dp] > configCacheDepth) {
298 piCachingStamps[dp] = iStampCnt++;
299 ++iCacheMisses;
300 }
301 }
302 // flag triangle as emitted
303 abEmitted[fidx] = true;
304 }
305 }
306
307 // the vertex has now no living adjacent triangles anymore
308 piNumTriPtr[ivdx] = 0;
309
310 // get next fanning vertex
311 ivdx = -1;
312 int max_priority = -1;
313 for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur) {
314 const unsigned int dp = *piCur;
315
316 // must have live triangles
317 if (piNumTriPtr[dp] > 0) {
318 int priority = 0;
319
320 // will the vertex be in cache, even after fanning occurs?
321 unsigned int tmp;
322 if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= configCacheDepth) {
323 priority = tmp;
324 }
325
326 // keep best candidate
327 if (priority > max_priority) {
328 max_priority = priority;
329 ivdx = dp;
330 }
331 }
332 }
333 // did we reach a dead end?
334 if (-1 == ivdx) {
335 // need to get a non-local vertex for which we have a good chance that it is still
336 // in the cache ...
337 while (!sDeadEndVStack.empty()) {
338 unsigned int iCachedIdx = sDeadEndVStack.top();
339 sDeadEndVStack.pop();
340 if (piNumTriPtr[ iCachedIdx ] > 0) {
341 ivdx = iCachedIdx;
342 break;
343 }
344 }
345
346 if (-1 == ivdx) {
347 // well, there isn't such a vertex. Simply get the next vertex in input order and
348 // hope it is not too bad ...
349 while (ics < (int)pMesh->mNumVertices) {
350 ++ics;
351 if (piNumTriPtr[ics] > 0) {
352 ivdx = ics;
353 break;
354 }
355 }
356 }
357 }
358 }
359 float fACMR2 = 0.0f;
360 if (!DefaultLogger::isNullLogger()) {
361 fACMR2 = (float)iCacheMisses / pMesh->mNumFaces;
362
363 // very intense verbose logging ... prepare for much text if there are many meshes
364 if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) {
365 char szBuff[128]; // should be sufficiently large in every case
366
367 ai_snprintf(szBuff,128,"Mesh %u | ACMR in: %f out: %f | ~%.1f%%",meshNum,fACMR,fACMR2,
368 ((fACMR - fACMR2) / fACMR) * 100.f);
369 DefaultLogger::get()->debug(szBuff);
370 }
371
372 fACMR2 *= pMesh->mNumFaces;
373 }
374 // sort the output index buffer back to the input array
375 piCSIter = piIBOutput;
376 for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace) {
377 pcFace->mIndices[0] = *piCSIter++;
378 pcFace->mIndices[1] = *piCSIter++;
379 pcFace->mIndices[2] = *piCSIter++;
380 }
381
382 // delete temporary storage
383 delete[] piCachingStamps;
384 delete[] piIBOutput;
385 delete[] piCandidates;
386
387 return fACMR2;
388}
389