1 | /* |
2 | --------------------------------------------------------------------------- |
3 | Open Asset Import Library (assimp) |
4 | --------------------------------------------------------------------------- |
5 | |
6 | Copyright (c) 2006-2017, assimp team |
7 | |
8 | |
9 | All rights reserved. |
10 | |
11 | Redistribution and use of this software in source and binary forms, |
12 | with or without modification, are permitted provided that the following |
13 | conditions are met: |
14 | |
15 | * Redistributions of source code must retain the above |
16 | copyright notice, this list of conditions and the |
17 | following disclaimer. |
18 | |
19 | * Redistributions in binary form must reproduce the above |
20 | copyright notice, this list of conditions and the |
21 | following disclaimer in the documentation and/or other |
22 | materials provided with the distribution. |
23 | |
24 | * Neither the name of the assimp team, nor the names of its |
25 | contributors may be used to endorse or promote products |
26 | derived from this software without specific prior |
27 | written permission of the assimp team. |
28 | |
29 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
30 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
31 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
32 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT |
33 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
34 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
35 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
36 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
37 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
38 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
39 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
40 | --------------------------------------------------------------------------- |
41 | */ |
42 | |
43 | /** @file Implementation of the post processing step to improve the cache locality of a mesh. |
44 | * <br> |
45 | * The algorithm is roughly basing on this paper: |
46 | * http://www.cs.princeton.edu/gfx/pubs/Sander_2007_%3ETR/tipsy.pdf |
47 | * .. although overdraw rduction isn't implemented yet ... |
48 | */ |
49 | |
50 | |
51 | |
52 | // internal headers |
53 | #include "ImproveCacheLocality.h" |
54 | #include "VertexTriangleAdjacency.h" |
55 | #include "StringUtils.h" |
56 | #include <assimp/postprocess.h> |
57 | #include <assimp/scene.h> |
58 | #include <assimp/DefaultLogger.hpp> |
59 | #include <stdio.h> |
60 | #include <stack> |
61 | |
62 | using namespace Assimp; |
63 | |
64 | // ------------------------------------------------------------------------------------------------ |
65 | // Constructor to be privately used by Importer |
66 | ImproveCacheLocalityProcess::ImproveCacheLocalityProcess() { |
67 | configCacheDepth = PP_ICL_PTCACHE_SIZE; |
68 | } |
69 | |
70 | // ------------------------------------------------------------------------------------------------ |
71 | // Destructor, private as well |
72 | ImproveCacheLocalityProcess::~ImproveCacheLocalityProcess() |
73 | { |
74 | // nothing to do here |
75 | } |
76 | |
77 | // ------------------------------------------------------------------------------------------------ |
78 | // Returns whether the processing step is present in the given flag field. |
79 | bool ImproveCacheLocalityProcess::IsActive( unsigned int pFlags) const |
80 | { |
81 | return (pFlags & aiProcess_ImproveCacheLocality) != 0; |
82 | } |
83 | |
84 | // ------------------------------------------------------------------------------------------------ |
85 | // Setup configuration |
86 | void ImproveCacheLocalityProcess::SetupProperties(const Importer* pImp) |
87 | { |
88 | // AI_CONFIG_PP_ICL_PTCACHE_SIZE controls the target cache size for the optimizer |
89 | configCacheDepth = pImp->GetPropertyInteger(AI_CONFIG_PP_ICL_PTCACHE_SIZE,PP_ICL_PTCACHE_SIZE); |
90 | } |
91 | |
92 | // ------------------------------------------------------------------------------------------------ |
93 | // Executes the post processing step on the given imported data. |
94 | void ImproveCacheLocalityProcess::Execute( aiScene* pScene) |
95 | { |
96 | if (!pScene->mNumMeshes) { |
97 | DefaultLogger::get()->debug("ImproveCacheLocalityProcess skipped; there are no meshes" ); |
98 | return; |
99 | } |
100 | |
101 | DefaultLogger::get()->debug("ImproveCacheLocalityProcess begin" ); |
102 | |
103 | float out = 0.f; |
104 | unsigned int numf = 0, numm = 0; |
105 | for( unsigned int a = 0; a < pScene->mNumMeshes; a++){ |
106 | const float res = ProcessMesh( pScene->mMeshes[a],a); |
107 | if (res) { |
108 | numf += pScene->mMeshes[a]->mNumFaces; |
109 | out += res; |
110 | ++numm; |
111 | } |
112 | } |
113 | if (!DefaultLogger::isNullLogger()) { |
114 | char szBuff[128]; // should be sufficiently large in every case |
115 | ai_snprintf(szBuff,128,"Cache relevant are %u meshes (%u faces). Average output ACMR is %f" , |
116 | numm,numf,out/numf); |
117 | |
118 | DefaultLogger::get()->info(szBuff); |
119 | DefaultLogger::get()->debug("ImproveCacheLocalityProcess finished. " ); |
120 | } |
121 | } |
122 | |
123 | // ------------------------------------------------------------------------------------------------ |
124 | // Improves the cache coherency of a specific mesh |
125 | float ImproveCacheLocalityProcess::ProcessMesh( aiMesh* pMesh, unsigned int meshNum) |
126 | { |
127 | // TODO: rewrite this to use std::vector or boost::shared_array |
128 | ai_assert(NULL != pMesh); |
129 | |
130 | // Check whether the input data is valid |
131 | // - there must be vertices and faces |
132 | // - all faces must be triangulated or we can't operate on them |
133 | if (!pMesh->HasFaces() || !pMesh->HasPositions()) |
134 | return 0.f; |
135 | |
136 | if (pMesh->mPrimitiveTypes != aiPrimitiveType_TRIANGLE) { |
137 | DefaultLogger::get()->error("This algorithm works on triangle meshes only" ); |
138 | return 0.f; |
139 | } |
140 | |
141 | if(pMesh->mNumVertices <= configCacheDepth) { |
142 | return 0.f; |
143 | } |
144 | |
145 | float fACMR = 3.f; |
146 | const aiFace* const pcEnd = pMesh->mFaces+pMesh->mNumFaces; |
147 | |
148 | // Input ACMR is for logging purposes only |
149 | if (!DefaultLogger::isNullLogger()) { |
150 | |
151 | unsigned int* piFIFOStack = new unsigned int[configCacheDepth]; |
152 | memset(piFIFOStack,0xff,configCacheDepth*sizeof(unsigned int)); |
153 | unsigned int* piCur = piFIFOStack; |
154 | const unsigned int* const piCurEnd = piFIFOStack + configCacheDepth; |
155 | |
156 | // count the number of cache misses |
157 | unsigned int iCacheMisses = 0; |
158 | for (const aiFace* pcFace = pMesh->mFaces;pcFace != pcEnd;++pcFace) { |
159 | |
160 | for (unsigned int qq = 0; qq < 3;++qq) { |
161 | bool bInCache = false; |
162 | |
163 | for (unsigned int* pp = piFIFOStack;pp < piCurEnd;++pp) { |
164 | if (*pp == pcFace->mIndices[qq]) { |
165 | // the vertex is in cache |
166 | bInCache = true; |
167 | break; |
168 | } |
169 | } |
170 | if (!bInCache) { |
171 | ++iCacheMisses; |
172 | if (piCurEnd == piCur) { |
173 | piCur = piFIFOStack; |
174 | } |
175 | *piCur++ = pcFace->mIndices[qq]; |
176 | } |
177 | } |
178 | } |
179 | delete[] piFIFOStack; |
180 | fACMR = (float)iCacheMisses / pMesh->mNumFaces; |
181 | if (3.0 == fACMR) { |
182 | char szBuff[128]; // should be sufficiently large in every case |
183 | |
184 | // the JoinIdenticalVertices process has not been executed on this |
185 | // mesh, otherwise this value would normally be at least minimally |
186 | // smaller than 3.0 ... |
187 | ai_snprintf(szBuff,128,"Mesh %u: Not suitable for vcache optimization" ,meshNum); |
188 | DefaultLogger::get()->warn(szBuff); |
189 | return 0.f; |
190 | } |
191 | } |
192 | |
193 | // first we need to build a vertex-triangle adjacency list |
194 | VertexTriangleAdjacency adj(pMesh->mFaces,pMesh->mNumFaces, pMesh->mNumVertices,true); |
195 | |
196 | // build a list to store per-vertex caching time stamps |
197 | unsigned int* const piCachingStamps = new unsigned int[pMesh->mNumVertices]; |
198 | memset(piCachingStamps,0x0,pMesh->mNumVertices*sizeof(unsigned int)); |
199 | |
200 | // allocate an empty output index buffer. We store the output indices in one large array. |
201 | // Since the number of triangles won't change the input faces can be reused. This is how |
202 | // we save thousands of redundant mini allocations for aiFace::mIndices |
203 | const unsigned int iIdxCnt = pMesh->mNumFaces*3; |
204 | unsigned int* const piIBOutput = new unsigned int[iIdxCnt]; |
205 | unsigned int* piCSIter = piIBOutput; |
206 | |
207 | // allocate the flag array to hold the information |
208 | // whether a face has already been emitted or not |
209 | std::vector<bool> abEmitted(pMesh->mNumFaces,false); |
210 | |
211 | // dead-end vertex index stack |
212 | std::stack<unsigned int, std::vector<unsigned int> > sDeadEndVStack; |
213 | |
214 | // create a copy of the piNumTriPtr buffer |
215 | unsigned int* const piNumTriPtr = adj.mLiveTriangles; |
216 | const std::vector<unsigned int> piNumTriPtrNoModify(piNumTriPtr, piNumTriPtr + pMesh->mNumVertices); |
217 | |
218 | // get the largest number of referenced triangles and allocate the "candidate buffer" |
219 | unsigned int iMaxRefTris = 0; { |
220 | const unsigned int* piCur = adj.mLiveTriangles; |
221 | const unsigned int* const piCurEnd = adj.mLiveTriangles+pMesh->mNumVertices; |
222 | for (;piCur != piCurEnd;++piCur) { |
223 | iMaxRefTris = std::max(iMaxRefTris,*piCur); |
224 | } |
225 | } |
226 | ai_assert(iMaxRefTris > 0); |
227 | unsigned int* piCandidates = new unsigned int[iMaxRefTris*3]; |
228 | unsigned int iCacheMisses = 0; |
229 | |
230 | // ................................................................................... |
231 | /** PSEUDOCODE for the algorithm |
232 | |
233 | A = Build-Adjacency(I) Vertex-triangle adjacency |
234 | L = Get-Triangle-Counts(A) Per-vertex live triangle counts |
235 | C = Zero(Vertex-Count(I)) Per-vertex caching time stamps |
236 | D = Empty-Stack() Dead-end vertex stack |
237 | E = False(Triangle-Count(I)) Per triangle emitted flag |
238 | O = Empty-Index-Buffer() Empty output buffer |
239 | f = 0 Arbitrary starting vertex |
240 | s = k+1, i = 1 Time stamp and cursor |
241 | while f >= 0 For all valid fanning vertices |
242 | N = Empty-Set() 1-ring of next candidates |
243 | for each Triangle t in Neighbors(A, f) |
244 | if !Emitted(E,t) |
245 | for each Vertex v in t |
246 | Append(O,v) Output vertex |
247 | Push(D,v) Add to dead-end stack |
248 | Insert(N,v) Register as candidate |
249 | L[v] = L[v]-1 Decrease live triangle count |
250 | if s-C[v] > k If not in cache |
251 | C[v] = s Set time stamp |
252 | s = s+1 Increment time stamp |
253 | E[t] = true Flag triangle as emitted |
254 | Select next fanning vertex |
255 | f = Get-Next-Vertex(I,i,k,N,C,s,L,D) |
256 | return O |
257 | */ |
258 | // ................................................................................... |
259 | |
260 | int ivdx = 0; |
261 | int ics = 1; |
262 | int iStampCnt = configCacheDepth+1; |
263 | while (ivdx >= 0) { |
264 | |
265 | unsigned int icnt = piNumTriPtrNoModify[ivdx]; |
266 | unsigned int* piList = adj.GetAdjacentTriangles(ivdx); |
267 | unsigned int* piCurCandidate = piCandidates; |
268 | |
269 | // get all triangles in the neighborhood |
270 | for (unsigned int tri = 0; tri < icnt;++tri) { |
271 | |
272 | // if they have not yet been emitted, add them to the output IB |
273 | const unsigned int fidx = *piList++; |
274 | if (!abEmitted[fidx]) { |
275 | |
276 | // so iterate through all vertices of the current triangle |
277 | const aiFace* pcFace = &pMesh->mFaces[ fidx ]; |
278 | for (unsigned int* p = pcFace->mIndices, *p2 = pcFace->mIndices+3;p != p2;++p) { |
279 | const unsigned int dp = *p; |
280 | |
281 | // the current vertex won't have any free triangles after this step |
282 | if (ivdx != (int)dp) { |
283 | // append the vertex to the dead-end stack |
284 | sDeadEndVStack.push(dp); |
285 | |
286 | // register as candidate for the next step |
287 | *piCurCandidate++ = dp; |
288 | |
289 | // decrease the per-vertex triangle counts |
290 | piNumTriPtr[dp]--; |
291 | } |
292 | |
293 | // append the vertex to the output index buffer |
294 | *piCSIter++ = dp; |
295 | |
296 | // if the vertex is not yet in cache, set its cache count |
297 | if (iStampCnt-piCachingStamps[dp] > configCacheDepth) { |
298 | piCachingStamps[dp] = iStampCnt++; |
299 | ++iCacheMisses; |
300 | } |
301 | } |
302 | // flag triangle as emitted |
303 | abEmitted[fidx] = true; |
304 | } |
305 | } |
306 | |
307 | // the vertex has now no living adjacent triangles anymore |
308 | piNumTriPtr[ivdx] = 0; |
309 | |
310 | // get next fanning vertex |
311 | ivdx = -1; |
312 | int max_priority = -1; |
313 | for (unsigned int* piCur = piCandidates;piCur != piCurCandidate;++piCur) { |
314 | const unsigned int dp = *piCur; |
315 | |
316 | // must have live triangles |
317 | if (piNumTriPtr[dp] > 0) { |
318 | int priority = 0; |
319 | |
320 | // will the vertex be in cache, even after fanning occurs? |
321 | unsigned int tmp; |
322 | if ((tmp = iStampCnt-piCachingStamps[dp]) + 2*piNumTriPtr[dp] <= configCacheDepth) { |
323 | priority = tmp; |
324 | } |
325 | |
326 | // keep best candidate |
327 | if (priority > max_priority) { |
328 | max_priority = priority; |
329 | ivdx = dp; |
330 | } |
331 | } |
332 | } |
333 | // did we reach a dead end? |
334 | if (-1 == ivdx) { |
335 | // need to get a non-local vertex for which we have a good chance that it is still |
336 | // in the cache ... |
337 | while (!sDeadEndVStack.empty()) { |
338 | unsigned int iCachedIdx = sDeadEndVStack.top(); |
339 | sDeadEndVStack.pop(); |
340 | if (piNumTriPtr[ iCachedIdx ] > 0) { |
341 | ivdx = iCachedIdx; |
342 | break; |
343 | } |
344 | } |
345 | |
346 | if (-1 == ivdx) { |
347 | // well, there isn't such a vertex. Simply get the next vertex in input order and |
348 | // hope it is not too bad ... |
349 | while (ics < (int)pMesh->mNumVertices) { |
350 | ++ics; |
351 | if (piNumTriPtr[ics] > 0) { |
352 | ivdx = ics; |
353 | break; |
354 | } |
355 | } |
356 | } |
357 | } |
358 | } |
359 | float fACMR2 = 0.0f; |
360 | if (!DefaultLogger::isNullLogger()) { |
361 | fACMR2 = (float)iCacheMisses / pMesh->mNumFaces; |
362 | |
363 | // very intense verbose logging ... prepare for much text if there are many meshes |
364 | if ( DefaultLogger::get()->getLogSeverity() == Logger::VERBOSE) { |
365 | char szBuff[128]; // should be sufficiently large in every case |
366 | |
367 | ai_snprintf(szBuff,128,"Mesh %u | ACMR in: %f out: %f | ~%.1f%%" ,meshNum,fACMR,fACMR2, |
368 | ((fACMR - fACMR2) / fACMR) * 100.f); |
369 | DefaultLogger::get()->debug(szBuff); |
370 | } |
371 | |
372 | fACMR2 *= pMesh->mNumFaces; |
373 | } |
374 | // sort the output index buffer back to the input array |
375 | piCSIter = piIBOutput; |
376 | for (aiFace* pcFace = pMesh->mFaces; pcFace != pcEnd;++pcFace) { |
377 | pcFace->mIndices[0] = *piCSIter++; |
378 | pcFace->mIndices[1] = *piCSIter++; |
379 | pcFace->mIndices[2] = *piCSIter++; |
380 | } |
381 | |
382 | // delete temporary storage |
383 | delete[] piCachingStamps; |
384 | delete[] piIBOutput; |
385 | delete[] piCandidates; |
386 | |
387 | return fACMR2; |
388 | } |
389 | |