1//
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions
4// are met:
5// * Redistributions of source code must retain the above copyright
6// notice, this list of conditions and the following disclaimer.
7// * Redistributions in binary form must reproduce the above copyright
8// notice, this list of conditions and the following disclaimer in the
9// documentation and/or other materials provided with the distribution.
10// * Neither the name of NVIDIA CORPORATION nor the names of its
11// contributors may be used to endorse or promote products derived
12// from this software without specific prior written permission.
13//
14// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ''AS IS'' AND ANY
15// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
18// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25//
26// Copyright (c) 2008-2021 NVIDIA Corporation. All rights reserved.
27
28
29#ifndef PXCUDACONTEXTMANAGER_PXCUDACONTEXTMANAGER_H
30#define PXCUDACONTEXTMANAGER_PXCUDACONTEXTMANAGER_H
31
32#include "foundation/PxPreprocessor.h"
33
34#if PX_SUPPORT_GPU_PHYSX
35
36#include "foundation/PxSimpleTypes.h"
37#include "foundation/PxErrorCallback.h"
38#include "foundation/PxFlags.h"
39#include "task/PxTaskDefine.h"
40#include "cudamanager/PxCudaMemoryManager.h"
41
42/* Forward decl to avoid inclusion of cuda.h */
43typedef struct CUctx_st *CUcontext;
44typedef struct CUgraphicsResource_st *CUgraphicsResource;
45typedef int CUdevice;
46
47namespace physx
48{
49
50/** \brief Possible graphic/CUDA interoperability modes for context */
51struct PxCudaInteropMode
52{
53 /**
54 * \brief Possible graphic/CUDA interoperability modes for context
55 */
56 enum Enum
57 {
58 NO_INTEROP = 0,
59 D3D10_INTEROP,
60 D3D11_INTEROP,
61 OGL_INTEROP,
62
63 COUNT
64 };
65};
66
67struct PxCudaInteropRegisterFlag
68{
69 enum Enum
70 {
71 eNONE = 0x00,
72 eREAD_ONLY = 0x01,
73 eWRITE_DISCARD = 0x02,
74 eSURFACE_LDST = 0x04,
75 eTEXTURE_GATHER = 0x08
76 };
77};
78
79/**
80\brief collection of set bits defined in NxCudaInteropRegisterFlag.
81
82@see NxCudaInteropRegisterFlag
83*/
84typedef PxFlags<PxCudaInteropRegisterFlag::Enum, uint32_t> PxCudaInteropRegisterFlags;
85PX_FLAGS_OPERATORS(PxCudaInteropRegisterFlag::Enum, uint32_t)
86
87//! \brief Descriptor used to create a PxCudaContextManager
88class PxCudaContextManagerDesc
89{
90public:
91 /**
92 * \brief The CUDA context to manage
93 *
94 * If left NULL, the PxCudaContextManager will create a new context. If
95 * graphicsDevice is also not NULL, this new CUDA context will be bound to
96 * that graphics device, enabling the use of CUDA/Graphics interop features.
97 *
98 * If ctx is not NULL, the specified context must be applied to the thread
99 * that is allocating the PxCudaContextManager at creation time (aka, it
100 * cannot be popped). The PxCudaContextManager will take ownership of the
101 * context until the manager is released. All access to the context must be
102 * gated by lock acquisition.
103 *
104 * If the user provides a context for the PxCudaContextManager, the context
105 * _must_ have either been created on the GPU ordinal returned by
106 * PxGetSuggestedCudaDeviceOrdinal() or on your graphics device.
107 *
108 * It is perfectly acceptable to allocate device or host pinned memory from
109 * the context outside the scope of the PxCudaMemoryManager, so long as you
110 * manage its eventual cleanup.
111 */
112 CUcontext *ctx;
113
114 /**
115 * \brief D3D device pointer or OpenGl context handle
116 *
117 * Only applicable when ctx is NULL, thus forcing a new context to be
118 * created. In that case, the created context will be bound to this
119 * graphics device.
120 */
121 void *graphicsDevice;
122
123#if PX_SUPPORT_GPU_PHYSX
124 /**
125 * \brief Application-specific GUID
126 *
127 * If your application employs PhysX modules that use CUDA you need to use a GUID
128 * so that patches for new architectures can be released for your game.You can obtain a GUID for your
129 * application from Nvidia.
130 */
131 const char* appGUID;
132#endif
133 /**
134 * \brief The CUDA/Graphics interop mode of this context
135 *
136 * If ctx is NULL, this value describes the nature of the graphicsDevice
137 * pointer provided by the user. Else it describes the nature of the
138 * context provided by the user.
139 */
140 PxCudaInteropMode::Enum interopMode;
141
142
143 /**
144 * \brief Size of persistent memory
145 *
146 * This memory is allocated up front and stays allocated until the
147 * PxCudaContextManager is released. Size is in bytes, has to be power of two
148 * and bigger than the page size. Set to 0 to only use dynamic pages.
149 *
150 * Note: On Vista O/S and above, there is a per-memory allocation overhead
151 * to every CUDA work submission, so we recommend that you carefully tune
152 * this initial base memory size to closely approximate the amount of
153 * memory your application will consume.
154
155 Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured
156 for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig.
157 */
158 uint32_t memoryBaseSize[PxCudaBufferMemorySpace::COUNT];
159
160 /**
161 * \brief Size of memory pages
162 *
163 * The memory manager will dynamically grow and shrink in blocks multiple of
164 * this page size. Size has to be power of two and bigger than 0.
165
166 Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured
167 for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig.
168 */
169 uint32_t memoryPageSize[PxCudaBufferMemorySpace::COUNT];
170
171 /**
172 * \brief Maximum size of memory that the memory manager will allocate
173
174 Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured
175 for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig.
176 */
177 uint32_t maxMemorySize[PxCudaBufferMemorySpace::COUNT];
178
179 PX_INLINE PxCudaContextManagerDesc()
180 {
181 ctx = NULL;
182 interopMode = PxCudaInteropMode::NO_INTEROP;
183 graphicsDevice = 0;
184#if PX_SUPPORT_GPU_PHYSX
185 appGUID = NULL;
186#endif
187 for(uint32_t i = 0; i < PxCudaBufferMemorySpace::COUNT; i++)
188 {
189 memoryBaseSize[i] = 0;
190 memoryPageSize[i] = 2 * 1024*1024;
191 maxMemorySize[i] = UINT32_MAX;
192 }
193 }
194};
195
196
197/**
198 * \brief Manages memory, thread locks, and task scheduling for a CUDA context
199 *
200 * A PxCudaContextManager manages access to a single CUDA context, allowing it to
201 * be shared between multiple scenes. Memory allocations are dynamic: starting
202 * with an initial heap size and growing on demand by a configurable page size.
203 * The context must be acquired from the manager before using any CUDA APIs.
204 *
205 * The PxCudaContextManager is based on the CUDA driver API and explictly does not
206 * support the CUDA runtime API (aka, CUDART).
207 */
208class PxCudaContextManager
209{
210public:
211 /**
212 * \brief Acquire the CUDA context for the current thread
213 *
214 * Acquisitions are allowed to be recursive within a single thread.
215 * You can acquire the context multiple times so long as you release
216 * it the same count.
217 *
218 * The context must be acquired before using most CUDA functions.
219 */
220 virtual void acquireContext() = 0;
221
222 /**
223 * \brief Release the CUDA context from the current thread
224 *
225 * The CUDA context should be released as soon as practically
226 * possible, to allow other CPU threads to work efficiently.
227 */
228 virtual void releaseContext() = 0;
229
230 /**
231 * \brief Return the CUcontext
232 */
233 virtual CUcontext getContext() = 0;
234
235 /**
236 * \brief Return the PxCudaMemoryManager instance associated with this
237 * CUDA context
238 * Note: This is currently not used by PxSceneFlag::eENABLE_GPU_DYNAMICS. Memory allocation properties are configured
239 * for GPU rigid bodies using PxSceneDesc::gpuDynamicsConfig.
240 */
241 virtual PxCudaMemoryManager *getMemoryManager() = 0;
242
243 /**
244 * \brief Context manager has a valid CUDA context
245 *
246 * This method should be called after creating a PxCudaContextManager,
247 * especially if the manager was responsible for allocating its own
248 * CUDA context (desc.ctx == NULL).
249 */
250 virtual bool contextIsValid() const = 0;
251
252 /* Query CUDA context and device properties, without acquiring context */
253
254 virtual bool supportsArchSM10() const = 0; //!< G80
255 virtual bool supportsArchSM11() const = 0; //!< G92
256 virtual bool supportsArchSM12() const = 0; //!< GT200
257 virtual bool supportsArchSM13() const = 0; //!< GT260
258 virtual bool supportsArchSM20() const = 0; //!< GF100
259 virtual bool supportsArchSM30() const = 0; //!< GK100
260 virtual bool supportsArchSM35() const = 0; //!< GK110
261 virtual bool supportsArchSM50() const = 0; //!< GM100
262 virtual bool supportsArchSM52() const = 0; //!< GM200
263 virtual bool supportsArchSM60() const = 0; //!< GP100
264 virtual bool isIntegrated() const = 0; //!< true if GPU is an integrated (MCP) part
265 virtual bool canMapHostMemory() const = 0; //!< true if GPU map host memory to GPU (0-copy)
266 virtual int getDriverVersion() const = 0; //!< returns cached value of cuGetDriverVersion()
267 virtual size_t getDeviceTotalMemBytes() const = 0; //!< returns cached value of device memory size
268 virtual int getMultiprocessorCount() const = 0; //!< returns cache value of SM unit count
269 virtual unsigned int getClockRate() const = 0; //!< returns cached value of SM clock frequency
270 virtual int getSharedMemPerBlock() const = 0; //!< returns total amount of shared memory available per block in bytes
271 virtual int getSharedMemPerMultiprocessor() const = 0; //!< returns total amount of shared memory available per multiprocessor in bytes
272 virtual unsigned int getMaxThreadsPerBlock() const = 0; //!< returns the maximum number of threads per block
273 virtual const char *getDeviceName() const = 0; //!< returns device name retrieved from driver
274 virtual CUdevice getDevice() const = 0; //!< returns device handle retrieved from driver
275 virtual PxCudaInteropMode::Enum getInteropMode() const = 0; //!< interop mode the context was created with
276
277 virtual void setUsingConcurrentStreams(bool) = 0; //!< turn on/off using concurrent streams for GPU work
278 virtual bool getUsingConcurrentStreams() const = 0; //!< true if GPU work can run in concurrent streams
279 /* End query methods that don't require context to be acquired */
280
281 /**
282 * \brief Register a rendering resource with CUDA
283 *
284 * This function is called to register render resources (allocated
285 * from OpenGL) with CUDA so that the memory may be shared
286 * between the two systems. This is only required for render
287 * resources that are designed for interop use. In APEX, each
288 * render resource descriptor that could support interop has a
289 * 'registerInCUDA' boolean variable.
290 *
291 * The function must be called again any time your graphics device
292 * is reset, to re-register the resource.
293 *
294 * Returns true if the registration succeeded. A registered
295 * resource must be unregistered before it can be released.
296 *
297 * \param resource [OUT] the handle to the resource that can be used with CUDA
298 * \param buffer [IN] GLuint buffer index to be mapped to cuda
299 * \param flags [IN] cuda interop registration flags
300 */
301 virtual bool registerResourceInCudaGL(CUgraphicsResource &resource, uint32_t buffer, PxCudaInteropRegisterFlags flags = PxCudaInteropRegisterFlags()) = 0;
302
303 /**
304 * \brief Register a rendering resource with CUDA
305 *
306 * This function is called to register render resources (allocated
307 * from Direct3D) with CUDA so that the memory may be shared
308 * between the two systems. This is only required for render
309 * resources that are designed for interop use. In APEX, each
310 * render resource descriptor that could support interop has a
311 * 'registerInCUDA' boolean variable.
312 *
313 * The function must be called again any time your graphics device
314 * is reset, to re-register the resource.
315 *
316 * Returns true if the registration succeeded. A registered
317 * resource must be unregistered before it can be released.
318 *
319 * \param resource [OUT] the handle to the resource that can be used with CUDA
320 * \param resourcePointer [IN] A pointer to either IDirect3DResource9, or ID3D10Device, or ID3D11Resource to be registered.
321 * \param flags [IN] cuda interop registration flags
322 */
323 virtual bool registerResourceInCudaD3D(CUgraphicsResource &resource, void *resourcePointer, PxCudaInteropRegisterFlags flags = PxCudaInteropRegisterFlags()) = 0;
324
325 /**
326 * \brief Unregister a rendering resource with CUDA
327 *
328 * If a render resource was successfully registered with CUDA using
329 * the registerResourceInCuda***() methods, this function must be called
330 * to unregister the resource before the it can be released.
331 */
332 virtual bool unregisterResourceInCuda(CUgraphicsResource resource) = 0;
333
334 /**
335 * \brief Determine if the user has configured a dedicated PhysX GPU in the NV Control Panel
336 * \note If using CUDA Interop, this will always return false
337 * \returns 1 if there is a dedicated GPU
338 * 0 if there is NOT a dedicated GPU
339 * -1 if the routine is not implemented
340 */
341 virtual int usingDedicatedGPU() const = 0;
342
343 /**
344 * \brief Release the PxCudaContextManager
345 *
346 * When the manager instance is released, it also releases its
347 * PxCudaMemoryManager. Before the memory manager is released, it
348 * frees all allocated memory pages. If the PxCudaContextManager
349 * created the CUDA context it was responsible for, it also frees
350 * that context.
351 *
352 * Do not release the PxCudaContextManager if there are any scenes
353 * using it. Those scenes must be released first.
354 *
355 */
356 virtual void release() = 0;
357
358protected:
359
360 /**
361 * \brief protected destructor, use release() method
362 */
363 virtual ~PxCudaContextManager() {}
364};
365
366/**
367 * \brief Convenience class for holding CUDA lock within a scope
368 */
369class PxScopedCudaLock
370{
371public:
372 /**
373 * \brief ScopedCudaLock constructor
374 */
375 PxScopedCudaLock(PxCudaContextManager& ctx) : mCtx(&ctx)
376 {
377 mCtx->acquireContext();
378 }
379
380 /**
381 * \brief ScopedCudaLock destructor
382 */
383 ~PxScopedCudaLock()
384 {
385 mCtx->releaseContext();
386 }
387
388protected:
389
390 /**
391 * \brief CUDA context manager pointer (initialized in the constructor)
392 */
393 PxCudaContextManager* mCtx;
394};
395
396} // end physx namespace
397
398#endif // PX_SUPPORT_GPU_PHYSX
399#endif // PXCUDACONTEXTMANAGER_PXCUDACONTEXTMANAGER_H
400

source code of qtquick3dphysics/src/3rdparty/PhysX/include/cudamanager/PxCudaContextManager.h