|
FZGPUModules 2.0
GPU-accelerated modular compression pipelines
|
#include <mempool.h>
Public Member Functions | |
| void * | allocate (size_t size, cudaStream_t stream, const std::string &tag="", bool persistent=false) |
| void | free (void *ptr, cudaStream_t stream) |
| void * | allocatePersistentDevice (size_t bytes, const std::string &tag="") |
| void * | allocatePersistentPinned (size_t bytes, const std::string &tag="") |
| void | freePersistentDevice (void *ptr) |
| void | freePersistentPinned (void *ptr) |
| void | reset (cudaStream_t stream) |
| void | trim () |
| void | setReleaseThreshold (size_t bytes) |
| void | synchronize (cudaStream_t stream) |
| size_t | getPersistentDeviceBytes () const |
| size_t | getPersistentPinnedBytes () const |
| size_t | getCurrentUsage () const |
| size_t | getPeakUsage () const |
| size_t | getAllocationCount () const |
| size_t | getConfiguredSize () const |
| cudaMemPool_t | getMemPool () const |
| bool | isFallbackMode () const |
Stream-ordered CUDA memory pool.
Uses cudaMallocAsync/cudaFreeAsync over a cudaMemPool_t for efficient reuse and CUDA Graph compatibility. All allocations are tracked for overflow warnings and debug printing.
| void * fz::MemoryPool::allocate | ( | size_t | size, |
| cudaStream_t | stream, | ||
| const std::string & | tag = "", |
||
| bool | persistent = false |
||
| ) |
Allocate size bytes from the pool on stream.
| size | Bytes to allocate. |
| stream | CUDA stream ordering the allocation. |
| tag | Debug label stored in AllocationInfo. |
| persistent | If true, allocation survives reset() (graph replay); if false, reset() will free it. |
| void fz::MemoryPool::free | ( | void * | ptr, |
| cudaStream_t | stream | ||
| ) |
Free ptr back to the pool, ordered on stream.
| void * fz::MemoryPool::allocatePersistentDevice | ( | size_t | bytes, |
| const std::string & | tag = "" |
||
| ) |
Allocate bytes bytes of persistent device memory via cudaMalloc.
Use for stage-internal buffers that live for the stage's lifetime: codebooks, histograms, partition metadata. Not stream-ordered; not subject to MINIMAL/PREALLOCATE policy; safe across CUDA Graph captures (stable device address).
Freed explicitly via freePersistentDevice() or in bulk by the pool destructor. Tracked for getPersistentDeviceBytes() reporting.
| void * fz::MemoryPool::allocatePersistentPinned | ( | size_t | bytes, |
| const std::string & | tag = "" |
||
| ) |
Allocate bytes bytes of persistent pinned host memory via cudaMallocHost.
Use for host-side stage buffers that participate in async D2H/H2D transfers (codebook tables, partition metadata arrays). Pinned memory enables DMA without staging and is required for reliable async transfers.
Freed explicitly via freePersistentPinned() or in bulk by the pool destructor. Tracked for getPersistentPinnedBytes() reporting.
| void fz::MemoryPool::freePersistentDevice | ( | void * | ptr | ) |
Return a previously persistent-device allocation to the pool.
| void fz::MemoryPool::freePersistentPinned | ( | void * | ptr | ) |
Return a previously persistent-pinned allocation to the pool.
| void fz::MemoryPool::reset | ( | cudaStream_t | stream | ) |
Free all non-persistent allocations. Call between compression runs.
| void fz::MemoryPool::trim | ( | ) |
Release pool memory back to the OS if usage exceeds the release threshold.
| void fz::MemoryPool::setReleaseThreshold | ( | size_t | bytes | ) |
Update the CUDA pool's release threshold and keep config in sync.
Called by Pipeline::finalize() after topology-aware sizing to replace the blunt input_size × multiplier estimate with a tighter bound.
| bytes | New threshold in bytes. |
| void fz::MemoryPool::synchronize | ( | cudaStream_t | stream | ) |
Block until all stream-ordered operations on stream complete.
|
inline |
Bytes currently held in persistent device allocations.
|
inline |
Bytes currently held in persistent pinned-host allocations.
|
inline |
Current live bytes (queries cudaMemPoolAttrUsedMemCurrent).
|
inline |
Peak live bytes since last reset (queries cudaMemPoolAttrUsedMemHigh).
|
inline |
Total number of currently live allocations (stream + graph).
|
inline |
Soft-capacity hint passed at construction (used only for overflow warnings; the CUDA pool itself is not hard-capped).
|
inline |
Raw cudaMemPool_t handle for advanced usage.
|
inline |
Returns true if operating in cudaMalloc fallback mode (pool creation failed or was forced).