|
FZGPUModules 1.0
GPU-accelerated modular compression pipeline
|
#include <mempool.h>
Public Member Functions | |
| void * | allocate (size_t size, cudaStream_t stream, const std::string &tag="", bool persistent=false) |
| void | free (void *ptr, cudaStream_t stream) |
| void | reset (cudaStream_t stream) |
| void | trim () |
| void | setReleaseThreshold (size_t bytes) |
| void | synchronize (cudaStream_t stream) |
| size_t | getCurrentUsage () const |
| size_t | getPeakUsage () const |
| size_t | getAllocationCount () const |
| size_t | getConfiguredSize () const |
| cudaMemPool_t | getMemPool () const |
Stream-ordered CUDA memory pool.
Uses cudaMallocAsync/cudaFreeAsync over a cudaMemPool_t for efficient reuse and CUDA Graph compatibility. All allocations are tracked for overflow warnings and debug printing.
| void * fz::MemoryPool::allocate | ( | size_t | size, |
| cudaStream_t | stream, | ||
| const std::string & | tag = "", |
||
| bool | persistent = false |
||
| ) |
Allocate size bytes from the pool on stream.
| size | Bytes to allocate. |
| stream | CUDA stream ordering the allocation. |
| tag | Debug label stored in AllocationInfo. |
| persistent | If true, allocation survives reset() (graph replay); if false, reset() will free it. |
| void fz::MemoryPool::free | ( | void * | ptr, |
| cudaStream_t | stream | ||
| ) |
Free ptr back to the pool, ordered on stream.
| void fz::MemoryPool::reset | ( | cudaStream_t | stream | ) |
Free all non-persistent allocations. Call between compression runs.
| void fz::MemoryPool::trim | ( | ) |
Release pool memory back to the OS if usage exceeds the release threshold.
| void fz::MemoryPool::setReleaseThreshold | ( | size_t | bytes | ) |
Update the CUDA pool's release threshold and keep config in sync.
Called by Pipeline::finalize() after topology-aware sizing to replace the blunt input_size × multiplier estimate with a tighter bound.
| bytes | New threshold in bytes. |
| void fz::MemoryPool::synchronize | ( | cudaStream_t | stream | ) |
Block until all stream-ordered operations on stream complete.
|
inline |
Current live bytes (queries cudaMemPoolAttrUsedMemCurrent).
|
inline |
Peak live bytes since last reset (queries cudaMemPoolAttrUsedMemHigh).
|
inline |
Total number of currently live allocations (stream + graph).
|
inline |
Soft-capacity hint passed at construction (used only for overflow warnings; the CUDA pool itself is not hard-capped).
|
inline |
Raw cudaMemPool_t handle for advanced usage.