FZGPUModules 1.0
GPU-accelerated modular compression pipeline
Loading...
Searching...
No Matches
fz::MemoryPool Class Reference

#include <mempool.h>

Public Member Functions

void * allocate (size_t size, cudaStream_t stream, const std::string &tag="", bool persistent=false)
 
void free (void *ptr, cudaStream_t stream)
 
void reset (cudaStream_t stream)
 
void trim ()
 
void setReleaseThreshold (size_t bytes)
 
void synchronize (cudaStream_t stream)
 
size_t getCurrentUsage () const
 
size_t getPeakUsage () const
 
size_t getAllocationCount () const
 
size_t getConfiguredSize () const
 
cudaMemPool_t getMemPool () const
 

Detailed Description

Stream-ordered CUDA memory pool.

Uses cudaMallocAsync/cudaFreeAsync over a cudaMemPool_t for efficient reuse and CUDA Graph compatibility. All allocations are tracked for overflow warnings and debug printing.

Note
Non-copyable. Not thread-safe.

Member Function Documentation

◆ allocate()

void * fz::MemoryPool::allocate ( size_t  size,
cudaStream_t  stream,
const std::string &  tag = "",
bool  persistent = false 
)

Allocate size bytes from the pool on stream.

Parameters
sizeBytes to allocate.
streamCUDA stream ordering the allocation.
tagDebug label stored in AllocationInfo.
persistentIf true, allocation survives reset() (graph replay); if false, reset() will free it.
Returns
Device pointer, or nullptr on failure.

◆ free()

void fz::MemoryPool::free ( void *  ptr,
cudaStream_t  stream 
)

Free ptr back to the pool, ordered on stream.

◆ reset()

void fz::MemoryPool::reset ( cudaStream_t  stream)

Free all non-persistent allocations. Call between compression runs.

◆ trim()

void fz::MemoryPool::trim ( )

Release pool memory back to the OS if usage exceeds the release threshold.

◆ setReleaseThreshold()

void fz::MemoryPool::setReleaseThreshold ( size_t  bytes)

Update the CUDA pool's release threshold and keep config in sync.

Called by Pipeline::finalize() after topology-aware sizing to replace the blunt input_size × multiplier estimate with a tighter bound.

Parameters
bytesNew threshold in bytes.

◆ synchronize()

void fz::MemoryPool::synchronize ( cudaStream_t  stream)

Block until all stream-ordered operations on stream complete.

◆ getCurrentUsage()

size_t fz::MemoryPool::getCurrentUsage ( ) const
inline

Current live bytes (queries cudaMemPoolAttrUsedMemCurrent).

◆ getPeakUsage()

size_t fz::MemoryPool::getPeakUsage ( ) const
inline

Peak live bytes since last reset (queries cudaMemPoolAttrUsedMemHigh).

◆ getAllocationCount()

size_t fz::MemoryPool::getAllocationCount ( ) const
inline

Total number of currently live allocations (stream + graph).

◆ getConfiguredSize()

size_t fz::MemoryPool::getConfiguredSize ( ) const
inline

Soft-capacity hint passed at construction (used only for overflow warnings; the CUDA pool itself is not hard-capped).

◆ getMemPool()

cudaMemPool_t fz::MemoryPool::getMemPool ( ) const
inline

Raw cudaMemPool_t handle for advanced usage.