#include <mempool.h>

Public Member Functions
void *	allocate (size_t size, cudaStream_t stream, const std::string &tag="", bool persistent=false)

void	free (void *ptr, cudaStream_t stream)

void *	allocatePersistentDevice (size_t bytes, const std::string &tag="")

void *	allocatePersistentPinned (size_t bytes, const std::string &tag="")

void	freePersistentDevice (void *ptr)

void	freePersistentPinned (void *ptr)

void	reset (cudaStream_t stream)

void	trim ()

void	setReleaseThreshold (size_t bytes)

void	synchronize (cudaStream_t stream)

size_t	getPersistentDeviceBytes () const

size_t	getPersistentPinnedBytes () const

size_t	getCurrentUsage () const

size_t	getPeakUsage () const

size_t	getAllocationCount () const

size_t	getConfiguredSize () const

cudaMemPool_t	getMemPool () const

bool	isFallbackMode () const

Detailed Description

Stream-ordered CUDA memory pool.

Uses cudaMallocAsync/cudaFreeAsync over a cudaMemPool_t for efficient reuse and CUDA Graph compatibility. All allocations are tracked for overflow warnings and debug printing.

Note: Non-copyable. Not thread-safe.

Member Function Documentation

◆ allocate()

void * fz::MemoryPool::allocate	(	size_t	size,
		cudaStream_t	stream,
		const std::string &	tag = `""`,
		bool	persistent = `false`
	)

Allocate size bytes from the pool on stream.

Parameters

size	Bytes to allocate.
stream	CUDA stream ordering the allocation.
tag	Debug label stored in `AllocationInfo`.
persistent	If true, allocation survives `reset()` (graph replay); if false, `reset()` will free it.

Returns: Device pointer, or nullptr on failure.

◆ free()

void fz::MemoryPool::free	(	void *	ptr,
		cudaStream_t	stream
	)

Free ptr back to the pool, ordered on stream.

◆ allocatePersistentDevice()

void * fz::MemoryPool::allocatePersistentDevice	(	size_t	bytes,
		const std::string &	tag = `""`
	)

Allocate bytes bytes of persistent device memory via cudaMalloc.

Use for stage-internal buffers that live for the stage's lifetime: codebooks, histograms, partition metadata. Not stream-ordered; not subject to MINIMAL/PREALLOCATE policy; safe across CUDA Graph captures (stable device address).

Freed explicitly via freePersistentDevice() or in bulk by the pool destructor. Tracked for getPersistentDeviceBytes() reporting.

◆ allocatePersistentPinned()

void * fz::MemoryPool::allocatePersistentPinned	(	size_t	bytes,
		const std::string &	tag = `""`
	)

Allocate bytes bytes of persistent pinned host memory via cudaMallocHost.

Use for host-side stage buffers that participate in async D2H/H2D transfers (codebook tables, partition metadata arrays). Pinned memory enables DMA without staging and is required for reliable async transfers.

Freed explicitly via freePersistentPinned() or in bulk by the pool destructor. Tracked for getPersistentPinnedBytes() reporting.

◆ freePersistentDevice()

void fz::MemoryPool::freePersistentDevice ( void * ptr )

Return a previously persistent-device allocation to the pool.

◆ freePersistentPinned()

void fz::MemoryPool::freePersistentPinned ( void * ptr )

Return a previously persistent-pinned allocation to the pool.

◆ reset()

void fz::MemoryPool::reset ( cudaStream_t stream )

Free all non-persistent allocations. Call between compression runs.

◆ trim()

void fz::MemoryPool::trim ( )

Release pool memory back to the OS if usage exceeds the release threshold.

◆ setReleaseThreshold()

void fz::MemoryPool::setReleaseThreshold ( size_t bytes )

Update the CUDA pool's release threshold and keep config in sync.

Called by Pipeline::finalize() after topology-aware sizing to replace the blunt input_size × multiplier estimate with a tighter bound.

Parameters

bytes New threshold in bytes.

◆ synchronize()

void fz::MemoryPool::synchronize ( cudaStream_t stream )

Block until all stream-ordered operations on stream complete.

◆ getPersistentDeviceBytes()

size_t fz::MemoryPool::getPersistentDeviceBytes ( ) const

inline

Bytes currently held in persistent device allocations.

◆ getPersistentPinnedBytes()

size_t fz::MemoryPool::getPersistentPinnedBytes ( ) const

inline

Bytes currently held in persistent pinned-host allocations.

◆ getCurrentUsage()

size_t fz::MemoryPool::getCurrentUsage ( ) const

inline

Current live bytes (queries cudaMemPoolAttrUsedMemCurrent).

◆ getPeakUsage()

size_t fz::MemoryPool::getPeakUsage ( ) const

inline

Peak live bytes since last reset (queries cudaMemPoolAttrUsedMemHigh).

◆ getAllocationCount()

size_t fz::MemoryPool::getAllocationCount ( ) const

inline

Total number of currently live allocations (stream + graph).

◆ getConfiguredSize()

size_t fz::MemoryPool::getConfiguredSize ( ) const

inline

Soft-capacity hint passed at construction (used only for overflow warnings; the CUDA pool itself is not hard-capped).

◆ getMemPool()

cudaMemPool_t fz::MemoryPool::getMemPool ( ) const

inline

Raw cudaMemPool_t handle for advanced usage.

◆ isFallbackMode()

bool fz::MemoryPool::isFallbackMode ( ) const

inline

Returns true if operating in cudaMalloc fallback mode (pool creation failed or was forced).

Public Member Functions

Detailed Description

Member Function Documentation

◆ allocate()

◆ free()

◆ allocatePersistentDevice()

◆ allocatePersistentPinned()

◆ freePersistentDevice()

◆ freePersistentPinned()

◆ reset()

◆ trim()

◆ setReleaseThreshold()

◆ synchronize()

◆ getPersistentDeviceBytes()

◆ getPersistentPinnedBytes()

◆ getCurrentUsage()

◆ getPeakUsage()

◆ getAllocationCount()

◆ getConfiguredSize()

◆ getMemPool()

◆ isFallbackMode()