8#include <cuda_runtime.h>
13#include <unordered_map>
25 AllocationInfo(
void* p =
nullptr,
size_t s = 0,
const std::string& t =
"")
55 size_t input_size = 0,
56 float multiplier = 3.0f,
103 const std::string& tag =
"",
bool persistent =
false);
106 void free(
void* ptr, cudaStream_t stream);
172 if (!mem_pool_)
return current_allocated_bytes_;
174 cudaMemPoolGetAttribute(mem_pool_, cudaMemPoolAttrUsedMemCurrent, &used);
175 return static_cast<size_t>(used);
180 if (!mem_pool_)
return current_allocated_bytes_;
182 cudaMemPoolGetAttribute(mem_pool_, cudaMemPoolAttrUsedMemHigh, &high);
183 return static_cast<size_t>(high);
189 void printStats()
const;
203 int getDeviceId()
const {
return config_.
device_id; }
206 MemoryPoolConfig config_;
207 cudaMemPool_t mem_pool_;
209 std::unordered_map<void*, AllocationInfo> allocations_;
210 std::unordered_map<void*, AllocationInfo> graph_allocations_;
213 std::vector<PersistentAllocInfo> persistent_allocs_;
214 size_t persistent_device_bytes_ = 0;
215 size_t persistent_pinned_bytes_ = 0;
217 size_t total_allocations_;
221 size_t current_allocated_bytes_;
224 bool overflow_warned_;
227 void initializeMemPool();
void reset(cudaStream_t stream)
size_t getPersistentPinnedBytes() const
Definition mempool.h:168
cudaMemPool_t getMemPool() const
Definition mempool.h:198
void freePersistentPinned(void *ptr)
bool isFallbackMode() const
Definition mempool.h:201
void free(void *ptr, cudaStream_t stream)
void synchronize(cudaStream_t stream)
size_t getPeakUsage() const
Definition mempool.h:179
void * allocatePersistentPinned(size_t bytes, const std::string &tag="")
size_t getCurrentUsage() const
Definition mempool.h:171
size_t getAllocationCount() const
Definition mempool.h:187
void setReleaseThreshold(size_t bytes)
void * allocatePersistentDevice(size_t bytes, const std::string &tag="")
size_t getPersistentDeviceBytes() const
Definition mempool.h:165
void * allocate(size_t size, cudaStream_t stream, const std::string &tag="", bool persistent=false)
size_t getConfiguredSize() const
Definition mempool.h:195
void freePersistentDevice(void *ptr)
Definition fzm_format.h:25
size_t size
Size in bytes.
Definition mempool.h:21
std::string tag
Debug label (e.g. "lorenzo_output").
Definition mempool.h:22
void * ptr
Device pointer.
Definition mempool.h:20
bool in_use
True while allocated.
Definition mempool.h:23
int device_id
CUDA device index.
Definition mempool.h:49
bool enable_reuse
Enable opportunistic buffer reuse.
Definition mempool.h:50
size_t getPoolSize() const
Definition mempool.h:67
size_t input_data_size
Input byte count used to size the pool.
Definition mempool.h:47
float pool_size_multiplier
Pool capacity = input_data_size × multiplier.
Definition mempool.h:48
bool force_fallback
Skip pool creation and use cudaMalloc; for vGPU or testing.
Definition mempool.h:52
bool is_pinned
true = cudaMallocHost; false = cudaMalloc
Definition mempool.h:42