8#include <cuda_runtime.h>
13#include <unordered_map>
25 AllocationInfo(
void* p =
nullptr,
size_t s = 0,
const std::string& t =
"")
39 size_t input_size = 0,
40 float multiplier = 3.0f,
86 void*
allocate(
size_t size, cudaStream_t stream,
87 const std::string& tag =
"",
bool persistent =
false);
90 void free(
void* ptr, cudaStream_t stream);
95 void reset(cudaStream_t stream);
117 if (!mem_pool_)
return current_allocated_bytes_;
119 cudaMemPoolGetAttribute(mem_pool_, cudaMemPoolAttrUsedMemCurrent, &used);
120 return static_cast<size_t>(used);
125 if (!mem_pool_)
return current_allocated_bytes_;
127 cudaMemPoolGetAttribute(mem_pool_, cudaMemPoolAttrUsedMemHigh, &high);
128 return static_cast<size_t>(high);
134 void printStats()
const;
148 int getDeviceId()
const {
return config_.
device_id; }
151 MemoryPoolConfig config_;
152 cudaMemPool_t mem_pool_;
154 std::unordered_map<void*, AllocationInfo> allocations_;
155 std::unordered_map<void*, AllocationInfo> graph_allocations_;
157 size_t total_allocations_;
161 size_t current_allocated_bytes_;
164 bool overflow_warned_;
167 void initializeMemPool();
void reset(cudaStream_t stream)
cudaMemPool_t getMemPool() const
Definition mempool.h:143
bool isFallbackMode() const
Definition mempool.h:146
void free(void *ptr, cudaStream_t stream)
void synchronize(cudaStream_t stream)
size_t getPeakUsage() const
Definition mempool.h:124
size_t getCurrentUsage() const
Definition mempool.h:116
size_t getAllocationCount() const
Definition mempool.h:132
void setReleaseThreshold(size_t bytes)
void * allocate(size_t size, cudaStream_t stream, const std::string &tag="", bool persistent=false)
size_t getConfiguredSize() const
Definition mempool.h:140
size_t size
Size in bytes.
Definition mempool.h:21
std::string tag
Debug label (e.g. "lorenzo_output").
Definition mempool.h:22
void * ptr
Device pointer.
Definition mempool.h:20
bool in_use
True while allocated.
Definition mempool.h:23
int device_id
CUDA device index.
Definition mempool.h:33
bool enable_reuse
Enable opportunistic buffer reuse.
Definition mempool.h:34
size_t getPoolSize() const
Definition mempool.h:51
size_t input_data_size
Input byte count used to size the pool.
Definition mempool.h:31
float pool_size_multiplier
Pool capacity = input_data_size × multiplier.
Definition mempool.h:32
bool force_fallback
Skip pool creation and use cudaMalloc; for vGPU or testing.
Definition mempool.h:36