FZGPUModules 2.0
GPU-accelerated modular compression pipelines
Loading...
Searching...
No Matches
mempool.h
Go to the documentation of this file.
1#pragma once
2
8#include <cuda_runtime.h>
9
10#include <cstddef>
11#include <memory>
12#include <string>
13#include <unordered_map>
14#include <vector>
15
16namespace fz {
17
20 void* ptr;
21 size_t size;
22 std::string tag;
23 bool in_use;
24
25 AllocationInfo(void* p = nullptr, size_t s = 0, const std::string& t = "")
26 : ptr(p), size(s), tag(t), in_use(true) {}
27};
28
35
37
39 size_t input_size = 0,
40 float multiplier = 3.0f,
41 int device = 0,
42 bool reuse = true,
43 bool force_fallback = false)
44 : input_data_size(input_size),
45 pool_size_multiplier(multiplier),
46 device_id(device),
47 enable_reuse(reuse),
49
51 size_t getPoolSize() const {
52 if (input_data_size == 0) return 1024ULL * 1024 * 1024;
53 return static_cast<size_t>(input_data_size * pool_size_multiplier);
54 }
55};
56
67public:
68 explicit MemoryPool(const MemoryPoolConfig& config = MemoryPoolConfig());
70
71 MemoryPool(const MemoryPool&) = delete;
72 MemoryPool& operator=(const MemoryPool&) = delete;
73
74 // ── Allocation ────────────────────────────────────────────────────────────
75
86 void* allocate(size_t size, cudaStream_t stream,
87 const std::string& tag = "", bool persistent = false);
88
90 void free(void* ptr, cudaStream_t stream);
91
92 // ── Lifecycle ─────────────────────────────────────────────────────────────
93
95 void reset(cudaStream_t stream);
96
98 void trim();
99
108 void setReleaseThreshold(size_t bytes);
109
111 void synchronize(cudaStream_t stream);
112
113 // ── Stats & debug ─────────────────────────────────────────────────────────
114
116 size_t getCurrentUsage() const {
117 if (!mem_pool_) return current_allocated_bytes_;
118 uint64_t used = 0;
119 cudaMemPoolGetAttribute(mem_pool_, cudaMemPoolAttrUsedMemCurrent, &used);
120 return static_cast<size_t>(used);
121 }
122
124 size_t getPeakUsage() const {
125 if (!mem_pool_) return current_allocated_bytes_;
126 uint64_t high = 0;
127 cudaMemPoolGetAttribute(mem_pool_, cudaMemPoolAttrUsedMemHigh, &high);
128 return static_cast<size_t>(high);
129 }
130
132 size_t getAllocationCount() const { return allocations_.size() + graph_allocations_.size(); }
133
134 void printStats() const;
135
140 size_t getConfiguredSize() const { return config_.getPoolSize(); }
141
143 cudaMemPool_t getMemPool() const { return mem_pool_; }
144
146 bool isFallbackMode() const { return mem_pool_ == nullptr; }
147
148 int getDeviceId() const { return config_.device_id; }
149
150private:
151 MemoryPoolConfig config_;
152 cudaMemPool_t mem_pool_;
153
154 std::unordered_map<void*, AllocationInfo> allocations_;
155 std::unordered_map<void*, AllocationInfo> graph_allocations_;
156
157 size_t total_allocations_;
158 size_t total_frees_;
159 // Host-side running total of live bytes — used for overflow detection without
160 // querying a CUDA attribute on every hot-path allocation.
161 size_t current_allocated_bytes_;
162 // Set the first time current_allocated_bytes_ exceeds configured pool size
163 // so the overflow warning fires only once per reset() cycle.
164 bool overflow_warned_;
165 bool initialized_;
166
167 void initializeMemPool();
168};
169
170} // namespace fz
Definition mempool.h:66
void reset(cudaStream_t stream)
cudaMemPool_t getMemPool() const
Definition mempool.h:143
bool isFallbackMode() const
Definition mempool.h:146
void free(void *ptr, cudaStream_t stream)
void synchronize(cudaStream_t stream)
size_t getPeakUsage() const
Definition mempool.h:124
size_t getCurrentUsage() const
Definition mempool.h:116
size_t getAllocationCount() const
Definition mempool.h:132
void setReleaseThreshold(size_t bytes)
void * allocate(size_t size, cudaStream_t stream, const std::string &tag="", bool persistent=false)
size_t getConfiguredSize() const
Definition mempool.h:140
Definition mempool.h:19
size_t size
Size in bytes.
Definition mempool.h:21
std::string tag
Debug label (e.g. "lorenzo_output").
Definition mempool.h:22
void * ptr
Device pointer.
Definition mempool.h:20
bool in_use
True while allocated.
Definition mempool.h:23
Definition mempool.h:30
int device_id
CUDA device index.
Definition mempool.h:33
bool enable_reuse
Enable opportunistic buffer reuse.
Definition mempool.h:34
size_t getPoolSize() const
Definition mempool.h:51
size_t input_data_size
Input byte count used to size the pool.
Definition mempool.h:31
float pool_size_multiplier
Pool capacity = input_data_size × multiplier.
Definition mempool.h:32
bool force_fallback
Skip pool creation and use cudaMalloc; for vGPU or testing.
Definition mempool.h:36