FZGPUModules 1.0
GPU-accelerated modular compression pipeline
Loading...
Searching...
No Matches
mempool.h
Go to the documentation of this file.
1#pragma once
2
8#include <cuda_runtime.h>
9
10#include <cstddef>
11#include <memory>
12#include <string>
13#include <unordered_map>
14#include <vector>
15
16namespace fz {
17
20 void* ptr;
21 size_t size;
22 std::string tag;
23 bool in_use;
24
25 AllocationInfo(void* p = nullptr, size_t s = 0, const std::string& t = "")
26 : ptr(p), size(s), tag(t), in_use(true) {}
27};
28
35
37 size_t input_size = 0,
38 float multiplier = 3.0f,
39 int device = 0,
40 bool reuse = true)
41 : input_data_size(input_size),
42 pool_size_multiplier(multiplier),
43 device_id(device),
44 enable_reuse(reuse) {}
45
47 size_t getPoolSize() const {
48 if (input_data_size == 0) return 1024ULL * 1024 * 1024;
49 return static_cast<size_t>(input_data_size * pool_size_multiplier);
50 }
51};
52
63public:
64 explicit MemoryPool(const MemoryPoolConfig& config = MemoryPoolConfig());
66
67 MemoryPool(const MemoryPool&) = delete;
68 MemoryPool& operator=(const MemoryPool&) = delete;
69
70 // ── Allocation ────────────────────────────────────────────────────────────
71
82 void* allocate(size_t size, cudaStream_t stream,
83 const std::string& tag = "", bool persistent = false);
84
86 void free(void* ptr, cudaStream_t stream);
87
88 // ── Lifecycle ─────────────────────────────────────────────────────────────
89
91 void reset(cudaStream_t stream);
92
94 void trim();
95
104 void setReleaseThreshold(size_t bytes);
105
107 void synchronize(cudaStream_t stream);
108
109 // ── Stats & debug ─────────────────────────────────────────────────────────
110
112 size_t getCurrentUsage() const {
113 uint64_t used = 0;
114 cudaMemPoolGetAttribute(mem_pool_, cudaMemPoolAttrUsedMemCurrent, &used);
115 return static_cast<size_t>(used);
116 }
117
119 size_t getPeakUsage() const {
120 uint64_t high = 0;
121 cudaMemPoolGetAttribute(mem_pool_, cudaMemPoolAttrUsedMemHigh, &high);
122 return static_cast<size_t>(high);
123 }
124
126 size_t getAllocationCount() const { return allocations_.size() + graph_allocations_.size(); }
127
128 void printStats() const;
129
134 size_t getConfiguredSize() const { return config_.getPoolSize(); }
135
137 cudaMemPool_t getMemPool() const { return mem_pool_; }
138
139 int getDeviceId() const { return config_.device_id; }
140
141private:
142 MemoryPoolConfig config_;
143 cudaMemPool_t mem_pool_;
144
145 std::unordered_map<void*, AllocationInfo> allocations_;
146 std::unordered_map<void*, AllocationInfo> graph_allocations_;
147
148 size_t total_allocations_;
149 size_t total_frees_;
150 // Host-side running total of live bytes — used for overflow detection without
151 // querying a CUDA attribute on every hot-path allocation.
152 size_t current_allocated_bytes_;
153 // Set the first time current_allocated_bytes_ exceeds configured pool size
154 // so the overflow warning fires only once per reset() cycle.
155 bool overflow_warned_;
156 bool initialized_;
157
158 void initializeMemPool();
159};
160
161} // namespace fz
Definition mempool.h:62
void reset(cudaStream_t stream)
cudaMemPool_t getMemPool() const
Definition mempool.h:137
void free(void *ptr, cudaStream_t stream)
void synchronize(cudaStream_t stream)
size_t getPeakUsage() const
Definition mempool.h:119
size_t getCurrentUsage() const
Definition mempool.h:112
size_t getAllocationCount() const
Definition mempool.h:126
void setReleaseThreshold(size_t bytes)
void * allocate(size_t size, cudaStream_t stream, const std::string &tag="", bool persistent=false)
size_t getConfiguredSize() const
Definition mempool.h:134
Definition fzm_format.h:25
Definition mempool.h:19
size_t size
Size in bytes.
Definition mempool.h:21
std::string tag
Debug label (e.g. "lorenzo_output").
Definition mempool.h:22
void * ptr
Device pointer.
Definition mempool.h:20
bool in_use
True while allocated.
Definition mempool.h:23
Definition mempool.h:30
int device_id
CUDA device index.
Definition mempool.h:33
bool enable_reuse
Enable opportunistic buffer reuse.
Definition mempool.h:34
size_t getPoolSize() const
Definition mempool.h:47
size_t input_data_size
Input byte count used to size the pool.
Definition mempool.h:31
float pool_size_multiplier
Pool capacity = input_data_size × multiplier.
Definition mempool.h:32