FZGPUModules 2.0
GPU-accelerated modular compression pipelines
Loading...
Searching...
No Matches
huffman_stage.h
Go to the documentation of this file.
1#pragma once
2
27#include "stage/stage.h"
28#include "fzm_format.h"
29#include "coders/huffman/phf/hf.h" // phf_header, phf_stream_t
30
31#include <cuda_runtime.h>
32#include <cstdint>
33#include <cstring>
34#include <memory>
35#include <stdexcept>
36#include <string>
37#include <type_traits>
38#include <unordered_map>
39#include <vector>
40
41// Forward-declare phf::Buf<T> to avoid pulling hf_buf.h (CUDA-only) into this header.
42// HuffmanStage<T>::~HuffmanStage() is defined in huffman_stage.cu where the type is complete.
43namespace phf { template<typename E> struct Buf; }
44
45namespace fz {
46
49 Coarse,
50 Fine,
51};
52
67template <typename T>
68class HuffmanStage : public Stage {
69 static_assert(
70 std::is_same_v<T, uint8_t> ||
71 std::is_same_v<T, uint16_t> ||
72 std::is_same_v<T, uint32_t>,
73 "HuffmanStage: T must be uint8_t, uint16_t, or uint32_t.");
74
75public:
77 ~HuffmanStage() override;
78
79 // ── Configuration ─────────────────────────────────────────────────────────
80
96 void setBklen(uint32_t bklen) { bklen_ = bklen; }
97 uint32_t getBklen() const { return bklen_; }
98
106 void setEncodeMode(HuffmanEncodeMode mode) { encode_mode_ = mode; }
107 HuffmanEncodeMode getEncodeMode() const { return encode_mode_; }
108
109 // ── Stage control ─────────────────────────────────────────────────────────
110 void setInverse(bool inv) override { is_inverse_ = inv; }
111 bool isInverse() const override { return is_inverse_; }
112
113 // Histogram D2H makes this graph-incompatible regardless of encode mode.
114 bool isGraphCompatible() const override { return false; }
115
116 // ── Pool lifecycle ────────────────────────────────────────────────────────
117
126 void onFinalize(size_t estimated_inlen, MemoryPool* pool) override;
127
128 size_t estimateDeviceFootprintBytes(size_t inlen) const override;
129 size_t estimatePinnedFootprintBytes(size_t inlen) const override;
130
131 // ── Execution ─────────────────────────────────────────────────────────────
133 cudaStream_t stream,
134 MemoryPool* pool,
135 const std::vector<void*>& inputs,
136 const std::vector<void*>& outputs,
137 const std::vector<size_t>& sizes
138 ) override;
139
140 // ── Metadata ──────────────────────────────────────────────────────────────
141 std::string getName() const override { return "Huffman"; }
142 size_t getNumInputs() const override { return 1; }
143 size_t getNumOutputs()const override { return 1; }
144
145 std::vector<size_t> estimateOutputSizes(
146 const std::vector<size_t>& input_sizes
147 ) const override {
148 if (input_sizes.empty()) return {0};
149 if (!is_inverse_) {
150 // Upper bound: generous 2× input for worst-case bitstream + header overhead.
151 return {input_sizes[0] * 2 + 4096};
152 }
153 // Inverse: exact decoded size restored from the serialized header.
154 return {original_len_ * sizeof(T)};
155 }
156
157 std::unordered_map<std::string, size_t>
158 getActualOutputSizesByName() const override {
159 return {{"output", actual_output_size_}};
160 }
161
162 size_t getActualOutputSize(int index) const override {
163 return (index == 0) ? actual_output_size_ : 0;
164 }
165
166 // ── Type system ───────────────────────────────────────────────────────────
167 uint16_t getStageTypeId() const override {
168 return static_cast<uint16_t>(StageType::HUFFMAN);
169 }
170
171 // Byte-transparent output: opt out of pipeline type-compatibility checking.
172 uint8_t getOutputDataType(size_t /*output_index*/) const override {
173 return static_cast<uint8_t>(DataType::UNKNOWN);
174 }
175 uint8_t getInputDataType(size_t /*input_index*/) const override {
176 return static_cast<uint8_t>(DataType::UNKNOWN);
177 }
178
179 // ── Serialization ─────────────────────────────────────────────────────────
181 size_t /*output_index*/, uint8_t* buf, size_t max_size
182 ) const override {
183 if (max_size < 11) return 0;
184 buf[0] = static_cast<uint8_t>(dataTypeOf<T>());
185 uint16_t bk = static_cast<uint16_t>(bklen_);
186 std::memcpy(buf + 1, &bk, sizeof(uint16_t));
187 std::memcpy(buf + 3, &original_len_, sizeof(uint64_t));
188 return 11;
189 }
190
191 void deserializeHeader(const uint8_t* buf, size_t size) override {
192 if (size >= 3) {
193 uint16_t bk;
194 std::memcpy(&bk, buf + 1, sizeof(uint16_t));
195 bklen_ = bk;
196 }
197 if (size >= 11)
198 std::memcpy(&original_len_, buf + 3, sizeof(uint64_t));
199 }
200
201 size_t getMaxHeaderSize(size_t /*output_index*/) const override { return 11; }
202
203 void saveState() override {
204 saved_bklen_ = bklen_;
205 saved_original_len_ = original_len_;
206 saved_output_size_ = actual_output_size_;
207 }
208
209 void restoreState() override {
210 bklen_ = saved_bklen_;
211 original_len_ = saved_original_len_;
212 actual_output_size_ = saved_output_size_;
213 }
214
215private:
216 bool is_inverse_ = false;
217 uint32_t bklen_ = defaultBklen();
219 uint64_t original_len_ = 0; // element count set by forward execute
220 size_t actual_output_size_ = 0;
221 size_t cap_inlen_ = 0; // allocated capacity (elements); grow-only
222 uint32_t last_bklen_ = 0; // bklen_ when buf_ was last allocated
223 HuffmanEncodeMode last_encode_mode_ = HuffmanEncodeMode::Coarse; // encode_mode_ when buf_ was last allocated
224
225 // Histogram launch params — computed once in initBuf(), reused every execute()
226 int hist_grid_dim_ = 0;
227 int hist_block_dim_ = 0;
228 int hist_shmem_use_ = 0;
229 int hist_r_per_block_ = 0;
230
231 // PHF working buffers — allocated from pool_ on first execute() or in onFinalize()
232 std::unique_ptr<phf::Buf<T>> buf_;
233 phf_header header_ {};
234
235 // Pool used for buf_ allocations. Set by onFinalize() or captured from the
236 // pool parameter on the first execute() call. Raw non-owning pointer; the
237 // pool outlives the stage when used inside a Pipeline.
238 MemoryPool* pool_ = nullptr;
239
240 // saveState / restoreState snapshots
241 uint32_t saved_bklen_ = defaultBklen();
242 uint64_t saved_original_len_ = 0;
243 size_t saved_output_size_ = 0;
244
245 static constexpr uint32_t defaultBklen() {
246 if constexpr (std::is_same_v<T, uint8_t>) return 256;
247 return 1024;
248 }
249
250 template<typename U>
251 static constexpr DataType dataTypeOf() {
252 if constexpr (std::is_same_v<U, uint8_t>) return DataType::UINT8;
253 if constexpr (std::is_same_v<U, uint16_t>) return DataType::UINT16;
254 return DataType::UINT32;
255 }
256
257 // Allocates buf_ from pool and computes histogram launch params for the given
258 // element count. Must be in huffman_stage.cu: calls cudaFuncSetAttribute with
259 // a __global__ pointer. If buf_ already exists, destroys it first (returning
260 // its allocations to the pool) before creating the new one.
261 void initBuf(size_t inlen, MemoryPool* pool);
262};
263
264extern template class HuffmanStage<uint8_t>;
265extern template class HuffmanStage<uint16_t>;
266extern template class HuffmanStage<uint32_t>;
267
268} // namespace fz
Definition huffman_stage.h:68
size_t estimateDeviceFootprintBytes(size_t inlen) const override
size_t getMaxHeaderSize(size_t) const override
Definition huffman_stage.h:201
size_t serializeHeader(size_t, uint8_t *buf, size_t max_size) const override
Definition huffman_stage.h:180
void execute(cudaStream_t stream, MemoryPool *pool, const std::vector< void * > &inputs, const std::vector< void * > &outputs, const std::vector< size_t > &sizes) override
uint16_t getStageTypeId() const override
Definition huffman_stage.h:167
bool isGraphCompatible() const override
Definition huffman_stage.h:114
void onFinalize(size_t estimated_inlen, MemoryPool *pool) override
size_t getActualOutputSize(int index) const override
Definition huffman_stage.h:162
void saveState() override
Definition huffman_stage.h:203
void setInverse(bool inv) override
Definition huffman_stage.h:110
std::vector< size_t > estimateOutputSizes(const std::vector< size_t > &input_sizes) const override
Definition huffman_stage.h:145
void deserializeHeader(const uint8_t *buf, size_t size) override
Definition huffman_stage.h:191
size_t estimatePinnedFootprintBytes(size_t inlen) const override
std::string getName() const override
Definition huffman_stage.h:141
uint8_t getOutputDataType(size_t) const override
Definition huffman_stage.h:172
std::unordered_map< std::string, size_t > getActualOutputSizesByName() const override
Definition huffman_stage.h:158
void setBklen(uint32_t bklen)
Definition huffman_stage.h:96
void setEncodeMode(HuffmanEncodeMode mode)
Definition huffman_stage.h:106
uint8_t getInputDataType(size_t) const override
Definition huffman_stage.h:175
Definition mempool.h:82
Definition stage.h:30
FZM binary file format definitions — structs, enums, and helpers.
Definition fzm_format.h:25
DataType
Element data type identifiers used in buffer and stage descriptors.
Definition fzm_format.h:104
@ UNKNOWN
Byte-transparent stages: skip type checking at finalize()
HuffmanEncodeMode
Definition huffman_stage.h:48
@ Coarse
Multi-kernel coarse path; CPU prefix-sum sync in phase 3 (default).
@ Fine
ReVISIT-lite single kernel; fully GPU-async phase 3, no mid-encode CPU sync.
Base class interface for all compression stages.