FZGPUModules 2.0
GPU-accelerated modular compression pipelines
Loading...
Searching...
No Matches
quantizer.h
Go to the documentation of this file.
1#pragma once
2
8#include "stage/stage.h"
9#include "fzm_format.h"
10#include "fused/lorenzo_quant/lorenzo_quant.h" // for ErrorBoundMode
11#include <cuda_runtime.h>
12#include <array>
13#include <cmath>
14#include <cstdint>
15#include <cstring>
16#include <limits>
17
18namespace fz {
19
28 float value_base;
29 uint32_t quant_radius;
30 uint32_t num_elements;
31 uint32_t outlier_count;
34 uint8_t eb_mode;
35 uint8_t zigzag_codes;
38 uint8_t linear_mode;
39 uint8_t _pad[2];
40
42 : abs_error_bound(0.0f), user_error_bound(0.0f), value_base(0.0f),
44 input_type(DataType::FLOAT32), code_type(DataType::UINT16),
45 eb_mode(0), zigzag_codes(0),
46 outlier_threshold(std::numeric_limits<float>::infinity()),
48};
49static_assert(sizeof(QuantizerConfig) <= FZM_STAGE_CONFIG_SIZE,
50 "QuantizerConfig must fit in FZM_STAGE_CONFIG_SIZE");
51
106template<typename TInput = float, typename TCode = uint16_t>
107class QuantizerStage : public Stage {
108public:
110 struct Config {
111 float error_bound = 1e-4f;
112 int quant_radius = 32768;
113 float outlier_capacity = 0.05f;
119 bool zigzag_codes = false;
121 float outlier_threshold = std::numeric_limits<float>::infinity();
125 bool inplace_outliers = false;
132 bool linear_mode = false;
133
134 Config() = default;
135 Config(TInput eb, ErrorBoundMode mode = ErrorBoundMode::ABS,
136 int radius = 32768, float outlier_cap = 0.05f)
137 : error_bound(static_cast<float>(eb)), quant_radius(radius),
138 outlier_capacity(outlier_cap), eb_mode(mode) {}
139 };
140
141 explicit QuantizerStage(const Config& config = Config());
142 ~QuantizerStage() override;
143
145 cudaStream_t stream,
146 MemoryPool* pool,
147 const std::vector<void*>& inputs,
148 const std::vector<void*>& outputs,
149 const std::vector<size_t>& sizes
150 ) override;
151
152 void postStreamSync(cudaStream_t stream) override;
153
158 void onFinalize(size_t estimated_inlen, MemoryPool* pool) override;
159
160 size_t estimateDeviceFootprintBytes(size_t /*estimated_inlen*/) const override {
161 return (isLinearMode() || isInplaceMode()) ? 0 : sizeof(uint32_t);
162 }
163
164 std::string getName() const override { return "Quantizer"; }
165
166 size_t getNumInputs() const override {
167 if (!is_inverse_) return 1;
168 return (isLinearMode() || isInplaceMode()) ? 1 : 3;
169 }
170 size_t getNumOutputs() const override {
171 if (is_inverse_) return 1;
172 return (isLinearMode() || isInplaceMode()) ? 1 : 3;
173 }
174
175 std::vector<std::string> getOutputNames() const override {
176 if (is_inverse_) return {"reconstructed"};
177 if (isLinearMode() || isInplaceMode()) return {"codes"};
178 return {"codes", "outlier_vals", "outlier_idxs"};
179 }
180
181 std::vector<size_t> estimateOutputSizes(
182 const std::vector<size_t>& input_sizes
183 ) const override;
184
185 std::unordered_map<std::string, size_t> getActualOutputSizesByName() const override {
186 auto names = getOutputNames();
187 std::unordered_map<std::string, size_t> result;
188 for (size_t i = 0; i < names.size() && i < actual_output_sizes_.size(); i++)
189 result[names[i]] = actual_output_sizes_[i];
190 return result;
191 }
192 size_t getActualOutputSize(int index) const override {
193 return (index >= 0 && index < static_cast<int>(actual_output_sizes_.size()))
194 ? actual_output_sizes_[index] : 0;
195 }
196
197 void setInverse(bool inverse) override { is_inverse_ = inverse; }
198 bool isInverse() const override { return is_inverse_; }
199
200 uint16_t getStageTypeId() const override {
201 return static_cast<uint16_t>(StageType::QUANTIZER);
202 }
203
204 uint8_t getOutputDataType(size_t output_index) const override {
205 if (is_inverse_) return static_cast<uint8_t>(getInputDataType());
206 // Linear mode: codes hold two's-complement signed q — declare the signed type
207 // so the DAG connects cleanly to LorenzoStage<intN>.
208 if (isLinearMode()) return static_cast<uint8_t>(signedOf(getCodeDataType()));
209 if (isInplaceMode()) return static_cast<uint8_t>(getCodeDataType()); // only codes
210 switch (output_index) {
211 case 0: return static_cast<uint8_t>(getCodeDataType());
212 case 1: return static_cast<uint8_t>(getInputDataType());
213 case 2: return static_cast<uint8_t>(DataType::UINT32);
214 default: return static_cast<uint8_t>(DataType::UINT8);
215 }
216 }
217
218 uint8_t getInputDataType(size_t /*input_index*/) const override {
219 return static_cast<uint8_t>(getInputDataType());
220 }
221
222 size_t serializeHeader(size_t output_index, uint8_t* buf, size_t max_size) const override;
223 size_t getMaxHeaderSize(size_t) const override { return sizeof(QuantizerConfig); }
224 void deserializeHeader(const uint8_t* buf, size_t size) override;
225
226 void saveState() override {
227 saved_config_ = config_;
228 saved_num_elements_ = num_elements_;
229 saved_actual_outlier_count_ = actual_outlier_count_;
230 saved_computed_abs_eb_ = computed_abs_eb_;
231 saved_computed_value_base_ = computed_value_base_;
232 saved_actual_output_sizes_ = actual_output_sizes_;
233 }
234
235 void restoreState() override {
236 config_ = saved_config_;
237 num_elements_ = saved_num_elements_;
238 actual_outlier_count_ = saved_actual_outlier_count_;
239 computed_abs_eb_ = saved_computed_abs_eb_;
240 computed_value_base_ = saved_computed_value_base_;
241 actual_output_sizes_ = saved_actual_output_sizes_;
242 }
243
244 void setErrorBound(TInput eb) { config_.error_bound = static_cast<float>(eb); }
245 void setQuantRadius(int r) { config_.quant_radius = r; }
246 void setOutlierCapacity(float c) { config_.outlier_capacity = c; }
247 void setErrorBoundMode(ErrorBoundMode m) { config_.eb_mode = m; }
248 void setValueBase(float vb) { config_.precomputed_value_base = vb; }
249 void setZigzagCodes(bool enable) { config_.zigzag_codes = enable; }
251 void setOutlierThreshold(float t) { config_.outlier_threshold = t; }
253 void setInplaceOutliers(bool enable) { config_.inplace_outliers = enable; }
255 void setLinearMode(bool enable) { config_.linear_mode = enable; }
256
257 TInput getErrorBound() const { return static_cast<TInput>(config_.error_bound); }
258 int getQuantRadius() const { return config_.quant_radius; }
259 ErrorBoundMode getErrorBoundMode() const { return config_.eb_mode; }
260 float getValueBase() const { return config_.precomputed_value_base; }
261 float getOutlierCapacity() const { return config_.outlier_capacity; }
262 bool getZigzagCodes() const { return config_.zigzag_codes; }
263 float getOutlierThreshold() const { return config_.outlier_threshold; }
264 bool getInplaceOutliers() const { return config_.inplace_outliers; }
265 bool getLinearMode() const { return config_.linear_mode; }
266
267private:
268 Config config_;
269 Config saved_config_;
270 std::vector<size_t> actual_output_sizes_;
271 std::vector<size_t> saved_actual_output_sizes_;
272 size_t num_elements_ = 0;
273 size_t saved_num_elements_ = 0;
274 uint32_t actual_outlier_count_= 0;
275 uint32_t saved_actual_outlier_count_ = 0;
276 bool is_inverse_ = false;
277 TInput computed_abs_eb_ = static_cast<TInput>(1e-4);
278 TInput saved_computed_abs_eb_ = static_cast<TInput>(1e-4);
279 float computed_value_base_ = 0.0f;
280 float saved_computed_value_base_ = 0.0f;
288 uint32_t* d_outlier_count_scratch_ = nullptr;
291 MemoryPool* persistent_pool_ = nullptr;
292
296 void initOutlierCountScratch(MemoryPool* pool);
297
298 bool isInplaceMode() const {
299 return config_.inplace_outliers
300 && config_.eb_mode != ErrorBoundMode::REL;
301 }
302
303 bool isLinearMode() const { return config_.linear_mode; }
304
307 static DataType signedOf(DataType d) {
308 switch (d) {
309 case DataType::UINT8: return DataType::INT8;
310 case DataType::UINT16: return DataType::INT16;
311 case DataType::UINT32: return DataType::INT32;
312 default: return d;
313 }
314 }
315
316 DataType getInputDataType() const {
317 if (std::is_same<TInput, float>::value) return DataType::FLOAT32;
318 if (std::is_same<TInput, double>::value) return DataType::FLOAT64;
319 return DataType::FLOAT32;
320 }
321 DataType getCodeDataType() const {
322 if (std::is_same<TCode, uint8_t>::value) return DataType::UINT8;
323 if (std::is_same<TCode, uint16_t>::value) return DataType::UINT16;
324 if (std::is_same<TCode, uint32_t>::value) return DataType::UINT32;
325 return DataType::UINT16;
326 }
327 size_t getMaxOutlierCount(size_t n) const {
328 return static_cast<size_t>(std::ceil(n * config_.outlier_capacity));
329 }
330};
331
332extern template class QuantizerStage<float, uint16_t>;
333extern template class QuantizerStage<float, uint32_t>;
334extern template class QuantizerStage<double, uint16_t>;
335extern template class QuantizerStage<double, uint32_t>;
336
337} // namespace fz
Definition mempool.h:82
Definition quantizer.h:107
void saveState() override
Definition quantizer.h:226
uint8_t getInputDataType(size_t) const override
Definition quantizer.h:218
uint8_t getOutputDataType(size_t output_index) const override
Definition quantizer.h:204
void setInplaceOutliers(bool enable)
ABS/NOA: encode outliers in-place (raw float bits in codes array; no scatter buffers).
Definition quantizer.h:253
std::vector< std::string > getOutputNames() const override
Definition quantizer.h:175
void setInverse(bool inverse) override
Definition quantizer.h:197
void postStreamSync(cudaStream_t stream) override
void deserializeHeader(const uint8_t *buf, size_t size) override
size_t estimateDeviceFootprintBytes(size_t) const override
Definition quantizer.h:160
void setOutlierThreshold(float t)
ABS/NOA: |x| >= threshold → lossless outlier regardless of bin (LC reference parameter).
Definition quantizer.h:251
void onFinalize(size_t estimated_inlen, MemoryPool *pool) override
void execute(cudaStream_t stream, MemoryPool *pool, const std::vector< void * > &inputs, const std::vector< void * > &outputs, const std::vector< size_t > &sizes) override
std::string getName() const override
Definition quantizer.h:164
size_t serializeHeader(size_t output_index, uint8_t *buf, size_t max_size) const override
uint16_t getStageTypeId() const override
Definition quantizer.h:200
std::unordered_map< std::string, size_t > getActualOutputSizesByName() const override
Definition quantizer.h:185
std::vector< size_t > estimateOutputSizes(const std::vector< size_t > &input_sizes) const override
size_t getMaxHeaderSize(size_t) const override
Definition quantizer.h:223
void setLinearMode(bool enable)
ABS/NOA: linear / no-outlier mode (cuSZp-style signed codes; see Config::linear_mode).
Definition quantizer.h:255
size_t getActualOutputSize(int index) const override
Definition quantizer.h:192
Definition stage.h:30
FZM binary file format definitions — structs, enums, and helpers.
Fused Lorenzo predictor and quantizer stage.
Definition fzm_format.h:25
ErrorBoundMode
Definition lorenzo_quant.h:30
@ ABS
Absolute error bound.
@ REL
Global-approximate point-wise relative bound.
constexpr size_t FZM_STAGE_CONFIG_SIZE
Per-stage serialized config slot (bytes)
Definition fzm_format.h:65
DataType
Element data type identifiers used in buffer and stage descriptors.
Definition fzm_format.h:109
Base class interface for all compression stages.
Definition quantizer.h:25
uint8_t inplace_outliers
1 if outliers are encoded in-place in the codes array.
Definition quantizer.h:37
uint8_t linear_mode
1 if linear/no-outlier mode (signed codes, no outlier ports).
Definition quantizer.h:38
uint8_t eb_mode
ErrorBoundMode cast to uint8_t.
Definition quantizer.h:34
uint32_t outlier_count
Actual number of outliers.
Definition quantizer.h:31
DataType code_type
Quantization code type (1B).
Definition quantizer.h:33
uint8_t zigzag_codes
1 if ABS/NOA codes are zigzag-encoded.
Definition quantizer.h:35
uint32_t num_elements
Total element count.
Definition quantizer.h:30
uint8_t _pad[2]
Alignment padding — must be zero.
Definition quantizer.h:39
uint32_t quant_radius
Quantization radius.
Definition quantizer.h:29
float user_error_bound
Original user-specified EB.
Definition quantizer.h:27
DataType input_type
Original input type (1B).
Definition quantizer.h:32
float abs_error_bound
Absolute EB after mode conversion (0 for REL).
Definition quantizer.h:26
float outlier_threshold
ABS/NOA: |x| >= threshold → forced outlier (inf = disabled).
Definition quantizer.h:36
float value_base
value_range (NOA); 0 for ABS/REL.
Definition quantizer.h:28
Definition quantizer.h:110
float outlier_threshold
ABS/NOA: |x| >= threshold → lossless outlier (LC reference threshold). Default: ∞.
Definition quantizer.h:121
float outlier_capacity
Fraction of input size reserved for outliers.
Definition quantizer.h:113
bool zigzag_codes
Definition quantizer.h:119
float precomputed_value_base
Pre-computed value_base > 0 to skip the NOA data scan; 0 = auto.
Definition quantizer.h:116
bool inplace_outliers
Definition quantizer.h:125
int quant_radius
Quantization radius.
Definition quantizer.h:112
bool linear_mode
Definition quantizer.h:132
float error_bound
Error bound (interpretation set by eb_mode).
Definition quantizer.h:111