13#include <cuda_runtime.h>
19#include <unordered_map>
40 "AdaptiveBitpackConfig must fit in FZM_STAGE_CONFIG_SIZE");
76 static_assert(std::is_same_v<T, int16_t> || std::is_same_v<T, int32_t>,
77 "AdaptiveBitpackStage: T must be int16_t or int32_t.");
83 void setInverse(
bool inv)
override { is_inverse_ = inv; }
84 bool isInverse()
const override {
return is_inverse_; }
95 if (n == 0 || n > 1024)
96 throw std::invalid_argument(
97 "AdaptiveBitpackStage::setBlockSize: n must be in [1, 1024], got "
101 uint32_t getBlockSize()
const {
return block_size_; }
108 bool getOutlierSelection()
const {
return outlier_selection_; }
114 const std::vector<void*>& inputs,
115 const std::vector<void*>& outputs,
116 const std::vector<size_t>& sizes
126 std::string
getName()
const override {
return "AdaptiveBitpack"; }
127 size_t getNumInputs()
const override {
return 1; }
128 size_t getNumOutputs()
const override {
return 1; }
131 const std::vector<size_t>& input_sizes
139 const std::vector<size_t>& input_sizes
142 std::unordered_map<std::string, size_t>
144 return {{
"output", actual_output_size_}};
147 return (index == 0) ? actual_output_size_ : 0;
156 return static_cast<uint8_t
>(is_inverse_ ? getElementDataType()
160 return static_cast<uint8_t
>(is_inverse_ ? DataType::UINT8
161 : getElementDataType());
171 cfg.
num_elements =
static_cast<uint64_t
>(num_elements_);
172 std::memcpy(buf, &cfg,
sizeof(cfg));
177 throw std::runtime_error(
"AdaptiveBitpackStage: header too small");
179 std::memcpy(&cfg, buf,
sizeof(cfg));
189 saved_block_size_ = block_size_;
190 saved_num_elements_ = num_elements_;
191 saved_actual_size_ = actual_output_size_;
192 saved_outlier_select_ = outlier_selection_;
194 void restoreState()
override {
195 block_size_ = saved_block_size_;
196 num_elements_ = saved_num_elements_;
197 actual_output_size_ = saved_actual_size_;
198 outlier_selection_ = saved_outlier_select_;
201 size_t getNumElements()
const {
return num_elements_; }
204 bool is_inverse_ =
false;
205 uint32_t block_size_ = 32;
206 bool outlier_selection_ =
false;
207 size_t num_elements_ = 0;
208 size_t actual_output_size_ = 0;
214 uint32_t* d_cost_ =
nullptr;
215 uint32_t* d_offset_ =
nullptr;
216 size_t scratch_blocks_ = 0;
217 MemoryPool* scratch_pool_ =
nullptr;
218 size_t fwd_num_blocks_ = 0;
219 size_t fwd_meta_region_ = 0;
221 uint32_t saved_block_size_ = 32;
222 bool saved_outlier_select_ =
false;
223 size_t saved_num_elements_ = 0;
224 size_t saved_actual_size_ = 0;
226 static DataType getElementDataType() {
227 if (std::is_same<T, int16_t>::value)
return DataType::INT16;
228 return DataType::INT32;
232extern template class AdaptiveBitpackStage<int16_t>;
233extern template class AdaptiveBitpackStage<int32_t>;
Definition adaptive_bitpack_stage.h:75
void setBlockSize(uint32_t n)
Definition adaptive_bitpack_stage.h:94
void saveState() override
Definition adaptive_bitpack_stage.h:188
size_t serializeHeader(size_t, uint8_t *buf, size_t max_size) const override
Definition adaptive_bitpack_stage.h:165
uint8_t getInputDataType(size_t) const override
Definition adaptive_bitpack_stage.h:159
void deserializeHeader(const uint8_t *buf, size_t size) override
Definition adaptive_bitpack_stage.h:175
std::string getName() const override
Definition adaptive_bitpack_stage.h:126
void execute(cudaStream_t stream, MemoryPool *pool, const std::vector< void * > &inputs, const std::vector< void * > &outputs, const std::vector< size_t > &sizes) override
uint16_t getStageTypeId() const override
Definition adaptive_bitpack_stage.h:150
size_t getMaxHeaderSize(size_t) const override
Definition adaptive_bitpack_stage.h:184
uint8_t getOutputDataType(size_t) const override
Definition adaptive_bitpack_stage.h:155
std::unordered_map< std::string, size_t > getActualOutputSizesByName() const override
Definition adaptive_bitpack_stage.h:143
size_t getActualOutputSize(int index) const override
Definition adaptive_bitpack_stage.h:146
size_t estimateScratchBytes(const std::vector< size_t > &input_sizes) const override
bool isGraphCompatible() const override
Definition adaptive_bitpack_stage.h:90
void setInverse(bool inv) override
Definition adaptive_bitpack_stage.h:83
std::vector< size_t > estimateOutputSizes(const std::vector< size_t > &input_sizes) const override
void postStreamSync(cudaStream_t stream) override
void setOutlierSelection(bool enable)
Definition adaptive_bitpack_stage.h:107
Definition fzm_format.h:25
constexpr size_t FZM_STAGE_CONFIG_SIZE
Per-stage serialized config slot (bytes)
Definition fzm_format.h:65
@ ADAPTIVE_BITPACK
Per-block adaptive fixed-rate bit-plane coder (cuSZp plain mode)
DataType
Element data type identifiers used in buffer and stage descriptors.
Definition fzm_format.h:109
Base class interface for all compression stages.
Definition adaptive_bitpack_stage.h:28
uint8_t _pad[2]
Must be zero.
Definition adaptive_bitpack_stage.h:31
uint8_t outlier_selection
1 = cuSZp2 per-block plain/outlier selection.
Definition adaptive_bitpack_stage.h:30
uint32_t block_size
Elements per logical block (reset period).
Definition adaptive_bitpack_stage.h:32
DataType data_type
Signed element type (1B): INT16 / INT32.
Definition adaptive_bitpack_stage.h:29
uint64_t num_elements
Original element count (sizes the inverse output).
Definition adaptive_bitpack_stage.h:33