FZGPUModules 2.0
GPU-accelerated modular compression pipelines
Loading...
Searching...
No Matches
bitshuffle_stage.h
Go to the documentation of this file.
1#pragma once
2
19#include "stage/stage.h"
20#include "fzm_format.h"
21#include <cuda_runtime.h>
22#include <cstdint>
23#include <cstring>
24#include <stdexcept>
25#include <string>
26#include <unordered_map>
27#include <vector>
28
29namespace fz {
30
43class BitshuffleStage : public Stage {
44public:
46 : is_inverse_(false)
47 , block_size_(16384)
48 , element_width_(4)
49 , actual_output_size_(0)
50 {}
51
52 // ── Stage control ──────────────────────────────────────────────────────
53 void setInverse(bool inv) override { is_inverse_ = inv; }
54 bool isInverse() const override { return is_inverse_; }
55
56 void setBlockSize(size_t bytes) { block_size_ = static_cast<uint32_t>(bytes); }
57 void setElementWidth(size_t bytes){ element_width_ = static_cast<uint8_t>(bytes); }
58
59 size_t getBlockSize() const { return block_size_; }
60 size_t getRequiredInputAlignment() const override { return block_size_; }
61 size_t getElementWidth() const { return element_width_; }
62
63 // ── Execution ──────────────────────────────────────────────────────────
64 void execute(
65 cudaStream_t stream,
66 MemoryPool* pool,
67 const std::vector<void*>& inputs,
68 const std::vector<void*>& outputs,
69 const std::vector<size_t>& sizes
70 ) override;
71
72 // ── Metadata ───────────────────────────────────────────────────────────
73 std::string getName() const override { return "Bitshuffle"; }
74 size_t getNumInputs() const override { return 1; }
75 size_t getNumOutputs() const override { return 1; }
76
77 std::vector<size_t> estimateOutputSizes(
78 const std::vector<size_t>& input_sizes
79 ) const override {
80 // Size-preserving transform.
81 return {input_sizes[0]};
82 }
83
84 std::unordered_map<std::string, size_t>
85 getActualOutputSizesByName() const override {
86 return {{"output", actual_output_size_}};
87 }
88 size_t getActualOutputSize(int index) const override {
89 return (index == 0) ? actual_output_size_ : 0;
90 }
91
92 uint16_t getStageTypeId() const override {
93 return static_cast<uint16_t>(StageType::BITSHUFFLE);
94 }
95
96 uint8_t getOutputDataType(size_t) const override {
97 // Raw byte stream — report as UINT8.
98 return static_cast<uint8_t>(DataType::UINT8);
99 }
100
101 // ── Serialization ──────────────────────────────────────────────────────
102 // Header: [0..3] block_size (uint32_t LE), [4] element_width (uint8_t)
104 size_t output_index, uint8_t* buf, size_t max_size
105 ) const override {
106 (void)output_index;
107 if (max_size < 5) return 0;
108 std::memcpy(buf, &block_size_, sizeof(uint32_t));
109 buf[4] = element_width_;
110 return 5;
111 }
112
113 void deserializeHeader(const uint8_t* buf, size_t size) override {
114 if (size >= 4) std::memcpy(&block_size_, buf, sizeof(uint32_t));
115 if (size >= 5) element_width_ = buf[4];
116 }
117
118 size_t getMaxHeaderSize(size_t) const override { return 5; }
119
120 void saveState() override {
121 saved_block_size_ = block_size_;
122 saved_element_width_ = element_width_;
123 saved_actual_output_size_ = actual_output_size_;
124 }
125
126 void restoreState() override {
127 block_size_ = saved_block_size_;
128 element_width_ = saved_element_width_;
129 actual_output_size_ = saved_actual_output_size_;
130 }
131
132private:
133 bool is_inverse_;
134 uint32_t block_size_;
135 uint32_t saved_block_size_ = 0;
136 uint8_t element_width_;
137 uint8_t saved_element_width_ = 0;
138 size_t actual_output_size_ = 0;
139 size_t saved_actual_output_size_ = 0;
140
141 // Validate config and return N_chunk (elements per chunk).
142 // block_size must be a multiple of 1024*element_width so that butterfly
143 // kernels always have full warps in every __shfl_xor_sync call.
144 size_t validateConfig() const {
145 if (element_width_ != 1 && element_width_ != 2 &&
146 element_width_ != 4 && element_width_ != 8)
147 throw std::invalid_argument(
148 "BitshuffleStage: element_width must be 1, 2, 4, or 8");
149 if (block_size_ == 0 || block_size_ % (1024u * element_width_) != 0)
150 throw std::invalid_argument(
151 "BitshuffleStage: block_size must be a positive multiple of "
152 "1024 * element_width (default 16384 satisfies this for all "
153 "supported element widths)");
154 return block_size_ / element_width_;
155 }
156};
157
158} // namespace fz
Definition bitshuffle_stage.h:43
std::vector< size_t > estimateOutputSizes(const std::vector< size_t > &input_sizes) const override
Definition bitshuffle_stage.h:77
size_t getMaxHeaderSize(size_t) const override
Definition bitshuffle_stage.h:118
uint8_t getOutputDataType(size_t) const override
Definition bitshuffle_stage.h:96
void execute(cudaStream_t stream, MemoryPool *pool, const std::vector< void * > &inputs, const std::vector< void * > &outputs, const std::vector< size_t > &sizes) override
size_t getActualOutputSize(int index) const override
Definition bitshuffle_stage.h:88
std::string getName() const override
Definition bitshuffle_stage.h:73
void setInverse(bool inv) override
Definition bitshuffle_stage.h:53
void saveState() override
Definition bitshuffle_stage.h:120
uint16_t getStageTypeId() const override
Definition bitshuffle_stage.h:92
size_t getRequiredInputAlignment() const override
Definition bitshuffle_stage.h:60
size_t serializeHeader(size_t output_index, uint8_t *buf, size_t max_size) const override
Definition bitshuffle_stage.h:103
void deserializeHeader(const uint8_t *buf, size_t size) override
Definition bitshuffle_stage.h:113
std::unordered_map< std::string, size_t > getActualOutputSizesByName() const override
Definition bitshuffle_stage.h:85
Definition mempool.h:82
Definition stage.h:30
FZM binary file format definitions — structs, enums, and helpers.
Definition fzm_format.h:25
Base class interface for all compression stages.