FZGPUModules 1.0
GPU-accelerated modular compression pipeline
Loading...
Searching...
No Matches
bitshuffle_stage.h
Go to the documentation of this file.
1#pragma once
2
19#include "stage/stage.h"
20#include "fzm_format.h"
21#include <cuda_runtime.h>
22#include <cstdint>
23#include <cstring>
24#include <stdexcept>
25#include <string>
26#include <unordered_map>
27#include <vector>
28
29namespace fz {
30
38class BitshuffleStage : public Stage {
39public:
41 : is_inverse_(false)
42 , block_size_(16384)
43 , element_width_(4)
44 , actual_output_size_(0)
45 {}
46
47 // ── Stage control ──────────────────────────────────────────────────────
48 void setInverse(bool inv) override { is_inverse_ = inv; }
49 bool isInverse() const override { return is_inverse_; }
50
51 void setBlockSize(size_t bytes) { block_size_ = static_cast<uint32_t>(bytes); }
52 void setElementWidth(size_t bytes){ element_width_ = static_cast<uint8_t>(bytes); }
53
54 size_t getBlockSize() const { return block_size_; }
55 size_t getRequiredInputAlignment() const override { return block_size_; }
56 size_t getElementWidth() const { return element_width_; }
57
58 // ── Execution ──────────────────────────────────────────────────────────
59 void execute(
60 cudaStream_t stream,
61 MemoryPool* pool,
62 const std::vector<void*>& inputs,
63 const std::vector<void*>& outputs,
64 const std::vector<size_t>& sizes
65 ) override;
66
67 // ── Metadata ───────────────────────────────────────────────────────────
68 std::string getName() const override { return "Bitshuffle"; }
69 size_t getNumInputs() const override { return 1; }
70 size_t getNumOutputs() const override { return 1; }
71
72 std::vector<size_t> estimateOutputSizes(
73 const std::vector<size_t>& input_sizes
74 ) const override {
75 // Size-preserving transform.
76 return {input_sizes[0]};
77 }
78
79 std::unordered_map<std::string, size_t>
80 getActualOutputSizesByName() const override {
81 return {{"output", actual_output_size_}};
82 }
83 size_t getActualOutputSize(int index) const override {
84 return (index == 0) ? actual_output_size_ : 0;
85 }
86
87 uint16_t getStageTypeId() const override {
88 return static_cast<uint16_t>(StageType::BITSHUFFLE);
89 }
90
91 uint8_t getOutputDataType(size_t) const override {
92 // Raw byte stream — report as UINT8.
93 return static_cast<uint8_t>(DataType::UINT8);
94 }
95
96 // ── Serialization ──────────────────────────────────────────────────────
97 // Header: [0..3] block_size (uint32_t LE), [4] element_width (uint8_t)
99 size_t output_index, uint8_t* buf, size_t max_size
100 ) const override {
101 (void)output_index;
102 if (max_size < 5) return 0;
103 std::memcpy(buf, &block_size_, sizeof(uint32_t));
104 buf[4] = element_width_;
105 return 5;
106 }
107
108 void deserializeHeader(const uint8_t* buf, size_t size) override {
109 if (size >= 4) std::memcpy(&block_size_, buf, sizeof(uint32_t));
110 if (size >= 5) element_width_ = buf[4];
111 }
112
113 size_t getMaxHeaderSize(size_t) const override { return 5; }
114
115 void saveState() override {
116 saved_block_size_ = block_size_;
117 saved_element_width_ = element_width_;
118 saved_actual_output_size_ = actual_output_size_;
119 }
120
121 void restoreState() override {
122 block_size_ = saved_block_size_;
123 element_width_ = saved_element_width_;
124 actual_output_size_ = saved_actual_output_size_;
125 }
126
127private:
128 bool is_inverse_;
129 uint32_t block_size_;
130 uint32_t saved_block_size_ = 0;
131 uint8_t element_width_;
132 uint8_t saved_element_width_ = 0;
133 size_t actual_output_size_ = 0;
134 size_t saved_actual_output_size_ = 0;
135
136 // Validate config and return N_chunk (elements per chunk).
137 // block_size must be a multiple of 1024*element_width so that butterfly
138 // kernels always have full warps in every __shfl_xor_sync call.
139 size_t validateConfig() const {
140 if (element_width_ != 1 && element_width_ != 2 &&
141 element_width_ != 4 && element_width_ != 8)
142 throw std::invalid_argument(
143 "BitshuffleStage: element_width must be 1, 2, 4, or 8");
144 if (block_size_ == 0 || block_size_ % (1024u * element_width_) != 0)
145 throw std::invalid_argument(
146 "BitshuffleStage: block_size must be a positive multiple of "
147 "1024 * element_width (default 16384 satisfies this for all "
148 "supported element widths)");
149 return block_size_ / element_width_;
150 }
151};
152
153} // namespace fz
Definition bitshuffle_stage.h:38
std::vector< size_t > estimateOutputSizes(const std::vector< size_t > &input_sizes) const override
Definition bitshuffle_stage.h:72
size_t getMaxHeaderSize(size_t) const override
Definition bitshuffle_stage.h:113
uint8_t getOutputDataType(size_t) const override
Definition bitshuffle_stage.h:91
void execute(cudaStream_t stream, MemoryPool *pool, const std::vector< void * > &inputs, const std::vector< void * > &outputs, const std::vector< size_t > &sizes) override
size_t getActualOutputSize(int index) const override
Definition bitshuffle_stage.h:83
std::string getName() const override
Definition bitshuffle_stage.h:68
void setInverse(bool inv) override
Definition bitshuffle_stage.h:48
void saveState() override
Definition bitshuffle_stage.h:115
uint16_t getStageTypeId() const override
Definition bitshuffle_stage.h:87
size_t getRequiredInputAlignment() const override
Definition bitshuffle_stage.h:55
size_t serializeHeader(size_t output_index, uint8_t *buf, size_t max_size) const override
Definition bitshuffle_stage.h:98
void deserializeHeader(const uint8_t *buf, size_t size) override
Definition bitshuffle_stage.h:108
std::unordered_map< std::string, size_t > getActualOutputSizesByName() const override
Definition bitshuffle_stage.h:80
Definition mempool.h:62
Definition stage.h:28
FZM binary file format definitions — structs, enums, and helpers.
Definition fzm_format.h:25
@ BITSHUFFLE
BitshuffleStage — GPU bit-matrix transpose.