FZGPUModules 2.0
GPU-accelerated modular compression pipelines
Loading...
Searching...
No Matches
tiled_lorenzo_stage.h
Go to the documentation of this file.
1#pragma once
2
19#include "stage/stage.h"
20#include "fzm_format.h"
21#include <cuda_runtime.h>
22#include <algorithm>
23#include <array>
24#include <cstdint>
25#include <cstring>
26#include <stdexcept>
27#include <string>
28#include <type_traits>
29#include <unordered_map>
30#include <vector>
31
32namespace fz {
33
40 uint8_t ndim;
41 uint8_t tile_x;
42 uint8_t tile_y;
43 uint8_t tile_z;
44 uint8_t reserved[3];
45 uint32_t dim_x;
46 uint32_t dim_y;
47 uint32_t dim_z;
48
50 : data_type(DataType::INT32), ndim(2),
51 tile_x(8), tile_y(8), tile_z(1), reserved{0, 0, 0},
52 dim_x(0), dim_y(1), dim_z(1) {}
53};
54static_assert(sizeof(TiledLorenzoConfig) <= FZM_STAGE_CONFIG_SIZE,
55 "TiledLorenzoConfig must fit in FZM_STAGE_CONFIG_SIZE");
56
69template<typename T>
70class TiledLorenzoStage : public Stage {
71 static_assert(std::is_same_v<T, int16_t> || std::is_same_v<T, int32_t>,
72 "TiledLorenzoStage: T must be int16_t or int32_t.");
73public:
74 TiledLorenzoStage() = default;
75 ~TiledLorenzoStage() override = default;
76
77 void setInverse(bool inv) override { is_inverse_ = inv; }
78 bool isInverse() const override { return is_inverse_; }
79
80 void setDims(const std::array<size_t, 3>& dims) override { dims_ = dims; }
81 void setDims(size_t x, size_t y = 1, size_t z = 1) { dims_ = {x, y, z}; }
82 std::array<size_t, 3> getDims() const { return dims_; }
83
90 void setTileShape(uint32_t tx, uint32_t ty = 1, uint32_t tz = 1) {
91 auto chk = [](uint32_t v, const char* nm) {
92 if (v > 255)
93 throw std::invalid_argument(
94 std::string("TiledLorenzoStage::setTileShape: ") + nm
95 + " must be in [0, 255], got " + std::to_string(v));
96 };
97 chk(tx, "tx"); chk(ty, "ty"); chk(tz, "tz");
98 const uint32_t prod = (tx ? tx : 1) * (ty ? ty : 1) * (tz ? tz : 1);
99 if (prod > 1024)
100 throw std::invalid_argument(
101 "TiledLorenzoStage::setTileShape: tx*ty*tz must be in [1, 1024], got "
102 + std::to_string(prod));
103 tile_ = {tx, ty, tz};
104 tile_set_ = true;
105 }
106 std::array<uint32_t, 3> getTileShape() const { return effectiveTile(); }
107
109 uint32_t getTileElems() const {
110 auto t = effectiveTile();
111 return t[0] * t[1] * t[2];
112 }
113
114 int ndim() const {
115 if (dims_[2] > 1) return 3;
116 if (dims_[1] > 1) return 2;
117 return 1;
118 }
119
121 cudaStream_t stream,
122 MemoryPool* pool,
123 const std::vector<void*>& inputs,
124 const std::vector<void*>& outputs,
125 const std::vector<size_t>& sizes
126 ) override;
127
128 std::string getName() const override { return "TiledLorenzo"; }
129 size_t getNumInputs() const override { return 1; }
130 size_t getNumOutputs() const override { return 1; }
131
132 std::vector<size_t> estimateOutputSizes(
133 const std::vector<size_t>& input_sizes
134 ) const override {
135 // Forward: natural n -> padded tile-major (num_tiles * tile_elems).
136 // Inverse: padded tile-major -> natural n.
137 const size_t n = naturalElems(input_sizes);
138 const size_t out_elems = is_inverse_ ? n : paddedElems();
139 return {out_elems * sizeof(T)};
140 }
141
142 std::unordered_map<std::string, size_t>
143 getActualOutputSizesByName() const override {
144 return {{"output", actual_output_size_}};
145 }
146 size_t getActualOutputSize(int index) const override {
147 return (index == 0) ? actual_output_size_ : 0;
148 }
149
150 uint16_t getStageTypeId() const override {
151 return static_cast<uint16_t>(StageType::TILED_LORENZO);
152 }
153
154 uint8_t getOutputDataType(size_t /*output_index*/) const override {
155 return static_cast<uint8_t>(getElementDataType());
156 }
157 uint8_t getInputDataType(size_t /*input_index*/) const override {
158 return static_cast<uint8_t>(getElementDataType());
159 }
160
161 size_t serializeHeader(size_t /*output_index*/, uint8_t* buf, size_t max_size) const override {
162 if (max_size < sizeof(TiledLorenzoConfig))
163 throw std::runtime_error("TiledLorenzoStage: header buffer too small");
164 auto t = effectiveTile();
166 cfg.data_type = getElementDataType();
167 cfg.ndim = static_cast<uint8_t>(ndim());
168 cfg.tile_x = static_cast<uint8_t>(t[0]);
169 cfg.tile_y = static_cast<uint8_t>(t[1]);
170 cfg.tile_z = static_cast<uint8_t>(t[2]);
171 cfg.dim_x = static_cast<uint32_t>(dims_[0]);
172 cfg.dim_y = static_cast<uint32_t>(dims_[1]);
173 cfg.dim_z = static_cast<uint32_t>(dims_[2]);
174 std::memcpy(buf, &cfg, sizeof(cfg));
175 return sizeof(cfg);
176 }
177
178 void deserializeHeader(const uint8_t* buf, size_t size) override {
179 if (size < sizeof(TiledLorenzoConfig))
180 throw std::runtime_error("TiledLorenzoStage: header too small");
182 std::memcpy(&cfg, buf, sizeof(cfg));
183 int eff_ndim = (cfg.ndim == 0) ? 1 : static_cast<int>(cfg.ndim);
184 dims_[0] = cfg.dim_x;
185 dims_[1] = (eff_ndim >= 2) ? cfg.dim_y : 1;
186 dims_[2] = (eff_ndim >= 3) ? cfg.dim_z : 1;
187 tile_ = {cfg.tile_x, cfg.tile_y, cfg.tile_z};
188 tile_set_ = (cfg.tile_x != 0);
189 }
190
191 size_t getMaxHeaderSize(size_t /*output_index*/) const override {
192 return sizeof(TiledLorenzoConfig);
193 }
194
195private:
196 bool is_inverse_ = false;
197 size_t actual_output_size_ = 0;
198 std::array<size_t, 3> dims_ = {0, 1, 1};
199 std::array<uint32_t, 3> tile_ = {8, 8, 1};
200 bool tile_set_ = false;
201
203 std::array<uint32_t, 3> effectiveTile() const {
204 if (tile_set_) {
205 return {tile_[0] ? tile_[0] : 1u,
206 tile_[1] ? tile_[1] : 1u,
207 tile_[2] ? tile_[2] : 1u};
208 }
209 switch (ndim()) {
210 case 3: return {4, 4, 4};
211 case 2: return {8, 8, 1};
212 default: return {64, 1, 1};
213 }
214 }
215
217 size_t naturalElems(const std::vector<size_t>& input_sizes) const {
218 if (dims_[0] > 0) return dims_[0] * dims_[1] * dims_[2];
219 return input_sizes.empty() ? 0 : input_sizes[0] / sizeof(T);
220 }
221
223 size_t paddedElems() const {
224 auto t = effectiveTile();
225 const size_t dx = (dims_[0] > 0) ? dims_[0] : 0;
226 const size_t dy = dims_[1], dz = dims_[2];
227 if (dx == 0) return 0;
228 const size_t ntx = (dx + t[0] - 1) / t[0];
229 const size_t nty = (dy + t[1] - 1) / t[1];
230 const size_t ntz = (dz + t[2] - 1) / t[2];
231 return ntx * nty * ntz * (size_t)t[0] * t[1] * t[2];
232 }
233
234 static DataType getElementDataType() {
235 if (std::is_same<T, int16_t>::value) return DataType::INT16;
236 return DataType::INT32;
237 }
238};
239
240extern template class TiledLorenzoStage<int16_t>;
241extern template class TiledLorenzoStage<int32_t>;
242
243} // namespace fz
Definition mempool.h:82
Definition stage.h:30
Definition tiled_lorenzo_stage.h:70
void deserializeHeader(const uint8_t *buf, size_t size) override
Definition tiled_lorenzo_stage.h:178
std::string getName() const override
Definition tiled_lorenzo_stage.h:128
uint16_t getStageTypeId() const override
Definition tiled_lorenzo_stage.h:150
void execute(cudaStream_t stream, MemoryPool *pool, const std::vector< void * > &inputs, const std::vector< void * > &outputs, const std::vector< size_t > &sizes) override
uint32_t getTileElems() const
Elements per tile = the AdaptiveBitpack block_size that aligns blocks to tiles.
Definition tiled_lorenzo_stage.h:109
size_t getActualOutputSize(int index) const override
Definition tiled_lorenzo_stage.h:146
std::vector< size_t > estimateOutputSizes(const std::vector< size_t > &input_sizes) const override
Definition tiled_lorenzo_stage.h:132
void setDims(const std::array< size_t, 3 > &dims) override
Definition tiled_lorenzo_stage.h:80
uint8_t getInputDataType(size_t) const override
Definition tiled_lorenzo_stage.h:157
uint8_t getOutputDataType(size_t) const override
Definition tiled_lorenzo_stage.h:154
void setTileShape(uint32_t tx, uint32_t ty=1, uint32_t tz=1)
Definition tiled_lorenzo_stage.h:90
void setInverse(bool inv) override
Definition tiled_lorenzo_stage.h:77
size_t getMaxHeaderSize(size_t) const override
Definition tiled_lorenzo_stage.h:191
std::unordered_map< std::string, size_t > getActualOutputSizesByName() const override
Definition tiled_lorenzo_stage.h:143
size_t serializeHeader(size_t, uint8_t *buf, size_t max_size) const override
Definition tiled_lorenzo_stage.h:161
FZM binary file format definitions — structs, enums, and helpers.
Definition fzm_format.h:25
constexpr size_t FZM_STAGE_CONFIG_SIZE
Per-stage serialized config slot (bytes)
Definition fzm_format.h:65
@ TILED_LORENZO
Dimension-aware (tiled separable) Lorenzo predictor (cuSZp3 delta)
DataType
Element data type identifiers used in buffer and stage descriptors.
Definition fzm_format.h:109
Base class interface for all compression stages.
Definition tiled_lorenzo_stage.h:38
uint32_t dim_z
Z dimension (1 for 1-D/2-D).
Definition tiled_lorenzo_stage.h:47
uint8_t reserved[3]
Must be zero.
Definition tiled_lorenzo_stage.h:44
uint8_t tile_z
Tile extent in z (1 for 1-D/2-D).
Definition tiled_lorenzo_stage.h:43
DataType data_type
Signed integer element type (1B): INT16 / INT32.
Definition tiled_lorenzo_stage.h:39
uint8_t tile_y
Tile extent in y (1 for 1-D).
Definition tiled_lorenzo_stage.h:42
uint8_t tile_x
Tile extent in x (fast dim).
Definition tiled_lorenzo_stage.h:41
uint32_t dim_y
Y dimension (1 for 1-D).
Definition tiled_lorenzo_stage.h:46
uint8_t ndim
Spatial dimensionality 1/2/3.
Definition tiled_lorenzo_stage.h:40
uint32_t dim_x
X (fast) dimension.
Definition tiled_lorenzo_stage.h:45