FZGPUModules 1.0
GPU-accelerated modular compression pipeline
Loading...
Searching...
No Matches
fzm_format.h
Go to the documentation of this file.
1#pragma once
2
20#include <cstdint>
21#include <cstring>
22#include <stdexcept>
23#include <string>
24
25namespace fz {
26
28constexpr uint32_t FZM_MAGIC = 0x464D5A32;
29
40constexpr uint8_t FZM_VERSION_MAJOR = 3;
41constexpr uint8_t FZM_VERSION_MINOR = 1;
42constexpr uint16_t FZM_VERSION = (static_cast<uint16_t>(FZM_VERSION_MAJOR) << 8)
43 | static_cast<uint16_t>(FZM_VERSION_MINOR);
44
46constexpr size_t FZM_LEGACY_HEADER_CORE_SIZE = 72;
47
48constexpr uint16_t FZM_FLAG_HAS_DATA_CHECKSUM = 0x0001u;
49constexpr uint16_t FZM_FLAG_HAS_HEADER_CHECKSUM = 0x0002u;
50
55constexpr uint8_t fzmVersionMajor(uint16_t v) {
56 return (v <= 0xFF) ? static_cast<uint8_t>(v) : static_cast<uint8_t>(v >> 8);
57}
59constexpr uint8_t fzmVersionMinor(uint16_t v) {
60 return (v <= 0xFF) ? 0u : static_cast<uint8_t>(v & 0xFF);
61}
62
63constexpr size_t FZM_MAX_BUFFERS = 32;
64constexpr size_t FZM_MAX_NAME_LEN = 64;
65constexpr size_t FZM_STAGE_CONFIG_SIZE = 128;
66constexpr size_t FZM_MAX_SOURCES = 4;
67
68// ─────────────────────────────────────────────────────────────────────────────
69
76enum class StageType : uint16_t {
77 UNKNOWN = 0,
78 LORENZO_1D = 1,
79 DIFFERENCE = 2,
80 SCALE = 3,
81 PASSTHROUGH= 4,
82 RLE = 5,
83 HUFFMAN = 6,
84 BITPACK = 7,
85 SPLIT = 10,
86 MERGE = 11,
87 LORENZO_2D = 12,
88 LORENZO_3D = 13,
89 QUANTIZER = 14,
90 ZIGZAG = 15,
91 NEGABINARY = 16,
92 BITSHUFFLE = 17,
93 RZE = 18,
94};
95
103enum class DataType : uint8_t {
104 UINT8 = 0,
105 UINT16 = 1,
106 UINT32 = 2,
107 UINT64 = 3,
108 INT8 = 4,
109 INT16 = 5,
110 INT32 = 6,
111 INT64 = 7,
112 FLOAT32 = 8,
113 FLOAT64 = 9,
114 UNKNOWN = 0xFF,
115};
116
117// ─────────────────────────────────────────────────────────────────────────────
118
119constexpr size_t FZM_MAX_STAGE_INPUTS = 8;
120constexpr size_t FZM_MAX_STAGE_OUTPUTS = 8;
121
133 uint16_t stage_version;
134 uint8_t num_inputs;
135 uint8_t num_outputs;
136 uint16_t reserved1;
137
138 uint16_t input_buffer_ids[FZM_MAX_STAGE_INPUTS];
139 uint16_t output_buffer_ids[FZM_MAX_STAGE_OUTPUTS];
140
142 uint32_t config_size;
143
144 uint8_t reserved2[84];
145 // Total: 2+2+1+1+2+16+16+128+4+84 = 256 bytes
146
147 FZMStageInfo() {
148 stage_type = StageType::UNKNOWN;
149 stage_version = 0;
150 num_inputs = 0;
151 num_outputs = 0;
152 reserved1 = 0;
153 memset(input_buffer_ids, 0xFF, sizeof(input_buffer_ids));
154 memset(output_buffer_ids, 0xFF, sizeof(output_buffer_ids));
156 config_size = 0;
157 memset(reserved2, 0, 84);
158 }
159};
160static_assert(sizeof(FZMStageInfo) == 256, "FZMStageInfo must be 256 bytes");
161
172 uint16_t stage_version;
175 uint16_t dag_buffer_id;
177
178 uint64_t data_size;
179 uint64_t allocated_size;
181 uint64_t byte_offset;
182
184 uint32_t config_size;
185
186 uint8_t reserved2[14];
187 // Total: 2+2+1+1+2+64+8+8+8+8+128+4+14 = 250... let me recount
188 // 2+2+1+1+2+64+8+8+8+8+128+4+14 = 250? No...
189 // stage_type(2)+stage_version(2)+data_type(1)+producer_output_idx(1)+dag_buffer_id(2)+name(64)
190 // +data_size(8)+allocated_size(8)+uncompressed_size(8)+byte_offset(8)
191 // +stage_config(128)+config_size(4)+reserved2(14) = 250. Hmm, the static_assert says 256.
192 // Let me check the original: 2+2+1+1+2+64+8+8+8+8+128+4+14 = 250. But static_assert says 256.
193 // There might be padding. Let me just keep it as-is.
194
196 stage_type = StageType::UNKNOWN;
197 stage_version = 0;
198 data_type = DataType::UINT8;
200 dag_buffer_id = 0xFFFF;
201 memset(name, 0, FZM_MAX_NAME_LEN);
202 data_size = 0;
203 allocated_size = 0;
205 byte_offset = 0;
207 config_size = 0;
208 memset(reserved2, 0, 14);
209 }
210};
211static_assert(sizeof(FZMBufferEntry) == 256, "FZMBufferEntry must be 256 bytes");
212
226 uint32_t magic;
227 uint16_t version;
228 uint16_t num_buffers;
229
232 uint64_t header_size;
233
234 uint32_t num_stages;
235 uint16_t num_sources;
236 uint16_t flags;
237
244
245 uint32_t data_checksum;
247 // Total: 4+2+2+8+8+8+4+2+2+32+4+4 = 80 bytes
248
249 FZMHeaderCore() {
251 version = FZM_VERSION;
252 num_buffers = 0;
254 compressed_size = 0;
255 header_size = sizeof(FZMHeaderCore);
256 num_stages = 0;
257 num_sources = 0;
258 flags = 0;
260 data_checksum = 0;
261 header_checksum = 0;
262 }
263
265 uint64_t computeHeaderSize() const {
266 return sizeof(FZMHeaderCore)
267 + num_stages * sizeof(FZMStageInfo)
268 + num_buffers * sizeof(FZMBufferEntry);
269 }
270};
271static_assert(sizeof(FZMHeaderCore) == 80, "FZMHeaderCore must be 80 bytes");
272
273// ─────────────────────────────────────────────────────────────────────────────
274// Helper functions
275// ─────────────────────────────────────────────────────────────────────────────
276
278inline size_t getDataTypeSize(DataType type) {
279 switch (type) {
280 case DataType::UINT8: case DataType::INT8: return 1;
281 case DataType::UINT16: case DataType::INT16: return 2;
282 case DataType::UINT32: case DataType::INT32: case DataType::FLOAT32: return 4;
283 case DataType::UINT64: case DataType::INT64: case DataType::FLOAT64: return 8;
284 default: throw std::runtime_error("Unknown data type");
285 }
286}
287
289inline std::string dataTypeToString(DataType type) {
290 switch (type) {
291 case DataType::UINT8: return "uint8";
292 case DataType::UINT16: return "uint16";
293 case DataType::UINT32: return "uint32";
294 case DataType::UINT64: return "uint64";
295 case DataType::INT8: return "int8";
296 case DataType::INT16: return "int16";
297 case DataType::INT32: return "int32";
298 case DataType::INT64: return "int64";
299 case DataType::FLOAT32: return "float32";
300 case DataType::FLOAT64: return "float64";
301 default: return "unknown";
302 }
303}
304
306inline std::string stageTypeToString(StageType type) {
307 switch (type) {
308 case StageType::LORENZO_1D: return "Lorenzo1D";
309 case StageType::LORENZO_2D: return "Lorenzo2D";
310 case StageType::LORENZO_3D: return "Lorenzo3D";
311 case StageType::DIFFERENCE: return "Difference";
312 case StageType::SCALE: return "Scale";
313 case StageType::PASSTHROUGH: return "PassThrough";
314 case StageType::RLE: return "RLE";
315 case StageType::HUFFMAN: return "Huffman";
316 case StageType::BITPACK: return "BitPack";
317 case StageType::SPLIT: return "Split";
318 case StageType::MERGE: return "Merge";
319 case StageType::QUANTIZER: return "Quantizer";
320 case StageType::ZIGZAG: return "Zigzag";
321 case StageType::NEGABINARY: return "Negabinary";
322 case StageType::BITSHUFFLE: return "Bitshuffle";
323 case StageType::RZE: return "RZE";
324 default: return "Unknown";
325 }
326}
327
328} // namespace fz
Definition fzm_format.h:25
size_t getDataTypeSize(DataType type)
Definition fzm_format.h:278
constexpr uint8_t FZM_VERSION_MAJOR
Definition fzm_format.h:40
std::string dataTypeToString(DataType type)
Definition fzm_format.h:289
std::string stageTypeToString(StageType type)
Definition fzm_format.h:306
constexpr uint8_t fzmVersionMinor(uint16_t v)
Definition fzm_format.h:59
constexpr size_t FZM_MAX_SOURCES
Maximum source stages per pipeline.
Definition fzm_format.h:66
constexpr size_t FZM_STAGE_CONFIG_SIZE
Per-stage serialized config slot (bytes)
Definition fzm_format.h:65
constexpr uint8_t fzmVersionMajor(uint16_t v)
Definition fzm_format.h:55
constexpr uint16_t FZM_FLAG_HAS_DATA_CHECKSUM
data_checksum field is valid
Definition fzm_format.h:48
constexpr uint32_t FZM_MAGIC
Definition fzm_format.h:28
StageType
Stage type identifiers written into the FZM header.
Definition fzm_format.h:76
@ BITSHUFFLE
BitshuffleStage — GPU bit-matrix transpose.
@ SCALE
ScaleStage (test utility)
@ RLE
RLEStage — run-length encoding.
@ LORENZO_1D
LorenzoStage — 1-D predictor.
@ LORENZO_3D
LorenzoStage — 3-D predictor.
@ RZE
RZEStage — recursive zero-byte elimination.
@ BITPACK
Reserved (not yet implemented)
@ LORENZO_2D
LorenzoStage — 2-D predictor.
@ HUFFMAN
Reserved (not yet implemented)
@ SPLIT
SplitStage (test utility)
@ MERGE
MergeStage (test utility)
@ ZIGZAG
ZigzagStage — zigzag encode/decode.
@ DIFFERENCE
DifferenceStage — first-order differencing.
@ NEGABINARY
NegabinaryStage — negabinary encode/decode.
@ QUANTIZER
QuantizerStage — direct-value quantization.
@ PASSTHROUGH
PassThroughStage (test utility)
constexpr uint16_t FZM_FLAG_HAS_HEADER_CHECKSUM
header_checksum field is valid
Definition fzm_format.h:49
DataType
Element data type identifiers used in buffer and stage descriptors.
Definition fzm_format.h:103
constexpr size_t FZM_LEGACY_HEADER_CORE_SIZE
Definition fzm_format.h:46
constexpr size_t FZM_MAX_NAME_LEN
Maximum output port name length (bytes, null-terminated)
Definition fzm_format.h:64
constexpr size_t FZM_MAX_BUFFERS
Maximum pipeline output buffers per file.
Definition fzm_format.h:63
Per-buffer metadata record written into the FZM header (256 bytes).
Definition fzm_format.h:170
StageType stage_type
Producer stage type (2B)
Definition fzm_format.h:171
uint64_t data_size
Actual compressed bytes in this segment (8B)
Definition fzm_format.h:178
uint8_t producer_output_idx
Which output port of the producer (1B)
Definition fzm_format.h:174
uint64_t uncompressed_size
Bytes after fully decompressing this stage's output (8B)
Definition fzm_format.h:180
uint8_t stage_config[FZM_STAGE_CONFIG_SIZE]
Producer stage config, see Stage::serializeHeader() (128B)
Definition fzm_format.h:183
char name[FZM_MAX_NAME_LEN]
Output port name, null-terminated (64B)
Definition fzm_format.h:176
DataType data_type
Element data type in this buffer (1B)
Definition fzm_format.h:173
uint64_t allocated_size
Buffer capacity required for decompression (8B)
Definition fzm_format.h:179
uint8_t reserved2[14]
Reserved for future use (14B)
Definition fzm_format.h:186
uint32_t config_size
Valid bytes in stage_config (4B)
Definition fzm_format.h:184
uint16_t dag_buffer_id
DAG buffer ID used for inverse routing; 0xFFFF = unassigned (2B)
Definition fzm_format.h:175
uint16_t stage_version
Producer stage config version (2B)
Definition fzm_format.h:172
uint64_t byte_offset
Byte offset of this segment within the compressed payload (8B)
Definition fzm_format.h:181
Fixed-size FZM file header core (80 bytes).
Definition fzm_format.h:225
uint64_t computeHeaderSize() const
Definition fzm_format.h:265
uint16_t num_buffers
Number of FZMBufferEntry records (2B)
Definition fzm_format.h:228
uint32_t header_checksum
CRC32 of header bytes (v3.1+; 0 if flag not set) (4B)
Definition fzm_format.h:246
uint16_t flags
Feature flags: FZM_FLAG_* constants (2B)
Definition fzm_format.h:236
uint32_t data_checksum
CRC32 of compressed payload (v3.1+; 0 if flag not set) (4B)
Definition fzm_format.h:245
uint32_t num_stages
Number of FZMStageInfo records (4B)
Definition fzm_format.h:234
uint64_t compressed_size
Total compressed payload size in bytes (8B)
Definition fzm_format.h:231
uint64_t source_uncompressed_sizes[FZM_MAX_SOURCES]
(32B)
Definition fzm_format.h:243
uint16_t version
FZM_VERSION (2B)
Definition fzm_format.h:227
uint64_t uncompressed_size
Sum of all source uncompressed sizes in bytes (8B)
Definition fzm_format.h:230
uint16_t num_sources
Number of source (input) stages in the pipeline (2B)
Definition fzm_format.h:235
uint32_t magic
Must equal FZM_MAGIC (4B)
Definition fzm_format.h:226
uint64_t header_size
Total header size; compressed payload starts at this offset (8B)
Definition fzm_format.h:232
Per-stage metadata record written into the FZM header (256 bytes).
Definition fzm_format.h:131
uint8_t stage_config[FZM_STAGE_CONFIG_SIZE]
Serialized stage config, see Stage::serializeHeader() (128B)
Definition fzm_format.h:141
uint8_t num_inputs
Number of input ports (1B)
Definition fzm_format.h:134
uint8_t num_outputs
Number of output ports (1B)
Definition fzm_format.h:135
StageType stage_type
Stage type (2B)
Definition fzm_format.h:132
uint8_t reserved2[84]
Reserved for future use (84B)
Definition fzm_format.h:144
uint16_t input_buffer_ids[FZM_MAX_STAGE_INPUTS]
Input buffer indices (16B); 0xFFFF = unused.
Definition fzm_format.h:138
uint16_t stage_version
Config format version (2B)
Definition fzm_format.h:133
uint32_t config_size
Valid bytes in stage_config (4B)
Definition fzm_format.h:142
uint16_t reserved1
Padding (2B)
Definition fzm_format.h:136
uint16_t output_buffer_ids[FZM_MAX_STAGE_OUTPUTS]
Output buffer indices (16B); 0xFFFF = unused.
Definition fzm_format.h:139