FZGPUModules 2.0
GPU-accelerated modular compression pipelines
Loading...
Searching...
No Matches
lorenzo_stage.h
Go to the documentation of this file.
1#pragma once
2
8#include "stage/stage.h"
9#include "fzm_format.h"
10#include <cuda_runtime.h>
11#include <array>
12#include <cstdint>
13#include <cstring>
14#include <stdexcept>
15#include <string>
16#include <type_traits>
17#include <unordered_map>
18#include <vector>
19
20namespace fz {
21
29 uint8_t ndim;
30 uint8_t reserved[2];
31 uint32_t dim_x;
32 uint32_t dim_y;
33 uint32_t dim_z;
34
36 : data_type(DataType::INT32), ndim(1), reserved{0, 0},
37 dim_x(0), dim_y(1), dim_z(1) {}
38};
39static_assert(sizeof(LorenzoConfig) <= FZM_STAGE_CONFIG_SIZE,
40 "LorenzoConfig must fit in FZM_STAGE_CONFIG_SIZE");
41
50template<typename T>
51class LorenzoStage : public Stage {
52 static_assert(std::is_integral<T>::value && std::is_signed<T>::value,
53 "LorenzoStage requires a signed integer type");
54public:
55 LorenzoStage() = default;
56
57 void setInverse(bool inv) override { is_inverse_ = inv; }
58 bool isInverse() const override { return is_inverse_; }
59
60 void setDims(const std::array<size_t, 3>& dims) override { dims_ = dims; }
61 void setDims(size_t x, size_t y = 1, size_t z = 1) { dims_ = {x, y, z}; }
62 std::array<size_t, 3> getDims() const { return dims_; }
63
64 int ndim() const {
65 if (dims_[2] > 1) return 3;
66 if (dims_[1] > 1) return 2;
67 return 1;
68 }
69
70 void execute(
71 cudaStream_t stream,
72 MemoryPool* pool,
73 const std::vector<void*>& inputs,
74 const std::vector<void*>& outputs,
75 const std::vector<size_t>& sizes
76 ) override;
77
78 std::string getName() const override { return "Lorenzo"; }
79 size_t getNumInputs() const override { return 1; }
80 size_t getNumOutputs() const override { return 1; }
81
82 std::vector<size_t> estimateOutputSizes(
83 const std::vector<size_t>& input_sizes
84 ) const override {
85 return {input_sizes.empty() ? 0 : input_sizes[0]};
86 }
87
88 std::unordered_map<std::string, size_t>
89 getActualOutputSizesByName() const override {
90 return {{"output", actual_output_size_}};
91 }
92
93 size_t getActualOutputSize(int index) const override {
94 return (index == 0) ? actual_output_size_ : 0;
95 }
96
97 uint16_t getStageTypeId() const override {
98 return static_cast<uint16_t>(StageType::LORENZO);
99 }
100
101 uint8_t getOutputDataType(size_t /*output_index*/) const override {
102 return static_cast<uint8_t>(getElementDataType());
103 }
104
105 uint8_t getInputDataType(size_t /*input_index*/) const override {
106 return static_cast<uint8_t>(getElementDataType());
107 }
108
109 size_t serializeHeader(size_t /*output_index*/, uint8_t* buf, size_t max_size) const override {
110 if (max_size < sizeof(LorenzoConfig))
111 throw std::runtime_error("LorenzoStage: header buffer too small");
112 LorenzoConfig cfg;
113 cfg.data_type = getElementDataType();
114 cfg.ndim = static_cast<uint8_t>(ndim());
115 cfg.dim_x = static_cast<uint32_t>(dims_[0]);
116 cfg.dim_y = static_cast<uint32_t>(dims_[1]);
117 cfg.dim_z = static_cast<uint32_t>(dims_[2]);
118 std::memcpy(buf, &cfg, sizeof(LorenzoConfig));
119 return sizeof(LorenzoConfig);
120 }
121
122 void deserializeHeader(const uint8_t* buf, size_t size) override {
123 if (size < sizeof(LorenzoConfig))
124 throw std::runtime_error("LorenzoStage: header too small");
125 LorenzoConfig cfg;
126 std::memcpy(&cfg, buf, sizeof(LorenzoConfig));
127 int eff_ndim = (cfg.ndim == 0) ? 1 : static_cast<int>(cfg.ndim);
128 dims_[0] = cfg.dim_x;
129 dims_[1] = (eff_ndim >= 2) ? cfg.dim_y : 1;
130 dims_[2] = (eff_ndim >= 3) ? cfg.dim_z : 1;
131 }
132
133 size_t getMaxHeaderSize(size_t /*output_index*/) const override {
134 return sizeof(LorenzoConfig);
135 }
136
137private:
138 bool is_inverse_ = false;
139 size_t actual_output_size_ = 0;
140 std::array<size_t, 3> dims_ = {0, 1, 1};
141
142 static DataType getElementDataType() {
143 if (std::is_same<T, int8_t>::value) return DataType::INT8;
144 if (std::is_same<T, int16_t>::value) return DataType::INT16;
145 if (std::is_same<T, int32_t>::value) return DataType::INT32;
146 if (std::is_same<T, int64_t>::value) return DataType::INT64;
147 return DataType::INT32;
148 }
149};
150
151extern template class LorenzoStage<int8_t>;
152extern template class LorenzoStage<int16_t>;
153extern template class LorenzoStage<int32_t>;
154extern template class LorenzoStage<int64_t>;
155
156// Kernel launcher declarations — defined in lorenzo_stage.cu.
157
158template<typename T>
159void launchLorenzoDeltaKernel1D(
160 const T* d_input, T* d_output, size_t n, cudaStream_t stream);
161
162template<typename T>
163void launchLorenzoPrefixSumKernel1D(
164 const T* d_input, T* d_output, size_t n, cudaStream_t stream);
165
166template<typename T>
167void launchLorenzoDeltaKernel2D(
168 const T* d_input, T* d_output, size_t nx, size_t ny, cudaStream_t stream);
169
170template<typename T>
171void launchLorenzoPrefixSumKernel2D(
172 const T* d_input, T* d_output, size_t nx, size_t ny, cudaStream_t stream);
173
174template<typename T>
175void launchLorenzoDeltaKernel3D(
176 const T* d_input, T* d_output, size_t nx, size_t ny, size_t nz, cudaStream_t stream);
177
178template<typename T>
179void launchLorenzoPrefixSumKernel3D(
180 const T* d_input, T* d_output, size_t nx, size_t ny, size_t nz, cudaStream_t stream);
181
182} // namespace fz
Definition lorenzo_stage.h:51
void execute(cudaStream_t stream, MemoryPool *pool, const std::vector< void * > &inputs, const std::vector< void * > &outputs, const std::vector< size_t > &sizes) override
std::vector< size_t > estimateOutputSizes(const std::vector< size_t > &input_sizes) const override
Definition lorenzo_stage.h:82
size_t getMaxHeaderSize(size_t) const override
Definition lorenzo_stage.h:133
std::string getName() const override
Definition lorenzo_stage.h:78
size_t getActualOutputSize(int index) const override
Definition lorenzo_stage.h:93
uint16_t getStageTypeId() const override
Definition lorenzo_stage.h:97
void setInverse(bool inv) override
Definition lorenzo_stage.h:57
size_t serializeHeader(size_t, uint8_t *buf, size_t max_size) const override
Definition lorenzo_stage.h:109
std::unordered_map< std::string, size_t > getActualOutputSizesByName() const override
Definition lorenzo_stage.h:89
uint8_t getInputDataType(size_t) const override
Definition lorenzo_stage.h:105
uint8_t getOutputDataType(size_t) const override
Definition lorenzo_stage.h:101
void deserializeHeader(const uint8_t *buf, size_t size) override
Definition lorenzo_stage.h:122
void setDims(const std::array< size_t, 3 > &dims) override
Definition lorenzo_stage.h:60
Definition mempool.h:66
Definition stage.h:30
FZM binary file format definitions — structs, enums, and helpers.
DataType
Element data type identifiers used in buffer and stage descriptors.
Definition fzm_format.h:102
Base class interface for all compression stages.
Definition lorenzo_stage.h:27
uint32_t dim_z
Z dimension (1 for 1-D/2-D).
Definition lorenzo_stage.h:33
DataType data_type
Signed integer element type (1B).
Definition lorenzo_stage.h:28
uint32_t dim_y
Y dimension (1 for 1-D).
Definition lorenzo_stage.h:32
uint8_t ndim
Spatial dimensionality 1/2/3 (0 treated as 1).
Definition lorenzo_stage.h:29
uint32_t dim_x
X (fast) dimension.
Definition lorenzo_stage.h:31
uint8_t reserved[2]
Must be zero.
Definition lorenzo_stage.h:30