FZGPUModules 1.0
GPU-accelerated modular compression pipeline
Loading...
Searching...
No Matches
diff.h
Go to the documentation of this file.
1#pragma once
2
8#include "stage/stage.h"
9#include "fzm_format.h"
10#include <cuda_runtime.h>
11#include <cstdint>
12#include <cstring>
13#include <type_traits>
14
15namespace fz {
16
47template<typename T = float, typename TOut = T>
48class DifferenceStage : public Stage {
49 static_assert(
50 std::is_same_v<T, TOut> ||
51 (std::is_integral_v<T> && std::is_signed_v<T> &&
52 std::is_integral_v<TOut> && std::is_unsigned_v<TOut> &&
53 sizeof(T) == sizeof(TOut)),
54 "DifferenceStage: TOut must equal T, or T must be a signed integer "
55 "and TOut its unsigned counterpart of the same width (negabinary fusion).");
56public:
57 DifferenceStage() : actual_output_size_(0), is_inverse_(false), chunk_size_(0) {}
58
59 void setInverse(bool inverse) override { is_inverse_ = inverse; }
60 bool isInverse() const override { return is_inverse_; }
61
69 void setChunkSize(size_t bytes) { chunk_size_ = bytes; }
70 size_t getChunkSize() const { return chunk_size_; }
71 size_t getRequiredInputAlignment() const override {
72 return chunk_size_ > 0 ? chunk_size_ : 1;
73 }
74
75 void execute(
76 cudaStream_t stream,
77 MemoryPool* pool,
78 const std::vector<void*>& inputs,
79 const std::vector<void*>& outputs,
80 const std::vector<size_t>& sizes
81 ) override;
82
83
84 std::string getName() const override { return "Difference"; }
85 size_t getNumInputs() const override { return 1; }
86 size_t getNumOutputs() const override { return 1; }
87
88 std::vector<size_t> estimateOutputSizes(
89 const std::vector<size_t>& input_sizes
90 ) const override {
91 return {input_sizes[0]}; // size-preserving (sizeof(T)==sizeof(TOut))
92 }
93
94 std::unordered_map<std::string, size_t> getActualOutputSizesByName() const override {
95 return {{"output", actual_output_size_}};
96 }
97 size_t getActualOutputSize(int index) const override {
98 return (index == 0) ? actual_output_size_ : 0;
99 }
100
101 uint16_t getStageTypeId() const override {
102 return static_cast<uint16_t>(StageType::DIFFERENCE);
103 }
104
105 uint8_t getOutputDataType(size_t output_index) const override {
106 (void)output_index;
107 return static_cast<uint8_t>(getOutDataTypeEnum());
108 }
109
110 uint8_t getInputDataType(size_t /*input_index*/) const override {
111 return static_cast<uint8_t>(getInDataTypeEnum());
112 }
113
114 size_t serializeHeader(size_t output_index, uint8_t* buf, size_t max_size) const override {
115 (void)output_index;
116 if (max_size < 6) return 0;
117 buf[0] = static_cast<uint8_t>(getInDataTypeEnum());
118 buf[1] = static_cast<uint8_t>(getOutDataTypeEnum());
119 uint32_t cs = static_cast<uint32_t>(chunk_size_);
120 std::memcpy(buf + 2, &cs, sizeof(uint32_t));
121 return 6;
122 }
123
124 void deserializeHeader(const uint8_t* buf, size_t size) override {
125 // DataTypes are baked into the template; factory picks the right instantiation.
126 // Only chunk_size needs to be restored at runtime.
127 if (size >= 6) {
128 uint32_t cs = 0;
129 std::memcpy(&cs, buf + 2, sizeof(uint32_t));
130 chunk_size_ = cs;
131 }
132 }
133
134 size_t getMaxHeaderSize(size_t output_index) const override {
135 (void)output_index;
136 return 6;
137 }
138
139 void saveState() override {
140 saved_chunk_size_ = chunk_size_;
141 saved_actual_output_size_ = actual_output_size_;
142 }
143
144 void restoreState() override {
145 chunk_size_ = saved_chunk_size_;
146 actual_output_size_ = saved_actual_output_size_;
147 }
148
149private:
150 size_t actual_output_size_;
151 size_t saved_actual_output_size_ = 0;
152 bool is_inverse_;
153 size_t chunk_size_;
154 size_t saved_chunk_size_ = 0;
155
156
157 DataType getInDataTypeEnum() const {
158 if (std::is_same_v<T, uint8_t>) return DataType::UINT8;
159 if (std::is_same_v<T, uint16_t>) return DataType::UINT16;
160 if (std::is_same_v<T, uint32_t>) return DataType::UINT32;
161 if (std::is_same_v<T, uint64_t>) return DataType::UINT64;
162 if (std::is_same_v<T, int8_t>) return DataType::INT8;
163 if (std::is_same_v<T, int16_t>) return DataType::INT16;
164 if (std::is_same_v<T, int32_t>) return DataType::INT32;
165 if (std::is_same_v<T, int64_t>) return DataType::INT64;
166 if (std::is_same_v<T, float>) return DataType::FLOAT32;
167 if (std::is_same_v<T, double>) return DataType::FLOAT64;
168 return DataType::UINT8;
169 }
170
171 DataType getOutDataTypeEnum() const {
172 if (std::is_same_v<TOut, uint8_t>) return DataType::UINT8;
173 if (std::is_same_v<TOut, uint16_t>) return DataType::UINT16;
174 if (std::is_same_v<TOut, uint32_t>) return DataType::UINT32;
175 if (std::is_same_v<TOut, uint64_t>) return DataType::UINT64;
176 if (std::is_same_v<TOut, int8_t>) return DataType::INT8;
177 if (std::is_same_v<TOut, int16_t>) return DataType::INT16;
178 if (std::is_same_v<TOut, int32_t>) return DataType::INT32;
179 if (std::is_same_v<TOut, int64_t>) return DataType::INT64;
180 if (std::is_same_v<TOut, float>) return DataType::FLOAT32;
181 if (std::is_same_v<TOut, double>) return DataType::FLOAT64;
182 return DataType::UINT8;
183 }
184};
185
186// ─── Same-type instantiations (original API, TOut = T) ───────────────────────
187extern template class DifferenceStage<float>;
188extern template class DifferenceStage<double>;
189extern template class DifferenceStage<int32_t>;
190extern template class DifferenceStage<int64_t>;
191extern template class DifferenceStage<uint16_t>;
192extern template class DifferenceStage<uint8_t>;
193extern template class DifferenceStage<uint32_t>;
194
195// ─── Negabinary-fused instantiations (TOut = unsigned counterpart of T) ───────
196extern template class DifferenceStage<int8_t, uint8_t>;
197extern template class DifferenceStage<int16_t, uint16_t>;
198extern template class DifferenceStage<int32_t, uint32_t>;
199extern template class DifferenceStage<int64_t, uint64_t>;
200
201} // namespace fz
Definition diff.h:48
void deserializeHeader(const uint8_t *buf, size_t size) override
Definition diff.h:124
void setChunkSize(size_t bytes)
Definition diff.h:69
void setInverse(bool inverse) override
Definition diff.h:59
std::vector< size_t > estimateOutputSizes(const std::vector< size_t > &input_sizes) const override
Definition diff.h:88
size_t serializeHeader(size_t output_index, uint8_t *buf, size_t max_size) const override
Definition diff.h:114
void saveState() override
Definition diff.h:139
std::string getName() const override
Definition diff.h:84
uint16_t getStageTypeId() const override
Definition diff.h:101
size_t getRequiredInputAlignment() const override
Definition diff.h:71
uint8_t getOutputDataType(size_t output_index) const override
Definition diff.h:105
size_t getActualOutputSize(int index) const override
Definition diff.h:97
void execute(cudaStream_t stream, MemoryPool *pool, const std::vector< void * > &inputs, const std::vector< void * > &outputs, const std::vector< size_t > &sizes) override
std::unordered_map< std::string, size_t > getActualOutputSizesByName() const override
Definition diff.h:94
uint8_t getInputDataType(size_t) const override
Definition diff.h:110
size_t getMaxHeaderSize(size_t output_index) const override
Definition diff.h:134
Definition mempool.h:62
Definition stage.h:28
FZM binary file format definitions — structs, enums, and helpers.
Definition fzm_format.h:25
@ DIFFERENCE
DifferenceStage — first-order differencing.
DataType
Element data type identifiers used in buffer and stage descriptors.
Definition fzm_format.h:103