47#include <cuda_runtime.h>
53#include <unordered_map>
77 throw std::runtime_error(
"MergeStage: at least one segment name required");
78 if (names.size() > kMaxSegments)
79 throw std::runtime_error(
"MergeStage: too many segments (max "
80 + std::to_string(kMaxSegments) +
")");
81 segment_names_ = names;
82 segment_sizes_.assign(names.size(), 0);
84 const std::vector<std::string>& getSegmentNames()
const {
return segment_names_; }
85 size_t getNumSegments()
const {
return segment_names_.size(); }
88 void setInverse(
bool inv)
override { is_inverse_ = inv; }
89 bool isInverse()
const override {
return is_inverse_; }
97 size_t getNumInputs()
const override {
return is_inverse_ ? 1 : segment_names_.size(); }
98 size_t getNumOutputs()
const override {
return is_inverse_ ? segment_names_.size() : 1; }
100 return is_inverse_ ? segment_names_ : std::vector<std::string>{
"output"};
103 std::string
getName()
const override {
return "Merge"; }
106 return static_cast<uint16_t
>(StageType::MERGE);
121 const std::vector<void*>& inputs,
122 const std::vector<void*>& outputs,
123 const std::vector<size_t>& sizes
128 const std::vector<size_t>& input_sizes
133 return segment_sizes_;
137 for (
size_t s : input_sizes) total += s;
141 std::unordered_map<std::string, size_t>
144 std::unordered_map<std::string, size_t> m;
145 for (
size_t i = 0; i < segment_names_.size(); i++)
146 m[segment_names_[i]] = (i < segment_sizes_.size()) ? segment_sizes_[i] : 0;
149 return {{
"output", merged_total_}};
154 if (index < 0 || index >= (
int)segment_sizes_.size())
return 0;
155 return segment_sizes_[index];
157 return (index == 0) ? merged_total_ : 0;
162 const size_t N = segment_names_.size();
163 size_t need = 1 + 4 * N;
164 for (
const auto& nm : segment_names_) need += 1 + nm.size();
165 if (need > max_size)
return 0;
167 buf[off++] =
static_cast<uint8_t
>(N);
168 for (
size_t i = 0; i < N; i++) {
169 uint32_t sz = (i < segment_sizes_.size()) ?
static_cast<uint32_t
>(segment_sizes_[i]) : 0u;
170 std::memcpy(buf + off, &sz,
sizeof(uint32_t));
171 off +=
sizeof(uint32_t);
173 for (
const auto& nm : segment_names_) {
174 buf[off++] =
static_cast<uint8_t
>(nm.size());
175 std::memcpy(buf + off, nm.data(), nm.size());
182 if (size < 1)
return;
184 const size_t N = buf[off++];
185 segment_sizes_.assign(N, 0);
186 for (
size_t i = 0; i < N && off + 4 <= size; i++) {
188 std::memcpy(&sz, buf + off,
sizeof(uint32_t));
189 off +=
sizeof(uint32_t);
190 segment_sizes_[i] = sz;
192 segment_names_.assign(N,
"");
193 for (
size_t i = 0; i < N && off < size; i++) {
194 const size_t len = buf[off++];
195 if (off + len > size)
break;
196 segment_names_[i].assign(
reinterpret_cast<const char*
>(buf + off), len);
204 saved_names_ = segment_names_;
205 saved_sizes_ = segment_sizes_;
207 void restoreState()
override {
208 segment_names_ = saved_names_;
209 segment_sizes_ = saved_sizes_;
213 static constexpr size_t kMaxSegments = 16;
215 bool is_inverse_ =
false;
216 std::vector<std::string> segment_names_;
217 std::vector<size_t> segment_sizes_;
218 size_t merged_total_ = 0;
220 std::vector<std::string> saved_names_;
221 std::vector<size_t> saved_sizes_;
Definition merge_stage.h:68
size_t serializeHeader(size_t, uint8_t *buf, size_t max_size) const override
Definition merge_stage.h:161
void setSegmentNames(const std::vector< std::string > &names)
Define the N segments (concatenation order = inverse output order).
Definition merge_stage.h:75
bool isGraphCompatible() const override
Pure stream-ordered D2D memcpy in both directions — no host sync.
Definition merge_stage.h:92
void saveState() override
Definition merge_stage.h:203
size_t getActualOutputSize(int index) const override
Definition merge_stage.h:152
std::unordered_map< std::string, size_t > getActualOutputSizesByName() const override
Definition merge_stage.h:142
size_t getMaxHeaderSize(size_t) const override
Definition merge_stage.h:201
std::string getName() const override
Definition merge_stage.h:103
void deserializeHeader(const uint8_t *buf, size_t size) override
Definition merge_stage.h:181
uint16_t getStageTypeId() const override
Definition merge_stage.h:105
uint8_t getInputDataType(size_t) const override
Definition merge_stage.h:113
std::vector< size_t > estimateOutputSizes(const std::vector< size_t > &input_sizes) const override
Definition merge_stage.h:127
uint8_t getOutputDataType(size_t) const override
Definition merge_stage.h:110
std::vector< std::string > getOutputNames() const override
Definition merge_stage.h:99
void execute(cudaStream_t stream, MemoryPool *pool, const std::vector< void * > &inputs, const std::vector< void * > &outputs, const std::vector< size_t > &sizes) override
void setInverse(bool inv) override
Definition merge_stage.h:88
Definition fzm_format.h:25
constexpr size_t FZM_STAGE_CONFIG_SIZE
Per-stage serialized config slot (bytes)
Definition fzm_format.h:65
@ UNKNOWN
Byte-transparent stages: skip type checking at finalize()
Base class interface for all compression stages.