19#include "stage/stage.h"
21#include <cuda_runtime.h>
26#include <unordered_map>
44 , actual_output_size_(0)
48 void setInverse(
bool inv)
override { is_inverse_ = inv; }
49 bool isInverse()
const override {
return is_inverse_; }
51 void setBlockSize(
size_t bytes) { block_size_ =
static_cast<uint32_t
>(bytes); }
52 void setElementWidth(
size_t bytes){ element_width_ =
static_cast<uint8_t
>(bytes); }
54 size_t getBlockSize()
const {
return block_size_; }
56 size_t getElementWidth()
const {
return element_width_; }
62 const std::vector<void*>& inputs,
63 const std::vector<void*>& outputs,
64 const std::vector<size_t>& sizes
68 std::string
getName()
const override {
return "Bitshuffle"; }
69 size_t getNumInputs()
const override {
return 1; }
70 size_t getNumOutputs()
const override {
return 1; }
73 const std::vector<size_t>& input_sizes
76 return {input_sizes[0]};
79 std::unordered_map<std::string, size_t>
81 return {{
"output", actual_output_size_}};
84 return (index == 0) ? actual_output_size_ : 0;
93 return static_cast<uint8_t
>(DataType::UINT8);
99 size_t output_index, uint8_t* buf,
size_t max_size
102 if (max_size < 5)
return 0;
103 std::memcpy(buf, &block_size_,
sizeof(uint32_t));
104 buf[4] = element_width_;
109 if (size >= 4) std::memcpy(&block_size_, buf,
sizeof(uint32_t));
110 if (size >= 5) element_width_ = buf[4];
116 saved_block_size_ = block_size_;
117 saved_element_width_ = element_width_;
118 saved_actual_output_size_ = actual_output_size_;
121 void restoreState()
override {
122 block_size_ = saved_block_size_;
123 element_width_ = saved_element_width_;
124 actual_output_size_ = saved_actual_output_size_;
129 uint32_t block_size_;
130 uint32_t saved_block_size_ = 0;
131 uint8_t element_width_;
132 uint8_t saved_element_width_ = 0;
133 size_t actual_output_size_ = 0;
134 size_t saved_actual_output_size_ = 0;
139 size_t validateConfig()
const {
140 if (element_width_ != 1 && element_width_ != 2 &&
141 element_width_ != 4 && element_width_ != 8)
142 throw std::invalid_argument(
143 "BitshuffleStage: element_width must be 1, 2, 4, or 8");
144 if (block_size_ == 0 || block_size_ % (1024u * element_width_) != 0)
145 throw std::invalid_argument(
146 "BitshuffleStage: block_size must be a positive multiple of "
147 "1024 * element_width (default 16384 satisfies this for all "
148 "supported element widths)");
149 return block_size_ / element_width_;
Definition bitshuffle_stage.h:38
std::vector< size_t > estimateOutputSizes(const std::vector< size_t > &input_sizes) const override
Definition bitshuffle_stage.h:72
size_t getMaxHeaderSize(size_t) const override
Definition bitshuffle_stage.h:113
uint8_t getOutputDataType(size_t) const override
Definition bitshuffle_stage.h:91
void execute(cudaStream_t stream, MemoryPool *pool, const std::vector< void * > &inputs, const std::vector< void * > &outputs, const std::vector< size_t > &sizes) override
size_t getActualOutputSize(int index) const override
Definition bitshuffle_stage.h:83
std::string getName() const override
Definition bitshuffle_stage.h:68
void setInverse(bool inv) override
Definition bitshuffle_stage.h:48
void saveState() override
Definition bitshuffle_stage.h:115
uint16_t getStageTypeId() const override
Definition bitshuffle_stage.h:87
size_t getRequiredInputAlignment() const override
Definition bitshuffle_stage.h:55
size_t serializeHeader(size_t output_index, uint8_t *buf, size_t max_size) const override
Definition bitshuffle_stage.h:98
void deserializeHeader(const uint8_t *buf, size_t size) override
Definition bitshuffle_stage.h:108
std::unordered_map< std::string, size_t > getActualOutputSizesByName() const override
Definition bitshuffle_stage.h:80
Definition fzm_format.h:25
@ BITSHUFFLE
BitshuffleStage — GPU bit-matrix transpose.