FZGPUModules 1.0
GPU-accelerated modular compression pipeline
Loading...
Searching...
No Matches
fz::DifferenceStage< T, TOut > Class Template Reference

#include <diff.h>

+ Inheritance diagram for fz::DifferenceStage< T, TOut >:

Public Member Functions

void setInverse (bool inverse) override
 
void setChunkSize (size_t bytes)
 
size_t getRequiredInputAlignment () const override
 
void execute (cudaStream_t stream, MemoryPool *pool, const std::vector< void * > &inputs, const std::vector< void * > &outputs, const std::vector< size_t > &sizes) override
 
std::string getName () const override
 
std::vector< size_t > estimateOutputSizes (const std::vector< size_t > &input_sizes) const override
 
std::unordered_map< std::string, size_t > getActualOutputSizesByName () const override
 
size_t getActualOutputSize (int index) const override
 
uint16_t getStageTypeId () const override
 
uint8_t getOutputDataType (size_t output_index) const override
 
uint8_t getInputDataType (size_t) const override
 
size_t serializeHeader (size_t output_index, uint8_t *buf, size_t max_size) const override
 
void deserializeHeader (const uint8_t *buf, size_t size) override
 
size_t getMaxHeaderSize (size_t output_index) const override
 
void saveState () override
 
- Public Member Functions inherited from fz::Stage
virtual std::vector< std::string > getOutputNames () const
 
int getOutputIndex (const std::string &name) const
 
virtual void setDims (const std::array< size_t, 3 > &dims)
 
virtual void postStreamSync (cudaStream_t stream)
 
virtual bool isGraphCompatible () const
 
virtual size_t estimateScratchBytes (const std::vector< size_t > &input_sizes) const
 

Detailed Description

template<typename T = float, typename TOut = T>
class fz::DifferenceStage< T, TOut >

Difference coding stage.

Forward (compression): first-order differences with optional negabinary output output[0] = input[0] output[i] = input[i] - input[i-1] (when TOut == T) output[i] = Negabinary<T>::encode(diff) (when TOut != T)

Inverse (decompression): cumulative sum with optional negabinary decode first

Optional chunking (setChunkSize > 0): The difference/cumsum resets at each chunk boundary. Each chunk is a fully independent context — the first element of each chunk is stored as-is (previous = 0 implied). This enables parallel decompression and is required for the PFPL pipeline where 16 KB chunks flow independently through Bitshuffle and RZE.

Template parameters: T — input element type (signed/unsigned integer or float). TOut — output element type (defaults to T). When TOut != T: T must be a signed integer and TOut its unsigned counterpart of the same width; negabinary encoding is fused at the final write of the forward kernel and the decode is the first step of the inverse kernel.

Serialized header layout (6 bytes): [0] DataType T (1 byte) [1] DataType TOut (1 byte) [2..5] chunk_size (uint32_t, little-endian, 0 = no chunking)

Member Function Documentation

◆ setInverse()

template<typename T = float, typename TOut = T>
void fz::DifferenceStage< T, TOut >::setInverse ( bool  inverse)
inlineoverridevirtual

Switch between forward (compression) and inverse (decompression) mode. Affects getNumInputs()/getNumOutputs() for stages with asymmetric port counts.

Reimplemented from fz::Stage.

◆ setChunkSize()

template<typename T = float, typename TOut = T>
void fz::DifferenceStage< T, TOut >::setChunkSize ( size_t  bytes)
inline

Set chunk size in bytes (default 0 = no chunking).

When > 0, differences and cumulative sums reset at each chunk boundary. Must be a positive multiple of sizeof(T). Pass 0 to disable chunking and process the whole array as a single context (the previous default).

◆ getRequiredInputAlignment()

template<typename T = float, typename TOut = T>
size_t fz::DifferenceStage< T, TOut >::getRequiredInputAlignment ( ) const
inlineoverridevirtual

Minimum input size alignment in bytes. Chunked stages return their chunk size; the pipeline uses the LCM of all stage alignments at finalize() to transparently zero-pad the input. Default: 1 (no alignment requirement).

Reimplemented from fz::Stage.

◆ execute()

template<typename T = float, typename TOut = T>
void fz::DifferenceStage< T, TOut >::execute ( cudaStream_t  stream,
MemoryPool pool,
const std::vector< void * > &  inputs,
const std::vector< void * > &  outputs,
const std::vector< size_t > &  sizes 
)
overridevirtual

Execute the stage. Inputs, outputs, and sizes are device pointers/bytes.

Implements fz::Stage.

◆ getName()

template<typename T = float, typename TOut = T>
std::string fz::DifferenceStage< T, TOut >::getName ( ) const
inlineoverridevirtual

Human-readable name used in error messages and debug output.

Implements fz::Stage.

◆ estimateOutputSizes()

template<typename T = float, typename TOut = T>
std::vector< size_t > fz::DifferenceStage< T, TOut >::estimateOutputSizes ( const std::vector< size_t > &  input_sizes) const
inlineoverridevirtual

Estimate output buffer sizes given input sizes. Used for buffer allocation planning in PREALLOCATE mode — must be a safe upper bound; under-estimation causes buffer overruns.

Implements fz::Stage.

◆ getActualOutputSizesByName()

template<typename T = float, typename TOut = T>
std::unordered_map< std::string, size_t > fz::DifferenceStage< T, TOut >::getActualOutputSizesByName ( ) const
inlineoverridevirtual

Actual output sizes after execute(), keyed by output port name.

Implements fz::Stage.

◆ getActualOutputSize()

template<typename T = float, typename TOut = T>
size_t fz::DifferenceStage< T, TOut >::getActualOutputSize ( int  index) const
inlineoverridevirtual

Actual size of a single output by index after execute(). Avoids constructing the map for the common single-output case. Default delegates to getActualOutputSizesByName(); override to return directly from an internal field.

Reimplemented from fz::Stage.

◆ getStageTypeId()

template<typename T = float, typename TOut = T>
uint16_t fz::DifferenceStage< T, TOut >::getStageTypeId ( ) const
inlineoverridevirtual

Stage type identifier written into the FZM file header.

Implements fz::Stage.

◆ getOutputDataType()

template<typename T = float, typename TOut = T>
uint8_t fz::DifferenceStage< T, TOut >::getOutputDataType ( size_t  output_index) const
inlineoverridevirtual

DataType enum of the given output port.

Implements fz::Stage.

◆ getInputDataType()

template<typename T = float, typename TOut = T>
uint8_t fz::DifferenceStage< T, TOut >::getInputDataType ( size_t  ) const
inlineoverridevirtual

Expected DataType of the given input port.

Used by Pipeline::finalize() to detect type mismatches between connected stages before any execution. Return DataType::UNKNOWN to opt out of checking — byte-transparent stages (Bitshuffle, RZE) and mock stages must return UNKNOWN; finalize() skips any connection where either side is UNKNOWN.

Reimplemented from fz::Stage.

◆ serializeHeader()

template<typename T = float, typename TOut = T>
size_t fz::DifferenceStage< T, TOut >::serializeHeader ( size_t  output_index,
uint8_t *  header_buffer,
size_t  max_size 
) const
inlineoverridevirtual

Serialize stage config into header_buffer (max 128 bytes) for the FZM file. Return the number of bytes written, or 0 if the stage has no config.

Reimplemented from fz::Stage.

◆ deserializeHeader()

template<typename T = float, typename TOut = T>
void fz::DifferenceStage< T, TOut >::deserializeHeader ( const uint8_t *  header_buffer,
size_t  size 
)
inlineoverridevirtual

Restore stage config from header_buffer during decompression.

Reimplemented from fz::Stage.

◆ getMaxHeaderSize()

template<typename T = float, typename TOut = T>
size_t fz::DifferenceStage< T, TOut >::getMaxHeaderSize ( size_t  output_index) const
inlineoverridevirtual

Maximum bytes this stage writes into its per-output FZM header slot.

Reimplemented from fz::Stage.

◆ saveState()

template<typename T = float, typename TOut = T>
void fz::DifferenceStage< T, TOut >::saveState ( )
inlineoverridevirtual

Save/restore config state around a decompression pass. deserializeHeader() overwrites the stage's forward-pass config; saveState() is called before and restoreState() after so the stage returns to its original configuration.

Reimplemented from fz::Stage.