6#include <cuda_runtime.h>
16static constexpr int kBlockThreads = 32;
17static constexpr int kChunk = 16;
18static constexpr int kDecmpChunk = 32;
19static constexpr int kBlockElems = kBlockThreads * kChunk;
22static constexpr int kDecoupledMaxGsize = 1024;
25static constexpr int kShift = 1;
28static constexpr int kMaxSignalBytesU16 = 2;
29static constexpr int kMaxSignalBytesU32 = 4;
33inline size_t adm_gsize(
size_t n) {
34 return (n +
static_cast<size_t>(kBlockElems) - 1) / kBlockElems;
37inline size_t adm_flags_bytes(
size_t gsize) {
38 return (gsize + 7) / 8;
41inline size_t adm_flags_words(
size_t gsize) {
42 return (adm_flags_bytes(gsize) +
sizeof(uint32_t) - 1) /
sizeof(uint32_t);
51 int* d_output_lengths;
53 uint32_t* d_block_flags;
55 uint8_t* d_concat_signals;
56 uint8_t* d_bit_signals;
59 unsigned int* d_overflow_flag;
65 const uint16_t* d_input,
size_t num_elements,
66 uint8_t* d_output,
size_t& output_size,
67 const AdmScratch& s, cudaStream_t stream);
70 const uint8_t* d_input,
size_t input_size,
71 uint16_t* d_output,
size_t num_elements,
72 const AdmScratch& s, cudaStream_t stream);
74size_t get_max_u16_payload_bytes(
size_t num_elements);
79 const uint32_t* d_input,
size_t num_elements,
80 uint8_t* d_output,
size_t& output_size,
81 const AdmScratch& s, cudaStream_t stream);
84 const uint8_t* d_input,
size_t input_size,
85 uint32_t* d_output,
size_t num_elements,
86 const AdmScratch& s, cudaStream_t stream);
88size_t get_max_u32_payload_bytes(
size_t num_elements);
Definition fzm_format.h:25