14#include <cuda_runtime.h>
25 static const int BLOCK_DIM_ENCODE = 256;
26 static const int BLOCK_DIM_DEFLATE = 256;
27 static const int ENC_SEQUENTIALITY = 4;
28 static const int DEFLATE_CONSTANT = 4;
33#define HF_SPACE phf::Buf<E>
34#define HF_STREAM void*
43 using M = PHF_METADATA;
46 static const int SCRATCH = 0;
47 static const int FREQ = 1;
48 static const int BK = 2;
49 static const int REVBK = 3;
50 static const int PAR_NBIT = 4;
51 static const int PAR_NCELL = 5;
52 static const int PAR_ENTRY = 6;
53 static const int BITSTREAM = 7;
54 static const int END = 8;
57 struct memcpy_helper {
64 using Header = phf_header;
72 const size_t revbk4_bytes;
73 const size_t bitstream_max_len;
77 size_t total_footprint_d = 0;
78 size_t total_footprint_h = 0;
108 uint64_t* d_total_nbit;
109 uint64_t* d_total_ncell;
110 uint64_t* h_total_nbit;
111 uint64_t* h_total_ncell;
115 size_t cub_temp_bytes;
118 static int _revbk4_bytes(
int bklen);
119 static int _revbk8_bytes(
int bklen);
122 Buf(
const Buf&) =
delete;
123 Buf& operator=(
const Buf&) =
delete;
125 Buf& operator=(Buf&&) =
delete;
135 int _pardeg = -1,
bool _use_HFR =
false);
139 void register_runtime_bklen(
int _rt_bklen) { rt_bklen = _rt_bklen; }
141 void memcpy_merge(phf_header& header, phf_stream_t stream);
153 static int build_book(Buf<E>* buf, uint32_t* h_hist,
154 uint16_t rt_bklen, HF_STREAM stream);
159 static int encode(Buf<E>* buf, E* in_data,
size_t data_len,
160 uint8_t** out_encoded,
size_t* encoded_len,
161 phf_header& header, HF_STREAM stream);
165 static int decode(Buf<E>* buf, phf_header& header,
166 PHF_BYTE* in_encoded, E* out_decoded, HF_STREAM stream);
Stream-ordered CUDA memory pool for pipeline buffer management.