9#include <cuda_runtime.h>
12#include <unordered_map>
13#include <unordered_set>
32 size_t allocated_size;
36 int remaining_consumers;
37 std::vector<int> consumer_stage_ids;
38 int producer_stage_id;
39 int producer_output_index;
46 : size(0), initial_size(0), allocated_size(0), d_ptr(nullptr), tag(
""),
47 remaining_consumers(0), producer_stage_id(-1), producer_output_index(0),
57 std::vector<int> input_buffer_ids;
58 std::vector<int> output_buffer_ids;
59 std::unordered_map<int, int> output_index_to_buffer_id;
61 std::vector<DAGNode*> dependencies;
62 std::vector<DAGNode*> dependents;
69 cudaEvent_t completion_event;
74 std::vector<void*> exec_inputs;
75 std::vector<void*> exec_outputs;
76 std::vector<size_t> exec_sizes;
79 : id(-1), stage(s), level(-1), execution_order(-1),
80 stream(nullptr), is_executed(false), completion_event(nullptr),
start_event(nullptr) {}
111 size_t buffer_size = 0,
int output_index = 0);
113 void setInputBuffer(
DAGNode* node,
size_t size,
const std::string& tag =
"input");
114 void setOutputBuffer(
DAGNode* node,
size_t size,
const std::string& tag =
"output");
128 void updateBufferTag(
int buffer_id,
const std::string& tag);
129 void setBufferPersistent(
int buffer_id,
bool persistent);
139 void execute(cudaStream_t stream);
148 void reset(cudaStream_t stream = 0);
152 void* getBuffer(
int buffer_id)
const;
160 void updateBufferSize(
int buffer_id,
size_t new_size);
177 size_t getPeakMemoryUsage()
const {
return peak_memory_usage_; }
178 size_t getCurrentMemoryUsage()
const {
return current_memory_usage_; }
179 size_t getBufferSize(
int buffer_id)
const;
180 const BufferInfo& getBufferInfo(
int buffer_id)
const;
181 const std::vector<std::vector<DAGNode*>>& getLevels()
const {
return levels_; }
182 const std::vector<DAGNode*>& getNodes()
const {
return nodes_; }
190 void printDAG()
const;
191 void printBufferLifetimes()
const;
199 bool isBoundsCheckEnabled()
const {
return bounds_check_enabled_; }
207 bool isColoringEnabled()
const {
return coloring_applied_; }
208 size_t getColorRegionCount()
const {
return color_region_sizes_.size(); }
218 bool isCaptureMode()
const {
return capture_mode_; }
227 bool isProfilingEnabled()
const {
return profiling_enabled_; }
236 std::vector<DAGNode*> nodes_;
237 std::unordered_map<int, BufferInfo> buffers_;
242 std::vector<cudaStream_t> streams_;
245 std::vector<std::vector<DAGNode*>> levels_;
248 size_t current_memory_usage_;
249 size_t peak_memory_usage_;
251 bool profiling_enabled_;
252 bool bounds_check_enabled_;
257 bool coloring_disabled_;
258 bool coloring_applied_;
259 std::unordered_map<int, int> buffer_color_;
260 std::vector<size_t> color_region_sizes_;
261 std::vector<void*> color_region_ptrs_;
264 void assignStreams();
265 void allocateBuffer(
int buffer_id, cudaStream_t stream);
266 void freeBuffer(
int buffer_id, cudaStream_t stream);
267 void planPreallocation();
void preallocateBuffers(cudaStream_t stream=0)
DAGNode * addStage(Stage *stage, std::string name="")
void setCaptureMode(bool capture)
int addDependency(DAGNode *dependent, DAGNode *dependency, size_t buffer_size=0, int output_index=0)
void setColoringEnabled(bool enable)
Definition dag.h:206
size_t computeTopoPoolSize() const
size_t getTotalBufferSize() const
int addUnconnectedOutput(DAGNode *node, size_t size, int output_index, const std::string &tag)
void setExternalPointer(int buffer_id, void *external_ptr)
size_t getStreamCount() const
Definition dag.h:188
void enableBoundsCheck(bool enable)
Definition dag.h:198
void enableProfiling(bool enable)
std::vector< StageTimingResult > collectTimings()
void configureStreams(int num_streams)
int getMaxParallelism() const
bool connectExistingOutput(DAGNode *producer, DAGNode *consumer, int output_index)
void reset(cudaStream_t stream=0)
MemoryStrategy
Definition dag.h:23
@ MINIMAL
Allocate on-demand, free at last consumer. Lowest peak memory.
@ PREALLOCATE
Allocate everything upfront at finalize(). Required for graph mode.
Pipeline and per-stage profiling result types.
bool is_external
If true, pointer is caller-owned — DAG never allocs or frees.
Definition dag.h:43
bool is_persistent
If true, survives reset() until DAG destruction.
Definition dag.h:42
cudaEvent_t start_event
Non-null only when profiling is enabled.
Definition dag.h:70