25 #include "ThirdParty/robin_hood/robin_hood.h"
54 const bool output_columnar,
57 const int64_t num_rows,
58 const std::vector<std::vector<const int8_t*>>& col_buffers,
59 const std::vector<std::vector<uint64_t>>& frag_offsets,
62 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
64 const size_t thread_idx,
72 const int64_t num_rows,
73 const std::vector<std::vector<const int8_t*>>& col_buffers,
74 const std::vector<std::vector<uint64_t>>& frag_offsets,
75 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
130 const unsigned block_size_x,
131 const unsigned grid_size_x,
132 const bool zero_initialize_buffers);
136 const size_t entry_count,
139 const unsigned block_size_x,
140 const unsigned grid_size_x);
144 const size_t entry_count,
147 const unsigned block_size_x,
148 const unsigned grid_size_x,
150 const bool prepend_index_buffer)
const;
156 TargetAggOpsMetadata& agg_expr_metadata,
158 const bool output_columnar,
162 int64_t* groups_buffer,
163 const std::vector<int64_t>& init_vals,
164 TargetAggOpsMetadata& agg_expr_metadata,
165 const int32_t groups_buffer_entry_count,
166 const size_t warp_size,
171 int64_t* groups_buffer,
172 const std::vector<int64_t>& init_vals,
178 const std::vector<int64_t>& init_vals,
179 const TargetAggOpsMetadata& agg_op_metadata);
208 const int8_t* init_agg_vals_dev_ptr,
211 const unsigned block_size_x,
212 const unsigned grid_size_x);
217 const int8_t* init_agg_vals_dev_ptr,
220 const unsigned block_size_x,
221 const unsigned grid_size_x,
222 const int8_t warp_size,
223 const bool can_sort_on_gpu,
224 const bool output_columnar,
232 const size_t projection_count);
236 const size_t projection_count,
237 const int device_id);
246 const unsigned total_thread_count,
247 const int device_id);
GpuGroupByBuffers setupTableFunctionGpuBuffers(const QueryMemoryDescriptor &query_mem_desc, const int device_id, const unsigned block_size_x, const unsigned grid_size_x, const bool zero_initialize_buffers)
ModeIndexSet initializeModeIndexSet(const QueryMemoryDescriptor &query_mem_desc, const RelAlgExecutionUnit &ra_exe_unit)
CUdeviceptr count_distinct_bitmap_device_mem_ptr_
GpuGroupByBuffers prepareTopNHeapsDevBuffer(const QueryMemoryDescriptor &query_mem_desc, const int8_t *init_agg_vals_dev_ptr, const size_t n, const int device_id, const unsigned block_size_x, const unsigned grid_size_x)
void resetResultSet(const size_t index)
GpuGroupByBuffers createAndInitializeGroupByBufferGpu(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const int8_t *init_agg_vals_dev_ptr, const int device_id, const ExecutorDispatchMode dispatch_mode, const unsigned block_size_x, const unsigned grid_size_x, const int8_t warp_size, const bool can_sort_on_gpu, const bool output_columnar, RenderAllocator *render_allocator)
DeviceAllocator * device_allocator_
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
void allocateCountDistinctBuffers(const QueryMemoryDescriptor &query_mem_desc, const RelAlgExecutionUnit &ra_exe_unit)
QueryMemoryInitializer(const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const int device_id, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const bool output_columnar, const bool sort_on_gpu, const shared::TableKey &outer_table_key, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, RenderAllocatorMap *render_allocator_map, RenderInfo *render_info, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, DeviceAllocator *gpu_allocator, const size_t thread_idx, const Executor *executor)
unsigned long long CUdeviceptr
const auto getGroupByBuffersPtr()
std::optional< double > QuantileParam
const auto getCountDistinctBitmapHostPtr() const
size_t computeNumberOfBuffers(const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const Executor *executor) const
std::vector< QuantileParam > initializeQuantileParams(const QueryMemoryDescriptor &query_mem_desc, const RelAlgExecutionUnit &ra_exe_unit)
size_t count_distinct_bitmap_mem_size_
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
void compactProjectionBuffersGpu(const QueryMemoryDescriptor &query_mem_desc, Data_Namespace::DataMgr *data_mgr, const GpuGroupByBuffers &gpu_group_by_buffers, const size_t projection_count, const int device_id)
std::vector< int64_t > calculateCountDistinctBufferSize(const QueryMemoryDescriptor &query_mem_desc, const RelAlgExecutionUnit &ra_exe_unit) const
int8_t * count_distinct_bitmap_host_mem_ptr_
std::vector< int64_t > init_agg_vals_
std::unique_ptr< ResultSet > getResultSetOwned(const size_t index)
void applyStreamingTopNOffsetCpu(const QueryMemoryDescriptor &query_mem_desc, const RelAlgExecutionUnit &ra_exe_unit)
int64_t getAggInitValForIndex(const size_t index) const
void initColumnarGroups(const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit)
int8_t * varlen_output_buffer_host_ptr_
int64_t allocateCountDistinctSet()
void compactProjectionBuffersCpu(const QueryMemoryDescriptor &query_mem_desc, const size_t projection_count)
std::vector< int64_t * > group_by_buffers_
void initColumnsPerRow(const QueryMemoryDescriptor &query_mem_desc, int8_t *row_ptr, const std::vector< int64_t > &init_vals, const TargetAggOpsMetadata &agg_op_metadata)
void copyGroupByBuffersFromGpu(DeviceAllocator &device_allocator, const QueryMemoryDescriptor &query_mem_desc, const size_t entry_count, const GpuGroupByBuffers &gpu_group_by_buffers, const RelAlgExecutionUnit *ra_exe_unit, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer) const
std::shared_ptr< VarlenOutputInfo > getVarlenOutputInfo()
void initGroupByBuffer(int64_t *buffer, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, TargetAggOpsMetadata &agg_expr_metadata, const ExecutorDeviceType device_type, const bool output_columnar, const Executor *executor)
const auto getNumBuffers() const
void allocateTDigestsBuffer(const QueryMemoryDescriptor &query_mem_desc, const RelAlgExecutionUnit &ra_exe_unit)
void allocateModeBuffer(const QueryMemoryDescriptor &query_mem_desc, const RelAlgExecutionUnit &ra_exe_unit)
robin_hood::unordered_set< size_t > ModeIndexSet
Abstract class for managing device memory allocations.
const auto getCountDistinctBitmapBytes() const
Descriptor for the result set buffer layout.
const auto getVarlenOutputPtr() const
std::vector< Data_Namespace::AbstractBuffer * > temporary_buffers_
void copyFromTableFunctionGpuBuffers(Data_Namespace::DataMgr *data_mgr, const QueryMemoryDescriptor &query_mem_desc, const size_t entry_count, const GpuGroupByBuffers &gpu_group_by_buffers, const int device_id, const unsigned block_size_x, const unsigned grid_size_x)
void applyStreamingTopNOffsetGpu(Data_Namespace::DataMgr *data_mgr, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &gpu_group_by_buffers, const RelAlgExecutionUnit &ra_exe_unit, const unsigned total_thread_count, const int device_id)
const auto getGroupByBuffersSize() const
Basic constructors and methods of the row set interface.
const auto getCountDistinctBitmapDevicePtr() const
std::shared_ptr< VarlenOutputInfo > varlen_output_info_
int8_t * count_distinct_bitmap_host_crt_ptr_
CUdeviceptr varlen_output_buffer_
ResultSet * getResultSet(const size_t index) const
std::vector< std::unique_ptr< ResultSet > > result_sets_
void initRowGroups(const QueryMemoryDescriptor &query_mem_desc, int64_t *groups_buffer, const std::vector< int64_t > &init_vals, TargetAggOpsMetadata &agg_expr_metadata, const int32_t groups_buffer_entry_count, const size_t warp_size, const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit)
void allocateCountDistinctGpuMem(const QueryMemoryDescriptor &query_mem_desc)
const auto getVarlenOutputHostPtr() const
int64_t allocateCountDistinctBitmap(const size_t bitmap_byte_sz)