17 #ifndef QUERYENGINE_GROUPBYANDAGGREGATE_H
18 #define QUERYENGINE_GROUPBYANDAGGREGATE_H
32 #include "../Shared/sqltypes.h"
35 #include <llvm/IR/Function.h>
36 #include <llvm/IR/Instructions.h>
37 #include <llvm/IR/Value.h>
38 #include <boost/algorithm/string/join.hpp>
39 #include <boost/make_unique.hpp>
66 const std::vector<InputTableInfo>& query_infos,
67 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
68 const std::optional<int64_t>& group_cardinality_estimation);
72 bool codegen(llvm::Value* filter_result,
73 llvm::BasicBlock* sc_false,
86 const bool allow_multifrag,
87 const size_t max_groups_buffer_entry_count,
88 const int8_t crt_min_byte_width,
90 const bool output_columnar_hint);
93 const bool allow_multifrag,
94 const size_t max_groups_buffer_entry_count,
95 const int8_t crt_min_byte_width,
96 const bool sort_on_gpu_hint,
98 const bool must_use_baseline_sort,
99 const bool output_columnar_hint);
102 const size_t shard_count)
const;
119 llvm::Value* groups_buffer,
120 llvm::Value* group_expr_lv_translated,
121 llvm::Value* group_expr_lv_original,
122 const int32_t row_size_quad);
125 llvm::Value* groups_buffer,
126 llvm::Value* group_key,
127 llvm::Value* key_size_lv,
129 const int32_t row_size_quad);
134 llvm::Value* groups_buffer,
135 llvm::Value* group_key,
136 llvm::Value* key_size_lv,
138 const size_t key_width,
139 const int32_t row_size_quad);
147 llvm::Value* target);
149 bool codegenAggCalls(
const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
150 llvm::Value* varlen_output_buffer,
151 const std::vector<llvm::Value*>& agg_out_vec,
163 llvm::Value* output_buffer_byte_stream,
164 llvm::Value* out_row_idx,
165 const std::tuple<llvm::Value*, llvm::Value*>& agg_out_ptr_w_idx,
167 const size_t chosen_bytes,
168 const size_t agg_out_off,
169 const size_t target_idx);
178 std::vector<llvm::Value*>& agg_args,
184 std::vector<llvm::Value*>& agg_args,
190 std::vector<llvm::Value*>& agg_args,
199 llvm::Value*
emitCall(
const std::string& fname,
const std::vector<llvm::Value*>&
args);
204 const std::string& agg_base_name,
205 const bool threads_share_memory,
234 return bitmap_sz_bits < 50000 && ra_exe_unit.
groupby_exprs.empty() &&
240 #endif // QUERYENGINE_GROUPBYANDAGGREGATE_H
const RelAlgExecutionUnit & ra_exe_unit
ApproxQuantileDescriptors initApproxQuantileDescriptors()
bool g_enable_smem_group_by
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
llvm::Value * getAdditionalLiteral(const int32_t off)
llvm::Value * codegenAggColumnPtr(llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
: returns the pointer to where the aggregation should be stored.
bool codegen(llvm::Value *filter_result, llvm::BasicBlock *sc_false, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
void codegenMode(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
ColRangeInfo getColRangeInfo()
QueryDescriptionType hash_type_
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Macros and functions for groupby buffer compaction.
llvm::Value * codegenVarlenOutputBuffer(const QueryMemoryDescriptor &query_mem_desc)
void codegenApproxQuantile(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
void checkErrorCode(llvm::Value *retCode)
std::vector< ApproxQuantileDescriptor > ApproxQuantileDescriptors
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool needsUnnestDoublePatch(llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
size_t get_count_distinct_sub_bitmap_count(const size_t bitmap_sz_bits, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
GroupByAndAggregate(Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::optional< int64_t > &group_cardinality_estimation)
llvm::Value * convertNullIfAny(const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, llvm::Value *varlen_output_buffer, const std::vector< llvm::Value * > &agg_out_vec, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
void codegenCountDistinct(const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
const int32_t target_index
const std::vector< InputTableInfo > & query_infos_
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptor(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
const ExecutorDeviceType device_type_
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit)
std::vector< llvm::Value * > codegenAggArg(const Analyzer::Expr *target_expr, const CompilationOptions &co)
llvm::Function * codegenPerfectHashFunction()
llvm::Value * codegenWindowRowPointer(const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
const std::optional< int64_t > group_cardinality_estimation_
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
const RelAlgExecutionUnit & ra_exe_unit_