OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
GroupByAndAggregate Class Reference

#include <GroupByAndAggregate.h>

+ Collaboration diagram for GroupByAndAggregate:

Public Member Functions

 GroupByAndAggregate (Executor *executor, const ExecutorDeviceType device_type, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::optional< int64_t > &group_cardinality_estimation)
 
bool codegen (llvm::Value *filter_result, llvm::BasicBlock *sc_false, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context)
 

Static Public Member Functions

static size_t shard_count_for_top_groups (const RelAlgExecutionUnit &ra_exe_unit)
 

Private Member Functions

bool gpuCanHandleOrderEntries (const std::list< Analyzer::OrderEntry > &order_entries)
 
ApproxQuantileDescriptors initApproxQuantileDescriptors ()
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptor (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, RenderInfo *render_info, const bool output_columnar_hint)
 
std::unique_ptr
< QueryMemoryDescriptor
initQueryMemoryDescriptorImpl (const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
 
int64_t getShardedTopBucket (const ColRangeInfo &col_range_info, const size_t shard_count) const
 
llvm::Value * codegenOutputSlot (llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenGroupBy (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
 
llvm::Value * codegenVarlenOutputBuffer (const QueryMemoryDescriptor &query_mem_desc)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenSingleColumnPerfectHash (const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnPerfectHash (llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
 
llvm::Function * codegenPerfectHashFunction ()
 
std::tuple< llvm::Value
*, llvm::Value * > 
codegenMultiColumnBaselineHash (const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
 
ColRangeInfo getColRangeInfo ()
 
llvm::Value * convertNullIfAny (const SQLTypeInfo &arg_type, const TargetInfo &agg_info, llvm::Value *target)
 
bool codegenAggCalls (const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, llvm::Value *varlen_output_buffer, const std::vector< llvm::Value * > &agg_out_vec, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenWindowRowPointer (const Analyzer::WindowFunction *window_func, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
 
llvm::Value * codegenAggColumnPtr (llvm::Value *output_buffer_byte_stream, llvm::Value *out_row_idx, const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, const QueryMemoryDescriptor &query_mem_desc, const size_t chosen_bytes, const size_t agg_out_off, const size_t target_idx)
 : returns the pointer to where the aggregation should be stored. More...
 
void codegenEstimator (std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
 
void codegenCountDistinct (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &, const ExecutorDeviceType)
 
void codegenApproxQuantile (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
 
void codegenMode (const size_t target_idx, const Analyzer::Expr *target_expr, std::vector< llvm::Value * > &agg_args, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type)
 
llvm::Value * getAdditionalLiteral (const int32_t off)
 
std::vector< llvm::Value * > codegenAggArg (const Analyzer::Expr *target_expr, const CompilationOptions &co)
 
llvm::Value * emitCall (const std::string &fname, const std::vector< llvm::Value * > &args)
 
void checkErrorCode (llvm::Value *retCode)
 
bool needsUnnestDoublePatch (llvm::Value const *val_ptr, const std::string &agg_base_name, const bool threads_share_memory, const CompilationOptions &co) const
 
void prependForceSync ()
 

Static Private Member Functions

static int64_t getBucketedCardinality (const ColRangeInfo &col_range_info)
 

Private Attributes

Executorexecutor_
 
const RelAlgExecutionUnitra_exe_unit_
 
const std::vector
< InputTableInfo > & 
query_infos_
 
std::shared_ptr
< RowSetMemoryOwner
row_set_mem_owner_
 
bool output_columnar_
 
const ExecutorDeviceType device_type_
 
const std::optional< int64_t > group_cardinality_estimation_
 

Friends

class Executor
 
class QueryMemoryDescriptor
 
class CodeGenerator
 
class ExecutionKernel
 
struct TargetExprCodegen
 
struct TargetExprCodegenBuilder
 

Detailed Description

Definition at line 61 of file GroupByAndAggregate.h.

Constructor & Destructor Documentation

GroupByAndAggregate::GroupByAndAggregate ( Executor executor,
const ExecutorDeviceType  device_type,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const std::optional< int64_t > &  group_cardinality_estimation 
)

Definition at line 394 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::groupby_exprs, and ra_exe_unit_.

401  : executor_(executor)
402  , ra_exe_unit_(ra_exe_unit)
403  , query_infos_(query_infos)
404  , row_set_mem_owner_(row_set_mem_owner)
405  , device_type_(device_type)
406  , group_cardinality_estimation_(group_cardinality_estimation) {
407  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
408  if (!groupby_expr) {
409  continue;
410  }
411  const auto& groupby_ti = groupby_expr->get_type_info();
412  if (groupby_ti.is_text_encoding_none()) {
413  throw std::runtime_error(
414  "Cannot group by string columns which are not dictionary encoded.");
415  }
416  if (groupby_ti.is_buffer()) {
417  throw std::runtime_error("Group by buffer not supported");
418  }
419  if (groupby_ti.is_geometry()) {
420  throw std::runtime_error("Group by geometry not supported");
421  }
422  }
423 }
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
const std::optional< int64_t > group_cardinality_estimation_
const RelAlgExecutionUnit & ra_exe_unit_

Member Function Documentation

void GroupByAndAggregate::checkErrorCode ( llvm::Value *  retCode)
private

Definition at line 2234 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

2234  {
2235  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
2236  auto zero_const = llvm::ConstantInt::get(retCode->getType(), 0, true);
2237  auto rc_check_condition = executor_->cgen_state_->ir_builder_.CreateICmp(
2238  llvm::ICmpInst::ICMP_EQ, retCode, zero_const);
2239 
2240  executor_->cgen_state_->emitErrorCheck(rc_check_condition, retCode, "rc");
2241 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegen ( llvm::Value *  filter_result,
llvm::BasicBlock *  sc_false,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context 
)

Definition at line 1047 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenAggCalls(), codegenEstimator(), codegenGroupBy(), codegenVarlenOutputBuffer(), DiamondCodegen::cond_false_, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), RelAlgExecutionUnit::estimator, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_agg_count(), get_arg_by_name(), get_int_type(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getQueryDescriptionType(), GPU, RelAlgExecutionUnit::groupby_exprs, heavyai::GroupByPerfectHash, RelAlgExecutionUnit::join_quals, LL_BUILDER, LL_CONTEXT, LL_INT, LLVM_ALIGN, CodeGenerator::posArg(), prependForceSync(), heavyai::Projection, query_mem_desc, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::target_exprs, QueryMemoryDescriptor::usesGetGroupValueFast(), and QueryMemoryDescriptor::useStreamingTopN().

1051  {
1052  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1053  CHECK(filter_result);
1054 
1055  bool can_return_error = false;
1056  llvm::BasicBlock* filter_false{nullptr};
1057 
1058  {
1059  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
1060 
1061  if (executor_->isArchMaxwell(co.device_type)) {
1062  prependForceSync();
1063  }
1064  DiamondCodegen filter_cfg(filter_result,
1065  executor_,
1066  !is_group_by || query_mem_desc.usesGetGroupValueFast(),
1067  "filter", // filter_true and filter_false basic blocks
1068  nullptr,
1069  false);
1070  filter_false = filter_cfg.cond_false_;
1071 
1072  if (is_group_by) {
1074  !query_mem_desc.useStreamingTopN()) {
1075  const auto crt_matched = get_arg_by_name(ROW_FUNC, "crt_matched");
1076  LL_BUILDER.CreateStore(LL_INT(int32_t(1)), crt_matched);
1077  auto total_matched_ptr = get_arg_by_name(ROW_FUNC, "total_matched");
1078  llvm::Value* old_total_matched_val{nullptr};
1080  old_total_matched_val =
1081  LL_BUILDER.CreateAtomicRMW(llvm::AtomicRMWInst::Add,
1082  total_matched_ptr,
1083  LL_INT(int32_t(1)),
1084 #if LLVM_VERSION_MAJOR > 12
1085  LLVM_ALIGN(8),
1086 #endif
1087  llvm::AtomicOrdering::Monotonic);
1088  } else {
1089  old_total_matched_val = LL_BUILDER.CreateLoad(
1090  total_matched_ptr->getType()->getPointerElementType(), total_matched_ptr);
1091  LL_BUILDER.CreateStore(
1092  LL_BUILDER.CreateAdd(old_total_matched_val, LL_INT(int32_t(1))),
1093  total_matched_ptr);
1094  }
1095  auto old_total_matched_ptr = get_arg_by_name(ROW_FUNC, "old_total_matched");
1096  LL_BUILDER.CreateStore(old_total_matched_val, old_total_matched_ptr);
1097  }
1098 
1099  auto agg_out_ptr_w_idx = codegenGroupBy(query_mem_desc, co, filter_cfg);
1100  auto varlen_output_buffer = codegenVarlenOutputBuffer(query_mem_desc);
1101  if (query_mem_desc.usesGetGroupValueFast() ||
1102  query_mem_desc.getQueryDescriptionType() ==
1104  if (query_mem_desc.getGroupbyColCount() > 1) {
1105  filter_cfg.setChainToNext();
1106  }
1107  // Don't generate null checks if the group slot is guaranteed to be non-null,
1108  // as it's the case for get_group_value_fast* family.
1109  can_return_error = codegenAggCalls(agg_out_ptr_w_idx,
1110  varlen_output_buffer,
1111  {},
1113  co,
1114  gpu_smem_context,
1115  filter_cfg);
1116  } else {
1117  {
1118  llvm::Value* nullcheck_cond{nullptr};
1119  if (query_mem_desc.didOutputColumnar()) {
1120  nullcheck_cond = LL_BUILDER.CreateICmpSGE(std::get<1>(agg_out_ptr_w_idx),
1121  LL_INT(int32_t(0)));
1122  } else {
1123  nullcheck_cond = LL_BUILDER.CreateICmpNE(
1124  std::get<0>(agg_out_ptr_w_idx),
1125  llvm::ConstantPointerNull::get(
1126  llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)));
1127  }
1128  DiamondCodegen nullcheck_cfg(
1129  nullcheck_cond, executor_, false, "groupby_nullcheck", &filter_cfg, false);
1130  codegenAggCalls(agg_out_ptr_w_idx,
1131  varlen_output_buffer,
1132  {},
1134  co,
1135  gpu_smem_context,
1136  filter_cfg);
1137  }
1138  can_return_error = true;
1139  if (query_mem_desc.getQueryDescriptionType() ==
1141  query_mem_desc.useStreamingTopN()) {
1142  // Ignore rejection on pushing current row to top-K heap.
1143  LL_BUILDER.CreateRet(LL_INT(int32_t(0)));
1144  } else {
1145  CodeGenerator code_generator(executor_);
1146  LL_BUILDER.CreateRet(LL_BUILDER.CreateNeg(LL_BUILDER.CreateTrunc(
1147  // TODO(alex): remove the trunc once pos is converted to 32 bits
1148  code_generator.posArg(nullptr),
1149  get_int_type(32, LL_CONTEXT))));
1150  }
1151  }
1152  } else {
1153  if (ra_exe_unit_.estimator) {
1154  std::stack<llvm::BasicBlock*> array_loops;
1155  codegenEstimator(array_loops, filter_cfg, query_mem_desc, co);
1156  } else {
1157  auto arg_it = ROW_FUNC->arg_begin();
1158  std::vector<llvm::Value*> agg_out_vec;
1159  for (int32_t i = 0; i < get_agg_count(ra_exe_unit_.target_exprs); ++i) {
1160  agg_out_vec.push_back(&*arg_it++);
1161  }
1162  can_return_error = codegenAggCalls(std::make_tuple(nullptr, nullptr),
1163  /*varlen_output_buffer=*/nullptr,
1164  agg_out_vec,
1165  query_mem_desc,
1166  co,
1167  gpu_smem_context,
1168  filter_cfg);
1169  }
1170  }
1171  }
1172 
1173  if (ra_exe_unit_.join_quals.empty()) {
1174  executor_->cgen_state_->ir_builder_.CreateRet(LL_INT(int32_t(0)));
1175  } else if (sc_false) {
1176  const auto saved_insert_block = LL_BUILDER.GetInsertBlock();
1177  LL_BUILDER.SetInsertPoint(sc_false);
1178  LL_BUILDER.CreateBr(filter_false);
1179  LL_BUILDER.SetInsertPoint(saved_insert_block);
1180  }
1181 
1182  return can_return_error;
1183 }
GroupByPerfectHash
Definition: enums.h:58
std::vector< Analyzer::Expr * > target_exprs
#define ROW_FUNC
llvm::BasicBlock * cond_false_
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * codegenVarlenOutputBuffer(const QueryMemoryDescriptor &query_mem_desc)
Projection
Definition: enums.h:58
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
#define LLVM_ALIGN(alignment)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:168
size_t getGroupbyColCount() const
const JoinQualsPerNestingLevel join_quals
bool codegenAggCalls(const std::tuple< llvm::Value *, llvm::Value * > &agg_out_ptr_w_idx, llvm::Value *varlen_output_buffer, const std::vector< llvm::Value * > &agg_out_vec, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context, DiamondCodegen &diamond_codegen)
std::tuple< llvm::Value *, llvm::Value * > codegenGroupBy(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &codegen)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
ExecutorDeviceType device_type
void codegenEstimator(std::stack< llvm::BasicBlock * > &array_loops, DiamondCodegen &diamond_codegen, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &)
int32_t get_agg_count(const std::vector< Analyzer::Expr * > &target_exprs)
#define CHECK(condition)
Definition: Logger.h:291
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

std::vector< llvm::Value * > GroupByAndAggregate::codegenAggArg ( const Analyzer::Expr target_expr,
const CompilationOptions co 
)
private

Definition at line 2044 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CodeGenerator::cgen_state_, CHECK, CHECK_EQ, CodeGenerator::codegen(), CgenState::context_, CUR_FUNC, executor_, get_int_type(), Analyzer::Expr::get_type_info(), SQLTypeInfo::is_geometry(), kARRAY, kPOINT, kSAMPLE, LL_BUILDER, LL_CONTEXT, log2_bytes(), and CodeGenerator::posArg().

Referenced by TargetExprCodegen::codegen(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

2046  {
2047  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
2048  const auto agg_expr = dynamic_cast<const Analyzer::AggExpr*>(target_expr);
2049  const auto func_expr = dynamic_cast<const Analyzer::FunctionOper*>(target_expr);
2050  const auto arr_expr = dynamic_cast<const Analyzer::ArrayExpr*>(target_expr);
2051 
2052  // TODO(alex): handle arrays uniformly?
2053  CodeGenerator code_generator(executor_);
2054  if (target_expr) {
2055  const auto& target_ti = target_expr->get_type_info();
2056  if (target_ti.is_buffer() &&
2057  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
2058  const auto target_lvs =
2059  agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
2060  : code_generator.codegen(
2061  target_expr, !executor_->plan_state_->allow_lazy_fetch_, co);
2062  if (!func_expr && !arr_expr) {
2063  // Something with the chunk transport is code that was generated from a source
2064  // other than an ARRAY[] expression
2065  if (target_ti.is_text_encoding_none()) {
2066  CHECK_EQ(size_t(3), target_lvs.size());
2067  return {target_lvs[1], target_lvs[2]};
2068  }
2069  CHECK(target_ti.is_array());
2070  CHECK_EQ(size_t(1), target_lvs.size());
2071  CHECK(!agg_expr || agg_expr->get_aggtype() == kSAMPLE);
2072  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
2073  const auto i8p_ty =
2074  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
2075  const auto& elem_ti = target_ti.get_elem_type();
2076  return {
2077  executor_->cgen_state_->emitExternalCall(
2078  "array_buff",
2079  i8p_ty,
2080  {target_lvs.front(), code_generator.posArg(target_expr)}),
2081  executor_->cgen_state_->emitExternalCall(
2082  "array_size",
2083  i32_ty,
2084  {target_lvs.front(),
2085  code_generator.posArg(target_expr),
2086  executor_->cgen_state_->llInt(log2_bytes(elem_ti.get_logical_size()))})};
2087  } else {
2088  if (agg_expr) {
2089  throw std::runtime_error(
2090  "Using array[] operator as argument to an aggregate operator is not "
2091  "supported");
2092  }
2093  CHECK(func_expr || arr_expr);
2094  if (dynamic_cast<const Analyzer::FunctionOper*>(target_expr)) {
2095  CHECK_EQ(size_t(1), target_lvs.size());
2096  const auto prefix = target_ti.get_buffer_name();
2097  CHECK(target_ti.is_array() || target_ti.is_text_encoding_none());
2098  const auto target_lv = LL_BUILDER.CreateLoad(
2099  target_lvs[0]->getType()->getPointerElementType(), target_lvs[0]);
2100  // const auto target_lv_type = target_lvs[0]->getType();
2101  // CHECK(target_lv_type->isStructTy());
2102  // CHECK_EQ(target_lv_type->getNumContainedTypes(), 3u);
2103  const auto i8p_ty = llvm::PointerType::get(
2104  get_int_type(8, executor_->cgen_state_->context_), 0);
2105  const auto ptr = LL_BUILDER.CreatePointerCast(
2106  LL_BUILDER.CreateExtractValue(target_lv, 0), i8p_ty);
2107  const auto size = LL_BUILDER.CreateExtractValue(target_lv, 1);
2108  const auto null_flag = LL_BUILDER.CreateExtractValue(target_lv, 2);
2109  const auto nullcheck_ok_bb =
2110  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_nullcheck_ok_bb", CUR_FUNC);
2111  const auto nullcheck_fail_bb = llvm::BasicBlock::Create(
2112  LL_CONTEXT, prefix + "_nullcheck_fail_bb", CUR_FUNC);
2113 
2114  // TODO(adb): probably better to zext the bool
2115  const auto nullcheck = LL_BUILDER.CreateICmpEQ(
2116  null_flag, executor_->cgen_state_->llInt(static_cast<int8_t>(1)));
2117  LL_BUILDER.CreateCondBr(nullcheck, nullcheck_fail_bb, nullcheck_ok_bb);
2118 
2119  const auto ret_bb =
2120  llvm::BasicBlock::Create(LL_CONTEXT, prefix + "_return", CUR_FUNC);
2121  LL_BUILDER.SetInsertPoint(ret_bb);
2122  auto result_phi = LL_BUILDER.CreatePHI(i8p_ty, 2, prefix + "_ptr_return");
2123  result_phi->addIncoming(ptr, nullcheck_ok_bb);
2124  const auto null_arr_sentinel = LL_BUILDER.CreateIntToPtr(
2125  executor_->cgen_state_->llInt(static_cast<int8_t>(0)), i8p_ty);
2126  result_phi->addIncoming(null_arr_sentinel, nullcheck_fail_bb);
2127  LL_BUILDER.SetInsertPoint(nullcheck_ok_bb);
2128  executor_->cgen_state_->emitExternalCall(
2129  "register_buffer_with_executor_rsm",
2130  llvm::Type::getVoidTy(executor_->cgen_state_->context_),
2131  {executor_->cgen_state_->llInt(reinterpret_cast<int64_t>(executor_)), ptr});
2132  LL_BUILDER.CreateBr(ret_bb);
2133  LL_BUILDER.SetInsertPoint(nullcheck_fail_bb);
2134  LL_BUILDER.CreateBr(ret_bb);
2135 
2136  LL_BUILDER.SetInsertPoint(ret_bb);
2137  return {result_phi, size};
2138  }
2139  CHECK_EQ(size_t(2), target_lvs.size());
2140  return {target_lvs[0], target_lvs[1]};
2141  }
2142  }
2143  if (target_ti.is_geometry() &&
2144  !executor_->plan_state_->isLazyFetchColumn(target_expr)) {
2145  auto generate_coord_lvs =
2146  [&](auto* selected_target_expr,
2147  bool const fetch_columns) -> std::vector<llvm::Value*> {
2148  const auto target_lvs =
2149  code_generator.codegen(selected_target_expr, fetch_columns, co);
2150  if (dynamic_cast<const Analyzer::GeoOperator*>(target_expr) &&
2151  target_expr->get_type_info().is_geometry()) {
2152  // return a pointer to the temporary alloca
2153  return target_lvs;
2154  }
2155  const auto geo_uoper = dynamic_cast<const Analyzer::GeoUOper*>(target_expr);
2156  const auto geo_binoper = dynamic_cast<const Analyzer::GeoBinOper*>(target_expr);
2157  if (geo_uoper || geo_binoper) {
2158  CHECK(target_expr->get_type_info().is_geometry());
2159  CHECK_EQ(2 * static_cast<size_t>(target_ti.get_physical_coord_cols()),
2160  target_lvs.size());
2161  return target_lvs;
2162  }
2163  CHECK_EQ(static_cast<size_t>(target_ti.get_physical_coord_cols()),
2164  target_lvs.size());
2165 
2166  const auto i32_ty = get_int_type(32, executor_->cgen_state_->context_);
2167  const auto i8p_ty =
2168  llvm::PointerType::get(get_int_type(8, executor_->cgen_state_->context_), 0);
2169  std::vector<llvm::Value*> coords;
2170  size_t ctr = 0;
2171  for (const auto& target_lv : target_lvs) {
2172  // TODO(adb): consider adding a utility to sqltypes so we can get the types of
2173  // the physical coords cols based on the sqltype (e.g. TINYINT for col 0, INT
2174  // for col 1 for pols / mpolys, etc). Hardcoding for now. first array is the
2175  // coords array (TINYINT). Subsequent arrays are regular INT.
2176 
2177  const size_t elem_sz = ctr == 0 ? 1 : 4;
2178  ctr++;
2179  int32_t fixlen = -1;
2180  if (target_ti.get_type() == kPOINT) {
2181  const auto col_var = dynamic_cast<const Analyzer::ColumnVar*>(target_expr);
2182  if (col_var) {
2183  const auto coords_cd = executor_->getPhysicalColumnDescriptor(col_var, 1);
2184  if (coords_cd && coords_cd->columnType.get_type() == kARRAY) {
2185  fixlen = coords_cd->columnType.get_size();
2186  }
2187  }
2188  }
2189  if (fixlen > 0) {
2190  coords.push_back(executor_->cgen_state_->emitExternalCall(
2191  "fast_fixlen_array_buff",
2192  i8p_ty,
2193  {target_lv, code_generator.posArg(selected_target_expr)}));
2194  auto fixed_len_lv = executor_->cgen_state_->emitExternalCall(
2195  "determine_fixed_array_len",
2196  llvm::IntegerType::get(code_generator.cgen_state_->context_, 64),
2197  {target_lv, executor_->cgen_state_->llInt(int64_t(fixlen))});
2198  coords.push_back(fixed_len_lv);
2199  continue;
2200  }
2201  coords.push_back(executor_->cgen_state_->emitExternalCall(
2202  "array_buff",
2203  i8p_ty,
2204  {target_lv, code_generator.posArg(selected_target_expr)}));
2205  coords.push_back(executor_->cgen_state_->emitExternalCall(
2206  "array_size",
2207  i32_ty,
2208  {target_lv,
2209  code_generator.posArg(selected_target_expr),
2210  executor_->cgen_state_->llInt(log2_bytes(elem_sz))}));
2211  }
2212  return coords;
2213  };
2214 
2215  if (agg_expr) {
2216  return generate_coord_lvs(agg_expr->get_arg(), true);
2217  } else {
2218  return generate_coord_lvs(target_expr,
2219  !executor_->plan_state_->allow_lazy_fetch_);
2220  }
2221  }
2222  }
2223  bool fetch_column = !executor_->plan_state_->allow_lazy_fetch_;
2224  return agg_expr ? code_generator.codegen(agg_expr->get_arg(), true, co)
2225  : code_generator.codegen(target_expr, fetch_column, co);
2226 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define LL_BUILDER
#define LL_CONTEXT
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
#define CHECK(condition)
Definition: Logger.h:291
bool is_geometry() const
Definition: sqltypes.h:597
#define CUR_FUNC
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:198

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::codegenAggCalls ( const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
llvm::Value *  varlen_output_buffer,
const std::vector< llvm::Value * > &  agg_out_vec,
QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
const GpuSharedMemoryContext gpu_smem_context,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1681 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, TargetExprCodegenBuilder::codegen(), QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, heavyai::Projection, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by codegen().

1688  {
1689  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1690  auto agg_out_ptr_w_idx = agg_out_ptr_w_idx_in;
1691  // TODO(alex): unify the two cases, the output for non-group by queries
1692  // should be a contiguous buffer
1693  const bool is_group_by = std::get<0>(agg_out_ptr_w_idx);
1694  bool can_return_error = false;
1695  if (is_group_by) {
1696  CHECK(agg_out_vec.empty());
1697  } else {
1698  CHECK(!agg_out_vec.empty());
1699  }
1700 
1701  // output buffer is casted into a byte stream to be able to handle data elements of
1702  // different sizes (only used when actual column width sizes are used)
1703  llvm::Value* output_buffer_byte_stream{nullptr};
1704  llvm::Value* out_row_idx{nullptr};
1705  if (query_mem_desc.didOutputColumnar() && !g_cluster &&
1707  output_buffer_byte_stream = LL_BUILDER.CreateBitCast(
1708  std::get<0>(agg_out_ptr_w_idx),
1709  llvm::PointerType::get(llvm::Type::getInt8Ty(LL_CONTEXT), 0));
1710  output_buffer_byte_stream->setName("out_buff_b_stream");
1711  CHECK(std::get<1>(agg_out_ptr_w_idx));
1712  out_row_idx = LL_BUILDER.CreateZExt(std::get<1>(agg_out_ptr_w_idx),
1713  llvm::Type::getInt64Ty(LL_CONTEXT));
1714  out_row_idx->setName("out_row_idx");
1715  }
1716 
1717  TargetExprCodegenBuilder target_builder(ra_exe_unit_, is_group_by);
1718  for (size_t target_idx = 0; target_idx < ra_exe_unit_.target_exprs.size();
1719  ++target_idx) {
1720  auto target_expr = ra_exe_unit_.target_exprs[target_idx];
1721  CHECK(target_expr);
1722 
1723  target_builder(target_expr, executor_, query_mem_desc, co);
1724  }
1725 
1726  target_builder.codegen(this,
1727  executor_,
1728  query_mem_desc,
1729  co,
1730  gpu_smem_context,
1731  agg_out_ptr_w_idx,
1732  agg_out_vec,
1733  output_buffer_byte_stream,
1734  out_row_idx,
1735  varlen_output_buffer,
1736  diamond_codegen);
1737 
1738  return can_return_error;
1739 }
std::vector< Analyzer::Expr * > target_exprs
#define LL_BUILDER
#define LL_CONTEXT
Projection
Definition: enums.h:58
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK(condition)
Definition: Logger.h:291
bool g_cluster
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenAggColumnPtr ( llvm::Value *  output_buffer_byte_stream,
llvm::Value *  out_row_idx,
const std::tuple< llvm::Value *, llvm::Value * > &  agg_out_ptr_w_idx,
const QueryMemoryDescriptor query_mem_desc,
const size_t  chosen_bytes,
const size_t  agg_out_off,
const size_t  target_idx 
)
private

: returns the pointer to where the aggregation should be stored.

Definition at line 1744 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, shared::bit_cast(), CHECK, CHECK_EQ, QueryMemoryDescriptor::didOutputColumnar(), executor_, g_cluster, get_int_type(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getColOnlyOffInBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), LL_BUILDER, LL_CONTEXT, LL_INT, heavyai::Projection, and to_string().

Referenced by TargetExprCodegen::codegenAggregate(), and TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions().

1751  {
1752  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1753  llvm::Value* agg_col_ptr{nullptr};
1754  if (query_mem_desc.didOutputColumnar()) {
1755  // TODO(Saman): remove the second columnar branch, and support all query description
1756  // types through the first branch. Then, input arguments should also be cleaned up
1757  if (!g_cluster &&
1759  CHECK(chosen_bytes == 1 || chosen_bytes == 2 || chosen_bytes == 4 ||
1760  chosen_bytes == 8);
1761  CHECK(output_buffer_byte_stream);
1762  CHECK(out_row_idx);
1763  size_t col_off = query_mem_desc.getColOffInBytes(agg_out_off);
1764  // multiplying by chosen_bytes, i.e., << log2(chosen_bytes)
1765  auto out_per_col_byte_idx =
1766 #ifdef _WIN32
1767  LL_BUILDER.CreateShl(out_row_idx, __lzcnt(chosen_bytes) - 1);
1768 #else
1769  LL_BUILDER.CreateShl(out_row_idx, __builtin_ffs(chosen_bytes) - 1);
1770 #endif
1771  auto byte_offset = LL_BUILDER.CreateAdd(out_per_col_byte_idx,
1772  LL_INT(static_cast<int64_t>(col_off)));
1773  byte_offset->setName("out_byte_off_target_" + std::to_string(target_idx));
1774  auto output_ptr = LL_BUILDER.CreateGEP(
1775  output_buffer_byte_stream->getType()->getScalarType()->getPointerElementType(),
1776  output_buffer_byte_stream,
1777  byte_offset);
1778  agg_col_ptr = LL_BUILDER.CreateBitCast(
1779  output_ptr,
1780  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1781  agg_col_ptr->setName("out_ptr_target_" + std::to_string(target_idx));
1782  } else {
1783  auto const col_off_in_bytes = query_mem_desc.getColOffInBytes(agg_out_off);
1784  auto const col_off = col_off_in_bytes / chosen_bytes;
1785  auto const col_rem = col_off_in_bytes % chosen_bytes;
1786  CHECK_EQ(col_rem, 0u) << col_off_in_bytes << " % " << chosen_bytes;
1787  CHECK(std::get<1>(agg_out_ptr_w_idx));
1788  auto* agg_out_idx = LL_BUILDER.CreateZExt(
1789  std::get<1>(agg_out_ptr_w_idx),
1790  get_int_type(8 * sizeof(col_off), executor_->cgen_state_->context_));
1791  auto* offset = LL_BUILDER.CreateAdd(agg_out_idx, LL_INT(col_off));
1792  auto* bit_cast = LL_BUILDER.CreateBitCast(
1793  std::get<0>(agg_out_ptr_w_idx),
1794  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1795  agg_col_ptr = LL_BUILDER.CreateGEP(
1796  bit_cast->getType()->getScalarType()->getPointerElementType(),
1797  bit_cast,
1798  offset);
1799  }
1800  } else {
1801  auto const col_off_in_bytes = query_mem_desc.getColOnlyOffInBytes(agg_out_off);
1802  auto const col_off = col_off_in_bytes / chosen_bytes;
1803  auto const col_rem = col_off_in_bytes % chosen_bytes;
1804  CHECK_EQ(col_rem, 0u) << col_off_in_bytes << " % " << chosen_bytes;
1805  auto* bit_cast = LL_BUILDER.CreateBitCast(
1806  std::get<0>(agg_out_ptr_w_idx),
1807  llvm::PointerType::get(get_int_type((chosen_bytes << 3), LL_CONTEXT), 0));
1808  agg_col_ptr = LL_BUILDER.CreateGEP(
1809  bit_cast->getType()->getScalarType()->getPointerElementType(),
1810  bit_cast,
1811  LL_INT(col_off));
1812  }
1813  CHECK(agg_col_ptr);
1814  return agg_col_ptr;
1815 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
Projection
Definition: enums.h:58
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
std::string to_string(char const *&&v)
size_t getColOnlyOffInBytes(const size_t col_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
TO bit_cast(FROM &&from)
Definition: misc.h:307
#define CHECK(condition)
Definition: Logger.h:291
bool g_cluster
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenApproxQuantile ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1951 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, executor_, g_bigint_count, SQLTypeInfo::get_notnull(), get_target_info(), Analyzer::Expr::get_type_info(), and GPU.

Referenced by TargetExprCodegen::codegenAggregate().

1956  {
1957  if (device_type == ExecutorDeviceType::GPU) {
1958  throw QueryMustRunOnCpu();
1959  }
1960  llvm::BasicBlock *calc, *skip{nullptr};
1961  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1962  auto const arg_ti =
1963  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1964  bool const nullable = !arg_ti.get_notnull();
1965 
1966  auto* cs = executor_->cgen_state_.get();
1967  auto& irb = cs->ir_builder_;
1968  if (nullable) {
1969  auto* const null_value = cs->castToTypeIn(cs->inlineNull(arg_ti), 64);
1970  auto* const skip_cond = arg_ti.is_fp()
1971  ? irb.CreateFCmpOEQ(agg_args.back(), null_value)
1972  : irb.CreateICmpEQ(agg_args.back(), null_value);
1973  calc = llvm::BasicBlock::Create(cs->context_, "calc_approx_quantile");
1974  skip = llvm::BasicBlock::Create(cs->context_, "skip_approx_quantile");
1975  irb.CreateCondBr(skip_cond, skip, calc);
1976  cs->current_func_->getBasicBlockList().push_back(calc);
1977  irb.SetInsertPoint(calc);
1978  }
1979  if (!arg_ti.is_fp()) {
1980  auto const agg_info = get_target_info(target_expr, g_bigint_count);
1981  agg_args.back() = executor_->castToFP(agg_args.back(), arg_ti, agg_info.sql_type);
1982  }
1983  cs->emitExternalCall(
1984  "agg_approx_quantile", llvm::Type::getVoidTy(cs->context_), agg_args);
1985  if (nullable) {
1986  irb.CreateBr(skip);
1987  cs->current_func_->getBasicBlockList().push_back(skip);
1988  irb.SetInsertPoint(skip);
1989  }
1990 }
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
Definition: TargetInfo.h:92
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenCountDistinct ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1881 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, Bitmap, CHECK, CHECK_EQ, emitCall(), executor_, g_bigint_count, get_int_type(), get_target_info(), Analyzer::Expr::get_type_info(), getAdditionalLiteral(), QueryMemoryDescriptor::getCountDistinctDescriptor(), GPU, Invalid, kAPPROX_COUNT_DISTINCT, LL_CONTEXT, and LL_INT.

Referenced by TargetExprCodegen::codegenAggregate().

1886  {
1887  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1888  const auto agg_info = get_target_info(target_expr, g_bigint_count);
1889  const auto& arg_ti =
1890  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
1891  if (arg_ti.is_fp()) {
1892  agg_args.back() = executor_->cgen_state_->ir_builder_.CreateBitCast(
1893  agg_args.back(), get_int_type(64, executor_->cgen_state_->context_));
1894  }
1895  const auto& count_distinct_descriptor =
1896  query_mem_desc.getCountDistinctDescriptor(target_idx);
1897  CHECK(count_distinct_descriptor.impl_type_ != CountDistinctImplType::Invalid);
1898  if (agg_info.agg_kind == kAPPROX_COUNT_DISTINCT) {
1899  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1900  agg_args.push_back(LL_INT(int32_t(count_distinct_descriptor.bitmap_sz_bits)));
1901  if (device_type == ExecutorDeviceType::GPU) {
1902  const auto base_dev_addr = getAdditionalLiteral(-1);
1903  const auto base_host_addr = getAdditionalLiteral(-2);
1904  agg_args.push_back(base_dev_addr);
1905  agg_args.push_back(base_host_addr);
1906  emitCall("agg_approximate_count_distinct_gpu", agg_args);
1907  } else {
1908  emitCall("agg_approximate_count_distinct", agg_args);
1909  }
1910  return;
1911  }
1912  std::string agg_fname{"agg_count_distinct"};
1913  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1914  agg_fname += "_bitmap";
1915  agg_args.push_back(LL_INT(count_distinct_descriptor.min_val));
1916  agg_args.push_back(LL_INT(count_distinct_descriptor.bucket_size));
1917  }
1918  if (agg_info.skip_null_val) {
1919  auto null_lv = executor_->cgen_state_->castToTypeIn(
1920  (arg_ti.is_fp()
1921  ? static_cast<llvm::Value*>(executor_->cgen_state_->inlineFpNull(arg_ti))
1922  : static_cast<llvm::Value*>(executor_->cgen_state_->inlineIntNull(arg_ti))),
1923  64);
1924  null_lv = executor_->cgen_state_->ir_builder_.CreateBitCast(
1925  null_lv, get_int_type(64, executor_->cgen_state_->context_));
1926  agg_fname += "_skip_val";
1927  agg_args.push_back(null_lv);
1928  }
1929  if (device_type == ExecutorDeviceType::GPU) {
1930  CHECK(count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap);
1931  agg_fname += "_gpu";
1932  const auto base_dev_addr = getAdditionalLiteral(-1);
1933  const auto base_host_addr = getAdditionalLiteral(-2);
1934  agg_args.push_back(base_dev_addr);
1935  agg_args.push_back(base_host_addr);
1936  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.sub_bitmap_count)));
1937  CHECK_EQ(size_t(0),
1938  count_distinct_descriptor.bitmapPaddedSizeBytes() %
1939  count_distinct_descriptor.sub_bitmap_count);
1940  agg_args.push_back(LL_INT(int64_t(count_distinct_descriptor.bitmapPaddedSizeBytes() /
1941  count_distinct_descriptor.sub_bitmap_count)));
1942  }
1943  if (count_distinct_descriptor.impl_type_ == CountDistinctImplType::Bitmap) {
1944  emitCall(agg_fname, agg_args);
1945  } else {
1946  executor_->cgen_state_->emitExternalCall(
1947  agg_fname, llvm::Type::getVoidTy(LL_CONTEXT), agg_args);
1948  }
1949 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
llvm::Value * getAdditionalLiteral(const int32_t off)
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
Definition: TargetInfo.h:92
bool g_bigint_count
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenEstimator ( std::stack< llvm::BasicBlock * > &  array_loops,
DiamondCodegen diamond_codegen,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co 
)
private

Definition at line 1817 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, emitCall(), RelAlgExecutionUnit::estimator, executor_, get_int_type(), QueryMemoryDescriptor::getEffectiveKeyWidth(), LL_BUILDER, LL_CONTEXT, LL_INT, ra_exe_unit_, and ROW_FUNC.

Referenced by codegen().

1820  {
1821  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1822  const auto& estimator_arg = ra_exe_unit_.estimator->getArgument();
1823  auto estimator_comp_count_lv = LL_INT(static_cast<int32_t>(estimator_arg.size()));
1824  auto estimator_key_lv = LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT),
1825  estimator_comp_count_lv);
1826  int32_t subkey_idx = 0;
1827  for (const auto& estimator_arg_comp : estimator_arg) {
1828  const auto estimator_arg_comp_lvs =
1829  executor_->groupByColumnCodegen(estimator_arg_comp.get(),
1830  query_mem_desc.getEffectiveKeyWidth(),
1831  co,
1832  false,
1833  0,
1834  diamond_codegen,
1835  array_loops,
1836  true);
1837  CHECK(!estimator_arg_comp_lvs.original_value);
1838  const auto estimator_arg_comp_lv = estimator_arg_comp_lvs.translated_value;
1839  // store the sub-key to the buffer
1840  LL_BUILDER.CreateStore(
1841  estimator_arg_comp_lv,
1842  LL_BUILDER.CreateGEP(
1843  estimator_key_lv->getType()->getScalarType()->getPointerElementType(),
1844  estimator_key_lv,
1845  LL_INT(subkey_idx++)));
1846  }
1847  const auto int8_ptr_ty = llvm::PointerType::get(get_int_type(8, LL_CONTEXT), 0);
1848  const auto bitmap = LL_BUILDER.CreateBitCast(&*ROW_FUNC->arg_begin(), int8_ptr_ty);
1849  const auto key_bytes = LL_BUILDER.CreateBitCast(estimator_key_lv, int8_ptr_ty);
1850  const auto estimator_comp_bytes_lv =
1851  LL_INT(static_cast<int32_t>(estimator_arg.size() * sizeof(int64_t)));
1852  const auto bitmap_size_lv =
1853  LL_INT(static_cast<uint32_t>(ra_exe_unit_.estimator->getBufferSize()));
1854  emitCall(ra_exe_unit_.estimator->getRuntimeFunctionName(),
1855  {bitmap, &*bitmap_size_lv, key_bytes, &*estimator_comp_bytes_lv});
1856 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
size_t getEffectiveKeyWidth() const
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
const std::shared_ptr< Analyzer::Estimator > estimator
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:291
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenGroupBy ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen codegen 
)
private

Definition at line 1273 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), QueryMemoryDescriptor::didOutputColumnar(), executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getEffectiveKeyWidth(), QueryMemoryDescriptor::getGroupbyColCount(), QueryMemoryDescriptor::getMaxVal(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, QueryMemoryDescriptor::hasNulls(), QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash(), LL_BUILDER, LL_CONTEXT, LL_INT, heavyai::Projection, query_infos_, ra_exe_unit_, ROW_FUNC, and QueryMemoryDescriptor::threadsShareMemory().

Referenced by codegen().

1276  {
1277  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1278  auto arg_it = ROW_FUNC->arg_begin();
1279  auto groups_buffer = arg_it++;
1280 
1281  std::stack<llvm::BasicBlock*> array_loops;
1282 
1283  // TODO(Saman): move this logic outside of this function.
1285  if (query_mem_desc.didOutputColumnar()) {
1286  return std::make_tuple(
1287  &*groups_buffer,
1288  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen));
1289  } else {
1290  return std::make_tuple(
1291  codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen),
1292  nullptr);
1293  }
1294  }
1295 
1296  CHECK(query_mem_desc.getQueryDescriptionType() ==
1298  query_mem_desc.getQueryDescriptionType() ==
1300 
1301  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1302  ? 0
1303  : query_mem_desc.getRowSize() / sizeof(int64_t);
1304 
1305  const auto col_width_size = query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1306  ? sizeof(int64_t)
1307  : query_mem_desc.getEffectiveKeyWidth();
1308  // for multi-column group by
1309  llvm::Value* group_key = nullptr;
1310  llvm::Value* key_size_lv = nullptr;
1311 
1312  if (!query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1313  key_size_lv = LL_INT(static_cast<int32_t>(query_mem_desc.getGroupbyColCount()));
1314  if (query_mem_desc.getQueryDescriptionType() ==
1316  group_key =
1317  LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1318  } else if (query_mem_desc.getQueryDescriptionType() ==
1320  group_key =
1321  col_width_size == sizeof(int32_t)
1322  ? LL_BUILDER.CreateAlloca(llvm::Type::getInt32Ty(LL_CONTEXT), key_size_lv)
1323  : LL_BUILDER.CreateAlloca(llvm::Type::getInt64Ty(LL_CONTEXT), key_size_lv);
1324  }
1325  CHECK(group_key);
1326  CHECK(key_size_lv);
1327  }
1328 
1329  int32_t subkey_idx = 0;
1330  CHECK(query_mem_desc.getGroupbyColCount() == ra_exe_unit_.groupby_exprs.size());
1331  for (const auto& group_expr : ra_exe_unit_.groupby_exprs) {
1332  const auto col_range_info =
1334  const auto translated_null_value = static_cast<int64_t>(
1335  query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1336  ? checked_int64_t(query_mem_desc.getMaxVal()) +
1337  (query_mem_desc.getBucket() ? query_mem_desc.getBucket() : 1)
1338  : checked_int64_t(col_range_info.max) +
1339  (col_range_info.bucket ? col_range_info.bucket : 1));
1340 
1341  const bool col_has_nulls =
1342  query_mem_desc.getQueryDescriptionType() ==
1344  ? (query_mem_desc.isSingleColumnGroupByWithPerfectHash()
1345  ? query_mem_desc.hasNulls()
1346  : col_range_info.has_nulls)
1347  : false;
1348 
1349  const auto group_expr_lvs =
1350  executor_->groupByColumnCodegen(group_expr.get(),
1351  col_width_size,
1352  co,
1353  col_has_nulls,
1354  translated_null_value,
1355  diamond_codegen,
1356  array_loops,
1357  query_mem_desc.threadsShareMemory());
1358  const auto group_expr_lv = group_expr_lvs.translated_value;
1359  if (query_mem_desc.isSingleColumnGroupByWithPerfectHash()) {
1360  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1361  return codegenSingleColumnPerfectHash(query_mem_desc,
1362  co,
1363  &*groups_buffer,
1364  group_expr_lv,
1365  group_expr_lvs.original_value,
1366  row_size_quad);
1367  } else {
1368  // store the sub-key to the buffer
1369  LL_BUILDER.CreateStore(
1370  group_expr_lv,
1371  LL_BUILDER.CreateGEP(
1372  group_key->getType()->getScalarType()->getPointerElementType(),
1373  group_key,
1374  LL_INT(subkey_idx++)));
1375  }
1376  }
1377  if (query_mem_desc.getQueryDescriptionType() ==
1379  CHECK(ra_exe_unit_.groupby_exprs.size() != 1);
1381  &*groups_buffer, group_key, key_size_lv, query_mem_desc, row_size_quad);
1382  } else if (query_mem_desc.getQueryDescriptionType() ==
1385  &*groups_buffer,
1386  group_key,
1387  key_size_lv,
1388  query_mem_desc,
1389  col_width_size,
1390  row_size_quad);
1391  }
1392  CHECK(false);
1393  return std::make_tuple(nullptr, nullptr);
1394 }
GroupByPerfectHash
Definition: enums.h:58
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define ROW_FUNC
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define LL_BUILDER
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
#define LL_CONTEXT
#define LL_INT(v)
Projection
Definition: enums.h:58
size_t getEffectiveKeyWidth() const
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnBaselineHash(const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const size_t key_width, const int32_t row_size_quad)
size_t getGroupbyColCount() const
std::tuple< llvm::Value *, llvm::Value * > codegenSingleColumnPerfectHash(const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, llvm::Value *groups_buffer, llvm::Value *group_expr_lv_translated, llvm::Value *group_expr_lv_original, const int32_t row_size_quad)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
std::tuple< llvm::Value *, llvm::Value * > codegenMultiColumnPerfectHash(llvm::Value *groups_buffer, llvm::Value *group_key, llvm::Value *key_size_lv, const QueryMemoryDescriptor &query_mem_desc, const int32_t row_size_quad)
QueryDescriptionType getQueryDescriptionType() const
const std::vector< InputTableInfo > & query_infos_
bool isSingleColumnGroupByWithPerfectHash() const
GroupByBaselineHash
Definition: enums.h:58
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)
#define CHECK(condition)
Definition: Logger.h:291
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void GroupByAndAggregate::codegenMode ( const size_t  target_idx,
const Analyzer::Expr target_expr,
std::vector< llvm::Value * > &  agg_args,
const QueryMemoryDescriptor query_mem_desc,
const ExecutorDeviceType  device_type 
)
private

Definition at line 1992 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, executor_, get_int_type(), SQLTypeInfo::get_notnull(), Analyzer::Expr::get_type_info(), and GPU.

Referenced by TargetExprCodegen::codegenAggregate().

1996  {
1997  if (device_type == ExecutorDeviceType::GPU) {
1998  throw QueryMustRunOnCpu();
1999  }
2000  llvm::BasicBlock *calc, *skip{nullptr};
2001  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
2002  auto const arg_ti =
2003  static_cast<const Analyzer::AggExpr*>(target_expr)->get_arg()->get_type_info();
2004  bool const nullable = !arg_ti.get_notnull();
2005  bool const is_fp = arg_ti.is_fp();
2006  auto* cs = executor_->cgen_state_.get();
2007  auto& irb = cs->ir_builder_;
2008  if (nullable) {
2009  auto* const null_value =
2010  is_fp ? cs->inlineNull(arg_ti) : cs->castToTypeIn(cs->inlineNull(arg_ti), 64);
2011  auto* const skip_cond = is_fp ? irb.CreateFCmpOEQ(agg_args.back(), null_value)
2012  : irb.CreateICmpEQ(agg_args.back(), null_value);
2013  calc = llvm::BasicBlock::Create(cs->context_, "calc_mode");
2014  skip = llvm::BasicBlock::Create(cs->context_, "skip_mode");
2015  irb.CreateCondBr(skip_cond, skip, calc);
2016  cs->current_func_->getBasicBlockList().push_back(calc);
2017  irb.SetInsertPoint(calc);
2018  }
2019  if (is_fp) {
2020  auto* const int_type = get_int_type(8 * arg_ti.get_size(), cs->context_);
2021  agg_args.back() = irb.CreateBitCast(agg_args.back(), int_type);
2022  }
2023  // "agg_mode" collides with existing names, so non-standard suffix "_func" is added.
2024  cs->emitExternalCall("agg_mode_func", llvm::Type::getVoidTy(cs->context_), agg_args);
2025  if (nullable) {
2026  irb.CreateBr(skip);
2027  cs->current_func_->getBasicBlockList().push_back(skip);
2028  irb.SetInsertPoint(skip);
2029  }
2030 }
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnBaselineHash ( const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const size_t  key_width,
const int32_t  row_size_quad 
)
private

Definition at line 1505 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getEntryCount(), LL_BUILDER, LL_CONTEXT, LL_INT, and CompilationOptions::with_dynamic_watchdog.

Referenced by codegenGroupBy().

1512  {
1513  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1514  if (group_key->getType() != llvm::Type::getInt64PtrTy(LL_CONTEXT)) {
1515  CHECK(key_width == sizeof(int32_t));
1516  group_key =
1517  LL_BUILDER.CreatePointerCast(group_key, llvm::Type::getInt64PtrTy(LL_CONTEXT));
1518  }
1519  std::vector<llvm::Value*> func_args{
1520  groups_buffer,
1521  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount())),
1522  &*group_key,
1523  &*key_size_lv,
1524  LL_INT(static_cast<int32_t>(key_width))};
1525  std::string func_name{"get_group_value"};
1526  if (query_mem_desc.didOutputColumnar()) {
1527  func_name += "_columnar_slot";
1528  } else {
1529  func_args.push_back(LL_INT(row_size_quad));
1530  }
1531  if (co.with_dynamic_watchdog) {
1532  func_name += "_with_watchdog";
1533  }
1534  if (query_mem_desc.didOutputColumnar()) {
1535  return std::make_tuple(groups_buffer, emitCall(func_name, func_args));
1536  } else {
1537  return std::make_tuple(emitCall(func_name, func_args), nullptr);
1538  }
1539 }
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenMultiColumnPerfectHash ( llvm::Value *  groups_buffer,
llvm::Value *  group_key,
llvm::Value *  key_size_lv,
const QueryMemoryDescriptor query_mem_desc,
const int32_t  row_size_quad 
)
private

Definition at line 1461 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, codegenPerfectHashFunction(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getQueryDescriptionType(), heavyai::GroupByPerfectHash, QueryMemoryDescriptor::hasKeylessHash(), LL_BUILDER, LL_CONTEXT, and LL_INT.

Referenced by codegenGroupBy().

1466  {
1467  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1468  CHECK(query_mem_desc.getQueryDescriptionType() ==
1470  // compute the index (perfect hash)
1471  auto perfect_hash_func = codegenPerfectHashFunction();
1472  auto hash_lv =
1473  LL_BUILDER.CreateCall(perfect_hash_func, std::vector<llvm::Value*>{group_key});
1474 
1475  if (query_mem_desc.didOutputColumnar()) {
1476  if (!query_mem_desc.hasKeylessHash()) {
1477  const std::string set_matching_func_name{
1478  "set_matching_group_value_perfect_hash_columnar"};
1479  const std::vector<llvm::Value*> set_matching_func_arg{
1480  groups_buffer,
1481  hash_lv,
1482  group_key,
1483  key_size_lv,
1484  llvm::ConstantInt::get(get_int_type(32, LL_CONTEXT),
1485  query_mem_desc.getEntryCount())};
1486  emitCall(set_matching_func_name, set_matching_func_arg);
1487  }
1488  return std::make_tuple(groups_buffer, hash_lv);
1489  } else {
1490  if (query_mem_desc.hasKeylessHash()) {
1491  return std::make_tuple(emitCall("get_matching_group_value_perfect_hash_keyless",
1492  {groups_buffer, hash_lv, LL_INT(row_size_quad)}),
1493  nullptr);
1494  } else {
1495  return std::make_tuple(
1496  emitCall(
1497  "get_matching_group_value_perfect_hash",
1498  {groups_buffer, hash_lv, group_key, key_size_lv, LL_INT(row_size_quad)}),
1499  nullptr);
1500  }
1501  }
1502 }
GroupByPerfectHash
Definition: enums.h:58
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
llvm::Function * codegenPerfectHashFunction()
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenOutputSlot ( llvm::Value *  groups_buffer,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1185 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, CHECK, CHECK_EQ, CHECK_GE, CHECK_LT, CodeGenerator::codegen(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_arg_by_name(), get_heap_key_slot_index(), QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getQueryDescriptionType(), QueryMemoryDescriptor::getRowSize(), RelAlgExecutionUnit::groupby_exprs, inline_fp_null_val(), inline_int_null_val(), SortInfo::limit, LL_BOOL, LL_BUILDER, LL_FP, LL_INT, anonymous_namespace{Utm.h}::n, SortInfo::offset, SortInfo::order_entries, CodeGenerator::posArg(), heavyai::Projection, ra_exe_unit_, ROW_FUNC, RelAlgExecutionUnit::sort_info, RelAlgExecutionUnit::target_exprs, to_string(), and QueryMemoryDescriptor::useStreamingTopN().

Referenced by codegenGroupBy(), and codegenWindowRowPointer().

1189  {
1190  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1192  CHECK_EQ(size_t(1), ra_exe_unit_.groupby_exprs.size());
1193  const auto group_expr = ra_exe_unit_.groupby_exprs.front();
1194  CHECK(!group_expr);
1195  if (!query_mem_desc.didOutputColumnar()) {
1196  CHECK_EQ(size_t(0), query_mem_desc.getRowSize() % sizeof(int64_t));
1197  }
1198  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1199  ? 0
1200  : query_mem_desc.getRowSize() / sizeof(int64_t);
1201  CodeGenerator code_generator(executor_);
1202  if (query_mem_desc.useStreamingTopN()) {
1203  const auto& only_order_entry = ra_exe_unit_.sort_info.order_entries.front();
1204  CHECK_GE(only_order_entry.tle_no, int(1));
1205  const size_t target_idx = only_order_entry.tle_no - 1;
1206  CHECK_LT(target_idx, ra_exe_unit_.target_exprs.size());
1207  const auto order_entry_expr = ra_exe_unit_.target_exprs[target_idx];
1208  const auto chosen_bytes =
1209  static_cast<size_t>(query_mem_desc.getPaddedSlotWidthBytes(target_idx));
1210  auto order_entry_lv = executor_->cgen_state_->castToTypeIn(
1211  code_generator.codegen(order_entry_expr, true, co).front(), chosen_bytes * 8);
1212  const uint32_t n =
1214  std::string fname = "get_bin_from_k_heap";
1215  const auto& oe_ti = order_entry_expr->get_type_info();
1216  llvm::Value* null_key_lv = nullptr;
1217  if (oe_ti.is_integer() || oe_ti.is_decimal() || oe_ti.is_time()) {
1218  const size_t bit_width = order_entry_lv->getType()->getIntegerBitWidth();
1219  switch (bit_width) {
1220  case 32:
1221  null_key_lv = LL_INT(static_cast<int32_t>(inline_int_null_val(oe_ti)));
1222  break;
1223  case 64:
1224  null_key_lv = LL_INT(static_cast<int64_t>(inline_int_null_val(oe_ti)));
1225  break;
1226  default:
1227  CHECK(false);
1228  }
1229  fname += "_int" + std::to_string(bit_width) + "_t";
1230  } else {
1231  CHECK(oe_ti.is_fp());
1232  if (order_entry_lv->getType()->isDoubleTy()) {
1233  null_key_lv = LL_FP(static_cast<double>(inline_fp_null_val(oe_ti)));
1234  } else {
1235  null_key_lv = LL_FP(static_cast<float>(inline_fp_null_val(oe_ti)));
1236  }
1237  fname += order_entry_lv->getType()->isDoubleTy() ? "_double" : "_float";
1238  }
1239  const auto key_slot_idx =
1241  return emitCall(
1242  fname,
1243  {groups_buffer,
1244  LL_INT(n),
1245  LL_INT(row_size_quad),
1246  LL_INT(static_cast<uint32_t>(query_mem_desc.getColOffInBytes(key_slot_idx))),
1247  LL_BOOL(only_order_entry.is_desc),
1248  LL_BOOL(!order_entry_expr->get_type_info().get_notnull()),
1249  LL_BOOL(only_order_entry.nulls_first),
1250  null_key_lv,
1251  order_entry_lv});
1252  } else {
1253  auto* arg = get_arg_by_name(ROW_FUNC, "max_matched");
1254  const auto output_buffer_entry_count_lv =
1255  LL_BUILDER.CreateLoad(arg->getType()->getPointerElementType(), arg);
1256  arg = get_arg_by_name(ROW_FUNC, "old_total_matched");
1257  const auto group_expr_lv =
1258  LL_BUILDER.CreateLoad(arg->getType()->getPointerElementType(), arg);
1259  std::vector<llvm::Value*> args{groups_buffer,
1260  output_buffer_entry_count_lv,
1261  group_expr_lv,
1262  code_generator.posArg(nullptr)};
1263  if (query_mem_desc.didOutputColumnar()) {
1264  const auto columnar_output_offset =
1265  emitCall("get_columnar_scan_output_offset", args);
1266  return columnar_output_offset;
1267  }
1268  args.push_back(LL_INT(row_size_quad));
1269  return emitCall("get_scan_output_slot", args);
1270  }
1271 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:301
#define ROW_FUNC
#define LL_BUILDER
#define LL_INT(v)
#define CHECK_GE(x, y)
Definition: Logger.h:306
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
Projection
Definition: enums.h:58
size_t get_heap_key_slot_index(const std::vector< Analyzer::Expr * > &target_exprs, const size_t target_idx)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
double inline_fp_null_val(const SQL_TYPE_INFO &ti)
std::string to_string(char const *&&v)
#define LL_BOOL(v)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:168
std::optional< size_t > limit
std::list< Analyzer::OrderEntry > order_entries
#define LL_FP(v)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define AUTOMATIC_IR_METADATA(CGENSTATE)
QueryDescriptionType getQueryDescriptionType() const
#define CHECK_LT(x, y)
Definition: Logger.h:303
#define CHECK(condition)
Definition: Logger.h:291
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
constexpr double n
Definition: Utm.h:38
const RelAlgExecutionUnit & ra_exe_unit_
size_t getColOffInBytes(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Function * GroupByAndAggregate::codegenPerfectHashFunction ( )
private

Definition at line 1541 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CHECK_GT, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), get_int_type(), getBucketedCardinality(), RelAlgExecutionUnit::groupby_exprs, heavyai::GroupByPerfectHash, LL_CONTEXT, LL_INT, mark_function_always_inline(), query_infos_, and ra_exe_unit_.

Referenced by codegenMultiColumnPerfectHash().

1541  {
1542  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1543  CHECK_GT(ra_exe_unit_.groupby_exprs.size(), size_t(1));
1544  auto ft = llvm::FunctionType::get(
1545  get_int_type(32, LL_CONTEXT),
1546  std::vector<llvm::Type*>{llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0)},
1547  false);
1548  auto key_hash_func = llvm::Function::Create(ft,
1549  llvm::Function::ExternalLinkage,
1550  "perfect_key_hash",
1551  executor_->cgen_state_->module_);
1552  executor_->cgen_state_->helper_functions_.push_back(key_hash_func);
1553  mark_function_always_inline(key_hash_func);
1554  auto& key_buff_arg = *key_hash_func->args().begin();
1555  llvm::Value* key_buff_lv = &key_buff_arg;
1556  auto bb = llvm::BasicBlock::Create(LL_CONTEXT, "entry", key_hash_func);
1557  llvm::IRBuilder<> key_hash_func_builder(bb);
1558  llvm::Value* hash_lv{llvm::ConstantInt::get(get_int_type(64, LL_CONTEXT), 0)};
1559  std::vector<int64_t> cardinalities;
1560  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1561  auto col_range_info =
1562  get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
1563  CHECK(col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash);
1564  cardinalities.push_back(getBucketedCardinality(col_range_info));
1565  }
1566  size_t dim_idx = 0;
1567  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
1568  auto* gep = key_hash_func_builder.CreateGEP(
1569  key_buff_lv->getType()->getScalarType()->getPointerElementType(),
1570  key_buff_lv,
1571  LL_INT(dim_idx));
1572  auto key_comp_lv =
1573  key_hash_func_builder.CreateLoad(gep->getType()->getPointerElementType(), gep);
1574  auto col_range_info =
1575  get_expr_range_info(ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
1576  auto crt_term_lv =
1577  key_hash_func_builder.CreateSub(key_comp_lv, LL_INT(col_range_info.min));
1578  if (col_range_info.bucket) {
1579  crt_term_lv =
1580  key_hash_func_builder.CreateSDiv(crt_term_lv, LL_INT(col_range_info.bucket));
1581  }
1582  for (size_t prev_dim_idx = 0; prev_dim_idx < dim_idx; ++prev_dim_idx) {
1583  crt_term_lv = key_hash_func_builder.CreateMul(crt_term_lv,
1584  LL_INT(cardinalities[prev_dim_idx]));
1585  }
1586  hash_lv = key_hash_func_builder.CreateAdd(hash_lv, crt_term_lv);
1587  ++dim_idx;
1588  }
1589  key_hash_func_builder.CreateRet(
1590  key_hash_func_builder.CreateTrunc(hash_lv, get_int_type(32, LL_CONTEXT)));
1591  return key_hash_func;
1592 }
GroupByPerfectHash
Definition: enums.h:58
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define LL_CONTEXT
void mark_function_always_inline(llvm::Function *func)
#define LL_INT(v)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
#define CHECK_GT(x, y)
Definition: Logger.h:305
#define AUTOMATIC_IR_METADATA(CGENSTATE)
const std::vector< InputTableInfo > & query_infos_
#define CHECK(condition)
Definition: Logger.h:291
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::tuple< llvm::Value *, llvm::Value * > GroupByAndAggregate::codegenSingleColumnPerfectHash ( const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
llvm::Value *  groups_buffer,
llvm::Value *  group_expr_lv_translated,
llvm::Value *  group_expr_lv_original,
const int32_t  row_size_quad 
)
private

Definition at line 1411 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, CompilationOptions::device_type, QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, QueryMemoryDescriptor::getBucket(), QueryMemoryDescriptor::getMinVal(), QueryMemoryDescriptor::hasKeylessHash(), QueryMemoryDescriptor::interleavedBins(), LL_INT, QueryMemoryDescriptor::mustUseBaselineSort(), and QueryMemoryDescriptor::usesGetGroupValueFast().

Referenced by codegenGroupBy().

1417  {
1418  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1419  CHECK(query_mem_desc.usesGetGroupValueFast());
1420  std::string get_group_fn_name{query_mem_desc.didOutputColumnar()
1421  ? "get_columnar_group_bin_offset"
1422  : "get_group_value_fast"};
1423  if (!query_mem_desc.didOutputColumnar() && query_mem_desc.hasKeylessHash()) {
1424  get_group_fn_name += "_keyless";
1425  }
1426  if (query_mem_desc.interleavedBins(co.device_type)) {
1427  CHECK(!query_mem_desc.didOutputColumnar());
1428  CHECK(query_mem_desc.hasKeylessHash());
1429  get_group_fn_name += "_semiprivate";
1430  }
1431  std::vector<llvm::Value*> get_group_fn_args{&*groups_buffer,
1432  &*group_expr_lv_translated};
1433  if (group_expr_lv_original && get_group_fn_name == "get_group_value_fast" &&
1434  query_mem_desc.mustUseBaselineSort()) {
1435  get_group_fn_name += "_with_original_key";
1436  get_group_fn_args.push_back(group_expr_lv_original);
1437  }
1438  get_group_fn_args.push_back(LL_INT(query_mem_desc.getMinVal()));
1439  get_group_fn_args.push_back(LL_INT(query_mem_desc.getBucket()));
1440  if (!query_mem_desc.hasKeylessHash()) {
1441  if (!query_mem_desc.didOutputColumnar()) {
1442  get_group_fn_args.push_back(LL_INT(row_size_quad));
1443  }
1444  } else {
1445  if (!query_mem_desc.didOutputColumnar()) {
1446  get_group_fn_args.push_back(LL_INT(row_size_quad));
1447  }
1448  if (query_mem_desc.interleavedBins(co.device_type)) {
1449  auto warp_idx = emitCall("thread_warp_idx", {LL_INT(executor_->warpSize())});
1450  get_group_fn_args.push_back(warp_idx);
1451  get_group_fn_args.push_back(LL_INT(executor_->warpSize()));
1452  }
1453  }
1454  if (get_group_fn_name == "get_columnar_group_bin_offset") {
1455  return std::make_tuple(&*groups_buffer,
1456  emitCall(get_group_fn_name, get_group_fn_args));
1457  }
1458  return std::make_tuple(emitCall(get_group_fn_name, get_group_fn_args), nullptr);
1459 }
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
ExecutorDeviceType device_type
bool interleavedBins(const ExecutorDeviceType) const
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenVarlenOutputBuffer ( const QueryMemoryDescriptor query_mem_desc)
private

Definition at line 1396 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, CHECK, executor_, QueryMemoryDescriptor::hasVarlenOutput(), LL_CONTEXT, and ROW_FUNC.

Referenced by codegen().

1397  {
1398  if (!query_mem_desc.hasVarlenOutput()) {
1399  return nullptr;
1400  }
1401 
1402  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1403  auto arg_it = ROW_FUNC->arg_begin();
1404  arg_it++; /* groups_buffer */
1405  auto varlen_output_buffer = arg_it++;
1406  CHECK(varlen_output_buffer->getType() == llvm::Type::getInt64PtrTy(LL_CONTEXT));
1407  return varlen_output_buffer;
1408 }
#define ROW_FUNC
#define LL_CONTEXT
#define AUTOMATIC_IR_METADATA(CGENSTATE)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::codegenWindowRowPointer ( const Analyzer::WindowFunction window_func,
const QueryMemoryDescriptor query_mem_desc,
const CompilationOptions co,
DiamondCodegen diamond_codegen 
)
private

Definition at line 1645 of file GroupByAndAggregate.cpp.

References run_benchmark_import::args, AUTOMATIC_IR_METADATA, codegenOutputSlot(), CodeGenerator::codegenWindowPosition(), QueryMemoryDescriptor::didOutputColumnar(), emitCall(), executor_, get_int_type(), WindowProjectNodeContext::getActiveWindowFunctionContext(), QueryMemoryDescriptor::getEntryCount(), Analyzer::WindowFunction::getKind(), QueryMemoryDescriptor::getRowSize(), LL_BUILDER, LL_CONTEXT, LL_INT, CodeGenerator::posArg(), ROW_FUNC, and window_function_is_aggregate().

Referenced by TargetExprCodegen::codegen().

1649  {
1650  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1651  const auto window_func_context =
1653  if (window_func_context && window_function_is_aggregate(window_func->getKind())) {
1654  const int32_t row_size_quad = query_mem_desc.didOutputColumnar()
1655  ? 0
1656  : query_mem_desc.getRowSize() / sizeof(int64_t);
1657  auto arg_it = ROW_FUNC->arg_begin();
1658  auto groups_buffer = arg_it++;
1659  CodeGenerator code_generator(executor_);
1660  auto window_pos_lv = code_generator.codegenWindowPosition(
1661  window_func_context, code_generator.posArg(nullptr));
1662  const auto pos_in_window =
1663  LL_BUILDER.CreateTrunc(window_pos_lv, get_int_type(32, LL_CONTEXT));
1664  llvm::Value* entry_count_lv =
1665  LL_INT(static_cast<int32_t>(query_mem_desc.getEntryCount()));
1666  std::vector<llvm::Value*> args{
1667  &*groups_buffer, entry_count_lv, pos_in_window, code_generator.posArg(nullptr)};
1668  if (query_mem_desc.didOutputColumnar()) {
1669  const auto columnar_output_offset =
1670  emitCall("get_columnar_scan_output_offset", args);
1671  return LL_BUILDER.CreateSExt(columnar_output_offset, get_int_type(64, LL_CONTEXT));
1672  }
1673  args.push_back(LL_INT(row_size_quad));
1674  return emitCall("get_scan_output_slot", args);
1675  }
1676  auto arg_it = ROW_FUNC->arg_begin();
1677  auto groups_buffer = arg_it++;
1678  return codegenOutputSlot(&*groups_buffer, query_mem_desc, co, diamond_codegen);
1679 }
SqlWindowFunctionKind getKind() const
Definition: Analyzer.h:2925
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Value * emitCall(const std::string &fname, const std::vector< llvm::Value * > &args)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
static WindowFunctionContext * getActiveWindowFunctionContext(Executor *executor)
#define AUTOMATIC_IR_METADATA(CGENSTATE)
bool window_function_is_aggregate(const SqlWindowFunctionKind kind)
Definition: WindowContext.h:61
llvm::Value * codegenOutputSlot(llvm::Value *groups_buffer, const QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, DiamondCodegen &diamond_codegen)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::convertNullIfAny ( const SQLTypeInfo arg_type,
const TargetInfo agg_info,
llvm::Value *  target 
)
private

Definition at line 1594 of file GroupByAndAggregate.cpp.

References TargetInfo::agg_kind, AUTOMATIC_IR_METADATA, CHECK, executor_, SQLTypeInfo::get_size(), SQLTypeInfo::is_fp(), kAPPROX_COUNT_DISTINCT, kCOUNT, LL_BUILDER, and TargetInfo::sql_type.

Referenced by TargetExprCodegen::codegenAggregate().

1596  {
1597  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
1598  const auto& agg_type = agg_info.sql_type;
1599  const size_t chosen_bytes = agg_type.get_size();
1600 
1601  bool need_conversion{false};
1602  llvm::Value* arg_null{nullptr};
1603  llvm::Value* agg_null{nullptr};
1604  llvm::Value* target_to_cast{target};
1605  if (arg_type.is_fp()) {
1606  arg_null = executor_->cgen_state_->inlineFpNull(arg_type);
1607  if (agg_type.is_fp()) {
1608  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1609  if (!static_cast<llvm::ConstantFP*>(arg_null)->isExactlyValue(
1610  static_cast<llvm::ConstantFP*>(agg_null)->getValueAPF())) {
1611  need_conversion = true;
1612  }
1613  } else {
1614  CHECK(agg_info.agg_kind == kCOUNT || agg_info.agg_kind == kAPPROX_COUNT_DISTINCT);
1615  return target;
1616  }
1617  } else {
1618  arg_null = executor_->cgen_state_->inlineIntNull(arg_type);
1619  if (agg_type.is_fp()) {
1620  agg_null = executor_->cgen_state_->inlineFpNull(agg_type);
1621  need_conversion = true;
1622  target_to_cast = executor_->castToFP(target, arg_type, agg_type);
1623  } else {
1624  agg_null = executor_->cgen_state_->inlineIntNull(agg_type);
1625  if ((static_cast<llvm::ConstantInt*>(arg_null)->getBitWidth() !=
1626  static_cast<llvm::ConstantInt*>(agg_null)->getBitWidth()) ||
1627  (static_cast<llvm::ConstantInt*>(arg_null)->getValue() !=
1628  static_cast<llvm::ConstantInt*>(agg_null)->getValue())) {
1629  need_conversion = true;
1630  }
1631  }
1632  }
1633  if (need_conversion) {
1634  auto cmp = arg_type.is_fp() ? LL_BUILDER.CreateFCmpOEQ(target, arg_null)
1635  : LL_BUILDER.CreateICmpEQ(target, arg_null);
1636  return LL_BUILDER.CreateSelect(
1637  cmp,
1638  agg_null,
1639  executor_->cgen_state_->castToTypeIn(target_to_cast, chosen_bytes << 3));
1640  } else {
1641  return target;
1642  }
1643 }
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
#define LL_BUILDER
SQLTypeInfo sql_type
Definition: TargetInfo.h:52
bool is_fp() const
Definition: sqltypes.h:573
#define AUTOMATIC_IR_METADATA(CGENSTATE)
SQLAgg agg_kind
Definition: TargetInfo.h:51
Definition: sqldefs.h:81
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::emitCall ( const std::string &  fname,
const std::vector< llvm::Value * > &  args 
)
private

Definition at line 2228 of file GroupByAndAggregate.cpp.

References AUTOMATIC_IR_METADATA, and executor_.

Referenced by TargetExprCodegen::codegen(), TargetExprCodegen::codegenAggregate(), codegenCountDistinct(), codegenEstimator(), codegenMultiColumnBaselineHash(), codegenMultiColumnPerfectHash(), codegenOutputSlot(), codegenSingleColumnPerfectHash(), and codegenWindowRowPointer().

2229  {
2230  AUTOMATIC_IR_METADATA(executor_->cgen_state_.get());
2231  return executor_->cgen_state_->emitCall(fname, args);
2232 }
#define AUTOMATIC_IR_METADATA(CGENSTATE)

+ Here is the caller graph for this function:

llvm::Value * GroupByAndAggregate::getAdditionalLiteral ( const int32_t  off)
private

Definition at line 2032 of file GroupByAndAggregate.cpp.

References shared::bit_cast(), CHECK_LT, get_arg_by_name(), get_int_type(), LL_BUILDER, LL_CONTEXT, LL_INT, and ROW_FUNC.

Referenced by codegenCountDistinct().

2032  {
2033  CHECK_LT(off, 0);
2034  const auto lit_buff_lv = get_arg_by_name(ROW_FUNC, "literals");
2035  auto* bit_cast = LL_BUILDER.CreateBitCast(
2036  lit_buff_lv, llvm::PointerType::get(get_int_type(64, LL_CONTEXT), 0));
2037  auto* gep =
2038  LL_BUILDER.CreateGEP(bit_cast->getType()->getScalarType()->getPointerElementType(),
2039  bit_cast,
2040  LL_INT(off));
2041  return LL_BUILDER.CreateLoad(gep->getType()->getPointerElementType(), gep);
2042 }
#define ROW_FUNC
#define LL_BUILDER
#define LL_CONTEXT
#define LL_INT(v)
llvm::Type * get_int_type(const int width, llvm::LLVMContext &context)
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:168
#define CHECK_LT(x, y)
Definition: Logger.h:303
TO bit_cast(FROM &&from)
Definition: misc.h:307

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getBucketedCardinality ( const ColRangeInfo col_range_info)
staticprivate

Definition at line 356 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, ColRangeInfo::has_nulls, ColRangeInfo::max, and ColRangeInfo::min.

Referenced by codegenPerfectHashFunction(), and getColRangeInfo().

356  {
357  checked_int64_t crt_col_cardinality =
358  checked_int64_t(col_range_info.max) - checked_int64_t(col_range_info.min);
359  if (col_range_info.bucket) {
360  crt_col_cardinality /= col_range_info.bucket;
361  }
362  return static_cast<int64_t>(crt_col_cardinality +
363  (1 + (col_range_info.has_nulls ? 1 : 0)));
364 }
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t

+ Here is the caller graph for this function:

ColRangeInfo GroupByAndAggregate::getColRangeInfo ( )
private

Definition at line 218 of file GroupByAndAggregate.cpp.

References anonymous_namespace{GroupByAndAggregate.cpp}::cardinality_estimate_less_than_column_range(), CHECK, CHECK_GE, device_type_, executor_, anonymous_namespace{GroupByAndAggregate.cpp}::expr_is_rowid(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), Executor::getBaselineThreshold(), getBucketedCardinality(), group_cardinality_estimation_, RelAlgExecutionUnit::groupby_exprs, heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, anonymous_namespace{GroupByAndAggregate.cpp}::has_count_distinct(), anonymous_namespace{GroupByAndAggregate.cpp}::is_column_range_too_big_for_perfect_hash(), kENCODING_DICT, MAX_BUFFER_SIZE, SortInfo::order_entries, RelAlgExecutionUnit::quals, query_infos_, ra_exe_unit_, RelAlgExecutionUnit::simple_quals, RelAlgExecutionUnit::sort_info, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptorImpl().

218  {
219  // Use baseline layout more eagerly on the GPU if the query uses count distinct,
220  // because our HyperLogLog implementation is 4x less memory efficient on GPU.
221  // Technically, this only applies to APPROX_COUNT_DISTINCT, but in practice we
222  // can expect this to be true anyway for grouped queries since the precise version
223  // uses significantly more memory.
224  const int64_t baseline_threshold =
226  // `group_cardinality_estimation_` is set as the result of (NDV) cardinality estimator
227  auto group_cardinality_estimation = group_cardinality_estimation_.value_or(0);
228  if (ra_exe_unit_.groupby_exprs.size() != 1) {
229  try {
230  checked_int64_t cardinality{1};
231  bool has_nulls{false};
232  for (const auto& groupby_expr : ra_exe_unit_.groupby_exprs) {
233  auto col_range_info = get_expr_range_info(
234  ra_exe_unit_, query_infos_, groupby_expr.get(), executor_);
235  if (col_range_info.hash_type_ != QueryDescriptionType::GroupByPerfectHash) {
236  // going through baseline hash if a non-integer type is encountered
238  0,
239  group_cardinality_estimation,
240  0,
241  false};
242  }
243  auto crt_col_cardinality = getBucketedCardinality(col_range_info);
244  CHECK_GE(crt_col_cardinality, 0);
245  cardinality *= crt_col_cardinality;
246  if (col_range_info.has_nulls) {
247  has_nulls = true;
248  }
249  }
250  // For zero or high cardinalities, use baseline layout.
251  if (!cardinality || cardinality > baseline_threshold) {
253  0,
254  group_cardinality_estimation,
255  0,
256  false};
257  }
258  // todo (yoonmin) : should we consider min(group_cardinality_estimation,
259  // cardinality) if we have `group_cardinality_estimation` value?
261  0,
262  int64_t(cardinality),
263  0,
264  has_nulls};
265  } catch (...) { // overflow when computing cardinality
267  0,
268  group_cardinality_estimation,
269  0,
270  false};
271  }
272  }
273  // For single column groupby on high timestamps, force baseline hash due to wide ranges
274  // we are likely to encounter when applying quals to the expression range
275  // TODO: consider allowing TIMESTAMP(9) (nanoseconds) with quals to use perfect hash if
276  // the range is small enough
277  if (ra_exe_unit_.groupby_exprs.front() &&
278  ra_exe_unit_.groupby_exprs.front()->get_type_info().is_high_precision_timestamp() &&
279  ra_exe_unit_.simple_quals.size() > 0) {
281  0,
282  group_cardinality_estimation,
283  0,
284  false};
285  }
286  const auto col_range_info = get_expr_range_info(
288  if (!ra_exe_unit_.groupby_exprs.front()) {
289  return col_range_info;
290  }
291  static const int64_t MAX_BUFFER_SIZE = 1 << 30;
292  const int64_t col_count =
294  int64_t max_entry_count = MAX_BUFFER_SIZE / (col_count * sizeof(int64_t));
296  max_entry_count = std::min(max_entry_count, baseline_threshold);
297  }
298  const auto& groupby_expr_ti = ra_exe_unit_.groupby_exprs.front()->get_type_info();
299  if (groupby_expr_ti.is_string() && !col_range_info.bucket) {
300  CHECK(groupby_expr_ti.get_compression() == kENCODING_DICT);
301 
302  const bool has_filters =
303  !ra_exe_unit_.quals.empty() || !ra_exe_unit_.simple_quals.empty();
304  if (has_filters &&
305  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count)) {
306  // if filters are present, we can use the filter to narrow the cardinality of the
307  // group by in the case of ranges too big for perfect hash. Otherwise, we are better
308  // off attempting perfect hash (since we know the range will be made of
309  // monotonically increasing numbers from min to max for dictionary encoded strings)
310  // and failing later due to excessive memory use.
311  // Check the conditions where baseline hash can provide a performance increase and
312  // return baseline hash (potentially forcing an estimator query) as the range type.
313  // Otherwise, return col_range_info which will likely be perfect hash, though could
314  // be baseline from a previous call of this function prior to the estimator query.
315  if (!ra_exe_unit_.sort_info.order_entries.empty()) {
316  // TODO(adb): allow some sorts to pass through this block by centralizing sort
317  // algorithm decision making
319  // always use baseline hash for column range too big for perfect hash with count
320  // distinct descriptors. We will need 8GB of CPU memory minimum for the perfect
321  // hash group by in this case.
323  col_range_info.min,
324  col_range_info.max,
325  0,
326  col_range_info.has_nulls};
327  } else {
328  // use original col range for sort
329  return col_range_info;
330  }
331  }
332  // if filters are present and the filtered range is less than the cardinality of
333  // the column, consider baseline hash
336  col_range_info)) {
338  col_range_info.min,
339  col_range_info.max,
340  0,
341  col_range_info.has_nulls};
342  }
343  }
344  } else if ((!expr_is_rowid(ra_exe_unit_.groupby_exprs.front().get())) &&
345  is_column_range_too_big_for_perfect_hash(col_range_info, max_entry_count) &&
346  !col_range_info.bucket) {
348  col_range_info.min,
349  col_range_info.max,
350  0,
351  col_range_info.has_nulls};
352  }
353  return col_range_info;
354 }
GroupByPerfectHash
Definition: enums.h:58
std::vector< Analyzer::Expr * > target_exprs
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
boost::multiprecision::number< boost::multiprecision::cpp_int_backend< 64, 64, boost::multiprecision::signed_magnitude, boost::multiprecision::checked, void >> checked_int64_t
bool is_column_range_too_big_for_perfect_hash(const ColRangeInfo &col_range_info, const int64_t max_entry_count)
#define CHECK_GE(x, y)
Definition: Logger.h:306
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool has_count_distinct(const RelAlgExecutionUnit &ra_exe_unit)
std::list< Analyzer::OrderEntry > order_entries
const std::vector< InputTableInfo > & query_infos_
bool expr_is_rowid(const Analyzer::Expr *expr)
const ExecutorDeviceType device_type_
bool cardinality_estimate_less_than_column_range(const int64_t cardinality_estimate, const ColRangeInfo &col_range_info)
GroupByBaselineHash
Definition: enums.h:58
#define MAX_BUFFER_SIZE
const std::optional< int64_t > group_cardinality_estimation_
std::list< std::shared_ptr< Analyzer::Expr > > quals
#define CHECK(condition)
Definition: Logger.h:291
static size_t getBaselineThreshold(bool for_count_distinct, ExecutorDeviceType device_type)
Definition: Execute.h:1448
const RelAlgExecutionUnit & ra_exe_unit_
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t GroupByAndAggregate::getShardedTopBucket ( const ColRangeInfo col_range_info,
const size_t  shard_count 
) const
private

Definition at line 425 of file GroupByAndAggregate.cpp.

References ColRangeInfo::bucket, CHECK, CHECK_GT, device_type_, executor_, g_leaf_count, and GPU.

Referenced by initQueryMemoryDescriptorImpl().

426  {
427  size_t device_count{0};
429  device_count = executor_->cudaMgr()->getDeviceCount();
430  CHECK_GT(device_count, 0u);
431  }
432 
433  int64_t bucket{col_range_info.bucket};
434 
435  if (shard_count) {
436  CHECK(!col_range_info.bucket);
437  /*
438  when a node has fewer devices than shard count,
439  a) In a distributed setup, the minimum distance between two keys would be
440  device_count because shards are stored consecutively across the physical tables,
441  i.e if a shard column has values 0 to 9, and 3 shards on each leaf, then node 1
442  would have values: 0,1,2,6,7,8 and node 2 would have values: 3,4,5,9. If each leaf
443  node has only 1 device, in this case, all the keys from each node are loaded on
444  the device each.
445 
446  b) In a single node setup, the distance would be minimum of device_count or
447  difference of device_count - shard_count. For example: If a single node server
448  running on 3 devices a shard column has values 0 to 9 in a table with 4 shards,
449  device to fragment keys mapping would be: device 1 - 4,8,3,7 device 2 - 1,5,9
450  device 3 - 2, 6 The bucket value would be 4(shards) - 3(devices) = 1 i.e. minimum
451  of device_count or difference.
452 
453  When a node has device count equal to or more than shard count then the
454  minimum distance is always at least shard_count * no of leaf nodes.
455  */
456  if (device_count < shard_count) {
457  bucket = g_leaf_count ? std::max(device_count, static_cast<size_t>(1))
458  : std::min(device_count, shard_count - device_count);
459  } else {
460  bucket = shard_count * std::max(g_leaf_count, static_cast<size_t>(1));
461  }
462  }
463 
464  return bucket;
465 }
#define CHECK_GT(x, y)
Definition: Logger.h:305
const ExecutorDeviceType device_type_
#define CHECK(condition)
Definition: Logger.h:291
size_t g_leaf_count
Definition: ParserNode.cpp:79

+ Here is the caller graph for this function:

bool GroupByAndAggregate::gpuCanHandleOrderEntries ( const std::list< Analyzer::OrderEntry > &  order_entries)
private

Definition at line 1004 of file GroupByAndAggregate.cpp.

References CHECK, CHECK_GE, CHECK_LE, executor_, Analyzer::AggExpr::get_arg(), anonymous_namespace{GroupByAndAggregate.cpp}::get_expr_range_info(), Analyzer::Expr::get_type_info(), heavyai::GroupByPerfectHash, kAPPROX_COUNT_DISTINCT, kAVG, kMAX, kMIN, query_infos_, ra_exe_unit_, and RelAlgExecutionUnit::target_exprs.

Referenced by initQueryMemoryDescriptor().

1005  {
1006  if (order_entries.size() > 1) { // TODO(alex): lift this restriction
1007  return false;
1008  }
1009  for (const auto& order_entry : order_entries) {
1010  CHECK_GE(order_entry.tle_no, 1);
1011  CHECK_LE(static_cast<size_t>(order_entry.tle_no), ra_exe_unit_.target_exprs.size());
1012  const auto target_expr = ra_exe_unit_.target_exprs[order_entry.tle_no - 1];
1013  if (!dynamic_cast<Analyzer::AggExpr*>(target_expr)) {
1014  return false;
1015  }
1016  // TODO(alex): relax the restrictions
1017  auto agg_expr = static_cast<Analyzer::AggExpr*>(target_expr);
1018  if (agg_expr->get_is_distinct() || agg_expr->get_aggtype() == kAVG ||
1019  agg_expr->get_aggtype() == kMIN || agg_expr->get_aggtype() == kMAX ||
1020  agg_expr->get_aggtype() == kAPPROX_COUNT_DISTINCT) {
1021  return false;
1022  }
1023  if (agg_expr->get_arg()) {
1024  const auto& arg_ti = agg_expr->get_arg()->get_type_info();
1025  if (arg_ti.is_fp()) {
1026  return false;
1027  }
1028  auto expr_range_info =
1029  get_expr_range_info(ra_exe_unit_, query_infos_, agg_expr->get_arg(), executor_);
1030  // TOD(adb): QMD not actually initialized here?
1031  if ((!(expr_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
1032  /* query_mem_desc.getGroupbyColCount() == 1 */ false) ||
1033  expr_range_info.has_nulls) &&
1034  order_entry.is_desc == order_entry.nulls_first) {
1035  return false;
1036  }
1037  }
1038  const auto& target_ti = target_expr->get_type_info();
1039  CHECK(!target_ti.is_buffer());
1040  if (!target_ti.is_integer()) {
1041  return false;
1042  }
1043  }
1044  return true;
1045 }
GroupByPerfectHash
Definition: enums.h:58
std::vector< Analyzer::Expr * > target_exprs
ColRangeInfo get_expr_range_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Analyzer::Expr *expr, Executor *executor)
#define CHECK_GE(x, y)
Definition: Logger.h:306
Expr * get_arg() const
Definition: Analyzer.h:1330
Definition: sqldefs.h:78
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
const std::vector< InputTableInfo > & query_infos_
#define CHECK_LE(x, y)
Definition: Logger.h:304
#define CHECK(condition)
Definition: Logger.h:291
const RelAlgExecutionUnit & ra_exe_unit_
Definition: sqldefs.h:79
Definition: sqldefs.h:77

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

ApproxQuantileDescriptors GroupByAndAggregate::initApproxQuantileDescriptors ( )
private

Definition at line 894 of file GroupByAndAggregate.cpp.

References RelAlgExecutionUnit::eachAggTarget(), g_approx_quantile_buffer, g_approx_quantile_centroids, kAPPROX_QUANTILE, and ra_exe_unit_.

Referenced by initQueryMemoryDescriptorImpl().

894  {
895  // Count APPROX_QUANTILE targets
896  size_t target_count = 0u;
897  auto count_target = [&](Analyzer::AggExpr const*, size_t) { ++target_count; };
899  if (target_count == 0u) {
900  return {};
901  }
902 
903  // Reserve and fill descriptors
904  std::vector<ApproxQuantileDescriptor> descriptors;
905  descriptors.reserve(target_count);
906  auto add_descriptor = [&](Analyzer::AggExpr const*, size_t) {
907  descriptors.push_back({g_approx_quantile_buffer, g_approx_quantile_centroids});
908  };
910  return descriptors;
911 }
void eachAggTarget(std::function< void(Analyzer::AggExpr const *, size_t target_idx)> lambda) const
size_t g_approx_quantile_buffer
Definition: Execute.cpp:171
size_t g_approx_quantile_centroids
Definition: Execute.cpp:172
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptor ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
RenderInfo render_info,
const bool  output_columnar_hint 
)
private

Definition at line 853 of file GroupByAndAggregate.cpp.

References align_to_int64(), CHECK, device_type_, GPU, gpuCanHandleOrderEntries(), initQueryMemoryDescriptorImpl(), SortInfo::order_entries, query_mem_desc, ra_exe_unit_, shard_count_for_top_groups(), and RelAlgExecutionUnit::sort_info.

858  {
859  const auto shard_count = device_type_ == ExecutorDeviceType::GPU
861  : 0;
862  bool sort_on_gpu_hint =
863  device_type_ == ExecutorDeviceType::GPU && allow_multifrag &&
866  // must_use_baseline_sort is true iff we'd sort on GPU with the old algorithm
867  // but the total output buffer size would be too big or it's a sharded top query.
868  // For the sake of managing risk, use the new result set way very selectively for
869  // this case only (alongside the baseline layout we've enabled for a while now).
870  bool must_use_baseline_sort = shard_count;
871  std::unique_ptr<QueryMemoryDescriptor> query_mem_desc;
872  while (true) {
873  query_mem_desc = initQueryMemoryDescriptorImpl(allow_multifrag,
874  max_groups_buffer_entry_count,
875  crt_min_byte_width,
876  sort_on_gpu_hint,
877  render_info,
878  must_use_baseline_sort,
879  output_columnar_hint);
880  CHECK(query_mem_desc);
881  if (query_mem_desc->sortOnGpu() &&
882  (query_mem_desc->getBufferSizeBytes(device_type_) +
883  align_to_int64(query_mem_desc->getEntryCount() * sizeof(int32_t))) >
884  2 * 1024 * 1024 * 1024LL) {
885  must_use_baseline_sort = true;
886  sort_on_gpu_hint = false;
887  } else {
888  break;
889  }
890  }
891  return query_mem_desc;
892 }
bool gpuCanHandleOrderEntries(const std::list< Analyzer::OrderEntry > &order_entries)
std::unique_ptr< QueryMemoryDescriptor > initQueryMemoryDescriptorImpl(const bool allow_multifrag, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, RenderInfo *render_info, const bool must_use_baseline_sort, const bool output_columnar_hint)
std::list< Analyzer::OrderEntry > order_entries
const ExecutorDeviceType device_type_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit)
#define CHECK(condition)
Definition: Logger.h:291
const RelAlgExecutionUnit & ra_exe_unit_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

std::unique_ptr< QueryMemoryDescriptor > GroupByAndAggregate::initQueryMemoryDescriptorImpl ( const bool  allow_multifrag,
const size_t  max_groups_buffer_entry_count,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
RenderInfo render_info,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint 
)
private

Definition at line 913 of file GroupByAndAggregate.cpp.

References CPU, device_type_, executor_, g_enable_watchdog, g_watchdog_baseline_max_groups, anonymous_namespace{GroupByAndAggregate.cpp}::get_keyless_info(), getColRangeInfo(), getShardedTopBucket(), GPU, RelAlgExecutionUnit::groupby_exprs, heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, ColRangeInfo::hash_type_, QueryMemoryDescriptor::init(), anonymous_namespace{GroupByAndAggregate.cpp}::init_count_distinct_descriptors(), initApproxQuantileDescriptors(), LOG, query_infos_, ra_exe_unit_, shard_count_for_top_groups(), and logger::WARNING.

Referenced by initQueryMemoryDescriptor().

920  {
921  const bool is_group_by{!ra_exe_unit_.groupby_exprs.empty()};
922 
923  const bool threads_can_reuse_group_by_buffers =
924  device_type_ == ExecutorDeviceType::CPU && is_group_by &&
925  ra_exe_unit_.groupby_exprs.front();
926 
927  auto col_range_info_nosharding = getColRangeInfo();
928 
929  const auto shard_count = device_type_ == ExecutorDeviceType::GPU
931  : 0;
932 
933  const auto col_range_info =
934  ColRangeInfo{col_range_info_nosharding.hash_type_,
935  col_range_info_nosharding.min,
936  col_range_info_nosharding.max,
937  getShardedTopBucket(col_range_info_nosharding, shard_count),
938  col_range_info_nosharding.has_nulls};
939 
940  // Non-grouped aggregates do not support accessing aggregated ranges
941  // Keyless hash is currently only supported with single-column perfect hash
942  const auto keyless_info =
943  !(is_group_by &&
944  col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash)
945  ? KeylessInfo{false, -1}
947 
948  if (g_enable_watchdog &&
949  ((col_range_info.hash_type_ == QueryDescriptionType::GroupByBaselineHash &&
950  max_groups_buffer_entry_count > g_watchdog_baseline_max_groups) ||
951  (col_range_info.hash_type_ == QueryDescriptionType::GroupByPerfectHash &&
952  ra_exe_unit_.groupby_exprs.size() == 1 &&
953  (col_range_info.max - col_range_info.min) /
954  std::max(col_range_info.bucket, int64_t(1)) >
955  130000000))) {
956  throw WatchdogException("Query would use too much memory");
957  }
958 
959  const auto count_distinct_descriptors = init_count_distinct_descriptors(
960  ra_exe_unit_, query_infos_, col_range_info, device_type_, executor_);
961  auto approx_quantile_descriptors = initApproxQuantileDescriptors();
962  try {
964  ra_exe_unit_,
965  query_infos_,
966  col_range_info,
967  keyless_info,
968  allow_multifrag,
969  device_type_,
970  crt_min_byte_width,
971  sort_on_gpu_hint,
972  shard_count,
973  max_groups_buffer_entry_count,
974  render_info,
975  approx_quantile_descriptors,
976  count_distinct_descriptors,
977  must_use_baseline_sort,
978  output_columnar_hint,
979  /*streaming_top_n_hint=*/true,
980  threads_can_reuse_group_by_buffers);
981  } catch (const StreamingTopNOOM& e) {
982  LOG(WARNING) << e.what() << " Disabling Streaming Top N.";
984  ra_exe_unit_,
985  query_infos_,
986  col_range_info,
987  keyless_info,
988  allow_multifrag,
989  device_type_,
990  crt_min_byte_width,
991  sort_on_gpu_hint,
992  shard_count,
993  max_groups_buffer_entry_count,
994  render_info,
995  approx_quantile_descriptors,
996  count_distinct_descriptors,
997  must_use_baseline_sort,
998  output_columnar_hint,
999  /*streaming_top_n_hint=*/false,
1000  threads_can_reuse_group_by_buffers);
1001  }
1002 }
GroupByPerfectHash
Definition: enums.h:58
ApproxQuantileDescriptors initApproxQuantileDescriptors()
size_t g_watchdog_baseline_max_groups
KeylessInfo get_keyless_info(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool is_group_by, Executor *executor)
#define LOG(tag)
Definition: Logger.h:285
ColRangeInfo getColRangeInfo()
QueryDescriptionType hash_type_
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const ApproxQuantileDescriptors &, const CountDistinctDescriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint, const bool threads_can_reuse_group_by_buffers)
CountDistinctDescriptors init_count_distinct_descriptors(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &group_by_range_info, const ExecutorDeviceType device_type, Executor *executor)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_enable_watchdog
int64_t getShardedTopBucket(const ColRangeInfo &col_range_info, const size_t shard_count) const
const std::vector< InputTableInfo > & query_infos_
const ExecutorDeviceType device_type_
static size_t shard_count_for_top_groups(const RelAlgExecutionUnit &ra_exe_unit)
GroupByBaselineHash
Definition: enums.h:58
const RelAlgExecutionUnit & ra_exe_unit_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool GroupByAndAggregate::needsUnnestDoublePatch ( llvm::Value const *  val_ptr,
const std::string &  agg_base_name,
const bool  threads_share_memory,
const CompilationOptions co 
) const
private

Definition at line 29 of file MaxwellCodegenPatch.cpp.

References CompilationOptions::device_type, and executor_.

Referenced by TargetExprCodegen::codegenAggregate().

32  {
33  return (executor_->isArchMaxwell(co.device_type) && threads_share_memory &&
34  llvm::isa<llvm::AllocaInst>(val_ptr) &&
35  val_ptr->getType() ==
36  llvm::Type::getDoublePtrTy(executor_->cgen_state_->context_) &&
37  "agg_id" == agg_base_name);
38 }
ExecutorDeviceType device_type

+ Here is the caller graph for this function:

void GroupByAndAggregate::prependForceSync ( )
private

Definition at line 40 of file MaxwellCodegenPatch.cpp.

References executor_.

Referenced by codegen().

40  {
41  executor_->cgen_state_->ir_builder_.CreateCall(
42  executor_->cgen_state_->module_->getFunction("force_sync"));
43 }

+ Here is the caller graph for this function:

size_t GroupByAndAggregate::shard_count_for_top_groups ( const RelAlgExecutionUnit ra_exe_unit)
static

Definition at line 2251 of file GroupByAndAggregate.cpp.

References Catalog_Namespace::get_metadata_for_table(), Analyzer::ColumnVar::getColumnKey(), RelAlgExecutionUnit::groupby_exprs, SortInfo::limit, TableDescriptor::nShards, SortInfo::order_entries, and RelAlgExecutionUnit::sort_info.

Referenced by Executor::collectAllDeviceResults(), RelAlgExecutor::executeRelAlgQuerySingleStep(), initQueryMemoryDescriptor(), and initQueryMemoryDescriptorImpl().

2252  {
2253  if (ra_exe_unit.sort_info.order_entries.size() != 1 || !ra_exe_unit.sort_info.limit) {
2254  return 0;
2255  }
2256  for (const auto& group_expr : ra_exe_unit.groupby_exprs) {
2257  const auto grouped_col_expr =
2258  dynamic_cast<const Analyzer::ColumnVar*>(group_expr.get());
2259  if (!grouped_col_expr) {
2260  continue;
2261  }
2262  const auto& column_key = grouped_col_expr->getColumnKey();
2263  if (column_key.table_id <= 0) {
2264  return 0;
2265  }
2267  {column_key.db_id, column_key.table_id});
2268  if (td->shardedColumnId == column_key.column_id) {
2269  return td->nShards;
2270  }
2271  }
2272  return 0;
2273 }
const TableDescriptor * get_metadata_for_table(const ::shared::TableKey &table_key, bool populate_fragmenter)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
std::optional< size_t > limit
std::list< Analyzer::OrderEntry > order_entries
const shared::ColumnKey & getColumnKey() const
Definition: Analyzer.h:198

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class CodeGenerator
friend

Definition at line 221 of file GroupByAndAggregate.h.

friend class ExecutionKernel
friend

Definition at line 222 of file GroupByAndAggregate.h.

friend class Executor
friend

Definition at line 219 of file GroupByAndAggregate.h.

friend class QueryMemoryDescriptor
friend

Definition at line 220 of file GroupByAndAggregate.h.

friend struct TargetExprCodegen
friend

Definition at line 223 of file GroupByAndAggregate.h.

friend struct TargetExprCodegenBuilder
friend

Definition at line 224 of file GroupByAndAggregate.h.

Member Data Documentation

const ExecutorDeviceType GroupByAndAggregate::device_type_
private
const std::optional<int64_t> GroupByAndAggregate::group_cardinality_estimation_
private

Definition at line 217 of file GroupByAndAggregate.h.

Referenced by getColRangeInfo().

bool GroupByAndAggregate::output_columnar_
private

Definition at line 214 of file GroupByAndAggregate.h.

const std::vector<InputTableInfo>& GroupByAndAggregate::query_infos_
private
std::shared_ptr<RowSetMemoryOwner> GroupByAndAggregate::row_set_mem_owner_
private

Definition at line 213 of file GroupByAndAggregate.h.


The documentation for this class was generated from the following files: