19 #include "../Execute.h"
20 #include "../ExpressionRewrite.h"
21 #include "../GroupByAndAggregate.h"
22 #include "../StreamingTopN.h"
23 #include "../UsedColumnsVisitor.h"
26 #include <boost/algorithm/cxx11/any_of.hpp>
46 const std::list<std::shared_ptr<Analyzer::Expr>>& groupby_exprs,
47 const std::vector<Analyzer::Expr*>& target_exprs) {
48 std::vector<int64_t> indices(target_exprs.size(), -1);
49 for (
size_t target_idx = 0; target_idx < target_exprs.size(); ++target_idx) {
50 const auto target_expr = target_exprs[target_idx];
51 if (dynamic_cast<const Analyzer::AggExpr*>(target_expr)) {
54 const auto var_expr =
dynamic_cast<const Analyzer::Var*
>(target_expr);
56 indices[target_idx] = var_expr->
get_varno() - 1;
68 std::vector<int64_t> target_indices(ra_exe_unit.
target_exprs.size(), -1);
70 std::unordered_set<shared::ColumnKey> used_columns;
71 for (
const auto& simple_qual : ra_exe_unit.
simple_quals) {
72 const auto crt_used_columns = columns_visitor.
visit(simple_qual.get());
73 used_columns.insert(crt_used_columns.begin(), crt_used_columns.end());
75 for (
const auto& qual : ra_exe_unit.
quals) {
76 const auto crt_used_columns = columns_visitor.visit(qual.get());
77 used_columns.insert(crt_used_columns.begin(), crt_used_columns.end());
83 if (!cd || !cd->isVirtualCol) {
87 const auto crt_used_columns = columns_visitor.visit(target);
88 used_columns.insert(crt_used_columns.begin(), crt_used_columns.end());
90 for (
size_t target_idx = 0; target_idx < ra_exe_unit.
target_exprs.size();
92 const auto target_expr = ra_exe_unit.
target_exprs[target_idx];
94 const auto& ti = target_expr->get_type_info();
103 if (!ti.is_varlen() &&
104 used_columns.find(col_var->getColumnKey()) == used_columns.end()) {
108 target_indices[target_idx] = 0;
111 return target_indices;
115 const size_t group_col_width) {
117 return sizeof(int64_t);
121 if (group_col_width ==
sizeof(int64_t) && range.
hasNulls()) {
122 return sizeof(int64_t);
127 return sizeof(int64_t);
131 return sizeof(int64_t);
136 const std::vector<InputTableInfo>& query_infos,
137 const Executor* executor) {
138 int8_t compact_width{4};
140 const auto expr_range =
getExpressionRange(groupby_expr.get(), query_infos, executor);
141 compact_width = std::max(compact_width,
143 expr_range, groupby_expr->get_type_info().get_size()));
145 return compact_width;
149 const bool output_columnar) {
154 for (
const auto target_expr : ra_exe_unit.
target_exprs) {
155 if (dynamic_cast<const Analyzer::AggExpr*>(target_expr)) {
158 if (dynamic_cast<const Analyzer::WindowFunction*>(target_expr)) {
169 CHECK_GT(only_order_entry.tle_no,
int(0));
170 CHECK_LE(static_cast<size_t>(only_order_entry.tle_no),
172 const auto order_entry_expr = ra_exe_unit.
target_exprs[only_order_entry.tle_no - 1];
174 if ((order_entry_expr->get_type_info().is_number() ||
175 order_entry_expr->get_type_info().is_time()) &&
186 std::vector<int8_t> col_widths;
187 size_t col_expr_idx = 0;
188 for (
const auto& col_expr : col_expr_list) {
191 col_widths.push_back(
sizeof(int64_t));
194 if constexpr (std::is_same<
T, std::list<std::shared_ptr<Analyzer::Expr>>>::value) {
203 col_widths.push_back(
sizeof(int64_t));
209 if ((chosen_type.is_string() && chosen_type.get_compression() ==
kENCODING_NONE) ||
210 chosen_type.is_array()) {
211 col_widths.push_back(
sizeof(int64_t));
212 col_widths.push_back(
sizeof(int64_t));
216 if (chosen_type.is_geometry()) {
217 for (
auto i = 0; i < chosen_type.get_physical_coord_cols(); ++i) {
218 col_widths.push_back(
sizeof(int64_t));
219 col_widths.push_back(
sizeof(int64_t));
225 CHECK_EQ(
size_t(0), col_expr_bitwidth % 8);
226 col_widths.push_back(static_cast<int8_t>(col_expr_bitwidth >> 3));
228 if (agg_info.agg_kind ==
kAVG) {
229 CHECK(agg_info.is_agg);
230 col_widths.push_back(
sizeof(int64_t));
241 const Executor* executor,
243 const std::vector<InputTableInfo>& query_infos,
246 const bool allow_multifrag,
248 const int8_t crt_min_byte_width,
249 const bool sort_on_gpu_hint,
250 const size_t shard_count,
251 const size_t max_groups_buffer_entry_count,
255 const bool must_use_baseline_sort,
256 const bool output_columnar_hint,
257 const bool streaming_top_n_hint,
258 const bool threads_can_reuse_group_by_buffers) {
260 const bool is_group_by{!group_col_widths.empty()};
265 ra_exe_unit, query_infos, crt_min_byte_width);
267 col_slot_context.setAllSlotsPaddedSize(min_slot_size);
268 col_slot_context.validate();
271 CHECK(!must_use_baseline_sort);
273 return std::make_unique<QueryMemoryDescriptor>(
281 ColRangeInfo{ra_exe_unit.estimator ? QueryDescriptionType::Estimator
288 std::vector<int8_t>{},
290 std::vector<int64_t>{},
292 approx_quantile_descriptors,
293 count_distinct_descriptors,
295 output_columnar_hint,
296 render_info && render_info->isInSitu(),
297 must_use_baseline_sort,
299 threads_can_reuse_group_by_buffers);
302 size_t entry_count = 1;
303 auto actual_col_range_info = col_range_info;
304 bool interleaved_bins_on_gpu =
false;
305 bool keyless_hash =
false;
306 bool streaming_top_n =
false;
307 int8_t group_col_compact_width = 0;
308 int32_t idx_target_as_key = -1;
309 auto output_columnar = output_columnar_hint;
310 std::vector<int64_t> target_groupby_indices;
312 switch (col_range_info.hash_type_) {
317 render_info->setNonInSitu();
322 (!sort_on_gpu_hint ||
324 col_range_info.max, col_range_info.min, col_range_info.bucket)) &&
325 !col_range_info.bucket && !must_use_baseline_sort && keyless_info.keyless;
329 idx_target_as_key = keyless_info.target_index;
331 if (group_col_widths.size() > 1) {
333 entry_count =
static_cast<size_t>(actual_col_range_info.max);
334 actual_col_range_info.bucket = 0;
337 entry_count = std::max(
339 const size_t interleaved_max_threshold{512};
341 if (must_use_baseline_sort) {
343 ra_exe_unit.target_exprs);
348 bool has_varlen_sample_agg =
false;
349 for (
const auto& target_expr : ra_exe_unit.target_exprs) {
350 if (target_expr->get_contains_agg()) {
353 if (agg_expr->get_aggtype() ==
kSAMPLE &&
354 agg_expr->get_type_info().is_varlen()) {
355 has_varlen_sample_agg =
true;
361 interleaved_bins_on_gpu = keyless_hash && !has_varlen_sample_agg &&
362 (entry_count <= interleaved_max_threshold) &&
365 count_distinct_descriptors) &&
374 render_info->setNonInSitu();
376 entry_count = shard_count
377 ? (max_groups_buffer_entry_count + shard_count - 1) / shard_count
378 : max_groups_buffer_entry_count;
380 ra_exe_unit.target_exprs);
381 col_slot_context =
ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
383 group_col_compact_width =
387 actual_col_range_info =
392 CHECK(!must_use_baseline_sort);
395 streaming_top_n =
true;
397 ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit.value_or(0);
399 if (ra_exe_unit.use_bump_allocator) {
400 output_columnar =
false;
403 entry_count = ra_exe_unit.scan_limit
404 ?
static_cast<size_t>(ra_exe_unit.scan_limit)
405 : max_groups_buffer_entry_count;
409 target_groupby_indices = executor->plan_state_->allow_lazy_fetch_
411 : std::vector<int64_t>{};
413 col_slot_context =
ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
420 return std::make_unique<QueryMemoryDescriptor>(executor,
425 interleaved_bins_on_gpu,
427 actual_col_range_info,
430 group_col_compact_width,
431 target_groupby_indices,
433 approx_quantile_descriptors,
434 count_distinct_descriptors,
437 render_info && render_info->isInSitu(),
438 must_use_baseline_sort,
440 threads_can_reuse_group_by_buffers);
444 template <
SQLAgg... agg_types>
445 bool any_of(std::vector<Analyzer::Expr*>
const& target_exprs) {
448 return agg && (... || (agg_types == agg->get_aggtype()));
454 const Executor* executor,
456 const std::vector<InputTableInfo>& query_infos,
457 const bool allow_multifrag,
458 const bool keyless_hash,
459 const bool interleaved_bins_on_gpu,
460 const int32_t idx_target_as_key,
463 const std::vector<int8_t>& group_col_widths,
464 const int8_t group_col_compact_width,
465 const std::vector<int64_t>& target_groupby_indices,
466 const size_t entry_count,
469 const bool sort_on_gpu_hint,
470 const bool output_columnar_hint,
471 const bool render_output,
472 const bool must_use_baseline_sort,
474 const bool threads_can_reuse_group_by_buffers)
476 , allow_multifrag_(allow_multifrag)
477 , query_desc_type_(col_range_info.hash_type_)
478 , keyless_hash_(keyless_hash)
479 , interleaved_bins_on_gpu_(interleaved_bins_on_gpu)
480 , idx_target_as_key_(idx_target_as_key)
481 , group_col_widths_(group_col_widths)
482 , group_col_compact_width_(group_col_compact_width)
483 , target_groupby_indices_(target_groupby_indices)
484 , entry_count_(entry_count)
485 , min_val_(col_range_info.min)
486 , max_val_(col_range_info.max)
487 , bucket_(col_range_info.bucket)
488 , has_nulls_(col_range_info.has_nulls)
489 , approx_quantile_descriptors_(approx_quantile_descriptors)
490 , count_distinct_descriptors_(count_distinct_descriptors)
491 , output_columnar_(
false)
492 , render_output_(render_output)
493 , must_use_baseline_sort_(must_use_baseline_sort)
494 , use_streaming_top_n_(use_streaming_top_n)
495 , threads_can_reuse_group_by_buffers_(threads_can_reuse_group_by_buffers)
496 , force_4byte_float_(
false)
497 , col_slot_context_(col_slot_context)
516 !any_of<kAPPROX_QUANTILE, kMODE>(ra_exe_unit.
target_exprs);
525 !any_of<kAPPROX_QUANTILE, kMODE>(ra_exe_unit.
target_exprs);
544 const auto thread_count = executor->blockSize() * executor->gridSize();
545 const auto total_buff_size =
547 if (total_buff_size >
executor_->maxGpuSlabSize()) {
556 , allow_multifrag_(
false)
557 , query_desc_type_(QueryDescriptionType::
Projection)
558 , keyless_hash_(
false)
559 , interleaved_bins_on_gpu_(
false)
560 , idx_target_as_key_(0)
561 , group_col_compact_width_(0)
567 , sort_on_gpu_(
false)
568 , output_columnar_(
false)
569 , render_output_(
false)
570 , must_use_baseline_sort_(
false)
571 , use_streaming_top_n_(
false)
572 , threads_can_reuse_group_by_buffers_(
false)
573 , force_4byte_float_(
false) {}
576 const size_t entry_count,
577 const QueryDescriptionType query_desc_type)
579 , allow_multifrag_(
false)
580 , query_desc_type_(query_desc_type)
581 , keyless_hash_(
false)
582 , interleaved_bins_on_gpu_(
false)
583 , idx_target_as_key_(0)
584 , group_col_compact_width_(0)
585 , entry_count_(entry_count)
590 , sort_on_gpu_(
false)
591 , output_columnar_(
false)
592 , render_output_(
false)
593 , must_use_baseline_sort_(
false)
594 , use_streaming_top_n_(
false)
595 , threads_can_reuse_group_by_buffers_(
false)
596 , force_4byte_float_(
false)
605 const int64_t min_val,
606 const int64_t max_val,
607 const bool has_nulls,
608 const std::vector<int8_t>& group_col_widths)
610 , allow_multifrag_(
false)
611 , query_desc_type_(query_desc_type)
612 , keyless_hash_(
false)
613 , interleaved_bins_on_gpu_(
false)
614 , idx_target_as_key_(0)
615 , group_col_widths_(group_col_widths)
616 , group_col_compact_width_(0)
622 , sort_on_gpu_(
false)
623 , output_columnar_(
false)
624 , render_output_(
false)
625 , must_use_baseline_sort_(
false)
626 , use_streaming_top_n_(
false)
627 , threads_can_reuse_group_by_buffers_(
false)
628 , force_4byte_float_(
false)
677 count_distinct_desc.device_type = ref_count_distinct_desc.device_type;
678 if (ref_count_distinct_desc != count_distinct_desc) {
700 const Executor* executor,
705 const int64_t num_rows,
706 const std::vector<std::vector<const int8_t*>>& col_buffers,
707 const std::vector<std::vector<uint64_t>>& frag_offsets,
708 std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
709 const bool output_columnar,
711 const size_t thread_idx,
714 if (frag_offsets.empty()) {
717 return std::unique_ptr<QueryExecutionContext>(
737 const std::vector<InputTableInfo>& query_infos,
738 const int8_t crt_min_byte_width) {
740 return sizeof(int64_t);
742 int8_t compact_width{0};
745 int unnest_array_col_id{std::numeric_limits<int>::min()};
748 if (uoper && uoper->get_optype() ==
kUNNEST) {
750 CHECK(arg_ti.is_array());
751 const auto& elem_ti = arg_ti.get_elem_type();
752 if (elem_ti.is_string() && elem_ti.get_compression() ==
kENCODING_DICT) {
753 unnest_array_col_id = (*col_it)->getColId();
755 compact_width = crt_min_byte_width;
763 if (!compact_width &&
765 compact_width = crt_min_byte_width;
767 if (!compact_width) {
771 const auto& ti = target->get_type_info();
773 if (agg && agg->get_arg()) {
774 compact_width = crt_min_byte_width;
780 CHECK(!agg->get_is_distinct());
796 if (uoper && uoper->get_optype() ==
kUNNEST &&
797 (*col_it)->getColId() == unnest_array_col_id) {
799 CHECK(arg_ti.is_array());
800 const auto& elem_ti = arg_ti.get_elem_type();
801 if (elem_ti.is_string() && elem_ti.get_compression() ==
kENCODING_DICT) {
809 compact_width = crt_min_byte_width;
813 if (!compact_width) {
814 size_t total_tuples{0};
815 for (
const auto& qi : query_infos) {
816 total_tuples += qi.info.getNumTuples();
818 return total_tuples <= static_cast<size_t>(std::numeric_limits<uint32_t>::max()) ||
819 unnest_array_col_id != std::numeric_limits<int>::min()
821 : crt_min_byte_width;
825 compact_width = std::max(compact_width, wid);
827 return compact_width;
837 size_t total_bytes{0};
872 const size_t num_entries_per_column)
const {
886 const size_t projection_count)
const {
887 constexpr
size_t row_index_width =
sizeof(int64_t);
889 row_index_width * projection_count;
914 for (
size_t index = 0; index < col_idx; ++index) {
916 if (column_width > 0) {
925 for (
size_t index = 0; index < col_idx; ++index) {
957 const size_t group_idx)
const {
961 for (
size_t col_idx = 0; col_idx < group_idx; col_idx++) {
964 std::max(
groupColWidth(col_idx), static_cast<int8_t>(
sizeof(int64_t))) *
976 size_t buffer_size{0};
979 std::max(
groupColWidth(group_idx), static_cast<int8_t>(
sizeof(int64_t))) *
998 const size_t col_idx)
const {
1004 if (col_idx + 1 == total_slot_count) {
1008 return static_cast<size_t>(
align_to_int64(col_ptr + chosen_bytes) - col_ptr);
1019 offset += bin * (next_chosen_bytes - chosen_bytes);
1023 if (next_chosen_bytes ==
sizeof(int64_t)) {
1024 return static_cast<size_t>(
align_to_int64(col_ptr + chosen_bytes) - col_ptr);
1026 return chosen_bytes;
1031 const size_t col_idx)
const {
1034 if (col_idx + 1 == total_slot_count) {
1035 return static_cast<size_t>(
align_to_int64(col_ptr + chosen_bytes) - col_ptr);
1040 if (next_chosen_bytes ==
sizeof(int64_t)) {
1041 return static_cast<size_t>(
align_to_int64(col_ptr + chosen_bytes) - col_ptr);
1043 return chosen_bytes;
1049 const unsigned thread_count,
1072 const size_t entry_count)
const {
1079 constexpr
size_t row_index_width =
sizeof(int64_t);
1080 size_t total_bytes{0};
1128 return total_slot_count;
1132 [](
const int64_t i) {
return i >= 0; });
1176 return executor_->cudaMgr()->isArchVoltaOrGreaterForAll();
1194 const int8_t bytes) {
1199 const size_t slot_idx)
const {
1204 const size_t col_idx)
const {
1206 CHECK_EQ(col_slots.size(), size_t(1));
1207 return col_slots.front();
1220 const int8_t actual_min_byte_width)
const {
1225 const std::vector<std::tuple<int8_t, int8_t>>& slots_for_col) {
1250 return "Perfect Hash";
1252 return "Baseline Hash";
1254 return "Projection";
1256 return "Table Function";
1258 return "Non-grouped Aggregate";
1259 case QueryDescriptionType::Estimator:
1284 auto const allow_lazy_fetch =
executor_->plan_state_
1285 ?
executor_->plan_state_->allow_lazy_fetch_
1287 str +=
"\tAllow Lazy Fetch: " +
::toString(allow_lazy_fetch) +
"\n";
1295 str +=
"Query Memory Descriptor State\n";
1305 if (group_indices_size) {
1306 std::vector<std::string> group_indices_strings;
1307 for (
size_t target_idx = 0; target_idx < group_indices_size; ++target_idx) {
1310 str +=
"\tTarget group by indices: " +
1318 const std::vector<Analyzer::Expr*>& targets,
1320 std::vector<TargetInfo> target_infos;
1322 for (
const auto target_expr : targets) {
1327 target.sql_type.set_notnull(
false);
1329 if (target.sql_type.supportsFlatBuffer()) {
1330 target.sql_type.setUsesFlatBuffer(
1333 target_infos.push_back(target);
1336 return target_infos;
1340 int64_t buffer_element_size{0};
1344 if (slot_element_size < 0) {
1345 return std::nullopt;
1347 buffer_element_size += slot_element_size;
1352 return buffer_element_size;
1356 int64_t buffer_element_size{0};
1358 for (
size_t i = 0; i < slot_idx; i++) {
1361 if (slot_element_size < 0) {
1364 buffer_element_size += slot_element_size;
1369 return buffer_element_size;
1375 auto by_cardinality = [](
auto&
a,
auto& b) {
return a.second < b.second; };
1376 auto itr = std::max_element(pdc.begin(), pdc.end(), by_cardinality);
1377 if (itr != pdc.end() && itr->second > 0) {
1380 return std::nullopt;
1390 auto is_left_join = [](
auto& join_qual) {
return join_qual.type ==
JoinType::LEFT; };
1392 return !
std::any_of(join_quals.begin(), join_quals.end(), is_left_join);
size_t varlenOutputRowSizeToSlot(const size_t slot_idx) const
int8_t getMinPaddedByteSize(const int8_t actual_min_byte_width) const
std::vector< Analyzer::Expr * > target_exprs
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
void addColSlotInfoFlatBuffer(const int64_t flatbuffer_size)
int64_t getIntMin() const
ColSlotContext col_slot_context_
size_t getSlotCount() const
bool canUsePerDeviceCardinality(const RelAlgExecutionUnit &ra_exe_unit) const
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
bool g_enable_smem_group_by
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
size_t getEntryCount() const
void alignPaddedSlots(const bool sort_on_gpu)
const Executor * executor_
std::vector< int64_t > target_expr_proj_indices(const RelAlgExecutionUnit &ra_exe_unit)
size_t getTotalBytesOfColumnarProjections(const size_t projection_count) const
int32_t idx_target_as_key_
int64_t getTargetGroupbyIndex(const size_t target_idx) const
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
std::string toString() const
bool isLogicalSizedColumnsAllowed() const
void setPaddedSlotWidthBytes(const size_t slot_idx, const int8_t bytes)
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list)
int8_t pick_baseline_key_component_width(const ExpressionRange &range, const size_t group_col_width)
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const ApproxQuantileDescriptors &, const CountDistinctDescriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint, const bool threads_can_reuse_group_by_buffers)
std::vector< InputDescriptor > input_descs
void setOutputColumnar(const bool val)
size_t getColCount() const
size_t getAllSlotsPaddedSize() const
bool threads_can_reuse_group_by_buffers_
size_t getAllSlotsAlignedPaddedSize() const
size_t getNextColOffInBytes(const int8_t *col_ptr, const size_t bin, const size_t col_idx) const
size_t getEffectiveKeyWidth() const
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
std::vector< ApproxQuantileDescriptor > ApproxQuantileDescriptors
size_t g_streaming_topn_max
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
T visit(const Analyzer::Expr *expr) const
void setAllSlotsSize(const int8_t slot_width_size)
TargetInfo get_target_info(const Analyzer::Expr *target_expr, const bool bigint_count)
size_t getWarpCount() const
std::optional< size_t > getMaxPerDeviceCardinality(const RelAlgExecutionUnit &ra_exe_unit) const
size_t getRowSize() const
void useConsistentSlotWidthSize(const int8_t slot_width_size)
const SlotSize & getSlotInfo(const size_t slot_idx) const
std::vector< Analyzer::Expr * > target_exprs_union
size_t getColOnlyOffInBytes(const size_t col_idx) const
size_t getColOnlyOffInBytes(const size_t slot_idx) const
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool is_varlen_projection(const Analyzer::Expr *target_expr, const SQLTypeInfo &ti)
bool blocksShareMemory() const
bool g_enable_columnar_output
int8_t groupColWidth(const size_t key_idx) const
std::vector< std::pair< std::vector< size_t >, size_t > > per_device_cardinality
size_t get_bit_width(const SQLTypeInfo &ti)
size_t getCompactByteWidth() const
void addColumnFlatBuffer(const int64_t flatbuffer_size)
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
size_t getCompactByteWidth() const
Provides column info and slot info for the output buffer and some metadata helpers.
size_t getGroupbyColCount() const
const ColumnDescriptor * get_column_descriptor_maybe(const shared::ColumnKey &column_key)
const JoinQualsPerNestingLevel join_quals
bool lazyInitGroups(const ExecutorDeviceType) const
size_t targetGroupbyIndicesSize() const
std::optional< size_t > limit
size_t getPrependedGroupBufferSizeInBytes() const
std::list< Analyzer::OrderEntry > order_entries
size_t getTotalBytesOfColumnarBuffers() const
std::vector< int64_t > target_groupby_indices_
static int8_t pick_target_compact_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)
CountDistinctDescriptors count_distinct_descriptors_
int32_t get_varno() const
bool is_valid_int32_range(const ExpressionRange &range)
bool interleaved_bins_on_gpu_
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
ExpressionRange getExpressionRange(const Analyzer::BinOper *expr, const std::vector< InputTableInfo > &query_infos, const Executor *, boost::optional< std::list< std::shared_ptr< Analyzer::Expr >>> simple_quals)
int64_t varlenOutputElementSize(const size_t slot_idx) const
int64_t getPaddedSlotBufferSize(const size_t slot_idx) const
const SQLTypeInfo & get_type_info() const
QueryDescriptionType getQueryDescriptionType() const
std::vector< int64_t > target_expr_group_by_indices(const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs, const std::vector< Analyzer::Expr * > &target_exprs)
std::optional< size_t > varlenOutputBufferElemSize() const
void addColumn(const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
size_t getNextColOffInBytesRowOnly(const int8_t *col_ptr, const size_t col_idx) const
const Expr * get_operand() const
QueryDescriptionType query_desc_type_
int8_t group_col_compact_width_
int8_t updateActualMinByteWidth(const int8_t actual_min_byte_width) const
size_t getTotalBytesOfColumnarBuffers(const size_t entry_count) const
bool operator==(const QueryMemoryDescriptor &other) const
Descriptor for the result set buffer layout.
size_t getColsSize() const
bool is_int_and_no_bigger_than(const SQLTypeInfo &ti, const size_t byte_width)
std::list< std::shared_ptr< Analyzer::Expr > > quals
bool canOutputColumnar() const
ExpressionRangeType getType() const
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
bool usesGetGroupValueFast() const
int64_t getIntMax() const
bool isWarpSyncRequired(const ExecutorDeviceType) const
std::string toString() const
size_t getSlotCount() const
void setAllSlotsPaddedSizeToLogicalSize()
bool interleavedBins(const ExecutorDeviceType) const
bool g_enable_watchdog false
bool threadsShareMemory() const
#define DEBUG_TIMER(name)
size_t getColCount() const
std::vector< int8_t > group_col_widths_
size_t getRowWidth() const
void setPaddedSlotWidthBytes(const size_t slot_idx, const int8_t bytes)
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
const std::vector< size_t > & getSlotsForCol(const size_t col_idx) const
bool must_use_baseline_sort_
std::string queryDescTypeToString() const
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs
void addColSlotInfo(const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
bool use_streaming_top_n_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
void setAllUnsetSlotsPaddedSize(const int8_t padded_size)
int64_t getFlatBufferSize(const size_t slot_idx) const
const int8_t getSlotIndexForSingleSlotCol(const size_t col_idx) const
bool checkSlotUsesFlatBufferFormat(const size_t slot_idx) const
size_t getBufferColSlotCount() const
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
size_t getColOffInBytes(const size_t col_idx) const
size_t getColOffInBytesInNextBin(const size_t col_idx) const
std::unique_ptr< QueryExecutionContext > getQueryExecutionContext(const RelAlgExecutionUnit &, const Executor *executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const shared::TableKey &outer_table_key, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner >, const bool output_columnar, const bool sort_on_gpu, const size_t thread_idx, RenderInfo *) const
friend class QueryExecutionContext
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
int8_t pick_baseline_key_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Executor *executor)
std::string reductionKey() const
std::list< std::shared_ptr< Analyzer::Expr > > simple_quals
void set_notnull(TargetInfo &target, const bool not_null)
int32_t getTargetIdxForKey() const
size_t getPrependedGroupColOffInBytes(const size_t group_idx) const