OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
QueryMemoryDescriptor Class Reference

#include <QueryMemoryDescriptor.h>

+ Collaboration diagram for QueryMemoryDescriptor:

Public Member Functions

 QueryMemoryDescriptor ()
 
 QueryMemoryDescriptor (const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool allow_multifrag, const bool keyless_hash, const bool interleaved_bins_on_gpu, const int32_t idx_target_as_key, const ColRangeInfo &col_range_info, const ColSlotContext &col_slot_context, const std::vector< int8_t > &group_col_widths, const int8_t group_col_compact_width, const std::vector< int64_t > &target_groupby_indices, const size_t entry_count, const ApproxQuantileDescriptors &, const CountDistinctDescriptors, const bool sort_on_gpu_hint, const bool output_columnar, const bool render_output, const bool must_use_baseline_sort, const bool use_streaming_top_n, const bool threads_can_reuse_group_by_buffers)
 
 QueryMemoryDescriptor (const Executor *executor, const size_t entry_count, const QueryDescriptionType query_desc_type)
 
 QueryMemoryDescriptor (const QueryDescriptionType query_desc_type, const int64_t min_val, const int64_t max_val, const bool has_nulls, const std::vector< int8_t > &group_col_widths)
 
 QueryMemoryDescriptor (const TResultSetBufferDescriptor &thrift_query_memory_descriptor)
 
bool operator== (const QueryMemoryDescriptor &other) const
 
std::unique_ptr
< QueryExecutionContext
getQueryExecutionContext (const RelAlgExecutionUnit &, const Executor *executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const shared::TableKey &outer_table_key, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner >, const bool output_columnar, const bool sort_on_gpu, const size_t thread_idx, RenderInfo *) const
 
bool countDistinctDescriptorsLogicallyEmpty () const
 
const ExecutorgetExecutor () const
 
QueryDescriptionType getQueryDescriptionType () const
 
void setQueryDescriptionType (const QueryDescriptionType val)
 
bool isSingleColumnGroupByWithPerfectHash () const
 
bool hasKeylessHash () const
 
void setHasKeylessHash (const bool val)
 
bool hasInterleavedBinsOnGpu () const
 
void setHasInterleavedBinsOnGpu (const bool val)
 
int32_t getTargetIdxForKey () const
 
void setTargetIdxForKey (const int32_t val)
 
int8_t groupColWidth (const size_t key_idx) const
 
size_t getPrependedGroupColOffInBytes (const size_t group_idx) const
 
size_t getPrependedGroupBufferSizeInBytes () const
 
const auto groupColWidthsBegin () const
 
const auto groupColWidthsEnd () const
 
void clearGroupColWidths ()
 
bool isGroupBy () const
 
void setGroupColCompactWidth (const int8_t val)
 
size_t getColCount () const
 
size_t getSlotCount () const
 
const int8_t getPaddedSlotWidthBytes (const size_t slot_idx) const
 
const int8_t getLogicalSlotWidthBytes (const size_t slot_idx) const
 
void setPaddedSlotWidthBytes (const size_t slot_idx, const int8_t bytes)
 
const int8_t getSlotIndexForSingleSlotCol (const size_t col_idx) const
 
size_t getPaddedColWidthForRange (const size_t offset, const size_t range) const
 
void useConsistentSlotWidthSize (const int8_t slot_width_size)
 
size_t getRowWidth () const
 
int8_t updateActualMinByteWidth (const int8_t actual_min_byte_width) const
 
void addColSlotInfo (const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
 
void addColSlotInfoFlatBuffer (const int64_t flatbuffer_size)
 
int64_t getFlatBufferSize (const size_t slot_idx) const
 
bool checkSlotUsesFlatBufferFormat (const size_t slot_idx) const
 
int64_t getPaddedSlotBufferSize (const size_t slot_idx) const
 
void clearSlotInfo ()
 
void alignPaddedSlots ()
 
int64_t getTargetGroupbyIndex (const size_t target_idx) const
 
void setAllTargetGroupbyIndices (std::vector< int64_t > group_by_indices)
 
size_t targetGroupbyIndicesSize () const
 
size_t targetGroupbyNegativeIndicesSize () const
 
void clearTargetGroupbyIndices ()
 
size_t getEntryCount () const
 
void setEntryCount (const size_t val)
 
int64_t getMinVal () const
 
int64_t getMaxVal () const
 
int64_t getBucket () const
 
bool hasNulls () const
 
const ApproxQuantileDescriptorsgetApproxQuantileDescriptors () const
 
const CountDistinctDescriptorgetCountDistinctDescriptor (const size_t idx) const
 
size_t getCountDistinctDescriptorsSize () const
 
bool sortOnGpu () const
 
bool canOutputColumnar () const
 
bool didOutputColumnar () const
 
void setOutputColumnar (const bool val)
 
bool useStreamingTopN () const
 
bool isLogicalSizedColumnsAllowed () const
 
bool mustUseBaselineSort () const
 
bool threadsCanReuseGroupByBuffers () const
 
void setThreadsCanReuseGroupByBuffers (const bool val)
 
bool forceFourByteFloat () const
 
void setForceFourByteFloat (const bool val)
 
size_t getGroupbyColCount () const
 
size_t getKeyCount () const
 
size_t getBufferColSlotCount () const
 
size_t getBufferSizeBytes (const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
 
size_t getBufferSizeBytes (const ExecutorDeviceType device_type) const
 
size_t getBufferSizeBytes (const ExecutorDeviceType device_type, const size_t override_entry_count) const
 
const ColSlotContextgetColSlotContext () const
 
bool usesGetGroupValueFast () const
 
bool blocksShareMemory () const
 
bool threadsShareMemory () const
 
bool lazyInitGroups (const ExecutorDeviceType) const
 
bool interleavedBins (const ExecutorDeviceType) const
 
size_t getColOffInBytes (const size_t col_idx) const
 
size_t getColOffInBytesInNextBin (const size_t col_idx) const
 
size_t getNextColOffInBytes (const int8_t *col_ptr, const size_t bin, const size_t col_idx) const
 
size_t getNextColOffInBytesRowOnly (const int8_t *col_ptr, const size_t col_idx) const
 
size_t getColOnlyOffInBytes (const size_t col_idx) const
 
size_t getRowSize () const
 
size_t getColsSize () const
 
size_t getWarpCount () const
 
size_t getCompactByteWidth () const
 
size_t getEffectiveKeyWidth () const
 
bool isWarpSyncRequired (const ExecutorDeviceType) const
 
std::string queryDescTypeToString () const
 
std::string toString () const
 
std::string reductionKey () const
 
bool hasVarlenOutput () const
 
std::optional< size_t > varlenOutputBufferElemSize () const
 
size_t varlenOutputRowSizeToSlot (const size_t slot_idx) const
 
bool slotIsVarlenOutput (const size_t slot_idx) const
 
size_t getAvailableCpuThreads () const
 
void setAvailableCpuThreads (size_t num_available_threads) const
 
std::optional< size_t > getMaxPerDeviceCardinality (const RelAlgExecutionUnit &ra_exe_unit) const
 
bool canUsePerDeviceCardinality (const RelAlgExecutionUnit &ra_exe_unit) const
 

Static Public Member Functions

static TResultSetBufferDescriptor toThrift (const QueryMemoryDescriptor &)
 
static std::unique_ptr
< QueryMemoryDescriptor
init (const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const ApproxQuantileDescriptors &, const CountDistinctDescriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint, const bool threads_can_reuse_group_by_buffers)
 
static bool many_entries (const int64_t max_val, const int64_t min_val, const int64_t bucket)
 
static bool countDescriptorsLogicallyEmpty (const CountDistinctDescriptors &count_distinct_descriptors)
 
static int8_t pick_target_compact_width (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)
 

Protected Member Functions

void resetGroupColWidths (const std::vector< int8_t > &new_group_col_widths)
 

Private Member Functions

size_t getTotalBytesOfColumnarBuffers () const
 
size_t getTotalBytesOfColumnarBuffers (const size_t num_entries_per_column) const
 
size_t getTotalBytesOfColumnarProjections (const size_t projection_count) const
 

Private Attributes

const Executorexecutor_
 
bool allow_multifrag_
 
QueryDescriptionType query_desc_type_
 
bool keyless_hash_
 
bool interleaved_bins_on_gpu_
 
int32_t idx_target_as_key_
 
std::vector< int8_t > group_col_widths_
 
int8_t group_col_compact_width_
 
std::vector< int64_t > target_groupby_indices_
 
size_t entry_count_
 
int64_t min_val_
 
int64_t max_val_
 
int64_t bucket_
 
bool has_nulls_
 
ApproxQuantileDescriptors approx_quantile_descriptors_
 
CountDistinctDescriptors count_distinct_descriptors_
 
bool sort_on_gpu_
 
bool output_columnar_
 
bool render_output_
 
bool must_use_baseline_sort_
 
bool use_streaming_top_n_
 
bool threads_can_reuse_group_by_buffers_
 
bool force_4byte_float_
 
ColSlotContext col_slot_context_
 
size_t num_available_threads_ {1}
 

Friends

class ResultSet
 
class QueryExecutionContext
 

Detailed Description

Definition at line 68 of file QueryMemoryDescriptor.h.

Constructor & Destructor Documentation

QueryMemoryDescriptor::QueryMemoryDescriptor ( )

Definition at line 554 of file QueryMemoryDescriptor.cpp.

555  : executor_(nullptr)
556  , allow_multifrag_(false)
558  , keyless_hash_(false)
559  , interleaved_bins_on_gpu_(false)
560  , idx_target_as_key_(0)
562  , entry_count_(0)
563  , min_val_(0)
564  , max_val_(0)
565  , bucket_(0)
566  , has_nulls_(false)
567  , sort_on_gpu_(false)
568  , output_columnar_(false)
569  , render_output_(false)
570  , must_use_baseline_sort_(false)
571  , use_streaming_top_n_(false)
573  , force_4byte_float_(false) {}
Projection
Definition: enums.h:58
QueryDescriptionType query_desc_type_
QueryMemoryDescriptor::QueryMemoryDescriptor ( const Executor executor,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const bool  allow_multifrag,
const bool  keyless_hash,
const bool  interleaved_bins_on_gpu,
const int32_t  idx_target_as_key,
const ColRangeInfo col_range_info,
const ColSlotContext col_slot_context,
const std::vector< int8_t > &  group_col_widths,
const int8_t  group_col_compact_width,
const std::vector< int64_t > &  target_groupby_indices,
const size_t  entry_count,
const ApproxQuantileDescriptors approx_quantile_descriptors,
const CountDistinctDescriptors  count_distinct_descriptors,
const bool  sort_on_gpu_hint,
const bool  output_columnar,
const bool  render_output,
const bool  must_use_baseline_sort,
const bool  use_streaming_top_n,
const bool  threads_can_reuse_group_by_buffers 
)

Definition at line 453 of file QueryMemoryDescriptor.cpp.

References canOutputColumnar(), CHECK, col_slot_context_, count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), executor_, streaming_top_n::get_heap_size(), getEntryCount(), getRowSize(), heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, isLogicalSizedColumnsAllowed(), keyless_hash_, heavyai::NonGroupedAggregate, output_columnar_, heavyai::Projection, query_desc_type_, ColSlotContext::setAllSlotsPaddedSizeToLogicalSize(), ColSlotContext::setAllUnsetSlotsPaddedSize(), sort_on_gpu_, heavyai::TableFunction, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::use_bump_allocator, use_streaming_top_n_, and ColSlotContext::validate().

475  : executor_(executor)
476  , allow_multifrag_(allow_multifrag)
477  , query_desc_type_(col_range_info.hash_type_)
478  , keyless_hash_(keyless_hash)
479  , interleaved_bins_on_gpu_(interleaved_bins_on_gpu)
480  , idx_target_as_key_(idx_target_as_key)
481  , group_col_widths_(group_col_widths)
482  , group_col_compact_width_(group_col_compact_width)
483  , target_groupby_indices_(target_groupby_indices)
484  , entry_count_(entry_count)
485  , min_val_(col_range_info.min)
486  , max_val_(col_range_info.max)
487  , bucket_(col_range_info.bucket)
488  , has_nulls_(col_range_info.has_nulls)
489  , approx_quantile_descriptors_(approx_quantile_descriptors)
490  , count_distinct_descriptors_(count_distinct_descriptors)
491  , output_columnar_(false)
492  , render_output_(render_output)
493  , must_use_baseline_sort_(must_use_baseline_sort)
495  , threads_can_reuse_group_by_buffers_(threads_can_reuse_group_by_buffers)
496  , force_4byte_float_(false)
497  , col_slot_context_(col_slot_context)
502 
503  sort_on_gpu_ = sort_on_gpu_hint && canOutputColumnar() && !keyless_hash_;
504  if (sort_on_gpu_) {
505  CHECK(!ra_exe_unit.use_bump_allocator);
506  output_columnar_ = true;
507  } else {
508  switch (query_desc_type_) {
510  output_columnar_ = output_columnar_hint;
511  break;
513  output_columnar_ = output_columnar_hint &&
516  !any_of<kAPPROX_QUANTILE, kMODE>(ra_exe_unit.target_exprs);
517  break;
519  output_columnar_ = output_columnar_hint;
520  break;
522  output_columnar_ = output_columnar_hint &&
525  !any_of<kAPPROX_QUANTILE, kMODE>(ra_exe_unit.target_exprs);
526  break;
527  default:
528  output_columnar_ = false;
529  break;
530  }
531  }
532 
534  // TODO(adb): Ensure fixed size buffer allocations are correct with all logical column
535  // sizes
536  CHECK(!ra_exe_unit.use_bump_allocator);
539  }
540 
541 #ifdef HAVE_CUDA
542  // Check Streaming Top N heap usage, bail if > max slab size, CUDA ONLY
543  if (use_streaming_top_n_ && executor->getDataMgr()->gpusPresent()) {
544  const auto thread_count = executor->blockSize() * executor->gridSize();
545  const auto total_buff_size =
547  if (total_buff_size > executor_->maxGpuSlabSize()) {
548  throw StreamingTopNOOM(total_buff_size);
549  }
550  }
551 #endif
552 }
GroupByPerfectHash
Definition: enums.h:58
std::vector< Analyzer::Expr * > target_exprs
NonGroupedAggregate
Definition: enums.h:58
bool isLogicalSizedColumnsAllowed() const
QueryDescriptionType hash_type_
Projection
Definition: enums.h:58
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
TableFunction
Definition: enums.h:58
std::vector< int64_t > target_groupby_indices_
CountDistinctDescriptors count_distinct_descriptors_
void validate() const
QueryDescriptionType query_desc_type_
GroupByBaselineHash
Definition: enums.h:58
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
void setAllSlotsPaddedSizeToLogicalSize()
#define CHECK(condition)
Definition: Logger.h:291
std::vector< int8_t > group_col_widths_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
void setAllUnsetSlotsPaddedSize(const int8_t padded_size)
int cpu_threads()
Definition: thread_count.h:25
ApproxQuantileDescriptors approx_quantile_descriptors_

+ Here is the call graph for this function:

QueryMemoryDescriptor::QueryMemoryDescriptor ( const Executor executor,
const size_t  entry_count,
const QueryDescriptionType  query_desc_type 
)

Definition at line 575 of file QueryMemoryDescriptor.cpp.

References output_columnar_, and heavyai::TableFunction.

578  : executor_(executor)
579  , allow_multifrag_(false)
580  , query_desc_type_(query_desc_type)
581  , keyless_hash_(false)
582  , interleaved_bins_on_gpu_(false)
583  , idx_target_as_key_(0)
585  , entry_count_(entry_count)
586  , min_val_(0)
587  , max_val_(0)
588  , bucket_(0)
589  , has_nulls_(false)
590  , sort_on_gpu_(false)
591  , output_columnar_(false)
592  , render_output_(false)
593  , must_use_baseline_sort_(false)
594  , use_streaming_top_n_(false)
596  , force_4byte_float_(false)
598  if (query_desc_type == QueryDescriptionType::TableFunction) {
599  // Table functions output columns are always columnar
600  output_columnar_ = true;
601  }
602 }
TableFunction
Definition: enums.h:58
QueryDescriptionType query_desc_type_
int cpu_threads()
Definition: thread_count.h:25
QueryMemoryDescriptor::QueryMemoryDescriptor ( const QueryDescriptionType  query_desc_type,
const int64_t  min_val,
const int64_t  max_val,
const bool  has_nulls,
const std::vector< int8_t > &  group_col_widths 
)

Definition at line 604 of file QueryMemoryDescriptor.cpp.

609  : executor_(nullptr)
610  , allow_multifrag_(false)
611  , query_desc_type_(query_desc_type)
612  , keyless_hash_(false)
613  , interleaved_bins_on_gpu_(false)
614  , idx_target_as_key_(0)
615  , group_col_widths_(group_col_widths)
617  , entry_count_(0)
618  , min_val_(min_val)
619  , max_val_(max_val)
620  , bucket_(0)
621  , has_nulls_(false)
622  , sort_on_gpu_(false)
623  , output_columnar_(false)
624  , render_output_(false)
625  , must_use_baseline_sort_(false)
626  , use_streaming_top_n_(false)
628  , force_4byte_float_(false)
QueryDescriptionType query_desc_type_
std::vector< int8_t > group_col_widths_
int cpu_threads()
Definition: thread_count.h:25
QueryMemoryDescriptor::QueryMemoryDescriptor ( const TResultSetBufferDescriptor &  thrift_query_memory_descriptor)

Member Function Documentation

void QueryMemoryDescriptor::addColSlotInfo ( const std::vector< std::tuple< int8_t, int8_t >> &  slots_for_col)

Definition at line 1224 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::addColumn(), and col_slot_context_.

Referenced by TableFunctionManager::allocate_output_buffers(), ResultSetLogicalValuesBuilder::create(), and TableFunctionExecutionContext::launchGpuCode().

1225  {
1226  col_slot_context_.addColumn(slots_for_col);
1227 }
void addColumn(const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::addColSlotInfoFlatBuffer ( const int64_t  flatbuffer_size)

Definition at line 1229 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::addColumnFlatBuffer(), and col_slot_context_.

Referenced by TableFunctionManager::allocate_output_buffers().

1229  {
1230  col_slot_context_.addColumnFlatBuffer(flatbuffer_size);
1231 }
void addColumnFlatBuffer(const int64_t flatbuffer_size)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::alignPaddedSlots ( )

Definition at line 1237 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::alignPaddedSlots(), col_slot_context_, and sortOnGpu().

1237  {
1239 }
void alignPaddedSlots(const bool sort_on_gpu)

+ Here is the call graph for this function:

bool QueryMemoryDescriptor::blocksShareMemory ( ) const

Definition at line 1144 of file QueryMemoryDescriptor.cpp.

References bucket_, count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), executor_, g_cluster, getGroupbyColCount(), heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, many_entries(), max_val_, min_val_, heavyai::Projection, query_desc_type_, render_output_, and heavyai::TableFunction.

Referenced by canOutputColumnar(), ResultSetReductionJIT::codegen(), QueryMemoryInitializer::computeNumberOfBuffers(), copy_group_by_buffers_from_gpu(), create_dev_group_by_buffers(), and toString().

1144  {
1145  if (g_cluster) {
1146  return true;
1147  }
1149  return true;
1150  }
1151  if (executor_->isCPUOnly() || render_output_ ||
1156  getGroupbyColCount() > 1)) {
1157  return true;
1158  }
1161 }
GroupByPerfectHash
Definition: enums.h:58
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
Projection
Definition: enums.h:58
TableFunction
Definition: enums.h:58
size_t getGroupbyColCount() const
CountDistinctDescriptors count_distinct_descriptors_
QueryDescriptionType query_desc_type_
GroupByBaselineHash
Definition: enums.h:58
bool g_cluster
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::canOutputColumnar ( ) const

Definition at line 1241 of file QueryMemoryDescriptor.cpp.

References blocksShareMemory(), count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), GPU, interleavedBins(), threadsShareMemory(), and usesGetGroupValueFast().

Referenced by QueryMemoryDescriptor().

1241  {
1245 }
CountDistinctDescriptors count_distinct_descriptors_
bool interleavedBins(const ExecutorDeviceType) const
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::canUsePerDeviceCardinality ( const RelAlgExecutionUnit ra_exe_unit) const

Definition at line 1383 of file QueryMemoryDescriptor.cpp.

References anonymous_namespace{QueryMemoryDescriptor.cpp}::any_of(), RelAlgExecutionUnit::join_quals, LEFT, heavyai::Projection, query_desc_type_, and RelAlgExecutionUnit::target_exprs_union.

1384  {
1385  // union-query needs to consider the "SUM" of each subquery's result
1387  !ra_exe_unit.target_exprs_union.empty()) {
1388  return false;
1389  }
1390  auto is_left_join = [](auto& join_qual) { return join_qual.type == JoinType::LEFT; };
1391  auto& join_quals = ra_exe_unit.join_quals;
1392  return !std::any_of(join_quals.begin(), join_quals.end(), is_left_join);
1393 }
Projection
Definition: enums.h:58
std::vector< Analyzer::Expr * > target_exprs_union
const JoinQualsPerNestingLevel join_quals
QueryDescriptionType query_desc_type_
bool any_of(std::vector< Analyzer::Expr * > const &target_exprs)

+ Here is the call graph for this function:

bool QueryMemoryDescriptor::checkSlotUsesFlatBufferFormat ( const size_t  slot_idx) const
inline

Definition at line 234 of file QueryMemoryDescriptor.h.

References ColSlotContext::checkSlotUsesFlatBufferFormat(), and col_slot_context_.

Referenced by ResultSet::checkSlotUsesFlatBufferFormat(), getPaddedSlotBufferSize(), and target_exprs_to_infos().

234  {
236  }
bool checkSlotUsesFlatBufferFormat(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::clearGroupColWidths ( )
inline

Definition at line 198 of file QueryMemoryDescriptor.h.

References group_col_widths_.

198 { group_col_widths_.clear(); }
std::vector< int8_t > group_col_widths_
void QueryMemoryDescriptor::clearSlotInfo ( )

Definition at line 1233 of file QueryMemoryDescriptor.cpp.

References ColSlotContext::clear(), and col_slot_context_.

1233  {
1235 }

+ Here is the call graph for this function:

void QueryMemoryDescriptor::clearTargetGroupbyIndices ( )
inline

Definition at line 259 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

259 { target_groupby_indices_.clear(); }
std::vector< int64_t > target_groupby_indices_
static bool QueryMemoryDescriptor::countDescriptorsLogicallyEmpty ( const CountDistinctDescriptors count_distinct_descriptors)
inlinestatic

Definition at line 153 of file QueryMemoryDescriptor.h.

References Invalid.

Referenced by blocksShareMemory(), canOutputColumnar(), countDistinctDescriptorsLogicallyEmpty(), lazyInitGroups(), and QueryMemoryDescriptor().

154  {
155  return std::all_of(count_distinct_descriptors.begin(),
156  count_distinct_descriptors.end(),
157  [](const CountDistinctDescriptor& desc) {
158  return desc.impl_type_ == CountDistinctImplType::Invalid;
159  });
160  }

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::countDistinctDescriptorsLogicallyEmpty ( ) const
inline

Definition at line 162 of file QueryMemoryDescriptor.h.

References count_distinct_descriptors_, and countDescriptorsLogicallyEmpty().

Referenced by QueryMemoryInitializer::allocateCountDistinctGpuMem(), and anonymous_namespace{QueryMemoryInitializer.cpp}::collect_target_expr_metadata().

162  {
164  }
CountDistinctDescriptors count_distinct_descriptors_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::didOutputColumnar ( ) const
inline

Definition at line 285 of file QueryMemoryDescriptor.h.

References output_columnar_.

Referenced by ResultSetStorage::binSearchRowCount(), TargetExprCodegen::codegen(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenAggCalls(), GroupByAndAggregate::codegenAggColumnPtr(), TargetExprCodegen::codegenAggregate(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenMultiColumnBaselineHash(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), GroupByAndAggregate::codegenWindowRowPointer(), copy_projection_buffer_from_gpu_columnar(), ResultSetStorage::copyKeyColWise(), ResultSet::createComparator(), ResultSet::didOutputColumnar(), ResultSet::eachCellInColumn(), anonymous_namespace{ResultSetReduction.cpp}::fill_slots(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), ResultSet::fixupQueryMemoryDescriptor(), get_cols_ptr(), ResultSet::getTargetValueFromBufferColwise(), GpuReductionHelperJIT::GpuReductionHelperJIT(), GpuSharedMemCodeBuilder::GpuSharedMemCodeBuilder(), ResultSetStorage::initializeBaselineValueSlots(), anonymous_namespace{TargetExprBuilder.cpp}::is_columnar_projection(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), ResultSet::makeGeoTargetValue(), ResultSetStorage::moveOneEntryToBuffer(), ResultSetStorage::reduce(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetStorage::reduceOneSlotBaseline(), ResultSetStorage::reduceSingleRow(), and ResultSetStorage::rewriteAggregateBufferOffsets().

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::forceFourByteFloat ( ) const
inline

Definition at line 304 of file QueryMemoryDescriptor.h.

References force_4byte_float_.

Referenced by ResultSet::makeTargetValue().

+ Here is the caller graph for this function:

const ApproxQuantileDescriptors& QueryMemoryDescriptor::getApproxQuantileDescriptors ( ) const
inline

Definition at line 270 of file QueryMemoryDescriptor.h.

References approx_quantile_descriptors_.

Referenced by QueryMemoryInitializer::allocateTDigestsBuffer(), QueryMemoryInitializer::initColumnsPerRow(), and QueryMemoryInitializer::QueryMemoryInitializer().

270  {
272  }
ApproxQuantileDescriptors approx_quantile_descriptors_

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getAvailableCpuThreads ( ) const
inline

Definition at line 372 of file QueryMemoryDescriptor.h.

References num_available_threads_.

Referenced by QueryMemoryInitializer::initRowGroups().

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getBucket ( ) const
inline

Definition at line 266 of file QueryMemoryDescriptor.h.

References bucket_.

Referenced by GroupByAndAggregate::codegenGroupBy(), and GroupByAndAggregate::codegenSingleColumnPerfectHash().

266 { return bucket_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getBufferColSlotCount ( ) const

Definition at line 1124 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotCount(), and target_groupby_indices_.

Referenced by anonymous_namespace{ResultSetIteration.cpp}::advance_col_buff_to_slot(), QueryMemoryInitializer::copyFromTableFunctionGpuBuffers(), anonymous_namespace{ResultSetReduction.cpp}::fill_slots(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), and QueryMemoryInitializer::setupTableFunctionGpuBuffers().

1124  {
1125  size_t total_slot_count = col_slot_context_.getSlotCount();
1126 
1127  if (target_groupby_indices_.empty()) {
1128  return total_slot_count;
1129  }
1130  return total_slot_count - std::count_if(target_groupby_indices_.begin(),
1132  [](const int64_t i) { return i >= 0; });
1133 }
std::vector< int64_t > target_groupby_indices_
size_t getSlotCount() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const RelAlgExecutionUnit ra_exe_unit,
const unsigned  thread_count,
const ExecutorDeviceType  device_type 
) const

Definition at line 1047 of file QueryMemoryDescriptor.cpp.

References entry_count_, streaming_top_n::get_heap_size(), getRowSize(), SortInfo::limit, anonymous_namespace{Utm.h}::n, SortInfo::offset, RelAlgExecutionUnit::sort_info, and use_streaming_top_n_.

Referenced by QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), getBufferSizeBytes(), Executor::launchKernelsViaResourceMgr(), and QueryMemoryInitializer::QueryMemoryInitializer().

1050  {
1051  if (use_streaming_top_n_) {
1052  const size_t n =
1053  ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit.value_or(0);
1054  return streaming_top_n::get_heap_size(getRowSize(), n, thread_count);
1055  }
1056  return getBufferSizeBytes(device_type, entry_count_);
1057 }
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
std::optional< size_t > limit
size_t get_heap_size(const size_t row_size, const size_t n, const size_t thread_count)
constexpr double n
Definition: Utm.h:38

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const ExecutorDeviceType  device_type) const

Definition at line 1100 of file QueryMemoryDescriptor.cpp.

References entry_count_, and getBufferSizeBytes().

1101  {
1102  return getBufferSizeBytes(device_type, entry_count_);
1103 }
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getBufferSizeBytes ( const ExecutorDeviceType  device_type,
const size_t  entry_count 
) const

Returns total amount of output buffer memory for each device (CPU/GPU)

Columnar: if projection: it returns index buffer + columnar buffer (all non-lazy columns) if table function: only the columnar buffer if group by: it returns the amount required for each group column (assumes 64-bit per group) + columnar buffer (all involved agg columns)

Row-wise: returns required memory per row multiplied by number of entries

Definition at line 1071 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK_GE, executor_, getColsSize(), getRowSize(), getTotalBytesOfColumnarBuffers(), group_col_widths_, interleavedBins(), keyless_hash_, output_columnar_, heavyai::Projection, query_desc_type_, and heavyai::TableFunction.

1072  {
1073  if (keyless_hash_ && !output_columnar_) {
1074  CHECK_GE(group_col_widths_.size(), size_t(1));
1075  auto row_bytes = align_to_int64(getColsSize());
1076  return (interleavedBins(device_type) ? executor_->warpSize() : 1) * entry_count *
1077  row_bytes;
1078  }
1079  constexpr size_t row_index_width = sizeof(int64_t);
1080  size_t total_bytes{0};
1081  if (output_columnar_) {
1082  switch (query_desc_type_) {
1084  total_bytes = row_index_width * entry_count + getTotalBytesOfColumnarBuffers();
1085  break;
1087  total_bytes = getTotalBytesOfColumnarBuffers();
1088  break;
1089  default:
1090  total_bytes = sizeof(int64_t) * group_col_widths_.size() * entry_count +
1092  break;
1093  }
1094  } else {
1095  total_bytes = getRowSize() * entry_count;
1096  }
1097  return total_bytes;
1098 }
#define CHECK_GE(x, y)
Definition: Logger.h:306
Projection
Definition: enums.h:58
TableFunction
Definition: enums.h:58
size_t getTotalBytesOfColumnarBuffers() const
QueryDescriptionType query_desc_type_
bool interleavedBins(const ExecutorDeviceType) const
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getColCount ( ) const

Definition at line 1181 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getColCount().

1181  {
1182  return col_slot_context_.getColCount();
1183 }
size_t getColCount() const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getColOffInBytes ( const size_t  col_idx) const

Definition at line 905 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, CHECK_EQ, CHECK_GT, entry_count_, getColOnlyOffInBytes(), getEffectiveKeyWidth(), getFlatBufferSize(), getPaddedSlotWidthBytes(), getPrependedGroupBufferSizeInBytes(), getWarpCount(), group_col_widths_, heavyai::GroupByPerfectHash, keyless_hash_, output_columnar_, query_desc_type_, and heavyai::TableFunction.

Referenced by TargetExprCodegen::codegen(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenOutputSlot(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), get_cols_ptr(), QueryExecutionContext::groupBufferToDeinterleavedResults(), QueryMemoryInitializer::initRowGroups(), inplace_sort_gpu(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().

905  {
906  const auto warp_count = getWarpCount();
907  if (output_columnar_) {
908  CHECK_EQ(size_t(1), warp_count);
909  size_t offset{0};
910  if (!keyless_hash_) {
912  }
914  for (size_t index = 0; index < col_idx; ++index) {
915  int8_t column_width = getPaddedSlotWidthBytes(index);
916  if (column_width > 0) {
917  offset += align_to_int64(column_width * entry_count_);
918  } else {
919  int64_t flatbuffer_size = getFlatBufferSize(index);
920  CHECK_GT(flatbuffer_size, 0);
921  offset += align_to_int64(flatbuffer_size);
922  }
923  }
924  } else {
925  for (size_t index = 0; index < col_idx; ++index) {
927  }
928  }
929  return offset;
930  }
931 
932  size_t offset{0};
933  if (keyless_hash_) {
934  // ignore, there's no group column in the output buffer
936  } else {
937  offset += group_col_widths_.size() * getEffectiveKeyWidth();
938  offset = align_to_int64(offset);
939  }
940  offset += getColOnlyOffInBytes(col_idx);
941  return offset;
942 }
GroupByPerfectHash
Definition: enums.h:58
#define CHECK_EQ(x, y)
Definition: Logger.h:301
size_t getEffectiveKeyWidth() const
#define CHECK_GT(x, y)
Definition: Logger.h:305
size_t getColOnlyOffInBytes(const size_t col_idx) const
TableFunction
Definition: enums.h:58
size_t getPrependedGroupBufferSizeInBytes() const
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
QueryDescriptionType query_desc_type_
#define CHECK(condition)
Definition: Logger.h:291
std::vector< int8_t > group_col_widths_
int64_t getFlatBufferSize(const size_t slot_idx) const
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getColOffInBytesInNextBin ( const size_t  col_idx) const

Definition at line 985 of file QueryMemoryDescriptor.cpp.

References CHECK_EQ, getPaddedSlotWidthBytes(), getRowSize(), getWarpCount(), group_col_widths_, and output_columnar_.

Referenced by QueryExecutionContext::groupBufferToDeinterleavedResults().

985  {
986  auto warp_count = getWarpCount();
987  if (output_columnar_) {
988  CHECK_EQ(size_t(1), group_col_widths_.size());
989  CHECK_EQ(size_t(1), warp_count);
990  return getPaddedSlotWidthBytes(col_idx);
991  }
992 
993  return warp_count * getRowSize();
994 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
std::vector< int8_t > group_col_widths_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getColOnlyOffInBytes ( const size_t  col_idx) const

Definition at line 892 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getColOnlyOffInBytes().

Referenced by TargetExprCodegen::codegen(), GroupByAndAggregate::codegenAggColumnPtr(), TargetExprCodegen::codegenAggregate(), getColOffInBytes(), and ResultSetStorage::reduceSingleRow().

892  {
893  return col_slot_context_.getColOnlyOffInBytes(col_idx);
894 }
size_t getColOnlyOffInBytes(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const ColSlotContext& QueryMemoryDescriptor::getColSlotContext ( ) const
inline

Definition at line 319 of file QueryMemoryDescriptor.h.

References col_slot_context_.

Referenced by QueryMemoryInitializer::copyFromTableFunctionGpuBuffers(), ResultSetStorage::reduceEntriesNoCollisionsColWise(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), and QueryMemoryInitializer::setupTableFunctionGpuBuffers().

319 { return col_slot_context_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getColsSize ( ) const

Definition at line 831 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getAllSlotsAlignedPaddedSize().

Referenced by QueryExecutionContext::copyInitAggValsToDevice(), getBufferSizeBytes(), getRowSize(), QueryExecutionContext::launchCpuCode(), and QueryExecutionContext::sizeofInitAggVals().

831  {
833 }
size_t getAllSlotsAlignedPaddedSize() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getCompactByteWidth ( ) const

Definition at line 853 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getCompactByteWidth().

Referenced by anonymous_namespace{TargetExprBuilder.cpp}::get_initial_agg_val(), and init_agg_val_vec().

853  {
855 }
size_t getCompactByteWidth() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getCountDistinctDescriptorsSize ( ) const
inline
size_t QueryMemoryDescriptor::getEntryCount ( ) const
inline

Definition at line 261 of file QueryMemoryDescriptor.h.

References entry_count_.

Referenced by QueryMemoryInitializer::allocateCountDistinctGpuMem(), QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::applyStreamingTopNOffsetGpu(), ResultSetStorage::binSearchRowCount(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegenMultiColumnBaselineHash(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GpuSharedMemCodeBuilder::codegenReduction(), GroupByAndAggregate::codegenWindowRowPointer(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), QueryMemoryInitializer::compactProjectionBuffersCpu(), QueryMemoryInitializer::compactProjectionBuffersGpu(), copy_group_by_buffers_from_gpu(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), Executor::createKernels(), ResultSet::entryCount(), Executor::executePlanWithGroupBy(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), anonymous_namespace{ResultSetReduction.cpp}::get_matching_group_value_reduction(), ResultSetStorage::getEntryCount(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), ResultSet::getTargetValueFromBufferColwise(), QueryMemoryInitializer::initColumnarGroups(), QueryMemoryInitializer::initGroupByBuffer(), ResultSetStorage::initializeBaselineValueSlots(), ResultSetStorage::initializeColWise(), ResultSetStorage::initializeRowWise(), inplace_sort_gpu(), QueryExecutionContext::launchGpuCode(), ResultSetStorage::moveEntriesToBuffer(), ResultSetStorage::moveOneEntryToBuffer(), QueryMemoryDescriptor(), QueryMemoryInitializer::QueryMemoryInitializer(), ResultSetStorage::reduce(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetStorage::reduceOneSlotBaseline(), and ResultSetStorage::rewriteAggregateBufferOffsets().

261 { return entry_count_; }

+ Here is the caller graph for this function:

const Executor* QueryMemoryDescriptor::getExecutor ( ) const
inline

Definition at line 171 of file QueryMemoryDescriptor.h.

References executor_.

Referenced by anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), ResultSetReductionJIT::codegen(), anonymous_namespace{Execute.cpp}::fill_entries_for_empty_input(), ResultSet::getExecutor(), ResultSet::getVarlenOrderEntry(), ResultSet::makeGeoTargetValue(), and ResultSet::makeVarlenTargetValue().

171 { return executor_; }

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getFlatBufferSize ( const size_t  slot_idx) const
inline

Definition at line 231 of file QueryMemoryDescriptor.h.

References col_slot_context_, and ColSlotContext::getFlatBufferSize().

Referenced by getColOffInBytes(), and getPaddedSlotBufferSize().

231  {
232  return col_slot_context_.getFlatBufferSize(slot_idx);
233  }
int64_t getFlatBufferSize(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getKeyCount ( ) const
inline

Definition at line 309 of file QueryMemoryDescriptor.h.

References getGroupbyColCount(), and keyless_hash_.

Referenced by anonymous_namespace{Execute.cpp}::permute_storage_columnar().

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const int8_t QueryMemoryDescriptor::getLogicalSlotWidthBytes ( const size_t  slot_idx) const

Definition at line 1198 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotInfo(), and SlotSize::logical_size.

Referenced by QueryMemoryInitializer::allocateTDigestsBuffer(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_count_distinct_expr_metadata(), TargetExprCodegen::codegenAggregate(), ResultSet::getTargetValueFromBufferRowwise(), and QueryMemoryInitializer::initializeQuantileParams().

1199  {
1200  return col_slot_context_.getSlotInfo(slot_idx).logical_size;
1201 }
int8_t logical_size
const SlotSize & getSlotInfo(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::optional< size_t > QueryMemoryDescriptor::getMaxPerDeviceCardinality ( const RelAlgExecutionUnit ra_exe_unit) const

Definition at line 1372 of file QueryMemoryDescriptor.cpp.

References anonymous_namespace{Utm.h}::a, and RelAlgExecutionUnit::per_device_cardinality.

1373  {
1374  auto& pdc = ra_exe_unit.per_device_cardinality;
1375  auto by_cardinality = [](auto& a, auto& b) { return a.second < b.second; };
1376  auto itr = std::max_element(pdc.begin(), pdc.end(), by_cardinality);
1377  if (itr != pdc.end() && itr->second > 0) {
1378  return itr->second;
1379  }
1380  return std::nullopt;
1381 }
constexpr double a
Definition: Utm.h:32
std::vector< std::pair< std::vector< size_t >, size_t > > per_device_cardinality
int64_t QueryMemoryDescriptor::getMaxVal ( ) const
inline

Definition at line 265 of file QueryMemoryDescriptor.h.

References max_val_.

Referenced by GroupByAndAggregate::codegenGroupBy().

265 { return max_val_; }

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getMinVal ( ) const
inline

Definition at line 264 of file QueryMemoryDescriptor.h.

References min_val_.

Referenced by GroupByAndAggregate::codegenSingleColumnPerfectHash().

264 { return min_val_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getNextColOffInBytes ( const int8_t *  col_ptr,
const size_t  bin,
const size_t  col_idx 
) const

Definition at line 996 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, CHECK_EQ, entry_count_, getPaddedSlotWidthBytes(), getSlotCount(), getWarpCount(), group_col_widths_, and output_columnar_.

998  {
1000  size_t offset{0};
1001  auto warp_count = getWarpCount();
1002  const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
1003  const auto total_slot_count = getSlotCount();
1004  if (col_idx + 1 == total_slot_count) {
1005  if (output_columnar_) {
1006  return (entry_count_ - bin) * chosen_bytes;
1007  } else {
1008  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
1009  }
1010  }
1011 
1012  const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
1013  if (output_columnar_) {
1014  CHECK_EQ(size_t(1), group_col_widths_.size());
1015  CHECK_EQ(size_t(1), warp_count);
1016 
1017  offset = align_to_int64(entry_count_ * chosen_bytes);
1018 
1019  offset += bin * (next_chosen_bytes - chosen_bytes);
1020  return offset;
1021  }
1022 
1023  if (next_chosen_bytes == sizeof(int64_t)) {
1024  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
1025  } else {
1026  return chosen_bytes;
1027  }
1028 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
#define CHECK(condition)
Definition: Logger.h:291
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getNextColOffInBytesRowOnly ( const int8_t *  col_ptr,
const size_t  col_idx 
) const

Definition at line 1030 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), getPaddedSlotWidthBytes(), and getSlotCount().

Referenced by QueryMemoryInitializer::initColumnsPerRow().

1031  {
1032  const auto chosen_bytes = getPaddedSlotWidthBytes(col_idx);
1033  const auto total_slot_count = getSlotCount();
1034  if (col_idx + 1 == total_slot_count) {
1035  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
1036  }
1037 
1038  const auto next_chosen_bytes = getPaddedSlotWidthBytes(col_idx + 1);
1039 
1040  if (next_chosen_bytes == sizeof(int64_t)) {
1041  return static_cast<size_t>(align_to_int64(col_ptr + chosen_bytes) - col_ptr);
1042  } else {
1043  return chosen_bytes;
1044  }
1045 }
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getPaddedColWidthForRange ( const size_t  offset,
const size_t  range 
) const
inline

Definition at line 214 of file QueryMemoryDescriptor.h.

References getPaddedSlotWidthBytes().

Referenced by result_set::get_byteoff_of_slot(), and ResultSet::makeGeoTargetValue().

214  {
215  size_t ret = 0;
216  for (size_t i = offset; i < offset + range; i++) {
217  ret += static_cast<size_t>(getPaddedSlotWidthBytes(i));
218  }
219  return ret;
220  }
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getPaddedSlotBufferSize ( const size_t  slot_idx) const

Definition at line 944 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), checkSlotUsesFlatBufferFormat(), entry_count_, getFlatBufferSize(), and getPaddedSlotWidthBytes().

Referenced by advance_to_next_columnar_target_buff().

944  {
945  if (checkSlotUsesFlatBufferFormat(slot_idx)) {
946  return align_to_int64(getFlatBufferSize(slot_idx));
947  }
948  int8_t column_width = getPaddedSlotWidthBytes(slot_idx);
949  return align_to_int64(column_width * entry_count_);
950 }
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
int64_t getFlatBufferSize(const size_t slot_idx) const
bool checkSlotUsesFlatBufferFormat(const size_t slot_idx) const
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const int8_t QueryMemoryDescriptor::getPaddedSlotWidthBytes ( const size_t  slot_idx) const

Definition at line 1189 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotInfo(), and SlotSize::padded_size.

Referenced by advance_target_ptr_row_wise(), TargetExprCodegen::codegen(), anonymous_namespace{GpuSharedMemoryUtils.cpp}::codegen_smem_dest_slot_ptr(), TargetExprCodegen::codegenAggregate(), GroupByAndAggregate::codegenOutputSlot(), compact_init_vals(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), ResultSet::copyColumnIntoBuffer(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), ResultSet::eachCellInColumn(), Executor::executePlanWithoutGroupBy(), result_set::get_width_for_slot(), getColOffInBytes(), getColOffInBytesInNextBin(), getNextColOffInBytes(), getNextColOffInBytesRowOnly(), getPaddedColWidthForRange(), getPaddedSlotBufferSize(), ResultSet::getPaddedSlotWidthBytes(), ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), init_agg_val_vec(), QueryMemoryInitializer::initColumnarGroups(), QueryMemoryInitializer::initColumnsPerRow(), inplace_sort_gpu(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), ResultSet::makeGeoTargetValue(), TargetExprCodegenBuilder::operator()(), anonymous_namespace{Execute.cpp}::permute_storage_columnar(), ResultSetStorage::reduceEntriesNoCollisionsColWise(), ResultSetReductionJIT::reduceOneAggregateSlot(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetStorage::reduceOneSlot(), ResultSetStorage::reduceSingleRow(), and ResultSetStorage::rewriteAggregateBufferOffsets().

1189  {
1190  return col_slot_context_.getSlotInfo(slot_idx).padded_size;
1191 }
const SlotSize & getSlotInfo(const size_t slot_idx) const
int8_t padded_size

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getPrependedGroupBufferSizeInBytes ( ) const

Definition at line 974 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, getEntryCount(), getGroupbyColCount(), groupColWidth(), and output_columnar_.

Referenced by getColOffInBytes().

974  {
976  size_t buffer_size{0};
977  for (size_t group_idx = 0; group_idx < getGroupbyColCount(); group_idx++) {
978  buffer_size += align_to_int64(
979  std::max(groupColWidth(group_idx), static_cast<int8_t>(sizeof(int64_t))) *
980  getEntryCount());
981  }
982  return buffer_size;
983 }
int8_t groupColWidth(const size_t key_idx) const
size_t getGroupbyColCount() const
#define CHECK(condition)
Definition: Logger.h:291
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getPrependedGroupColOffInBytes ( const size_t  group_idx) const

Definition at line 956 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, getEntryCount(), getGroupbyColCount(), groupColWidth(), and output_columnar_.

Referenced by ResultSetStorage::copyKeyColWise(), ResultSetStorage::isEmptyEntryColumnar(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().

957  {
959  CHECK(group_idx < getGroupbyColCount());
960  size_t offset{0};
961  for (size_t col_idx = 0; col_idx < group_idx; col_idx++) {
962  // TODO(Saman): relax that int64_bit part immediately
963  offset += align_to_int64(
964  std::max(groupColWidth(col_idx), static_cast<int8_t>(sizeof(int64_t))) *
965  getEntryCount());
966  }
967  return offset;
968 }
int8_t groupColWidth(const size_t key_idx) const
size_t getGroupbyColCount() const
#define CHECK(condition)
Definition: Logger.h:291
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

QueryDescriptionType QueryMemoryDescriptor::getQueryDescriptionType ( ) const
inline

Definition at line 173 of file QueryMemoryDescriptor.h.

References query_desc_type_.

Referenced by ResultSetStorage::binSearchRowCount(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegen(), GpuReductionHelperJIT::codegen(), GroupByAndAggregate::codegenAggCalls(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), Executor::collectAllDeviceResults(), copy_projection_buffer_from_gpu_columnar(), Executor::createKernels(), ResultSet::getQueryDescriptionType(), GpuReductionHelperJIT::GpuReductionHelperJIT(), GpuSharedMemCodeBuilder::GpuSharedMemCodeBuilder(), init_agg_val_vec(), QueryMemoryInitializer::initColumnarGroups(), anonymous_namespace{TargetExprBuilder.cpp}::is_columnar_projection(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), isSingleColumnGroupByWithPerfectHash(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), ResultSetStorage::moveEntriesToBuffer(), TargetExprCodegenBuilder::operator()(), ResultSetStorage::reduce(), Executor::reduceMultiDeviceResultSets(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), ResultSetReductionJIT::reduceOneEntryNoCollisionsIdx(), ExecutionKernel::run(), ExecutionKernel::runImpl(), target_exprs_to_infos(), and ResultSet::updateStorageEntryCount().

173 { return query_desc_type_; }
QueryDescriptionType query_desc_type_

+ Here is the caller graph for this function:

std::unique_ptr< QueryExecutionContext > QueryMemoryDescriptor::getQueryExecutionContext ( const RelAlgExecutionUnit ra_exe_unit,
const Executor executor,
const ExecutorDeviceType  device_type,
const ExecutorDispatchMode  dispatch_mode,
const int  device_id,
const shared::TableKey outer_table_key,
const int64_t  num_rows,
const std::vector< std::vector< const int8_t * >> &  col_buffers,
const std::vector< std::vector< uint64_t >> &  frag_offsets,
std::shared_ptr< RowSetMemoryOwner row_set_mem_owner,
const bool  output_columnar,
const bool  sort_on_gpu,
const size_t  thread_idx,
RenderInfo render_info 
) const

Definition at line 698 of file QueryMemoryDescriptor.cpp.

References DEBUG_TIMER, and QueryExecutionContext.

Referenced by ExecutionKernel::runImpl().

712  {
713  auto timer = DEBUG_TIMER(__func__);
714  if (frag_offsets.empty()) {
715  return nullptr;
716  }
717  return std::unique_ptr<QueryExecutionContext>(
718  new QueryExecutionContext(ra_exe_unit,
719  *this,
720  executor,
721  device_type,
722  dispatch_mode,
723  device_id,
724  outer_table_key,
725  num_rows,
726  col_buffers,
727  frag_offsets,
728  row_set_mem_owner,
729  output_columnar,
730  sort_on_gpu,
731  thread_idx,
732  render_info));
733 }
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
#define DEBUG_TIMER(name)
Definition: Logger.h:412

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getRowSize ( ) const

Definition at line 835 of file QueryMemoryDescriptor.cpp.

References align_to_int64(), CHECK, getColsSize(), getEffectiveKeyWidth(), group_col_widths_, heavyai::GroupByPerfectHash, keyless_hash_, output_columnar_, and query_desc_type_.

Referenced by QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::applyStreamingTopNOffsetGpu(), ResultSetLogicalValuesBuilder::build(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenOutputSlot(), GroupByAndAggregate::codegenWindowRowPointer(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), getBufferSizeBytes(), getColOffInBytesInNextBin(), QueryMemoryInitializer::initRowGroups(), anonymous_namespace{Execute.cpp}::permute_storage_row_wise(), QueryMemoryInitializer::prepareTopNHeapsDevBuffer(), QueryMemoryDescriptor(), QueryMemoryInitializer::QueryMemoryInitializer(), and ResultSetStorage::reduceSingleRow().

835  {
837  size_t total_bytes{0};
838  if (keyless_hash_) {
839  // ignore, there's no group column in the output buffer
841  } else {
842  total_bytes += group_col_widths_.size() * getEffectiveKeyWidth();
843  total_bytes = align_to_int64(total_bytes);
844  }
845  total_bytes += getColsSize();
846  return align_to_int64(total_bytes);
847 }
GroupByPerfectHash
Definition: enums.h:58
size_t getEffectiveKeyWidth() const
QueryDescriptionType query_desc_type_
#define CHECK(condition)
Definition: Logger.h:291
std::vector< int8_t > group_col_widths_
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getRowWidth ( ) const

Definition at line 1214 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getAllSlotsPaddedSize().

Referenced by get_row_bytes().

1214  {
1215  // Note: Actual row size may include padding (see ResultSetBufferAccessors.h)
1217 }
size_t getAllSlotsPaddedSize() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getSlotCount ( ) const

Definition at line 1185 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getSlotCount().

Referenced by QueryMemoryInitializer::allocateModeBuffer(), QueryMemoryInitializer::allocateTDigestsBuffer(), QueryMemoryInitializer::calculateCountDistinctBufferSize(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_count_distinct_expr_metadata(), compact_init_vals(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), ResultSet::copyColumnIntoBuffer(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), getNextColOffInBytes(), getNextColOffInBytesRowOnly(), QueryExecutionContext::groupBufferToDeinterleavedResults(), init_agg_val_vec(), QueryMemoryInitializer::initColumnarGroups(), QueryMemoryInitializer::initColumnsPerRow(), QueryMemoryInitializer::initializeModeIndexSet(), QueryMemoryInitializer::initializeQuantileParams(), inplace_sort_gpu(), anonymous_namespace{Execute.cpp}::permute_storage_columnar(), and ResultSetStorage::reduceSingleRow().

1185  {
1187 }
size_t getSlotCount() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const int8_t QueryMemoryDescriptor::getSlotIndexForSingleSlotCol ( const size_t  col_idx) const

Definition at line 1203 of file QueryMemoryDescriptor.cpp.

References CHECK_EQ, col_slot_context_, and ColSlotContext::getSlotsForCol().

Referenced by QueryMemoryInitializer::allocateCountDistinctBuffers(), QueryMemoryInitializer::allocateModeBuffer(), QueryMemoryInitializer::allocateTDigestsBuffer(), QueryMemoryInitializer::calculateCountDistinctBufferSize(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_count_distinct_expr_metadata(), QueryMemoryInitializer::initializeModeIndexSet(), and QueryMemoryInitializer::initializeQuantileParams().

1204  {
1205  const auto& col_slots = col_slot_context_.getSlotsForCol(col_idx);
1206  CHECK_EQ(col_slots.size(), size_t(1));
1207  return col_slots.front();
1208 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const std::vector< size_t > & getSlotsForCol(const size_t col_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int64_t QueryMemoryDescriptor::getTargetGroupbyIndex ( const size_t  target_idx) const
inline

Definition at line 243 of file QueryMemoryDescriptor.h.

References CHECK_LT, and target_groupby_indices_.

Referenced by ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), ResultSetReductionJIT::reduceOneEntryBaseline(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetReductionJIT::reduceOneSlot(), ResultSetStorage::reduceOneSlot(), and reductionKey().

243  {
244  CHECK_LT(target_idx, target_groupby_indices_.size());
245  return target_groupby_indices_[target_idx];
246  }
std::vector< int64_t > target_groupby_indices_
#define CHECK_LT(x, y)
Definition: Logger.h:303

+ Here is the caller graph for this function:

int32_t QueryMemoryDescriptor::getTargetIdxForKey ( ) const
inline

Definition at line 186 of file QueryMemoryDescriptor.h.

References idx_target_as_key_.

Referenced by ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), ResultSetStorage::reduceSingleRow(), and reductionKey().

186 { return idx_target_as_key_; }

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarBuffers ( ) const
private

Returns the maximum total number of bytes (including required paddings) to store all non-lazy columns' results for columnar cases.

Definition at line 862 of file QueryMemoryDescriptor.cpp.

References CHECK, col_slot_context_, entry_count_, ColSlotContext::getTotalBytesOfColumnarBuffers(), and output_columnar_.

Referenced by getBufferSizeBytes(), and getTotalBytesOfColumnarProjections().

862  {
865 }
size_t getTotalBytesOfColumnarBuffers(const size_t entry_count) const
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarBuffers ( const size_t  num_entries_per_column) const
private

This is a helper function that returns the total number of bytes (including required paddings) to store all non-lazy columns' results for columnar cases.

Definition at line 871 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getTotalBytesOfColumnarBuffers().

872  {
873  return col_slot_context_.getTotalBytesOfColumnarBuffers(num_entries_per_column);
874 }
size_t getTotalBytesOfColumnarBuffers(const size_t entry_count) const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getTotalBytesOfColumnarProjections ( const size_t  projection_count) const
private

Returns the effective total number of bytes from columnar projections, which includes 1) total number of bytes used to store all non-lazy columns 2) total number of bytes used to store row indices (for lazy fetches, etc.)

NOTE: this function does not represent the buffer sizes dedicated for the results, but the required memory to fill all valid results into a compact new buffer (with no holes in it)

Definition at line 885 of file QueryMemoryDescriptor.cpp.

References getTotalBytesOfColumnarBuffers().

886  {
887  constexpr size_t row_index_width = sizeof(int64_t);
888  return getTotalBytesOfColumnarBuffers(projection_count) +
889  row_index_width * projection_count;
890 }
size_t getTotalBytesOfColumnarBuffers() const

+ Here is the call graph for this function:

size_t QueryMemoryDescriptor::getWarpCount ( ) const

Definition at line 849 of file QueryMemoryDescriptor.cpp.

References executor_, and interleaved_bins_on_gpu_.

Referenced by getColOffInBytes(), getColOffInBytesInNextBin(), and getNextColOffInBytes().

849  {
850  return (interleaved_bins_on_gpu_ ? executor_->warpSize() : 1);
851 }

+ Here is the caller graph for this function:

int8_t QueryMemoryDescriptor::groupColWidth ( const size_t  key_idx) const
inline

Definition at line 189 of file QueryMemoryDescriptor.h.

References CHECK_LT, and group_col_widths_.

Referenced by ResultSetStorage::copyKeyColWise(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), ResultSetStorage::isEmptyEntryColumnar(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().

189  {
190  CHECK_LT(key_idx, group_col_widths_.size());
191  return group_col_widths_[key_idx];
192  }
#define CHECK_LT(x, y)
Definition: Logger.h:303
std::vector< int8_t > group_col_widths_

+ Here is the caller graph for this function:

const auto QueryMemoryDescriptor::groupColWidthsBegin ( ) const
inline

Definition at line 196 of file QueryMemoryDescriptor.h.

References group_col_widths_.

196 { return group_col_widths_.begin(); }
std::vector< int8_t > group_col_widths_
const auto QueryMemoryDescriptor::groupColWidthsEnd ( ) const
inline

Definition at line 197 of file QueryMemoryDescriptor.h.

References group_col_widths_.

197 { return group_col_widths_.end(); }
std::vector< int8_t > group_col_widths_
bool QueryMemoryDescriptor::hasInterleavedBinsOnGpu ( ) const
inline

Definition at line 183 of file QueryMemoryDescriptor.h.

References interleaved_bins_on_gpu_.

bool QueryMemoryDescriptor::hasNulls ( ) const
inline

Definition at line 268 of file QueryMemoryDescriptor.h.

References has_nulls_.

Referenced by GroupByAndAggregate::codegenGroupBy().

268 { return has_nulls_; }

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::hasVarlenOutput ( ) const
inline

Definition at line 358 of file QueryMemoryDescriptor.h.

References col_slot_context_, and ColSlotContext::hasVarlenOutput().

Referenced by QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::applyStreamingTopNOffsetGpu(), GroupByAndAggregate::codegenVarlenOutputBuffer(), QueryMemoryInitializer::compactProjectionBuffersCpu(), QueryMemoryInitializer::compactProjectionBuffersGpu(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), QueryExecutionContext::getRowSet(), query_group_by_template(), and QueryMemoryInitializer::QueryMemoryInitializer().

bool hasVarlenOutput() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::unique_ptr< QueryMemoryDescriptor > QueryMemoryDescriptor::init ( const Executor executor,
const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const ColRangeInfo col_range_info,
const KeylessInfo keyless_info,
const bool  allow_multifrag,
const ExecutorDeviceType  device_type,
const int8_t  crt_min_byte_width,
const bool  sort_on_gpu_hint,
const size_t  shard_count,
const size_t  max_groups_buffer_entry_count,
RenderInfo render_info,
const ApproxQuantileDescriptors approx_quantile_descriptors,
const CountDistinctDescriptors  count_distinct_descriptors,
const bool  must_use_baseline_sort,
const bool  output_columnar_hint,
const bool  streaming_top_n_hint,
const bool  threads_can_reuse_group_by_buffers 
)
static

Definition at line 240 of file QueryMemoryDescriptor.cpp.

References anonymous_namespace{QueryMemoryDescriptor.cpp}::get_col_byte_widths(), RelAlgExecutionUnit::groupby_exprs, and RelAlgExecutionUnit::target_exprs.

Referenced by GroupByAndAggregate::initQueryMemoryDescriptorImpl().

258  {
259  auto group_col_widths = get_col_byte_widths(ra_exe_unit.groupby_exprs);
260  const bool is_group_by{!group_col_widths.empty()};
261 
262  auto col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, {});
263 
264  const auto min_slot_size = QueryMemoryDescriptor::pick_target_compact_width(
265  ra_exe_unit, query_infos, crt_min_byte_width);
266 
267  col_slot_context.setAllSlotsPaddedSize(min_slot_size);
268  col_slot_context.validate();
269 
270  if (!is_group_by) {
271  CHECK(!must_use_baseline_sort);
272 
273  return std::make_unique<QueryMemoryDescriptor>(
274  executor,
275  ra_exe_unit,
276  query_infos,
277  allow_multifrag,
278  false,
279  false,
280  -1,
281  ColRangeInfo{ra_exe_unit.estimator ? QueryDescriptionType::Estimator
283  0,
284  0,
285  0,
286  false},
287  col_slot_context,
288  std::vector<int8_t>{},
289  /*group_col_compact_width=*/0,
290  std::vector<int64_t>{},
291  /*entry_count=*/1,
292  approx_quantile_descriptors,
293  count_distinct_descriptors,
294  false,
295  output_columnar_hint,
296  render_info && render_info->isInSitu(),
297  must_use_baseline_sort,
298  /*use_streaming_top_n=*/false,
299  threads_can_reuse_group_by_buffers);
300  }
301 
302  size_t entry_count = 1;
303  auto actual_col_range_info = col_range_info;
304  bool interleaved_bins_on_gpu = false;
305  bool keyless_hash = false;
306  bool streaming_top_n = false;
307  int8_t group_col_compact_width = 0;
308  int32_t idx_target_as_key = -1;
309  auto output_columnar = output_columnar_hint;
310  std::vector<int64_t> target_groupby_indices;
311 
312  switch (col_range_info.hash_type_) {
314  if (render_info) {
315  // TODO(croot): this can be removed now thanks to the more centralized
316  // NonInsituQueryClassifier code, but keeping it just in case
317  render_info->setNonInSitu();
318  }
319  // keyless hash: whether or not group columns are stored at the beginning of the
320  // output buffer
321  keyless_hash =
322  (!sort_on_gpu_hint ||
324  col_range_info.max, col_range_info.min, col_range_info.bucket)) &&
325  !col_range_info.bucket && !must_use_baseline_sort && keyless_info.keyless;
326 
327  // if keyless, then this target index indicates wheter an entry is empty or not
328  // (acts as a key)
329  idx_target_as_key = keyless_info.target_index;
330 
331  if (group_col_widths.size() > 1) {
332  // col range info max contains the expected cardinality of the output
333  entry_count = static_cast<size_t>(actual_col_range_info.max);
334  actual_col_range_info.bucket = 0;
335  } else {
336  // single column perfect hash
337  entry_count = std::max(
338  GroupByAndAggregate::getBucketedCardinality(col_range_info), int64_t(1));
339  const size_t interleaved_max_threshold{512};
340 
341  if (must_use_baseline_sort) {
342  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
343  ra_exe_unit.target_exprs);
344  col_slot_context =
345  ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
346  }
347 
348  bool has_varlen_sample_agg = false;
349  for (const auto& target_expr : ra_exe_unit.target_exprs) {
350  if (target_expr->get_contains_agg()) {
351  const auto agg_expr = dynamic_cast<Analyzer::AggExpr*>(target_expr);
352  CHECK(agg_expr);
353  if (agg_expr->get_aggtype() == kSAMPLE &&
354  agg_expr->get_type_info().is_varlen()) {
355  has_varlen_sample_agg = true;
356  break;
357  }
358  }
359  }
360 
361  interleaved_bins_on_gpu = keyless_hash && !has_varlen_sample_agg &&
362  (entry_count <= interleaved_max_threshold) &&
363  (device_type == ExecutorDeviceType::GPU) &&
365  count_distinct_descriptors) &&
366  !output_columnar;
367  }
368  break;
369  }
371  if (render_info) {
372  // TODO(croot): this can be removed now thanks to the more centralized
373  // NonInsituQueryClassifier code, but keeping it just in case
374  render_info->setNonInSitu();
375  }
376  entry_count = shard_count
377  ? (max_groups_buffer_entry_count + shard_count - 1) / shard_count
378  : max_groups_buffer_entry_count;
379  target_groupby_indices = target_expr_group_by_indices(ra_exe_unit.groupby_exprs,
380  ra_exe_unit.target_exprs);
381  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
382 
383  group_col_compact_width =
384  output_columnar ? 8
385  : pick_baseline_key_width(ra_exe_unit, query_infos, executor);
386 
387  actual_col_range_info =
389  break;
390  }
392  CHECK(!must_use_baseline_sort);
393 
394  if (streaming_top_n_hint && use_streaming_top_n(ra_exe_unit, output_columnar)) {
395  streaming_top_n = true;
396  entry_count =
397  ra_exe_unit.sort_info.offset + ra_exe_unit.sort_info.limit.value_or(0);
398  } else {
399  if (ra_exe_unit.use_bump_allocator) {
400  output_columnar = false;
401  entry_count = 0;
402  } else {
403  entry_count = ra_exe_unit.scan_limit
404  ? static_cast<size_t>(ra_exe_unit.scan_limit)
405  : max_groups_buffer_entry_count;
406  }
407  }
408 
409  target_groupby_indices = executor->plan_state_->allow_lazy_fetch_
410  ? target_expr_proj_indices(ra_exe_unit)
411  : std::vector<int64_t>{};
412 
413  col_slot_context = ColSlotContext(ra_exe_unit.target_exprs, target_groupby_indices);
414  break;
415  }
416  default:
417  UNREACHABLE() << "Unknown query type";
418  }
419 
420  return std::make_unique<QueryMemoryDescriptor>(executor,
421  ra_exe_unit,
422  query_infos,
423  allow_multifrag,
424  keyless_hash,
425  interleaved_bins_on_gpu,
426  idx_target_as_key,
427  actual_col_range_info,
428  col_slot_context,
429  group_col_widths,
430  group_col_compact_width,
431  target_groupby_indices,
432  entry_count,
433  approx_quantile_descriptors,
434  count_distinct_descriptors,
435  sort_on_gpu_hint,
436  output_columnar,
437  render_info && render_info->isInSitu(),
438  must_use_baseline_sort,
439  streaming_top_n,
440  threads_can_reuse_group_by_buffers);
441 }
GroupByPerfectHash
Definition: enums.h:58
std::vector< Analyzer::Expr * > target_exprs
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
static int64_t getBucketedCardinality(const ColRangeInfo &col_range_info)
std::vector< int64_t > target_expr_proj_indices(const RelAlgExecutionUnit &ra_exe_unit)
const bool keyless
NonGroupedAggregate
Definition: enums.h:58
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list)
QueryDescriptionType hash_type_
#define UNREACHABLE()
Definition: Logger.h:338
Projection
Definition: enums.h:58
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
static int8_t pick_target_compact_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)
const int32_t target_index
std::vector< int64_t > target_expr_group_by_indices(const std::list< std::shared_ptr< Analyzer::Expr >> &groupby_exprs, const std::vector< Analyzer::Expr * > &target_exprs)
void setNonInSitu()
Definition: RenderInfo.cpp:50
GroupByBaselineHash
Definition: enums.h:58
#define CHECK(condition)
Definition: Logger.h:291
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
int8_t pick_baseline_key_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const Executor *executor)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::interleavedBins ( const ExecutorDeviceType  device_type) const
bool QueryMemoryDescriptor::isGroupBy ( ) const
inline

Definition at line 200 of file QueryMemoryDescriptor.h.

References group_col_widths_.

Referenced by anonymous_namespace{TargetExprBuilder.cpp}::get_initial_agg_val(), init_agg_val_vec(), QueryMemoryInitializer::initColumnsPerRow(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), and QueryMemoryInitializer::QueryMemoryInitializer().

200 { return !group_col_widths_.empty(); }
std::vector< int8_t > group_col_widths_

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::isLogicalSizedColumnsAllowed ( ) const

Definition at line 1116 of file QueryMemoryDescriptor.cpp.

References g_cluster, output_columnar_, heavyai::Projection, query_desc_type_, and heavyai::TableFunction.

Referenced by TargetExprCodegen::codegenAggregate(), TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions(), TargetExprCodegenBuilder::codegenSlotEmptyKey(), init_agg_val_vec(), ResultSet::makeTargetValue(), QueryMemoryDescriptor(), ResultSetStorage::reduceOneSlot(), ResultSetStorage::reduceOneSlotSingleValue(), and setOutputColumnar().

1116  {
1117  // In distributed mode, result sets are serialized using rowwise iterators, so we use
1118  // consistent slot widths for now
1119  return output_columnar_ && !g_cluster &&
1121  query_desc_type_ == QueryDescriptionType::TableFunction);
1122 }
Projection
Definition: enums.h:58
TableFunction
Definition: enums.h:58
QueryDescriptionType query_desc_type_
bool g_cluster

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::isSingleColumnGroupByWithPerfectHash ( ) const
inline

Definition at line 175 of file QueryMemoryDescriptor.h.

References getGroupbyColCount(), getQueryDescriptionType(), and heavyai::GroupByPerfectHash.

Referenced by GroupByAndAggregate::codegenGroupBy(), and ResultSet::getTargetValueFromBufferRowwise().

175  {
177  getGroupbyColCount() == 1;
178  }
GroupByPerfectHash
Definition: enums.h:58
size_t getGroupbyColCount() const
QueryDescriptionType getQueryDescriptionType() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::isWarpSyncRequired ( const ExecutorDeviceType  device_type) const

Definition at line 1173 of file QueryMemoryDescriptor.cpp.

References executor_, and GPU.

Referenced by query_group_by_template().

1174  {
1175  if (device_type == ExecutorDeviceType::GPU) {
1176  return executor_->cudaMgr()->isArchVoltaOrGreaterForAll();
1177  }
1178  return false;
1179 }

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::lazyInitGroups ( const ExecutorDeviceType  device_type) const

Definition at line 1163 of file QueryMemoryDescriptor.cpp.

References count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), GPU, and render_output_.

Referenced by create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), QueryMemoryInitializer::prepareTopNHeapsDevBuffer(), QueryMemoryInitializer::QueryMemoryInitializer(), and toString().

1163  {
1164  return device_type == ExecutorDeviceType::GPU && !render_output_ &&
1166 }
CountDistinctDescriptors count_distinct_descriptors_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static bool QueryMemoryDescriptor::many_entries ( const int64_t  max_val,
const int64_t  min_val,
const int64_t  bucket 
)
inlinestatic

Definition at line 147 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory().

149  {
150  return max_val - min_val > 10000 * std::max(bucket, int64_t(1));
151  }

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::mustUseBaselineSort ( ) const
inline

Definition at line 292 of file QueryMemoryDescriptor.h.

References must_use_baseline_sort_.

Referenced by GroupByAndAggregate::codegenSingleColumnPerfectHash().

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::operator== ( const QueryMemoryDescriptor other) const

Definition at line 631 of file QueryMemoryDescriptor.cpp.

References bucket_, col_slot_context_, count_distinct_descriptors_, force_4byte_float_, group_col_compact_width_, group_col_widths_, has_nulls_, idx_target_as_key_, interleaved_bins_on_gpu_, keyless_hash_, max_val_, min_val_, output_columnar_, query_desc_type_, sort_on_gpu_, target_groupby_indices_, and threads_can_reuse_group_by_buffers_.

631  {
632  // Note that this method does not check ptr reference members (e.g. executor_) or
633  // entry_count_
634  if (query_desc_type_ != other.query_desc_type_) {
635  return false;
636  }
637  if (keyless_hash_ != other.keyless_hash_) {
638  return false;
639  }
641  return false;
642  }
643  if (idx_target_as_key_ != other.idx_target_as_key_) {
644  return false;
645  }
646  if (force_4byte_float_ != other.force_4byte_float_) {
647  return false;
648  }
649  if (group_col_widths_ != other.group_col_widths_) {
650  return false;
651  }
653  return false;
654  }
656  return false;
657  }
658  if (min_val_ != other.min_val_) {
659  return false;
660  }
661  if (max_val_ != other.max_val_) {
662  return false;
663  }
664  if (bucket_ != other.bucket_) {
665  return false;
666  }
667  if (has_nulls_ != other.has_nulls_) {
668  return false;
669  }
670  if (count_distinct_descriptors_.size() != other.count_distinct_descriptors_.size()) {
671  return false;
672  } else {
673  // Count distinct descriptors can legitimately differ in device only.
674  for (size_t i = 0; i < count_distinct_descriptors_.size(); ++i) {
675  auto ref_count_distinct_desc = other.count_distinct_descriptors_[i];
676  auto count_distinct_desc = count_distinct_descriptors_[i];
677  count_distinct_desc.device_type = ref_count_distinct_desc.device_type;
678  if (ref_count_distinct_desc != count_distinct_desc) {
679  return false;
680  }
681  }
682  }
683  if (sort_on_gpu_ != other.sort_on_gpu_) {
684  return false;
685  }
686  if (output_columnar_ != other.output_columnar_) {
687  return false;
688  }
689  if (col_slot_context_ != other.col_slot_context_) {
690  return false;
691  }
693  return false;
694  }
695  return true;
696 }
std::vector< int64_t > target_groupby_indices_
CountDistinctDescriptors count_distinct_descriptors_
QueryDescriptionType query_desc_type_
std::vector< int8_t > group_col_widths_
int8_t QueryMemoryDescriptor::pick_target_compact_width ( const RelAlgExecutionUnit ra_exe_unit,
const std::vector< InputTableInfo > &  query_infos,
const int8_t  crt_min_byte_width 
)
static

Definition at line 735 of file QueryMemoryDescriptor.cpp.

References CHECK, CHECK_EQ, g_bigint_count, anonymous_namespace{QueryMemoryDescriptor.cpp}::get_col_byte_widths(), Analyzer::UOper::get_operand(), Analyzer::Expr::get_type_info(), RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::input_col_descs, anonymous_namespace{QueryMemoryDescriptor.cpp}::is_int_and_no_bigger_than(), kCOUNT, kENCODING_DICT, kUNNEST, and RelAlgExecutionUnit::target_exprs.

738  {
739  if (g_bigint_count) {
740  return sizeof(int64_t);
741  }
742  int8_t compact_width{0};
743  auto col_it = ra_exe_unit.input_col_descs.begin();
744  auto const end = ra_exe_unit.input_col_descs.end();
745  int unnest_array_col_id{std::numeric_limits<int>::min()};
746  for (const auto& groupby_expr : ra_exe_unit.groupby_exprs) {
747  const auto uoper = dynamic_cast<Analyzer::UOper*>(groupby_expr.get());
748  if (uoper && uoper->get_optype() == kUNNEST) {
749  const auto& arg_ti = uoper->get_operand()->get_type_info();
750  CHECK(arg_ti.is_array());
751  const auto& elem_ti = arg_ti.get_elem_type();
752  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
753  unnest_array_col_id = (*col_it)->getColId();
754  } else {
755  compact_width = crt_min_byte_width;
756  break;
757  }
758  }
759  if (col_it != end) {
760  ++col_it;
761  }
762  }
763  if (!compact_width &&
764  (ra_exe_unit.groupby_exprs.size() != 1 || !ra_exe_unit.groupby_exprs.front())) {
765  compact_width = crt_min_byte_width;
766  }
767  if (!compact_width) {
768  col_it = ra_exe_unit.input_col_descs.begin();
769  std::advance(col_it, ra_exe_unit.groupby_exprs.size());
770  for (const auto target : ra_exe_unit.target_exprs) {
771  const auto& ti = target->get_type_info();
772  const auto agg = dynamic_cast<const Analyzer::AggExpr*>(target);
773  if (agg && agg->get_arg()) {
774  compact_width = crt_min_byte_width;
775  break;
776  }
777 
778  if (agg) {
779  CHECK_EQ(kCOUNT, agg->get_aggtype());
780  CHECK(!agg->get_is_distinct());
781  if (col_it != end) {
782  ++col_it;
783  }
784  continue;
785  }
786 
787  if (is_int_and_no_bigger_than(ti, 4) ||
788  (ti.is_string() && ti.get_compression() == kENCODING_DICT)) {
789  if (col_it != end) {
790  ++col_it;
791  }
792  continue;
793  }
794 
795  const auto uoper = dynamic_cast<Analyzer::UOper*>(target);
796  if (uoper && uoper->get_optype() == kUNNEST &&
797  (*col_it)->getColId() == unnest_array_col_id) {
798  const auto arg_ti = uoper->get_operand()->get_type_info();
799  CHECK(arg_ti.is_array());
800  const auto& elem_ti = arg_ti.get_elem_type();
801  if (elem_ti.is_string() && elem_ti.get_compression() == kENCODING_DICT) {
802  if (col_it != end) {
803  ++col_it;
804  }
805  continue;
806  }
807  }
808 
809  compact_width = crt_min_byte_width;
810  break;
811  }
812  }
813  if (!compact_width) {
814  size_t total_tuples{0};
815  for (const auto& qi : query_infos) {
816  total_tuples += qi.info.getNumTuples();
817  }
818  return total_tuples <= static_cast<size_t>(std::numeric_limits<uint32_t>::max()) ||
819  unnest_array_col_id != std::numeric_limits<int>::min()
820  ? 4
821  : crt_min_byte_width;
822  } else {
823  // TODO(miyu): relax this condition to allow more cases just w/o padding
824  for (auto wid : get_col_byte_widths(ra_exe_unit.target_exprs)) {
825  compact_width = std::max(compact_width, wid);
826  }
827  return compact_width;
828  }
829 }
std::vector< Analyzer::Expr * > target_exprs
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::vector< int8_t > get_col_byte_widths(const T &col_expr_list)
const std::list< std::shared_ptr< Analyzer::Expr > > groupby_exprs
bool g_bigint_count
const SQLTypeInfo & get_type_info() const
Definition: Analyzer.h:79
const Expr * get_operand() const
Definition: Analyzer.h:384
Definition: sqldefs.h:81
bool is_int_and_no_bigger_than(const SQLTypeInfo &ti, const size_t byte_width)
#define CHECK(condition)
Definition: Logger.h:291
std::list< std::shared_ptr< const InputColDescriptor > > input_col_descs

+ Here is the call graph for this function:

std::string QueryMemoryDescriptor::queryDescTypeToString ( ) const

Definition at line 1247 of file QueryMemoryDescriptor.cpp.

References heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, heavyai::NonGroupedAggregate, heavyai::Projection, query_desc_type_, heavyai::TableFunction, and UNREACHABLE.

Referenced by reductionKey().

1247  {
1248  switch (query_desc_type_) {
1250  return "Perfect Hash";
1252  return "Baseline Hash";
1254  return "Projection";
1256  return "Table Function";
1258  return "Non-grouped Aggregate";
1259  case QueryDescriptionType::Estimator:
1260  return "Estimator";
1261  default:
1262  UNREACHABLE();
1263  }
1264  return "";
1265 }
GroupByPerfectHash
Definition: enums.h:58
NonGroupedAggregate
Definition: enums.h:58
#define UNREACHABLE()
Definition: Logger.h:338
Projection
Definition: enums.h:58
TableFunction
Definition: enums.h:58
QueryDescriptionType query_desc_type_
GroupByBaselineHash
Definition: enums.h:58

+ Here is the caller graph for this function:

std::string QueryMemoryDescriptor::reductionKey ( ) const

Definition at line 1293 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, getEffectiveKeyWidth(), getGroupbyColCount(), getTargetGroupbyIndex(), getTargetIdxForKey(), join(), keyless_hash_, queryDescTypeToString(), targetGroupbyIndicesSize(), to_string(), ColSlotContext::toString(), and toString().

Referenced by ResultSetReductionJIT::cacheKey(), and toString().

1293  {
1294  std::string str;
1295  str += "Query Memory Descriptor State\n";
1296  str += "\tQuery Type: " + queryDescTypeToString() + "\n";
1297  str +=
1298  "\tKeyless Hash: " + ::toString(keyless_hash_) +
1299  (keyless_hash_ ? ", target index for key: " + std::to_string(getTargetIdxForKey())
1300  : "") +
1301  "\n";
1302  str += "\tEffective key width: " + std::to_string(getEffectiveKeyWidth()) + "\n";
1303  str += "\tNumber of group columns: " + std::to_string(getGroupbyColCount()) + "\n";
1304  const auto group_indices_size = targetGroupbyIndicesSize();
1305  if (group_indices_size) {
1306  std::vector<std::string> group_indices_strings;
1307  for (size_t target_idx = 0; target_idx < group_indices_size; ++target_idx) {
1308  group_indices_strings.push_back(std::to_string(getTargetGroupbyIndex(target_idx)));
1309  }
1310  str += "\tTarget group by indices: " +
1311  boost::algorithm::join(group_indices_strings, ",") + "\n";
1312  }
1313  str += "\t" + col_slot_context_.toString();
1314  return str;
1315 }
int64_t getTargetGroupbyIndex(const size_t target_idx) const
std::string toString() const
std::string join(T const &container, std::string const &delim)
size_t getEffectiveKeyWidth() const
std::string to_string(char const *&&v)
size_t getGroupbyColCount() const
size_t targetGroupbyIndicesSize() const
std::string toString() const
std::string queryDescTypeToString() const
int32_t getTargetIdxForKey() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::resetGroupColWidths ( const std::vector< int8_t > &  new_group_col_widths)
inlineprotected

Definition at line 384 of file QueryMemoryDescriptor.h.

References group_col_widths_.

384  {
385  group_col_widths_ = new_group_col_widths;
386  }
std::vector< int8_t > group_col_widths_
void QueryMemoryDescriptor::setAllTargetGroupbyIndices ( std::vector< int64_t >  group_by_indices)
inline

Definition at line 248 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

248  {
249  target_groupby_indices_ = group_by_indices;
250  }
std::vector< int64_t > target_groupby_indices_
void QueryMemoryDescriptor::setAvailableCpuThreads ( size_t  num_available_threads) const
inline

Definition at line 374 of file QueryMemoryDescriptor.h.

References num_available_threads_.

Referenced by ExecutionKernel::runImpl().

374  {
375  num_available_threads_ = num_available_threads;
376  }

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::setEntryCount ( const size_t  val)
inline

Definition at line 262 of file QueryMemoryDescriptor.h.

References entry_count_.

Referenced by Executor::executePlanWithGroupBy(), Executor::reduceMultiDeviceResultSets(), ResultSetStorage::updateEntryCount(), and ResultSet::updateStorageEntryCount().

262 { entry_count_ = val; }

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::setForceFourByteFloat ( const bool  val)
inline

Definition at line 305 of file QueryMemoryDescriptor.h.

References force_4byte_float_.

void QueryMemoryDescriptor::setGroupColCompactWidth ( const int8_t  val)
inline

Definition at line 202 of file QueryMemoryDescriptor.h.

References group_col_compact_width_.

void QueryMemoryDescriptor::setHasInterleavedBinsOnGpu ( const bool  val)
inline

Definition at line 184 of file QueryMemoryDescriptor.h.

References interleaved_bins_on_gpu_.

void QueryMemoryDescriptor::setHasKeylessHash ( const bool  val)
inline

Definition at line 181 of file QueryMemoryDescriptor.h.

References keyless_hash_.

void QueryMemoryDescriptor::setOutputColumnar ( const bool  val)

Definition at line 1105 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, isLogicalSizedColumnsAllowed(), output_columnar_, and ColSlotContext::setAllSlotsPaddedSizeToLogicalSize().

1105  {
1106  output_columnar_ = val;
1109  }
1110 }
bool isLogicalSizedColumnsAllowed() const
void setAllSlotsPaddedSizeToLogicalSize()

+ Here is the call graph for this function:

void QueryMemoryDescriptor::setPaddedSlotWidthBytes ( const size_t  slot_idx,
const int8_t  bytes 
)

Definition at line 1193 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::setPaddedSlotWidthBytes().

Referenced by TargetExprCodegenBuilder::operator()().

1194  {
1195  col_slot_context_.setPaddedSlotWidthBytes(slot_idx, bytes);
1196 }
void setPaddedSlotWidthBytes(const size_t slot_idx, const int8_t bytes)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void QueryMemoryDescriptor::setQueryDescriptionType ( const QueryDescriptionType  val)
inline

Definition at line 174 of file QueryMemoryDescriptor.h.

References query_desc_type_.

174 { query_desc_type_ = val; }
QueryDescriptionType query_desc_type_
void QueryMemoryDescriptor::setTargetIdxForKey ( const int32_t  val)
inline

Definition at line 187 of file QueryMemoryDescriptor.h.

References idx_target_as_key_.

void QueryMemoryDescriptor::setThreadsCanReuseGroupByBuffers ( const bool  val)
inline

Definition at line 298 of file QueryMemoryDescriptor.h.

References threads_can_reuse_group_by_buffers_.

298  {
300  }
bool QueryMemoryDescriptor::slotIsVarlenOutput ( const size_t  slot_idx) const
inline

Definition at line 368 of file QueryMemoryDescriptor.h.

References col_slot_context_, and ColSlotContext::slotIsVarlen().

Referenced by advance_target_ptr_row_wise(), and ResultSet::makeGeoTargetValue().

368  {
369  return col_slot_context_.slotIsVarlen(slot_idx);
370  }
bool slotIsVarlen(const size_t slot_idx) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::sortOnGpu ( ) const
inline

Definition at line 282 of file QueryMemoryDescriptor.h.

References sort_on_gpu_.

Referenced by alignPaddedSlots(), QueryExecutionContext::launchGpuCode(), ExecutionKernel::runImpl(), and use_speculative_top_n().

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::targetGroupbyIndicesSize ( ) const
inline
size_t QueryMemoryDescriptor::targetGroupbyNegativeIndicesSize ( ) const
inline

Definition at line 253 of file QueryMemoryDescriptor.h.

References target_groupby_indices_.

253  {
254  return std::count_if(
255  target_groupby_indices_.begin(),
257  [](const int64_t& target_group_by_index) { return target_group_by_index < 0; });
258  }
std::vector< int64_t > target_groupby_indices_
bool QueryMemoryDescriptor::threadsCanReuseGroupByBuffers ( ) const
inline

Definition at line 294 of file QueryMemoryDescriptor.h.

References threads_can_reuse_group_by_buffers_.

Referenced by Executor::launchKernelsViaResourceMgr(), QueryMemoryInitializer::QueryMemoryInitializer(), and Executor::reduceMultiDeviceResults().

294  {
296  }

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::threadsShareMemory ( ) const
std::string QueryMemoryDescriptor::toString ( ) const

Definition at line 1267 of file QueryMemoryDescriptor.cpp.

References allow_multifrag_, blocksShareMemory(), bucket_, entry_count_, executor_, g_enable_lazy_fetch, GPU, interleaved_bins_on_gpu_, lazyInitGroups(), max_val_, min_val_, must_use_baseline_sort_, output_columnar_, reductionKey(), render_output_, sort_on_gpu_, threadsShareMemory(), to_string(), use_streaming_top_n_, and usesGetGroupValueFast().

Referenced by Executor::createKernels(), and reductionKey().

1267  {
1268  auto str = reductionKey();
1269  str += "\tAllow Multifrag: " + ::toString(allow_multifrag_) + "\n";
1270  str += "\tInterleaved Bins on GPU: " + ::toString(interleaved_bins_on_gpu_) + "\n";
1271  str += "\tBlocks Share Memory: " + ::toString(blocksShareMemory()) + "\n";
1272  str += "\tThreads Share Memory: " + ::toString(threadsShareMemory()) + "\n";
1273  str += "\tUses Fast Group Values: " + ::toString(usesGetGroupValueFast()) + "\n";
1274  str +=
1275  "\tLazy Init Groups (GPU): " + ::toString(lazyInitGroups(ExecutorDeviceType::GPU)) +
1276  "\n";
1277  str += "\tEntry Count: " + std::to_string(entry_count_) + "\n";
1278  str += "\tMin Val (perfect hash only): " + std::to_string(min_val_) + "\n";
1279  str += "\tMax Val (perfect hash only): " + std::to_string(max_val_) + "\n";
1280  str += "\tBucket Val (perfect hash only): " + std::to_string(bucket_) + "\n";
1281  str += "\tSort on GPU: " + ::toString(sort_on_gpu_) + "\n";
1282  str += "\tUse Streaming Top N: " + ::toString(use_streaming_top_n_) + "\n";
1283  str += "\tOutput Columnar: " + ::toString(output_columnar_) + "\n";
1284  auto const allow_lazy_fetch = executor_->plan_state_
1285  ? executor_->plan_state_->allow_lazy_fetch_
1287  str += "\tAllow Lazy Fetch: " + ::toString(allow_lazy_fetch) + "\n";
1288  str += "\tRender Output: " + ::toString(render_output_) + "\n";
1289  str += "\tUse Baseline Sort: " + ::toString(must_use_baseline_sort_) + "\n";
1290  return str;
1291 }
std::string toString() const
bool g_enable_lazy_fetch
Definition: Execute.cpp:136
std::string to_string(char const *&&v)
bool lazyInitGroups(const ExecutorDeviceType) const
std::string reductionKey() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

static TResultSetBufferDescriptor QueryMemoryDescriptor::toThrift ( const QueryMemoryDescriptor )
static
int8_t QueryMemoryDescriptor::updateActualMinByteWidth ( const int8_t  actual_min_byte_width) const

Definition at line 1219 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::getMinPaddedByteSize().

1220  {
1221  return col_slot_context_.getMinPaddedByteSize(actual_min_byte_width);
1222 }
int8_t getMinPaddedByteSize(const int8_t actual_min_byte_width) const

+ Here is the call graph for this function:

void QueryMemoryDescriptor::useConsistentSlotWidthSize ( const int8_t  slot_width_size)

Definition at line 1210 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, and ColSlotContext::setAllSlotsSize().

1210  {
1211  col_slot_context_.setAllSlotsSize(slot_width_size);
1212 }
void setAllSlotsSize(const int8_t slot_width_size)

+ Here is the call graph for this function:

bool QueryMemoryDescriptor::usesGetGroupValueFast ( ) const

Definition at line 1135 of file QueryMemoryDescriptor.cpp.

References getGroupbyColCount(), heavyai::GroupByPerfectHash, and query_desc_type_.

Referenced by canOutputColumnar(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), and toString().

1135  {
1137  getGroupbyColCount() == 1);
1138 }
GroupByPerfectHash
Definition: enums.h:58
size_t getGroupbyColCount() const
QueryDescriptionType query_desc_type_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

bool QueryMemoryDescriptor::useStreamingTopN ( ) const
inline
std::optional< size_t > QueryMemoryDescriptor::varlenOutputBufferElemSize ( ) const

Definition at line 1339 of file QueryMemoryDescriptor.cpp.

References col_slot_context_, ColSlotContext::getSlotCount(), and ColSlotContext::varlenOutputElementSize().

Referenced by TargetExprCodegen::codegenAggregate(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), QueryExecutionContext::launchGpuCode(), and QueryMemoryInitializer::QueryMemoryInitializer().

1339  {
1340  int64_t buffer_element_size{0};
1341  for (size_t i = 0; i < col_slot_context_.getSlotCount(); i++) {
1342  try {
1343  const auto slot_element_size = col_slot_context_.varlenOutputElementSize(i);
1344  if (slot_element_size < 0) {
1345  return std::nullopt;
1346  }
1347  buffer_element_size += slot_element_size;
1348  } catch (...) {
1349  continue;
1350  }
1351  }
1352  return buffer_element_size;
1353 }
int64_t varlenOutputElementSize(const size_t slot_idx) const
size_t getSlotCount() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

size_t QueryMemoryDescriptor::varlenOutputRowSizeToSlot ( const size_t  slot_idx) const

Definition at line 1355 of file QueryMemoryDescriptor.cpp.

References CHECK_LT, col_slot_context_, ColSlotContext::getSlotCount(), and ColSlotContext::varlenOutputElementSize().

Referenced by TargetExprCodegen::codegenAggregate().

1355  {
1356  int64_t buffer_element_size{0};
1358  for (size_t i = 0; i < slot_idx; i++) {
1359  try {
1360  const auto slot_element_size = col_slot_context_.varlenOutputElementSize(i);
1361  if (slot_element_size < 0) {
1362  continue;
1363  }
1364  buffer_element_size += slot_element_size;
1365  } catch (...) {
1366  continue;
1367  }
1368  }
1369  return buffer_element_size;
1370 }
int64_t varlenOutputElementSize(const size_t slot_idx) const
#define CHECK_LT(x, y)
Definition: Logger.h:303
size_t getSlotCount() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

Friends And Related Function Documentation

friend class QueryExecutionContext
friend

Definition at line 432 of file QueryMemoryDescriptor.h.

Referenced by getQueryExecutionContext().

friend class ResultSet
friend

Definition at line 431 of file QueryMemoryDescriptor.h.

Member Data Documentation

bool QueryMemoryDescriptor::allow_multifrag_
private

Definition at line 390 of file QueryMemoryDescriptor.h.

Referenced by toString().

ApproxQuantileDescriptors QueryMemoryDescriptor::approx_quantile_descriptors_
private

Definition at line 409 of file QueryMemoryDescriptor.h.

Referenced by getApproxQuantileDescriptors().

int64_t QueryMemoryDescriptor::bucket_
private

Definition at line 407 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getBucket(), operator==(), and toString().

const Executor* QueryMemoryDescriptor::executor_
private
bool QueryMemoryDescriptor::force_4byte_float_
private

Definition at line 417 of file QueryMemoryDescriptor.h.

Referenced by forceFourByteFloat(), operator==(), and setForceFourByteFloat().

int8_t QueryMemoryDescriptor::group_col_compact_width_
private
bool QueryMemoryDescriptor::has_nulls_
private

Definition at line 408 of file QueryMemoryDescriptor.h.

Referenced by hasNulls(), and operator==().

int32_t QueryMemoryDescriptor::idx_target_as_key_
private

Definition at line 394 of file QueryMemoryDescriptor.h.

Referenced by getTargetIdxForKey(), operator==(), and setTargetIdxForKey().

bool QueryMemoryDescriptor::interleaved_bins_on_gpu_
private
bool QueryMemoryDescriptor::keyless_hash_
private
int64_t QueryMemoryDescriptor::max_val_
private

Definition at line 406 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getMaxVal(), operator==(), and toString().

int64_t QueryMemoryDescriptor::min_val_
private

Definition at line 404 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), getMinVal(), operator==(), and toString().

bool QueryMemoryDescriptor::must_use_baseline_sort_
private

Definition at line 414 of file QueryMemoryDescriptor.h.

Referenced by mustUseBaselineSort(), and toString().

size_t QueryMemoryDescriptor::num_available_threads_ {1}
mutableprivate

Definition at line 425 of file QueryMemoryDescriptor.h.

Referenced by getAvailableCpuThreads(), and setAvailableCpuThreads().

bool QueryMemoryDescriptor::render_output_
private

Definition at line 413 of file QueryMemoryDescriptor.h.

Referenced by blocksShareMemory(), lazyInitGroups(), and toString().

bool QueryMemoryDescriptor::sort_on_gpu_
private

Definition at line 411 of file QueryMemoryDescriptor.h.

Referenced by operator==(), QueryMemoryDescriptor(), sortOnGpu(), and toString().

std::vector<int64_t> QueryMemoryDescriptor::target_groupby_indices_
private
bool QueryMemoryDescriptor::threads_can_reuse_group_by_buffers_
private
bool QueryMemoryDescriptor::use_streaming_top_n_
private

The documentation for this class was generated from the following files: