OmniSciDB
a5dc49c757
|
#include <QueryMemoryDescriptor.h>
Public Member Functions | |
QueryMemoryDescriptor () | |
QueryMemoryDescriptor (const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const bool allow_multifrag, const bool keyless_hash, const bool interleaved_bins_on_gpu, const int32_t idx_target_as_key, const ColRangeInfo &col_range_info, const ColSlotContext &col_slot_context, const std::vector< int8_t > &group_col_widths, const int8_t group_col_compact_width, const std::vector< int64_t > &target_groupby_indices, const size_t entry_count, const ApproxQuantileDescriptors &, const CountDistinctDescriptors, const bool sort_on_gpu_hint, const bool output_columnar, const bool render_output, const bool must_use_baseline_sort, const bool use_streaming_top_n, const bool threads_can_reuse_group_by_buffers) | |
QueryMemoryDescriptor (const Executor *executor, const size_t entry_count, const QueryDescriptionType query_desc_type) | |
QueryMemoryDescriptor (const QueryDescriptionType query_desc_type, const int64_t min_val, const int64_t max_val, const bool has_nulls, const std::vector< int8_t > &group_col_widths) | |
QueryMemoryDescriptor (const TResultSetBufferDescriptor &thrift_query_memory_descriptor) | |
bool | operator== (const QueryMemoryDescriptor &other) const |
std::unique_ptr < QueryExecutionContext > | getQueryExecutionContext (const RelAlgExecutionUnit &, const Executor *executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const shared::TableKey &outer_table_key, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner >, const bool output_columnar, const bool sort_on_gpu, const size_t thread_idx, RenderInfo *) const |
bool | countDistinctDescriptorsLogicallyEmpty () const |
const Executor * | getExecutor () const |
QueryDescriptionType | getQueryDescriptionType () const |
void | setQueryDescriptionType (const QueryDescriptionType val) |
bool | isSingleColumnGroupByWithPerfectHash () const |
bool | hasKeylessHash () const |
void | setHasKeylessHash (const bool val) |
bool | hasInterleavedBinsOnGpu () const |
void | setHasInterleavedBinsOnGpu (const bool val) |
int32_t | getTargetIdxForKey () const |
void | setTargetIdxForKey (const int32_t val) |
int8_t | groupColWidth (const size_t key_idx) const |
size_t | getPrependedGroupColOffInBytes (const size_t group_idx) const |
size_t | getPrependedGroupBufferSizeInBytes () const |
const auto | groupColWidthsBegin () const |
const auto | groupColWidthsEnd () const |
void | clearGroupColWidths () |
bool | isGroupBy () const |
void | setGroupColCompactWidth (const int8_t val) |
size_t | getColCount () const |
size_t | getSlotCount () const |
const int8_t | getPaddedSlotWidthBytes (const size_t slot_idx) const |
const int8_t | getLogicalSlotWidthBytes (const size_t slot_idx) const |
void | setPaddedSlotWidthBytes (const size_t slot_idx, const int8_t bytes) |
const int8_t | getSlotIndexForSingleSlotCol (const size_t col_idx) const |
size_t | getPaddedColWidthForRange (const size_t offset, const size_t range) const |
void | useConsistentSlotWidthSize (const int8_t slot_width_size) |
size_t | getRowWidth () const |
int8_t | updateActualMinByteWidth (const int8_t actual_min_byte_width) const |
void | addColSlotInfo (const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col) |
void | addColSlotInfoFlatBuffer (const int64_t flatbuffer_size) |
int64_t | getFlatBufferSize (const size_t slot_idx) const |
bool | checkSlotUsesFlatBufferFormat (const size_t slot_idx) const |
int64_t | getPaddedSlotBufferSize (const size_t slot_idx) const |
void | clearSlotInfo () |
void | alignPaddedSlots () |
int64_t | getTargetGroupbyIndex (const size_t target_idx) const |
void | setAllTargetGroupbyIndices (std::vector< int64_t > group_by_indices) |
size_t | targetGroupbyIndicesSize () const |
size_t | targetGroupbyNegativeIndicesSize () const |
void | clearTargetGroupbyIndices () |
size_t | getEntryCount () const |
void | setEntryCount (const size_t val) |
int64_t | getMinVal () const |
int64_t | getMaxVal () const |
int64_t | getBucket () const |
bool | hasNulls () const |
const ApproxQuantileDescriptors & | getApproxQuantileDescriptors () const |
const CountDistinctDescriptor & | getCountDistinctDescriptor (const size_t idx) const |
size_t | getCountDistinctDescriptorsSize () const |
bool | sortOnGpu () const |
bool | canOutputColumnar () const |
bool | didOutputColumnar () const |
void | setOutputColumnar (const bool val) |
bool | useStreamingTopN () const |
bool | isLogicalSizedColumnsAllowed () const |
bool | mustUseBaselineSort () const |
bool | threadsCanReuseGroupByBuffers () const |
void | setThreadsCanReuseGroupByBuffers (const bool val) |
bool | forceFourByteFloat () const |
void | setForceFourByteFloat (const bool val) |
size_t | getGroupbyColCount () const |
size_t | getKeyCount () const |
size_t | getBufferColSlotCount () const |
size_t | getBufferSizeBytes (const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const |
size_t | getBufferSizeBytes (const ExecutorDeviceType device_type) const |
size_t | getBufferSizeBytes (const ExecutorDeviceType device_type, const size_t override_entry_count) const |
const ColSlotContext & | getColSlotContext () const |
bool | usesGetGroupValueFast () const |
bool | blocksShareMemory () const |
bool | threadsShareMemory () const |
bool | lazyInitGroups (const ExecutorDeviceType) const |
bool | interleavedBins (const ExecutorDeviceType) const |
size_t | getColOffInBytes (const size_t col_idx) const |
size_t | getColOffInBytesInNextBin (const size_t col_idx) const |
size_t | getNextColOffInBytes (const int8_t *col_ptr, const size_t bin, const size_t col_idx) const |
size_t | getNextColOffInBytesRowOnly (const int8_t *col_ptr, const size_t col_idx) const |
size_t | getColOnlyOffInBytes (const size_t col_idx) const |
size_t | getRowSize () const |
size_t | getColsSize () const |
size_t | getWarpCount () const |
size_t | getCompactByteWidth () const |
size_t | getEffectiveKeyWidth () const |
bool | isWarpSyncRequired (const ExecutorDeviceType) const |
std::string | queryDescTypeToString () const |
std::string | toString () const |
std::string | reductionKey () const |
bool | hasVarlenOutput () const |
std::optional< size_t > | varlenOutputBufferElemSize () const |
size_t | varlenOutputRowSizeToSlot (const size_t slot_idx) const |
bool | slotIsVarlenOutput (const size_t slot_idx) const |
size_t | getAvailableCpuThreads () const |
void | setAvailableCpuThreads (size_t num_available_threads) const |
std::optional< size_t > | getMaxPerDeviceCardinality (const RelAlgExecutionUnit &ra_exe_unit) const |
bool | canUsePerDeviceCardinality (const RelAlgExecutionUnit &ra_exe_unit) const |
Static Public Member Functions | |
static TResultSetBufferDescriptor | toThrift (const QueryMemoryDescriptor &) |
static std::unique_ptr < QueryMemoryDescriptor > | init (const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const ApproxQuantileDescriptors &, const CountDistinctDescriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint, const bool threads_can_reuse_group_by_buffers) |
static bool | many_entries (const int64_t max_val, const int64_t min_val, const int64_t bucket) |
static bool | countDescriptorsLogicallyEmpty (const CountDistinctDescriptors &count_distinct_descriptors) |
static int8_t | pick_target_compact_width (const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width) |
Protected Member Functions | |
void | resetGroupColWidths (const std::vector< int8_t > &new_group_col_widths) |
Private Member Functions | |
size_t | getTotalBytesOfColumnarBuffers () const |
size_t | getTotalBytesOfColumnarBuffers (const size_t num_entries_per_column) const |
size_t | getTotalBytesOfColumnarProjections (const size_t projection_count) const |
Private Attributes | |
const Executor * | executor_ |
bool | allow_multifrag_ |
QueryDescriptionType | query_desc_type_ |
bool | keyless_hash_ |
bool | interleaved_bins_on_gpu_ |
int32_t | idx_target_as_key_ |
std::vector< int8_t > | group_col_widths_ |
int8_t | group_col_compact_width_ |
std::vector< int64_t > | target_groupby_indices_ |
size_t | entry_count_ |
int64_t | min_val_ |
int64_t | max_val_ |
int64_t | bucket_ |
bool | has_nulls_ |
ApproxQuantileDescriptors | approx_quantile_descriptors_ |
CountDistinctDescriptors | count_distinct_descriptors_ |
bool | sort_on_gpu_ |
bool | output_columnar_ |
bool | render_output_ |
bool | must_use_baseline_sort_ |
bool | use_streaming_top_n_ |
bool | threads_can_reuse_group_by_buffers_ |
bool | force_4byte_float_ |
ColSlotContext | col_slot_context_ |
size_t | num_available_threads_ {1} |
Friends | |
class | ResultSet |
class | QueryExecutionContext |
Definition at line 68 of file QueryMemoryDescriptor.h.
QueryMemoryDescriptor::QueryMemoryDescriptor | ( | ) |
Definition at line 554 of file QueryMemoryDescriptor.cpp.
QueryMemoryDescriptor::QueryMemoryDescriptor | ( | const Executor * | executor, |
const RelAlgExecutionUnit & | ra_exe_unit, | ||
const std::vector< InputTableInfo > & | query_infos, | ||
const bool | allow_multifrag, | ||
const bool | keyless_hash, | ||
const bool | interleaved_bins_on_gpu, | ||
const int32_t | idx_target_as_key, | ||
const ColRangeInfo & | col_range_info, | ||
const ColSlotContext & | col_slot_context, | ||
const std::vector< int8_t > & | group_col_widths, | ||
const int8_t | group_col_compact_width, | ||
const std::vector< int64_t > & | target_groupby_indices, | ||
const size_t | entry_count, | ||
const ApproxQuantileDescriptors & | approx_quantile_descriptors, | ||
const CountDistinctDescriptors | count_distinct_descriptors, | ||
const bool | sort_on_gpu_hint, | ||
const bool | output_columnar, | ||
const bool | render_output, | ||
const bool | must_use_baseline_sort, | ||
const bool | use_streaming_top_n, | ||
const bool | threads_can_reuse_group_by_buffers | ||
) |
Definition at line 453 of file QueryMemoryDescriptor.cpp.
References canOutputColumnar(), CHECK, col_slot_context_, count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), executor_, streaming_top_n::get_heap_size(), getEntryCount(), getRowSize(), heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, isLogicalSizedColumnsAllowed(), keyless_hash_, heavyai::NonGroupedAggregate, output_columnar_, heavyai::Projection, query_desc_type_, ColSlotContext::setAllSlotsPaddedSizeToLogicalSize(), ColSlotContext::setAllUnsetSlotsPaddedSize(), sort_on_gpu_, heavyai::TableFunction, RelAlgExecutionUnit::target_exprs, RelAlgExecutionUnit::use_bump_allocator, use_streaming_top_n_, and ColSlotContext::validate().
QueryMemoryDescriptor::QueryMemoryDescriptor | ( | const Executor * | executor, |
const size_t | entry_count, | ||
const QueryDescriptionType | query_desc_type | ||
) |
Definition at line 575 of file QueryMemoryDescriptor.cpp.
References output_columnar_, and heavyai::TableFunction.
QueryMemoryDescriptor::QueryMemoryDescriptor | ( | const QueryDescriptionType | query_desc_type, |
const int64_t | min_val, | ||
const int64_t | max_val, | ||
const bool | has_nulls, | ||
const std::vector< int8_t > & | group_col_widths | ||
) |
Definition at line 604 of file QueryMemoryDescriptor.cpp.
QueryMemoryDescriptor::QueryMemoryDescriptor | ( | const TResultSetBufferDescriptor & | thrift_query_memory_descriptor | ) |
void QueryMemoryDescriptor::addColSlotInfo | ( | const std::vector< std::tuple< int8_t, int8_t >> & | slots_for_col | ) |
Definition at line 1224 of file QueryMemoryDescriptor.cpp.
References ColSlotContext::addColumn(), and col_slot_context_.
Referenced by TableFunctionManager::allocate_output_buffers(), ResultSetLogicalValuesBuilder::create(), and TableFunctionExecutionContext::launchGpuCode().
void QueryMemoryDescriptor::addColSlotInfoFlatBuffer | ( | const int64_t | flatbuffer_size | ) |
Definition at line 1229 of file QueryMemoryDescriptor.cpp.
References ColSlotContext::addColumnFlatBuffer(), and col_slot_context_.
Referenced by TableFunctionManager::allocate_output_buffers().
void QueryMemoryDescriptor::alignPaddedSlots | ( | ) |
Definition at line 1237 of file QueryMemoryDescriptor.cpp.
References ColSlotContext::alignPaddedSlots(), col_slot_context_, and sortOnGpu().
bool QueryMemoryDescriptor::blocksShareMemory | ( | ) | const |
Definition at line 1144 of file QueryMemoryDescriptor.cpp.
References bucket_, count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), executor_, g_cluster, getGroupbyColCount(), heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, many_entries(), max_val_, min_val_, heavyai::Projection, query_desc_type_, render_output_, and heavyai::TableFunction.
Referenced by canOutputColumnar(), ResultSetReductionJIT::codegen(), QueryMemoryInitializer::computeNumberOfBuffers(), copy_group_by_buffers_from_gpu(), create_dev_group_by_buffers(), and toString().
bool QueryMemoryDescriptor::canOutputColumnar | ( | ) | const |
Definition at line 1241 of file QueryMemoryDescriptor.cpp.
References blocksShareMemory(), count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), GPU, interleavedBins(), threadsShareMemory(), and usesGetGroupValueFast().
Referenced by QueryMemoryDescriptor().
bool QueryMemoryDescriptor::canUsePerDeviceCardinality | ( | const RelAlgExecutionUnit & | ra_exe_unit | ) | const |
Definition at line 1383 of file QueryMemoryDescriptor.cpp.
References anonymous_namespace{QueryMemoryDescriptor.cpp}::any_of(), RelAlgExecutionUnit::join_quals, LEFT, heavyai::Projection, query_desc_type_, and RelAlgExecutionUnit::target_exprs_union.
|
inline |
Definition at line 234 of file QueryMemoryDescriptor.h.
References ColSlotContext::checkSlotUsesFlatBufferFormat(), and col_slot_context_.
Referenced by ResultSet::checkSlotUsesFlatBufferFormat(), getPaddedSlotBufferSize(), and target_exprs_to_infos().
|
inline |
Definition at line 198 of file QueryMemoryDescriptor.h.
References group_col_widths_.
void QueryMemoryDescriptor::clearSlotInfo | ( | ) |
Definition at line 1233 of file QueryMemoryDescriptor.cpp.
References ColSlotContext::clear(), and col_slot_context_.
|
inline |
Definition at line 259 of file QueryMemoryDescriptor.h.
References target_groupby_indices_.
|
inlinestatic |
Definition at line 153 of file QueryMemoryDescriptor.h.
References Invalid.
Referenced by blocksShareMemory(), canOutputColumnar(), countDistinctDescriptorsLogicallyEmpty(), lazyInitGroups(), and QueryMemoryDescriptor().
|
inline |
Definition at line 162 of file QueryMemoryDescriptor.h.
References count_distinct_descriptors_, and countDescriptorsLogicallyEmpty().
Referenced by QueryMemoryInitializer::allocateCountDistinctGpuMem(), and anonymous_namespace{QueryMemoryInitializer.cpp}::collect_target_expr_metadata().
|
inline |
Definition at line 285 of file QueryMemoryDescriptor.h.
References output_columnar_.
Referenced by ResultSetStorage::binSearchRowCount(), TargetExprCodegen::codegen(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenAggCalls(), GroupByAndAggregate::codegenAggColumnPtr(), TargetExprCodegen::codegenAggregate(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenMultiColumnBaselineHash(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), GroupByAndAggregate::codegenWindowRowPointer(), copy_projection_buffer_from_gpu_columnar(), ResultSetStorage::copyKeyColWise(), ResultSet::createComparator(), ResultSet::didOutputColumnar(), ResultSet::eachCellInColumn(), anonymous_namespace{ResultSetReduction.cpp}::fill_slots(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), ResultSet::fixupQueryMemoryDescriptor(), get_cols_ptr(), ResultSet::getTargetValueFromBufferColwise(), GpuReductionHelperJIT::GpuReductionHelperJIT(), GpuSharedMemCodeBuilder::GpuSharedMemCodeBuilder(), ResultSetStorage::initializeBaselineValueSlots(), anonymous_namespace{TargetExprBuilder.cpp}::is_columnar_projection(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), ResultSet::makeGeoTargetValue(), ResultSetStorage::moveOneEntryToBuffer(), ResultSetStorage::reduce(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetStorage::reduceOneSlotBaseline(), ResultSetStorage::reduceSingleRow(), and ResultSetStorage::rewriteAggregateBufferOffsets().
|
inline |
Definition at line 304 of file QueryMemoryDescriptor.h.
References force_4byte_float_.
Referenced by ResultSet::makeTargetValue().
|
inline |
Definition at line 270 of file QueryMemoryDescriptor.h.
References approx_quantile_descriptors_.
Referenced by QueryMemoryInitializer::allocateTDigestsBuffer(), QueryMemoryInitializer::initColumnsPerRow(), and QueryMemoryInitializer::QueryMemoryInitializer().
|
inline |
Definition at line 372 of file QueryMemoryDescriptor.h.
References num_available_threads_.
Referenced by QueryMemoryInitializer::initRowGroups().
|
inline |
Definition at line 266 of file QueryMemoryDescriptor.h.
References bucket_.
Referenced by GroupByAndAggregate::codegenGroupBy(), and GroupByAndAggregate::codegenSingleColumnPerfectHash().
size_t QueryMemoryDescriptor::getBufferColSlotCount | ( | ) | const |
Definition at line 1124 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, ColSlotContext::getSlotCount(), and target_groupby_indices_.
Referenced by anonymous_namespace{ResultSetIteration.cpp}::advance_col_buff_to_slot(), QueryMemoryInitializer::copyFromTableFunctionGpuBuffers(), anonymous_namespace{ResultSetReduction.cpp}::fill_slots(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), and QueryMemoryInitializer::setupTableFunctionGpuBuffers().
size_t QueryMemoryDescriptor::getBufferSizeBytes | ( | const RelAlgExecutionUnit & | ra_exe_unit, |
const unsigned | thread_count, | ||
const ExecutorDeviceType | device_type | ||
) | const |
Definition at line 1047 of file QueryMemoryDescriptor.cpp.
References entry_count_, streaming_top_n::get_heap_size(), getRowSize(), SortInfo::limit, anonymous_namespace{Utm.h}::n, SortInfo::offset, RelAlgExecutionUnit::sort_info, and use_streaming_top_n_.
Referenced by QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), getBufferSizeBytes(), Executor::launchKernelsViaResourceMgr(), and QueryMemoryInitializer::QueryMemoryInitializer().
size_t QueryMemoryDescriptor::getBufferSizeBytes | ( | const ExecutorDeviceType | device_type | ) | const |
Definition at line 1100 of file QueryMemoryDescriptor.cpp.
References entry_count_, and getBufferSizeBytes().
size_t QueryMemoryDescriptor::getBufferSizeBytes | ( | const ExecutorDeviceType | device_type, |
const size_t | entry_count | ||
) | const |
Returns total amount of output buffer memory for each device (CPU/GPU)
Columnar: if projection: it returns index buffer + columnar buffer (all non-lazy columns) if table function: only the columnar buffer if group by: it returns the amount required for each group column (assumes 64-bit per group) + columnar buffer (all involved agg columns)
Row-wise: returns required memory per row multiplied by number of entries
Definition at line 1071 of file QueryMemoryDescriptor.cpp.
References align_to_int64(), CHECK_GE, executor_, getColsSize(), getRowSize(), getTotalBytesOfColumnarBuffers(), group_col_widths_, interleavedBins(), keyless_hash_, output_columnar_, heavyai::Projection, query_desc_type_, and heavyai::TableFunction.
size_t QueryMemoryDescriptor::getColCount | ( | ) | const |
Definition at line 1181 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, and ColSlotContext::getColCount().
size_t QueryMemoryDescriptor::getColOffInBytes | ( | const size_t | col_idx | ) | const |
Definition at line 905 of file QueryMemoryDescriptor.cpp.
References align_to_int64(), CHECK, CHECK_EQ, CHECK_GT, entry_count_, getColOnlyOffInBytes(), getEffectiveKeyWidth(), getFlatBufferSize(), getPaddedSlotWidthBytes(), getPrependedGroupBufferSizeInBytes(), getWarpCount(), group_col_widths_, heavyai::GroupByPerfectHash, keyless_hash_, output_columnar_, query_desc_type_, and heavyai::TableFunction.
Referenced by TargetExprCodegen::codegen(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenOutputSlot(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), get_cols_ptr(), QueryExecutionContext::groupBufferToDeinterleavedResults(), QueryMemoryInitializer::initRowGroups(), inplace_sort_gpu(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().
size_t QueryMemoryDescriptor::getColOffInBytesInNextBin | ( | const size_t | col_idx | ) | const |
Definition at line 985 of file QueryMemoryDescriptor.cpp.
References CHECK_EQ, getPaddedSlotWidthBytes(), getRowSize(), getWarpCount(), group_col_widths_, and output_columnar_.
Referenced by QueryExecutionContext::groupBufferToDeinterleavedResults().
size_t QueryMemoryDescriptor::getColOnlyOffInBytes | ( | const size_t | col_idx | ) | const |
Definition at line 892 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, and ColSlotContext::getColOnlyOffInBytes().
Referenced by TargetExprCodegen::codegen(), GroupByAndAggregate::codegenAggColumnPtr(), TargetExprCodegen::codegenAggregate(), getColOffInBytes(), and ResultSetStorage::reduceSingleRow().
|
inline |
Definition at line 319 of file QueryMemoryDescriptor.h.
References col_slot_context_.
Referenced by QueryMemoryInitializer::copyFromTableFunctionGpuBuffers(), ResultSetStorage::reduceEntriesNoCollisionsColWise(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), and QueryMemoryInitializer::setupTableFunctionGpuBuffers().
size_t QueryMemoryDescriptor::getColsSize | ( | ) | const |
Definition at line 831 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, and ColSlotContext::getAllSlotsAlignedPaddedSize().
Referenced by QueryExecutionContext::copyInitAggValsToDevice(), getBufferSizeBytes(), getRowSize(), QueryExecutionContext::launchCpuCode(), and QueryExecutionContext::sizeofInitAggVals().
size_t QueryMemoryDescriptor::getCompactByteWidth | ( | ) | const |
Definition at line 853 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, and ColSlotContext::getCompactByteWidth().
Referenced by anonymous_namespace{TargetExprBuilder.cpp}::get_initial_agg_val(), and init_agg_val_vec().
|
inline |
Definition at line 274 of file QueryMemoryDescriptor.h.
References CHECK_LT, and count_distinct_descriptors_.
Referenced by QueryMemoryInitializer::allocateCountDistinctBuffers(), QueryMemoryInitializer::allocateCountDistinctGpuMem(), QueryMemoryInitializer::calculateCountDistinctBufferSize(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_count_distinct_expr_metadata(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), GroupByAndAggregate::codegenCountDistinct(), count_distinct_set_union_jit_rt(), anonymous_namespace{Execute.cpp}::fill_entries_for_empty_input(), ResultSet::makeTargetValue(), ResultSetStorage::reduceOneCountDistinctSlot(), and ResultSetStorage::reduceSingleRow().
|
inline |
Definition at line 278 of file QueryMemoryDescriptor.h.
References count_distinct_descriptors_.
Referenced by QueryMemoryInitializer::allocateCountDistinctGpuMem(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), ResultSetReductionJIT::reduceOneApproxQuantileSlot(), ResultSetStorage::reduceOneApproxQuantileSlot(), ResultSetReductionJIT::reduceOneCountDistinctSlot(), ResultSetStorage::reduceOneCountDistinctSlot(), and ResultSetReductionJIT::reduceOneModeSlot().
|
inline |
Definition at line 347 of file QueryMemoryDescriptor.h.
References group_col_compact_width_.
Referenced by ResultSetStorage::binSearchRowCount(), GroupByAndAggregate::codegenEstimator(), GroupByAndAggregate::codegenGroupBy(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), ResultSetStorage::fillOneEntryRowWise(), get_key_bytes_rowwise(), getColOffInBytes(), getRowSize(), ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), ResultSetStorage::initializeRowWise(), QueryMemoryInitializer::initRowGroups(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::moveEntriesToBuffer(), QueryMemoryInitializer::prepareTopNHeapsDevBuffer(), and reductionKey().
|
inline |
Definition at line 261 of file QueryMemoryDescriptor.h.
References entry_count_.
Referenced by QueryMemoryInitializer::allocateCountDistinctGpuMem(), QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::applyStreamingTopNOffsetGpu(), ResultSetStorage::binSearchRowCount(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_total_bitmap_memory(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegenMultiColumnBaselineHash(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GpuSharedMemCodeBuilder::codegenReduction(), GroupByAndAggregate::codegenWindowRowPointer(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), QueryMemoryInitializer::compactProjectionBuffersCpu(), QueryMemoryInitializer::compactProjectionBuffersGpu(), copy_group_by_buffers_from_gpu(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), Executor::createKernels(), ResultSet::entryCount(), Executor::executePlanWithGroupBy(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), anonymous_namespace{ResultSetReduction.cpp}::get_matching_group_value_reduction(), ResultSetStorage::getEntryCount(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), ResultSet::getTargetValueFromBufferColwise(), QueryMemoryInitializer::initColumnarGroups(), QueryMemoryInitializer::initGroupByBuffer(), ResultSetStorage::initializeBaselineValueSlots(), ResultSetStorage::initializeColWise(), ResultSetStorage::initializeRowWise(), inplace_sort_gpu(), QueryExecutionContext::launchGpuCode(), ResultSetStorage::moveEntriesToBuffer(), ResultSetStorage::moveOneEntryToBuffer(), QueryMemoryDescriptor(), QueryMemoryInitializer::QueryMemoryInitializer(), ResultSetStorage::reduce(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetStorage::reduceOneSlotBaseline(), and ResultSetStorage::rewriteAggregateBufferOffsets().
|
inline |
Definition at line 171 of file QueryMemoryDescriptor.h.
References executor_.
Referenced by anonymous_namespace{Execute.cpp}::build_row_for_empty_input(), ResultSetReductionJIT::codegen(), anonymous_namespace{Execute.cpp}::fill_entries_for_empty_input(), ResultSet::getExecutor(), ResultSet::getVarlenOrderEntry(), ResultSet::makeGeoTargetValue(), and ResultSet::makeVarlenTargetValue().
|
inline |
Definition at line 231 of file QueryMemoryDescriptor.h.
References col_slot_context_, and ColSlotContext::getFlatBufferSize().
Referenced by getColOffInBytes(), and getPaddedSlotBufferSize().
|
inline |
Definition at line 308 of file QueryMemoryDescriptor.h.
References group_col_widths_.
Referenced by blocksShareMemory(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenGroupBy(), ResultSetStorage::copyKeyColWise(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), anonymous_namespace{ResultSetReduction.cpp}::fill_slots(), ResultSetStorage::fillOneEntryColWise(), ResultSetStorage::fillOneEntryRowWise(), get_key_bytes_rowwise(), getKeyCount(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), QueryMemoryInitializer::initColumnarGroups(), ResultSetStorage::initializeColWise(), ResultSetStorage::initializeRowWise(), QueryMemoryInitializer::initRowGroups(), ResultSetStorage::isEmptyEntryColumnar(), isSingleColumnGroupByWithPerfectHash(), ResultSetStorage::moveEntriesToBuffer(), QueryMemoryInitializer::prepareTopNHeapsDevBuffer(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), ResultSetStorage::reduceOneEntrySlotsBaseline(), reductionKey(), and usesGetGroupValueFast().
|
inline |
Definition at line 309 of file QueryMemoryDescriptor.h.
References getGroupbyColCount(), and keyless_hash_.
Referenced by anonymous_namespace{Execute.cpp}::permute_storage_columnar().
const int8_t QueryMemoryDescriptor::getLogicalSlotWidthBytes | ( | const size_t | slot_idx | ) | const |
Definition at line 1198 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, ColSlotContext::getSlotInfo(), and SlotSize::logical_size.
Referenced by QueryMemoryInitializer::allocateTDigestsBuffer(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_count_distinct_expr_metadata(), TargetExprCodegen::codegenAggregate(), ResultSet::getTargetValueFromBufferRowwise(), and QueryMemoryInitializer::initializeQuantileParams().
std::optional< size_t > QueryMemoryDescriptor::getMaxPerDeviceCardinality | ( | const RelAlgExecutionUnit & | ra_exe_unit | ) | const |
Definition at line 1372 of file QueryMemoryDescriptor.cpp.
References anonymous_namespace{Utm.h}::a, and RelAlgExecutionUnit::per_device_cardinality.
|
inline |
Definition at line 265 of file QueryMemoryDescriptor.h.
References max_val_.
Referenced by GroupByAndAggregate::codegenGroupBy().
|
inline |
Definition at line 264 of file QueryMemoryDescriptor.h.
References min_val_.
Referenced by GroupByAndAggregate::codegenSingleColumnPerfectHash().
size_t QueryMemoryDescriptor::getNextColOffInBytes | ( | const int8_t * | col_ptr, |
const size_t | bin, | ||
const size_t | col_idx | ||
) | const |
Definition at line 996 of file QueryMemoryDescriptor.cpp.
References align_to_int64(), CHECK, CHECK_EQ, entry_count_, getPaddedSlotWidthBytes(), getSlotCount(), getWarpCount(), group_col_widths_, and output_columnar_.
size_t QueryMemoryDescriptor::getNextColOffInBytesRowOnly | ( | const int8_t * | col_ptr, |
const size_t | col_idx | ||
) | const |
Definition at line 1030 of file QueryMemoryDescriptor.cpp.
References align_to_int64(), getPaddedSlotWidthBytes(), and getSlotCount().
Referenced by QueryMemoryInitializer::initColumnsPerRow().
|
inline |
Definition at line 214 of file QueryMemoryDescriptor.h.
References getPaddedSlotWidthBytes().
Referenced by result_set::get_byteoff_of_slot(), and ResultSet::makeGeoTargetValue().
int64_t QueryMemoryDescriptor::getPaddedSlotBufferSize | ( | const size_t | slot_idx | ) | const |
Definition at line 944 of file QueryMemoryDescriptor.cpp.
References align_to_int64(), checkSlotUsesFlatBufferFormat(), entry_count_, getFlatBufferSize(), and getPaddedSlotWidthBytes().
Referenced by advance_to_next_columnar_target_buff().
const int8_t QueryMemoryDescriptor::getPaddedSlotWidthBytes | ( | const size_t | slot_idx | ) | const |
Definition at line 1189 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, ColSlotContext::getSlotInfo(), and SlotSize::padded_size.
Referenced by advance_target_ptr_row_wise(), TargetExprCodegen::codegen(), anonymous_namespace{GpuSharedMemoryUtils.cpp}::codegen_smem_dest_slot_ptr(), TargetExprCodegen::codegenAggregate(), GroupByAndAggregate::codegenOutputSlot(), compact_init_vals(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), ResultSet::copyColumnIntoBuffer(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), ResultSet::eachCellInColumn(), Executor::executePlanWithoutGroupBy(), result_set::get_width_for_slot(), getColOffInBytes(), getColOffInBytesInNextBin(), getNextColOffInBytes(), getNextColOffInBytesRowOnly(), getPaddedColWidthForRange(), getPaddedSlotBufferSize(), ResultSet::getPaddedSlotWidthBytes(), ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), init_agg_val_vec(), QueryMemoryInitializer::initColumnarGroups(), QueryMemoryInitializer::initColumnsPerRow(), inplace_sort_gpu(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), ResultSet::makeGeoTargetValue(), TargetExprCodegenBuilder::operator()(), anonymous_namespace{Execute.cpp}::permute_storage_columnar(), ResultSetStorage::reduceEntriesNoCollisionsColWise(), ResultSetReductionJIT::reduceOneAggregateSlot(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetStorage::reduceOneSlot(), ResultSetStorage::reduceSingleRow(), and ResultSetStorage::rewriteAggregateBufferOffsets().
size_t QueryMemoryDescriptor::getPrependedGroupBufferSizeInBytes | ( | ) | const |
Definition at line 974 of file QueryMemoryDescriptor.cpp.
References align_to_int64(), CHECK, getEntryCount(), getGroupbyColCount(), groupColWidth(), and output_columnar_.
Referenced by getColOffInBytes().
size_t QueryMemoryDescriptor::getPrependedGroupColOffInBytes | ( | const size_t | group_idx | ) | const |
Definition at line 956 of file QueryMemoryDescriptor.cpp.
References align_to_int64(), CHECK, getEntryCount(), getGroupbyColCount(), groupColWidth(), and output_columnar_.
Referenced by ResultSetStorage::copyKeyColWise(), ResultSetStorage::isEmptyEntryColumnar(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().
|
inline |
Definition at line 173 of file QueryMemoryDescriptor.h.
References query_desc_type_.
Referenced by ResultSetStorage::binSearchRowCount(), ResultSetReductionJIT::codegen(), GroupByAndAggregate::codegen(), GpuReductionHelperJIT::codegen(), GroupByAndAggregate::codegenAggCalls(), GroupByAndAggregate::codegenAggColumnPtr(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenOutputSlot(), Executor::collectAllDeviceResults(), copy_projection_buffer_from_gpu_columnar(), Executor::createKernels(), ResultSet::getQueryDescriptionType(), GpuReductionHelperJIT::GpuReductionHelperJIT(), GpuSharedMemCodeBuilder::GpuSharedMemCodeBuilder(), init_agg_val_vec(), QueryMemoryInitializer::initColumnarGroups(), anonymous_namespace{TargetExprBuilder.cpp}::is_columnar_projection(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), isSingleColumnGroupByWithPerfectHash(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), ResultSetStorage::moveEntriesToBuffer(), TargetExprCodegenBuilder::operator()(), ResultSetStorage::reduce(), Executor::reduceMultiDeviceResultSets(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), ResultSetReductionJIT::reduceOneEntryNoCollisionsIdx(), ExecutionKernel::run(), ExecutionKernel::runImpl(), target_exprs_to_infos(), and ResultSet::updateStorageEntryCount().
std::unique_ptr< QueryExecutionContext > QueryMemoryDescriptor::getQueryExecutionContext | ( | const RelAlgExecutionUnit & | ra_exe_unit, |
const Executor * | executor, | ||
const ExecutorDeviceType | device_type, | ||
const ExecutorDispatchMode | dispatch_mode, | ||
const int | device_id, | ||
const shared::TableKey & | outer_table_key, | ||
const int64_t | num_rows, | ||
const std::vector< std::vector< const int8_t * >> & | col_buffers, | ||
const std::vector< std::vector< uint64_t >> & | frag_offsets, | ||
std::shared_ptr< RowSetMemoryOwner > | row_set_mem_owner, | ||
const bool | output_columnar, | ||
const bool | sort_on_gpu, | ||
const size_t | thread_idx, | ||
RenderInfo * | render_info | ||
) | const |
Definition at line 698 of file QueryMemoryDescriptor.cpp.
References DEBUG_TIMER, and QueryExecutionContext.
Referenced by ExecutionKernel::runImpl().
size_t QueryMemoryDescriptor::getRowSize | ( | ) | const |
Definition at line 835 of file QueryMemoryDescriptor.cpp.
References align_to_int64(), CHECK, getColsSize(), getEffectiveKeyWidth(), group_col_widths_, heavyai::GroupByPerfectHash, keyless_hash_, output_columnar_, and query_desc_type_.
Referenced by QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::applyStreamingTopNOffsetGpu(), ResultSetLogicalValuesBuilder::build(), GroupByAndAggregate::codegenGroupBy(), GroupByAndAggregate::codegenOutputSlot(), GroupByAndAggregate::codegenWindowRowPointer(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), getBufferSizeBytes(), getColOffInBytesInNextBin(), QueryMemoryInitializer::initRowGroups(), anonymous_namespace{Execute.cpp}::permute_storage_row_wise(), QueryMemoryInitializer::prepareTopNHeapsDevBuffer(), QueryMemoryDescriptor(), QueryMemoryInitializer::QueryMemoryInitializer(), and ResultSetStorage::reduceSingleRow().
size_t QueryMemoryDescriptor::getRowWidth | ( | ) | const |
Definition at line 1214 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, and ColSlotContext::getAllSlotsPaddedSize().
Referenced by get_row_bytes().
size_t QueryMemoryDescriptor::getSlotCount | ( | ) | const |
Definition at line 1185 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, and ColSlotContext::getSlotCount().
Referenced by QueryMemoryInitializer::allocateModeBuffer(), QueryMemoryInitializer::allocateTDigestsBuffer(), QueryMemoryInitializer::calculateCountDistinctBufferSize(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_count_distinct_expr_metadata(), compact_init_vals(), anonymous_namespace{QueryMemoryInitializer.cpp}::compact_projection_buffer_for_cpu_columnar(), copy_projection_buffer_from_gpu_columnar(), ResultSet::copyColumnIntoBuffer(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), getNextColOffInBytes(), getNextColOffInBytesRowOnly(), QueryExecutionContext::groupBufferToDeinterleavedResults(), init_agg_val_vec(), QueryMemoryInitializer::initColumnarGroups(), QueryMemoryInitializer::initColumnsPerRow(), QueryMemoryInitializer::initializeModeIndexSet(), QueryMemoryInitializer::initializeQuantileParams(), inplace_sort_gpu(), anonymous_namespace{Execute.cpp}::permute_storage_columnar(), and ResultSetStorage::reduceSingleRow().
const int8_t QueryMemoryDescriptor::getSlotIndexForSingleSlotCol | ( | const size_t | col_idx | ) | const |
Definition at line 1203 of file QueryMemoryDescriptor.cpp.
References CHECK_EQ, col_slot_context_, and ColSlotContext::getSlotsForCol().
Referenced by QueryMemoryInitializer::allocateCountDistinctBuffers(), QueryMemoryInitializer::allocateModeBuffer(), QueryMemoryInitializer::allocateTDigestsBuffer(), QueryMemoryInitializer::calculateCountDistinctBufferSize(), anonymous_namespace{QueryMemoryInitializer.cpp}::check_count_distinct_expr_metadata(), QueryMemoryInitializer::initializeModeIndexSet(), and QueryMemoryInitializer::initializeQuantileParams().
|
inline |
Definition at line 243 of file QueryMemoryDescriptor.h.
References CHECK_LT, and target_groupby_indices_.
Referenced by ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), ResultSetReductionJIT::reduceOneEntryBaseline(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetReductionJIT::reduceOneSlot(), ResultSetStorage::reduceOneSlot(), and reductionKey().
|
inline |
Definition at line 186 of file QueryMemoryDescriptor.h.
References idx_target_as_key_.
Referenced by ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), ResultSetStorage::reduceSingleRow(), and reductionKey().
|
private |
Returns the maximum total number of bytes (including required paddings) to store all non-lazy columns' results for columnar cases.
Definition at line 862 of file QueryMemoryDescriptor.cpp.
References CHECK, col_slot_context_, entry_count_, ColSlotContext::getTotalBytesOfColumnarBuffers(), and output_columnar_.
Referenced by getBufferSizeBytes(), and getTotalBytesOfColumnarProjections().
|
private |
This is a helper function that returns the total number of bytes (including required paddings) to store all non-lazy columns' results for columnar cases.
Definition at line 871 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, and ColSlotContext::getTotalBytesOfColumnarBuffers().
|
private |
Returns the effective total number of bytes from columnar projections, which includes 1) total number of bytes used to store all non-lazy columns 2) total number of bytes used to store row indices (for lazy fetches, etc.)
NOTE: this function does not represent the buffer sizes dedicated for the results, but the required memory to fill all valid results into a compact new buffer (with no holes in it)
Definition at line 885 of file QueryMemoryDescriptor.cpp.
References getTotalBytesOfColumnarBuffers().
size_t QueryMemoryDescriptor::getWarpCount | ( | ) | const |
Definition at line 849 of file QueryMemoryDescriptor.cpp.
References executor_, and interleaved_bins_on_gpu_.
Referenced by getColOffInBytes(), getColOffInBytesInNextBin(), and getNextColOffInBytes().
|
inline |
Definition at line 189 of file QueryMemoryDescriptor.h.
References CHECK_LT, and group_col_widths_.
Referenced by ResultSetStorage::copyKeyColWise(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), ResultSetStorage::isEmptyEntryColumnar(), and anonymous_namespace{Execute.cpp}::permute_storage_columnar().
|
inline |
Definition at line 196 of file QueryMemoryDescriptor.h.
References group_col_widths_.
|
inline |
Definition at line 197 of file QueryMemoryDescriptor.h.
References group_col_widths_.
|
inline |
Definition at line 183 of file QueryMemoryDescriptor.h.
References interleaved_bins_on_gpu_.
|
inline |
Definition at line 180 of file QueryMemoryDescriptor.h.
References keyless_hash_.
Referenced by GroupByAndAggregate::codegenMultiColumnPerfectHash(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), get_key_bytes_rowwise(), ResultSet::getTargetValueFromBufferRowwise(), GpuReductionHelperJIT::GpuReductionHelperJIT(), GpuSharedMemCodeBuilder::GpuSharedMemCodeBuilder(), QueryMemoryInitializer::initColumnarGroups(), ResultSetStorage::initializeColWise(), ResultSetStorage::initializeRowWise(), QueryMemoryInitializer::initRowGroups(), inplace_sort_gpu(), ResultSetReductionJIT::isEmpty(), ResultSetStorage::isEmptyEntry(), ResultSetStorage::isEmptyEntryColumnar(), QueryExecutionContext::launchGpuCode(), ResultSetStorage::moveEntriesToBuffer(), QueryMemoryInitializer::prepareTopNHeapsDevBuffer(), QueryMemoryInitializer::QueryMemoryInitializer(), ResultSetStorage::reduceEntriesNoCollisionsColWise(), ResultSetStorage::reduceOneEntryBaseline(), ResultSetReductionJIT::reduceOneEntryBaselineIdx(), and ResultSetStorage::reduceSingleRow().
|
inline |
Definition at line 268 of file QueryMemoryDescriptor.h.
References has_nulls_.
Referenced by GroupByAndAggregate::codegenGroupBy().
|
inline |
Definition at line 358 of file QueryMemoryDescriptor.h.
References col_slot_context_, and ColSlotContext::hasVarlenOutput().
Referenced by QueryMemoryInitializer::applyStreamingTopNOffsetCpu(), QueryMemoryInitializer::applyStreamingTopNOffsetGpu(), GroupByAndAggregate::codegenVarlenOutputBuffer(), QueryMemoryInitializer::compactProjectionBuffersCpu(), QueryMemoryInitializer::compactProjectionBuffersGpu(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), QueryExecutionContext::getRowSet(), query_group_by_template(), and QueryMemoryInitializer::QueryMemoryInitializer().
|
static |
Definition at line 240 of file QueryMemoryDescriptor.cpp.
References anonymous_namespace{QueryMemoryDescriptor.cpp}::get_col_byte_widths(), RelAlgExecutionUnit::groupby_exprs, and RelAlgExecutionUnit::target_exprs.
Referenced by GroupByAndAggregate::initQueryMemoryDescriptorImpl().
bool QueryMemoryDescriptor::interleavedBins | ( | const ExecutorDeviceType | device_type | ) | const |
Definition at line 1168 of file QueryMemoryDescriptor.cpp.
References GPU, and interleaved_bins_on_gpu_.
Referenced by canOutputColumnar(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), getBufferSizeBytes(), QueryExecutionContext::groupBufferToResults(), QueryMemoryInitializer::initGroupByBuffer(), and QueryMemoryInitializer::QueryMemoryInitializer().
|
inline |
Definition at line 200 of file QueryMemoryDescriptor.h.
References group_col_widths_.
Referenced by anonymous_namespace{TargetExprBuilder.cpp}::get_initial_agg_val(), init_agg_val_vec(), QueryMemoryInitializer::initColumnsPerRow(), QueryExecutionContext::launchCpuCode(), QueryExecutionContext::launchGpuCode(), and QueryMemoryInitializer::QueryMemoryInitializer().
bool QueryMemoryDescriptor::isLogicalSizedColumnsAllowed | ( | ) | const |
Definition at line 1116 of file QueryMemoryDescriptor.cpp.
References g_cluster, output_columnar_, heavyai::Projection, query_desc_type_, and heavyai::TableFunction.
Referenced by TargetExprCodegen::codegenAggregate(), TargetExprCodegenBuilder::codegenMultiSlotSampleExpressions(), TargetExprCodegenBuilder::codegenSlotEmptyKey(), init_agg_val_vec(), ResultSet::makeTargetValue(), QueryMemoryDescriptor(), ResultSetStorage::reduceOneSlot(), ResultSetStorage::reduceOneSlotSingleValue(), and setOutputColumnar().
|
inline |
Definition at line 175 of file QueryMemoryDescriptor.h.
References getGroupbyColCount(), getQueryDescriptionType(), and heavyai::GroupByPerfectHash.
Referenced by GroupByAndAggregate::codegenGroupBy(), and ResultSet::getTargetValueFromBufferRowwise().
bool QueryMemoryDescriptor::isWarpSyncRequired | ( | const ExecutorDeviceType | device_type | ) | const |
Definition at line 1173 of file QueryMemoryDescriptor.cpp.
References executor_, and GPU.
Referenced by query_group_by_template().
bool QueryMemoryDescriptor::lazyInitGroups | ( | const ExecutorDeviceType | device_type | ) | const |
Definition at line 1163 of file QueryMemoryDescriptor.cpp.
References count_distinct_descriptors_, countDescriptorsLogicallyEmpty(), GPU, and render_output_.
Referenced by create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), QueryMemoryInitializer::prepareTopNHeapsDevBuffer(), QueryMemoryInitializer::QueryMemoryInitializer(), and toString().
|
inlinestatic |
Definition at line 147 of file QueryMemoryDescriptor.h.
Referenced by blocksShareMemory().
|
inline |
Definition at line 292 of file QueryMemoryDescriptor.h.
References must_use_baseline_sort_.
Referenced by GroupByAndAggregate::codegenSingleColumnPerfectHash().
bool QueryMemoryDescriptor::operator== | ( | const QueryMemoryDescriptor & | other | ) | const |
Definition at line 631 of file QueryMemoryDescriptor.cpp.
References bucket_, col_slot_context_, count_distinct_descriptors_, force_4byte_float_, group_col_compact_width_, group_col_widths_, has_nulls_, idx_target_as_key_, interleaved_bins_on_gpu_, keyless_hash_, max_val_, min_val_, output_columnar_, query_desc_type_, sort_on_gpu_, target_groupby_indices_, and threads_can_reuse_group_by_buffers_.
|
static |
Definition at line 735 of file QueryMemoryDescriptor.cpp.
References CHECK, CHECK_EQ, g_bigint_count, anonymous_namespace{QueryMemoryDescriptor.cpp}::get_col_byte_widths(), Analyzer::UOper::get_operand(), Analyzer::Expr::get_type_info(), RelAlgExecutionUnit::groupby_exprs, RelAlgExecutionUnit::input_col_descs, anonymous_namespace{QueryMemoryDescriptor.cpp}::is_int_and_no_bigger_than(), kCOUNT, kENCODING_DICT, kUNNEST, and RelAlgExecutionUnit::target_exprs.
std::string QueryMemoryDescriptor::queryDescTypeToString | ( | ) | const |
Definition at line 1247 of file QueryMemoryDescriptor.cpp.
References heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, heavyai::NonGroupedAggregate, heavyai::Projection, query_desc_type_, heavyai::TableFunction, and UNREACHABLE.
Referenced by reductionKey().
std::string QueryMemoryDescriptor::reductionKey | ( | ) | const |
Definition at line 1293 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, getEffectiveKeyWidth(), getGroupbyColCount(), getTargetGroupbyIndex(), getTargetIdxForKey(), join(), keyless_hash_, queryDescTypeToString(), targetGroupbyIndicesSize(), to_string(), ColSlotContext::toString(), and toString().
Referenced by ResultSetReductionJIT::cacheKey(), and toString().
|
inlineprotected |
Definition at line 384 of file QueryMemoryDescriptor.h.
References group_col_widths_.
|
inline |
Definition at line 248 of file QueryMemoryDescriptor.h.
References target_groupby_indices_.
|
inline |
Definition at line 374 of file QueryMemoryDescriptor.h.
References num_available_threads_.
Referenced by ExecutionKernel::runImpl().
|
inline |
Definition at line 262 of file QueryMemoryDescriptor.h.
References entry_count_.
Referenced by Executor::executePlanWithGroupBy(), Executor::reduceMultiDeviceResultSets(), ResultSetStorage::updateEntryCount(), and ResultSet::updateStorageEntryCount().
|
inline |
Definition at line 305 of file QueryMemoryDescriptor.h.
References force_4byte_float_.
|
inline |
Definition at line 202 of file QueryMemoryDescriptor.h.
References group_col_compact_width_.
|
inline |
Definition at line 184 of file QueryMemoryDescriptor.h.
References interleaved_bins_on_gpu_.
|
inline |
void QueryMemoryDescriptor::setOutputColumnar | ( | const bool | val | ) |
Definition at line 1105 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, isLogicalSizedColumnsAllowed(), output_columnar_, and ColSlotContext::setAllSlotsPaddedSizeToLogicalSize().
void QueryMemoryDescriptor::setPaddedSlotWidthBytes | ( | const size_t | slot_idx, |
const int8_t | bytes | ||
) |
Definition at line 1193 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, and ColSlotContext::setPaddedSlotWidthBytes().
Referenced by TargetExprCodegenBuilder::operator()().
|
inline |
Definition at line 174 of file QueryMemoryDescriptor.h.
References query_desc_type_.
|
inline |
Definition at line 187 of file QueryMemoryDescriptor.h.
References idx_target_as_key_.
|
inline |
Definition at line 298 of file QueryMemoryDescriptor.h.
References threads_can_reuse_group_by_buffers_.
|
inline |
Definition at line 368 of file QueryMemoryDescriptor.h.
References col_slot_context_, and ColSlotContext::slotIsVarlen().
Referenced by advance_target_ptr_row_wise(), and ResultSet::makeGeoTargetValue().
|
inline |
Definition at line 282 of file QueryMemoryDescriptor.h.
References sort_on_gpu_.
Referenced by alignPaddedSlots(), QueryExecutionContext::launchGpuCode(), ExecutionKernel::runImpl(), and use_speculative_top_n().
|
inline |
Definition at line 252 of file QueryMemoryDescriptor.h.
References target_groupby_indices_.
Referenced by ResultSet::getTargetValueFromBufferColwise(), ResultSet::getTargetValueFromBufferRowwise(), ResultSetReductionJIT::reduceOneEntryBaseline(), ResultSetStorage::reduceOneEntrySlotsBaseline(), ResultSetReductionJIT::reduceOneEntryTargetsNoCollisions(), ResultSetReductionJIT::reduceOneSlot(), ResultSetStorage::reduceOneSlot(), and reductionKey().
|
inline |
Definition at line 253 of file QueryMemoryDescriptor.h.
References target_groupby_indices_.
|
inline |
Definition at line 294 of file QueryMemoryDescriptor.h.
References threads_can_reuse_group_by_buffers_.
Referenced by Executor::launchKernelsViaResourceMgr(), QueryMemoryInitializer::QueryMemoryInitializer(), and Executor::reduceMultiDeviceResults().
bool QueryMemoryDescriptor::threadsShareMemory | ( | ) | const |
Definition at line 1140 of file QueryMemoryDescriptor.cpp.
References heavyai::NonGroupedAggregate, and query_desc_type_.
Referenced by canOutputColumnar(), anonymous_namespace{GpuMemUtils.cpp}::coalesced_size(), TargetExprCodegen::codegen(), TargetExprCodegen::codegenAggregate(), GroupByAndAggregate::codegenGroupBy(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), QueryExecutionContext::getRowSet(), QueryMemoryInitializer::QueryMemoryInitializer(), and toString().
std::string QueryMemoryDescriptor::toString | ( | ) | const |
Definition at line 1267 of file QueryMemoryDescriptor.cpp.
References allow_multifrag_, blocksShareMemory(), bucket_, entry_count_, executor_, g_enable_lazy_fetch, GPU, interleaved_bins_on_gpu_, lazyInitGroups(), max_val_, min_val_, must_use_baseline_sort_, output_columnar_, reductionKey(), render_output_, sort_on_gpu_, threadsShareMemory(), to_string(), use_streaming_top_n_, and usesGetGroupValueFast().
Referenced by Executor::createKernels(), and reductionKey().
|
static |
int8_t QueryMemoryDescriptor::updateActualMinByteWidth | ( | const int8_t | actual_min_byte_width | ) | const |
Definition at line 1219 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, and ColSlotContext::getMinPaddedByteSize().
void QueryMemoryDescriptor::useConsistentSlotWidthSize | ( | const int8_t | slot_width_size | ) |
Definition at line 1210 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, and ColSlotContext::setAllSlotsSize().
bool QueryMemoryDescriptor::usesGetGroupValueFast | ( | ) | const |
Definition at line 1135 of file QueryMemoryDescriptor.cpp.
References getGroupbyColCount(), heavyai::GroupByPerfectHash, and query_desc_type_.
Referenced by canOutputColumnar(), GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenSingleColumnPerfectHash(), and toString().
|
inline |
Definition at line 288 of file QueryMemoryDescriptor.h.
References use_streaming_top_n_.
Referenced by GroupByAndAggregate::codegen(), GroupByAndAggregate::codegenOutputSlot(), QueryMemoryInitializer::copyGroupByBuffersFromGpu(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), QueryMemoryInitializer::initGroupByBuffer(), QueryExecutionContext::launchCpuCode(), and QueryExecutionContext::launchGpuCode().
std::optional< size_t > QueryMemoryDescriptor::varlenOutputBufferElemSize | ( | ) | const |
Definition at line 1339 of file QueryMemoryDescriptor.cpp.
References col_slot_context_, ColSlotContext::getSlotCount(), and ColSlotContext::varlenOutputElementSize().
Referenced by TargetExprCodegen::codegenAggregate(), create_dev_group_by_buffers(), QueryMemoryInitializer::createAndInitializeGroupByBufferGpu(), QueryExecutionContext::launchGpuCode(), and QueryMemoryInitializer::QueryMemoryInitializer().
size_t QueryMemoryDescriptor::varlenOutputRowSizeToSlot | ( | const size_t | slot_idx | ) | const |
Definition at line 1355 of file QueryMemoryDescriptor.cpp.
References CHECK_LT, col_slot_context_, ColSlotContext::getSlotCount(), and ColSlotContext::varlenOutputElementSize().
Referenced by TargetExprCodegen::codegenAggregate().
|
friend |
Definition at line 432 of file QueryMemoryDescriptor.h.
Referenced by getQueryExecutionContext().
|
friend |
Definition at line 431 of file QueryMemoryDescriptor.h.
|
private |
Definition at line 390 of file QueryMemoryDescriptor.h.
Referenced by toString().
|
private |
Definition at line 409 of file QueryMemoryDescriptor.h.
Referenced by getApproxQuantileDescriptors().
|
private |
Definition at line 407 of file QueryMemoryDescriptor.h.
Referenced by blocksShareMemory(), getBucket(), operator==(), and toString().
|
private |
Definition at line 419 of file QueryMemoryDescriptor.h.
Referenced by addColSlotInfo(), addColSlotInfoFlatBuffer(), alignPaddedSlots(), checkSlotUsesFlatBufferFormat(), clearSlotInfo(), getBufferColSlotCount(), getColCount(), getColOnlyOffInBytes(), getColSlotContext(), getColsSize(), getCompactByteWidth(), getFlatBufferSize(), getLogicalSlotWidthBytes(), getPaddedSlotWidthBytes(), getRowWidth(), getSlotCount(), getSlotIndexForSingleSlotCol(), getTotalBytesOfColumnarBuffers(), hasVarlenOutput(), operator==(), QueryMemoryDescriptor(), reductionKey(), setOutputColumnar(), setPaddedSlotWidthBytes(), slotIsVarlenOutput(), updateActualMinByteWidth(), useConsistentSlotWidthSize(), varlenOutputBufferElemSize(), and varlenOutputRowSizeToSlot().
|
private |
Definition at line 410 of file QueryMemoryDescriptor.h.
Referenced by blocksShareMemory(), canOutputColumnar(), countDistinctDescriptorsLogicallyEmpty(), getCountDistinctDescriptor(), getCountDistinctDescriptorsSize(), ResultSet::getTargetValueFromBufferRowwise(), lazyInitGroups(), operator==(), and QueryMemoryDescriptor().
|
private |
Definition at line 403 of file QueryMemoryDescriptor.h.
Referenced by getBufferSizeBytes(), getColOffInBytes(), getEntryCount(), getNextColOffInBytes(), getPaddedSlotBufferSize(), getTotalBytesOfColumnarBuffers(), setEntryCount(), and toString().
|
private |
Definition at line 389 of file QueryMemoryDescriptor.h.
Referenced by blocksShareMemory(), getBufferSizeBytes(), getExecutor(), getWarpCount(), isWarpSyncRequired(), QueryMemoryDescriptor(), and toString().
|
private |
Definition at line 417 of file QueryMemoryDescriptor.h.
Referenced by forceFourByteFloat(), operator==(), and setForceFourByteFloat().
|
private |
Definition at line 399 of file QueryMemoryDescriptor.h.
Referenced by getEffectiveKeyWidth(), operator==(), and setGroupColCompactWidth().
|
private |
Definition at line 398 of file QueryMemoryDescriptor.h.
Referenced by clearGroupColWidths(), getBufferSizeBytes(), getColOffInBytes(), getColOffInBytesInNextBin(), getGroupbyColCount(), getNextColOffInBytes(), getRowSize(), groupColWidth(), groupColWidthsBegin(), groupColWidthsEnd(), isGroupBy(), operator==(), and resetGroupColWidths().
|
private |
Definition at line 408 of file QueryMemoryDescriptor.h.
Referenced by hasNulls(), and operator==().
|
private |
Definition at line 394 of file QueryMemoryDescriptor.h.
Referenced by getTargetIdxForKey(), operator==(), and setTargetIdxForKey().
|
private |
Definition at line 393 of file QueryMemoryDescriptor.h.
Referenced by getWarpCount(), hasInterleavedBinsOnGpu(), interleavedBins(), operator==(), setHasInterleavedBinsOnGpu(), and toString().
|
private |
Definition at line 392 of file QueryMemoryDescriptor.h.
Referenced by getBufferSizeBytes(), getColOffInBytes(), getKeyCount(), getRowSize(), hasKeylessHash(), operator==(), QueryMemoryDescriptor(), reductionKey(), and setHasKeylessHash().
|
private |
Definition at line 406 of file QueryMemoryDescriptor.h.
Referenced by blocksShareMemory(), getMaxVal(), operator==(), and toString().
|
private |
Definition at line 404 of file QueryMemoryDescriptor.h.
Referenced by blocksShareMemory(), getMinVal(), operator==(), and toString().
|
private |
Definition at line 414 of file QueryMemoryDescriptor.h.
Referenced by mustUseBaselineSort(), and toString().
|
mutableprivate |
Definition at line 425 of file QueryMemoryDescriptor.h.
Referenced by getAvailableCpuThreads(), and setAvailableCpuThreads().
|
private |
Definition at line 412 of file QueryMemoryDescriptor.h.
Referenced by didOutputColumnar(), getBufferSizeBytes(), getColOffInBytes(), getColOffInBytesInNextBin(), getNextColOffInBytes(), getPrependedGroupBufferSizeInBytes(), getPrependedGroupColOffInBytes(), getRowSize(), getTotalBytesOfColumnarBuffers(), isLogicalSizedColumnsAllowed(), operator==(), QueryMemoryDescriptor(), setOutputColumnar(), and toString().
|
private |
Definition at line 391 of file QueryMemoryDescriptor.h.
Referenced by blocksShareMemory(), canUsePerDeviceCardinality(), getBufferSizeBytes(), getColOffInBytes(), getQueryDescriptionType(), getRowSize(), isLogicalSizedColumnsAllowed(), operator==(), queryDescTypeToString(), QueryMemoryDescriptor(), setQueryDescriptionType(), threadsShareMemory(), and usesGetGroupValueFast().
|
private |
Definition at line 413 of file QueryMemoryDescriptor.h.
Referenced by blocksShareMemory(), lazyInitGroups(), and toString().
|
private |
Definition at line 411 of file QueryMemoryDescriptor.h.
Referenced by operator==(), QueryMemoryDescriptor(), sortOnGpu(), and toString().
|
private |
Definition at line 402 of file QueryMemoryDescriptor.h.
Referenced by clearTargetGroupbyIndices(), getBufferColSlotCount(), getTargetGroupbyIndex(), operator==(), setAllTargetGroupbyIndices(), targetGroupbyIndicesSize(), and targetGroupbyNegativeIndicesSize().
|
private |
Definition at line 416 of file QueryMemoryDescriptor.h.
Referenced by operator==(), setThreadsCanReuseGroupByBuffers(), and threadsCanReuseGroupByBuffers().
|
private |
Definition at line 415 of file QueryMemoryDescriptor.h.
Referenced by getBufferSizeBytes(), QueryMemoryDescriptor(), toString(), and useStreamingTopN().