23 #ifndef QUERYENGINE_QUERYMEMORYDESCRIPTOR_H
24 #define QUERYENGINE_QUERYMEMORYDESCRIPTOR_H
35 #include <boost/optional.hpp>
42 #include <unordered_map>
53 class TResultSetBufferDescriptor;
58 using heavyai::QueryDescriptionType;
63 : std::runtime_error(
"Unable to use streaming top N due to required heap size of " +
65 " bytes exceeding maximum slab size.") {}
75 const std::vector<InputTableInfo>& query_infos,
76 const bool allow_multifrag,
77 const bool keyless_hash,
78 const bool interleaved_bins_on_gpu,
79 const int32_t idx_target_as_key,
82 const std::vector<int8_t>& group_col_widths,
83 const int8_t group_col_compact_width,
84 const std::vector<int64_t>& target_groupby_indices,
85 const size_t entry_count,
88 const bool sort_on_gpu_hint,
89 const bool output_columnar,
90 const bool render_output,
91 const bool must_use_baseline_sort,
93 const bool threads_can_reuse_group_by_buffers);
96 const size_t entry_count,
97 const QueryDescriptionType query_desc_type);
100 const int64_t min_val,
101 const int64_t max_val,
102 const bool has_nulls,
103 const std::vector<int8_t>& group_col_widths);
111 static std::unique_ptr<QueryMemoryDescriptor>
init(
114 const std::vector<InputTableInfo>& query_infos,
117 const bool allow_multifrag,
119 const int8_t crt_min_byte_width,
120 const bool sort_on_gpu_hint,
121 const size_t shard_count,
122 const size_t max_groups_buffer_entry_count,
126 const bool must_use_baseline_sort,
127 const bool output_columnar_hint,
128 const bool streaming_top_n_hint,
129 const bool threads_can_reuse_group_by_buffers);
138 const int64_t num_rows,
139 const std::vector<std::vector<const int8_t*>>& col_buffers,
140 const std::vector<std::vector<uint64_t>>& frag_offsets,
141 std::shared_ptr<RowSetMemoryOwner>,
142 const bool output_columnar,
144 const size_t thread_idx,
148 const int64_t min_val,
149 const int64_t bucket) {
150 return max_val - min_val > 10000 * std::max(bucket, int64_t(1));
155 return std::all_of(count_distinct_descriptors.begin(),
156 count_distinct_descriptors.end(),
167 const std::vector<InputTableInfo>& query_infos,
168 const int8_t crt_min_byte_width);
216 for (
size_t i = offset; i < offset + range; i++) {
227 void addColSlotInfo(
const std::vector<std::tuple<int8_t, int8_t>>& slots_for_col);
254 return std::count_if(
257 [](
const int64_t& target_group_by_index) {
return target_group_by_index < 0; });
313 const unsigned thread_count,
317 const size_t override_entry_count)
const;
335 const size_t col_idx)
const;
438 new_type.set_notnull(not_null);
443 const std::vector<Analyzer::Expr*>& targets,
446 #endif // QUERYENGINE_QUERYMEMORYDESCRIPTOR_H
size_t varlenOutputRowSizeToSlot(const size_t slot_idx) const
void set_compact_type(TargetInfo &target, const SQLTypeInfo &new_type)
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
void addColSlotInfoFlatBuffer(const int64_t flatbuffer_size)
ColSlotContext col_slot_context_
size_t getSlotCount() const
bool canUsePerDeviceCardinality(const RelAlgExecutionUnit &ra_exe_unit) const
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
bool slotIsVarlenOutput(const size_t slot_idx) const
bool countDistinctDescriptorsLogicallyEmpty() const
size_t getEntryCount() const
size_t getKeyCount() const
const Executor * executor_
bool slotIsVarlen(const size_t slot_idx) const
size_t getTotalBytesOfColumnarProjections(const size_t projection_count) const
void setEntryCount(const size_t val)
size_t getAvailableCpuThreads() const
int32_t idx_target_as_key_
int64_t getTargetGroupbyIndex(const size_t target_idx) const
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
std::string toString() const
bool isLogicalSizedColumnsAllowed() const
bool mustUseBaselineSort() const
bool hasVarlenOutput() const
void setHasKeylessHash(const bool val)
void setGroupColCompactWidth(const int8_t val)
void setThreadsCanReuseGroupByBuffers(const bool val)
static std::unique_ptr< QueryMemoryDescriptor > init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const ApproxQuantileDescriptors &, const CountDistinctDescriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint, const bool threads_can_reuse_group_by_buffers)
void setOutputColumnar(const bool val)
bool hasKeylessHash() const
size_t getColCount() const
bool threads_can_reuse_group_by_buffers_
const ApproxQuantileDescriptors & getApproxQuantileDescriptors() const
size_t getNextColOffInBytes(const int8_t *col_ptr, const size_t bin, const size_t col_idx) const
size_t getEffectiveKeyWidth() const
bool use_streaming_top_n(const RelAlgExecutionUnit &ra_exe_unit, const bool output_columnar)
std::vector< ApproxQuantileDescriptor > ApproxQuantileDescriptors
bool hasVarlenOutput() const
void setQueryDescriptionType(const QueryDescriptionType val)
size_t targetGroupbyNegativeIndicesSize() const
bool hasInterleavedBinsOnGpu() const
size_t getWarpCount() const
const auto groupColWidthsBegin() const
std::optional< size_t > getMaxPerDeviceCardinality(const RelAlgExecutionUnit &ra_exe_unit) const
size_t getRowSize() const
void useConsistentSlotWidthSize(const int8_t slot_width_size)
size_t getColOnlyOffInBytes(const size_t col_idx) const
int64_t getMaxVal() const
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool blocksShareMemory() const
bool forceFourByteFloat() const
const auto groupColWidthsEnd() const
int8_t groupColWidth(const size_t key_idx) const
void setTargetIdxForKey(const int32_t val)
size_t getCompactByteWidth() const
bool useStreamingTopN() const
size_t num_available_threads_
std::vector< CountDistinctDescriptor > CountDistinctDescriptors
Provides column info and slot info for the output buffer and some metadata helpers.
size_t getGroupbyColCount() const
bool lazyInitGroups(const ExecutorDeviceType) const
size_t targetGroupbyIndicesSize() const
bool threadsCanReuseGroupByBuffers() const
size_t getPrependedGroupBufferSizeInBytes() const
size_t getTotalBytesOfColumnarBuffers() const
std::vector< int64_t > target_groupby_indices_
static int8_t pick_target_compact_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &query_infos, const int8_t crt_min_byte_width)
CountDistinctDescriptors count_distinct_descriptors_
bool interleaved_bins_on_gpu_
size_t getPaddedColWidthForRange(const size_t offset, const size_t range) const
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
int64_t getPaddedSlotBufferSize(const size_t slot_idx) const
static TResultSetBufferDescriptor toThrift(const QueryMemoryDescriptor &)
size_t getCountDistinctDescriptorsSize() const
QueryDescriptionType getQueryDescriptionType() const
int64_t getBucket() const
const CountDistinctDescriptor & getCountDistinctDescriptor(const size_t idx) const
std::optional< size_t > varlenOutputBufferElemSize() const
bool isSingleColumnGroupByWithPerfectHash() const
size_t getNextColOffInBytesRowOnly(const int8_t *col_ptr, const size_t col_idx) const
bool checkSlotUsesFlatBufferFormat(const size_t slot_idx) const
StreamingTopNOOM(const size_t heap_size_bytes)
QueryDescriptionType query_desc_type_
void setAvailableCpuThreads(size_t num_available_threads) const
Functions used to work with (approximate) count distinct sets.
int8_t group_col_compact_width_
int8_t updateActualMinByteWidth(const int8_t actual_min_byte_width) const
QueryEngine enum classes with minimal #include files.
bool operator==(const QueryMemoryDescriptor &other) const
void setForceFourByteFloat(const bool val)
size_t getColsSize() const
bool canOutputColumnar() const
bool didOutputColumnar() const
bool usesGetGroupValueFast() const
bool isWarpSyncRequired(const ExecutorDeviceType) const
bool interleavedBins(const ExecutorDeviceType) const
const ColSlotContext & getColSlotContext() const
bool threadsShareMemory() const
std::vector< int8_t > group_col_widths_
size_t getRowWidth() const
int64_t getMinVal() const
void setAllTargetGroupbyIndices(std::vector< int64_t > group_by_indices)
void setPaddedSlotWidthBytes(const size_t slot_idx, const int8_t bytes)
void resetGroupColWidths(const std::vector< int8_t > &new_group_col_widths)
void clearTargetGroupbyIndices()
std::vector< TargetInfo > target_exprs_to_infos(const std::vector< Analyzer::Expr * > &targets, const QueryMemoryDescriptor &query_mem_desc)
bool must_use_baseline_sort_
std::string queryDescTypeToString() const
void clearGroupColWidths()
void addColSlotInfo(const std::vector< std::tuple< int8_t, int8_t >> &slots_for_col)
void setHasInterleavedBinsOnGpu(const bool val)
bool use_streaming_top_n_
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
int64_t getFlatBufferSize(const size_t slot_idx) const
const int8_t getSlotIndexForSingleSlotCol(const size_t col_idx) const
bool checkSlotUsesFlatBufferFormat(const size_t slot_idx) const
size_t getBufferColSlotCount() const
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
size_t getColOffInBytes(const size_t col_idx) const
ApproxQuantileDescriptors approx_quantile_descriptors_
int64_t getFlatBufferSize(const size_t slot_idx) const
size_t getColOffInBytesInNextBin(const size_t col_idx) const
std::unique_ptr< QueryExecutionContext > getQueryExecutionContext(const RelAlgExecutionUnit &, const Executor *executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const shared::TableKey &outer_table_key, const int64_t num_rows, const std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< std::vector< uint64_t >> &frag_offsets, std::shared_ptr< RowSetMemoryOwner >, const bool output_columnar, const bool sort_on_gpu, const size_t thread_idx, RenderInfo *) const
std::string reductionKey() const
const Executor * getExecutor() const
void set_notnull(TargetInfo &target, const bool not_null)
int32_t getTargetIdxForKey() const
size_t getPrependedGroupColOffInBytes(const size_t group_idx) const