40 #include <tbb/parallel_for.h>
55 invalidateCachedRowCount();
60 invalidateCachedRowCount();
67 const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
68 const unsigned block_size,
69 const unsigned grid_size)
71 , device_type_(device_type)
74 , query_mem_desc_(query_mem_desc)
75 , crt_row_buff_idx_(0)
79 , row_set_mem_owner_(row_set_mem_owner)
80 , block_size_(block_size)
81 , grid_size_(grid_size)
83 , separate_varlen_storage_valid_(
false)
84 , just_explain_(
false)
85 , for_validation_only_(
false)
91 , can_use_speculative_top_n_sort(std::nullopt) {}
94 const std::vector<ColumnLazyFetchInfo>& lazy_fetch_info,
95 const std::vector<std::vector<const int8_t*>>& col_buffers,
96 const std::vector<std::vector<int64_t>>& frag_offsets,
97 const std::vector<int64_t>& consistent_frag_sizes,
100 const int thread_idx,
102 const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
103 const unsigned block_size,
104 const unsigned grid_size)
106 , device_type_(device_type)
107 , device_id_(device_id)
109 , query_mem_desc_(query_mem_desc)
110 , crt_row_buff_idx_(0)
114 , row_set_mem_owner_(row_set_mem_owner)
115 , block_size_(block_size)
116 , grid_size_(grid_size)
117 , lazy_fetch_info_(lazy_fetch_info)
118 , col_buffers_{col_buffers}
119 , frag_offsets_{frag_offsets}
120 , consistent_frag_sizes_{consistent_frag_sizes}
122 , separate_varlen_storage_valid_(
false)
123 , just_explain_(
false)
124 , for_validation_only_(
false)
126 , geo_return_type_(GeoReturnType::WktString)
128 , query_exec_time_(0)
130 , can_use_speculative_top_n_sort(std::nullopt) {}
136 : device_type_(device_type)
137 , device_id_(device_id)
140 , crt_row_buff_idx_(0)
141 , estimator_(estimator)
143 , separate_varlen_storage_valid_(
false)
144 , just_explain_(
false)
145 , for_validation_only_(
false)
147 , geo_return_type_(GeoReturnType::WktString)
149 , query_exec_time_(0)
151 , can_use_speculative_top_n_sort(std::nullopt) {
154 data_mgr_, estimator_->getBufferSize(), device_id_);
155 data_mgr->getCudaMgr()->zeroDeviceMem(device_estimator_buffer_->getMemoryPtr(),
156 estimator_->getBufferSize(),
160 host_estimator_buffer_ =
161 static_cast<int8_t*
>(
checked_calloc(estimator_->getBufferSize(), 1));
170 , separate_varlen_storage_valid_(
false)
171 , explanation_(explanation)
172 , just_explain_(
true)
173 , for_validation_only_(
false)
177 , query_exec_time_(0)
179 , can_use_speculative_top_n_sort(std::nullopt) {}
182 int64_t render_time_ms,
183 const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner)
188 , row_set_mem_owner_(row_set_mem_owner)
190 , separate_varlen_storage_valid_(
false)
191 , just_explain_(
true)
192 , for_validation_only_(
false)
194 , geo_return_type_(GeoReturnType::WktString)
196 , query_exec_time_(0)
198 , can_use_speculative_top_n_sort(std::nullopt) {}
202 if (!storage_->buff_is_provided_) {
203 CHECK(storage_->getUnderlyingBuffer());
204 free(storage_->getUnderlyingBuffer());
207 for (
auto& storage : appended_storage_) {
208 if (storage && !storage->buff_is_provided_) {
209 free(storage->getUnderlyingBuffer());
212 if (host_estimator_buffer_) {
214 free(host_estimator_buffer_);
216 if (device_estimator_buffer_) {
218 data_mgr_->free(device_estimator_buffer_);
223 std::ostringstream oss;
224 oss <<
"Result Set Info" << std::endl;
225 oss <<
"\tLayout: " << query_mem_desc_.queryDescTypeToString() << std::endl;
226 oss <<
"\tColumns: " << colCount() << std::endl;
227 oss <<
"\tRows: " << rowCount() << std::endl;
228 oss <<
"\tEntry count: " << entryCount() << std::endl;
229 const std::string is_empty = isEmpty() ?
"True" :
"False";
230 oss <<
"\tIs empty: " << is_empty << std::endl;
231 const std::string did_output_columnar = didOutputColumnar() ?
"True" :
"False;";
232 oss <<
"\tColumnar: " << did_output_columnar << std::endl;
233 oss <<
"\tLazy-fetched columns: " << getNumColumnsLazyFetched() << std::endl;
234 const std::string is_direct_columnar_conversion_possible =
235 isDirectColumnarConversionPossible() ?
"True" :
"False";
236 oss <<
"\tDirect columnar conversion possible: "
237 << is_direct_columnar_conversion_possible << std::endl;
239 size_t num_columns_zero_copy_columnarizable{0};
240 for (
size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
241 if (isZeroCopyColumnarConversionPossible(target_idx)) {
242 num_columns_zero_copy_columnarizable++;
245 oss <<
"\tZero-copy columnar conversion columns: "
246 << num_columns_zero_copy_columnarizable << std::endl;
248 oss <<
"\tPermutation size: " << permutation_.size() << std::endl;
249 oss <<
"\tLimit: " << keep_first_ << std::endl;
250 oss <<
"\tOffset: " << drop_first_ << std::endl;
260 CHECK(row_set_mem_owner_);
261 auto buff = row_set_mem_owner_->allocate(
262 query_mem_desc_.getBufferSizeBytes(device_type_), 0);
265 return storage_.get();
270 const std::vector<int64_t>& target_init_vals,
271 std::shared_ptr<VarlenOutputInfo> varlen_output_info)
const {
274 storage_.reset(
new ResultSetStorage(targets_, query_mem_desc_, buff,
true));
276 storage_->target_init_vals_ = target_init_vals;
277 if (varlen_output_info) {
278 storage_->varlen_output_info_ = varlen_output_info;
280 return storage_.get();
284 const std::vector<int64_t>& target_init_vals)
const {
286 CHECK(row_set_mem_owner_);
287 auto buff = row_set_mem_owner_->allocate(
288 query_mem_desc_.getBufferSizeBytes(device_type_), 0);
291 storage_->target_init_vals_ = target_init_vals;
292 return storage_.get();
296 if (crt_row_buff_idx_ == 0) {
297 throw std::runtime_error(
"current row buffer iteration index is undefined");
299 return crt_row_buff_idx_ - 1;
304 invalidateCachedRowCount();
305 if (!that.storage_) {
308 appended_storage_.push_back(std::move(that.storage_));
309 query_mem_desc_.setEntryCount(
310 query_mem_desc_.getEntryCount() +
311 appended_storage_.back()->query_mem_desc_.getEntryCount());
312 chunks_.insert(chunks_.end(), that.chunks_.begin(), that.chunks_.end());
314 col_buffers_.end(), that.col_buffers_.begin(), that.col_buffers_.end());
315 frag_offsets_.insert(
316 frag_offsets_.end(), that.frag_offsets_.begin(), that.frag_offsets_.end());
317 consistent_frag_sizes_.insert(consistent_frag_sizes_.end(),
318 that.consistent_frag_sizes_.begin(),
319 that.consistent_frag_sizes_.end());
321 chunk_iters_.end(), that.chunk_iters_.begin(), that.chunk_iters_.end());
322 if (separate_varlen_storage_valid_) {
323 CHECK(that.separate_varlen_storage_valid_);
324 serialized_varlen_buffer_.insert(serialized_varlen_buffer_.end(),
325 that.serialized_varlen_buffer_.begin(),
326 that.serialized_varlen_buffer_.end());
328 for (
auto& buff : that.literal_buffers_) {
329 literal_buffers_.push_back(std::move(buff));
339 auto executor = getExecutor();
341 ResultSetPtr copied_rs = std::make_shared<ResultSet>(targets_,
345 executor->blockSize(),
346 executor->gridSize());
348 auto allocate_and_copy_storage =
349 [&](
const ResultSetStorage* prev_storage) -> std::unique_ptr<ResultSetStorage> {
350 const auto& prev_qmd = prev_storage->query_mem_desc_;
351 const auto storage_size = prev_qmd.getBufferSizeBytes(device_type_);
352 auto buff = row_set_mem_owner_->allocate(storage_size, 0);
353 std::unique_ptr<ResultSetStorage> new_storage;
355 prev_storage->targets_, prev_qmd, buff,
true));
356 new_storage->target_init_vals_ = prev_storage->target_init_vals_;
357 if (prev_storage->varlen_output_info_) {
358 new_storage->varlen_output_info_ = prev_storage->varlen_output_info_;
360 memcpy(new_storage->buff_, prev_storage->buff_, storage_size);
361 new_storage->query_mem_desc_ = prev_qmd;
365 copied_rs->storage_ = allocate_and_copy_storage(storage_.get());
366 if (!appended_storage_.empty()) {
367 for (
const auto& storage : appended_storage_) {
368 copied_rs->appended_storage_.push_back(allocate_and_copy_storage(storage.get()));
371 std::copy(chunks_.begin(), chunks_.end(), std::back_inserter(copied_rs->chunks_));
374 std::back_inserter(copied_rs->chunk_iters_));
377 std::back_inserter(copied_rs->col_buffers_));
380 std::back_inserter(copied_rs->frag_offsets_));
381 std::copy(consistent_frag_sizes_.begin(),
382 consistent_frag_sizes_.end(),
383 std::back_inserter(copied_rs->consistent_frag_sizes_));
384 if (separate_varlen_storage_valid_) {
385 std::copy(serialized_varlen_buffer_.begin(),
386 serialized_varlen_buffer_.end(),
387 std::back_inserter(copied_rs->serialized_varlen_buffer_));
390 literal_buffers_.end(),
391 std::back_inserter(copied_rs->literal_buffers_));
393 lazy_fetch_info_.end(),
394 std::back_inserter(copied_rs->lazy_fetch_info_));
396 copied_rs->permutation_ = permutation_;
397 copied_rs->drop_first_ = drop_first_;
398 copied_rs->keep_first_ = keep_first_;
399 copied_rs->separate_varlen_storage_valid_ = separate_varlen_storage_valid_;
400 copied_rs->query_exec_time_ = query_exec_time_;
401 copied_rs->input_table_keys_ = input_table_keys_;
402 copied_rs->target_meta_info_ = target_meta_info_;
403 copied_rs->geo_return_type_ = geo_return_type_;
404 copied_rs->query_plan_ = query_plan_;
405 if (can_use_speculative_top_n_sort) {
406 copied_rs->can_use_speculative_top_n_sort = can_use_speculative_top_n_sort;
413 return storage_.get();
417 return just_explain_ ? 1 : targets_.size();
426 : targets_[col_idx].sql_type;
431 constexpr
bool with_generation =
true;
433 ? row_set_mem_owner_->getOrAddStringDictProxy(dict_key, with_generation)
434 : row_set_mem_owner_->getStringDictProxy(dict_key);
445 using StringId = int32_t;
446 StringId*
const string_id_ptr =
447 const_cast<StringId*
>(
reinterpret_cast<StringId const*
>(cell_ptr));
449 *string_id_ptr =
id_map_[*string_id_ptr];
460 size_t const start_idx) {
462 CHECK_EQ(targets.size(), storage_->targets_.size());
464 for (
size_t target_idx = start_idx; target_idx < targets.size(); ++target_idx) {
465 auto const& type_lhs = targets[target_idx].sql_type;
466 if (type_lhs.is_dict_encoded_string()) {
468 const_cast<SQLTypeInfo&
>(storage_->targets_[target_idx].sql_type);
469 CHECK(type_rhs.is_dict_encoded_string());
470 if (type_lhs.getStringDictKey() != type_rhs.getStringDictKey()) {
471 auto*
const sdp_lhs = getStringDictionaryProxy(type_lhs.getStringDictKey());
473 auto const*
const sdp_rhs =
474 getStringDictionaryProxy(type_rhs.getStringDictKey());
476 state.cur_target_idx_ = target_idx;
480 type_rhs.set_comp_param(type_lhs.get_comp_param());
481 type_rhs.setStringDictKey(type_lhs.getStringDictKey());
494 CHECK_LT(target_idx, lazy_fetch_info_.size());
495 auto& col_lazy_fetch = lazy_fetch_info_[target_idx];
496 CHECK(col_lazy_fetch.is_lazily_fetched);
497 int const target_size = storage_->targets_[target_idx].sql_type.get_size();
498 CHECK_LT(0, target_size) << storage_->targets_[target_idx].toString();
499 size_t const nrows = storage_->binSearchRowCount();
506 : query_mem_desc_.getEffectiveKeyWidth();
509 size_t const next_target_idx = j + 1;
512 auto const& next_agg_info = storage_->targets_[next_target_idx];
517 : query_mem_desc_.getEffectiveKeyWidth();
519 for (
size_t i = 0; i < nrows; ++i) {
523 auto& frag_col_buffers = getColumnFrag(0, target_idx, pos);
524 CHECK_LT(
size_t(col_lazy_fetch.local_col_id), frag_col_buffers.size());
525 int8_t
const*
const col_frag = frag_col_buffers[col_lazy_fetch.local_col_id];
526 func(col_frag + pos * target_size);
529 size_t const key_bytes_with_padding =
531 for (
size_t i = 0; i < nrows; ++i) {
532 int8_t
const*
const keys_ptr =
row_ptr_rowwise(storage_->buff_, storage_qmd, i);
533 int8_t
const*
const rowwise_target_ptr = keys_ptr + key_bytes_with_padding;
534 int64_t pos = *
reinterpret_cast<int64_t const*
>(rowwise_target_ptr);
535 auto& frag_col_buffers = getColumnFrag(0, target_idx, pos);
536 CHECK_LT(
size_t(col_lazy_fetch.local_col_id), frag_col_buffers.size());
537 int8_t
const*
const col_frag = frag_col_buffers[col_lazy_fetch.local_col_id];
538 func(col_frag + pos * target_size);
546 if (total_row_count < offset) {
550 size_t total_truncated_row_count = total_row_count - offset;
553 return std::min(total_truncated_row_count, limit);
556 return total_truncated_row_count;
568 if (!permutation_.empty()) {
576 CHECK(permutation_.empty());
578 return binSearchRowCount();
581 constexpr
size_t auto_parallel_row_count_threshold{20000UL};
582 if (force_parallel || entryCount() >= auto_parallel_row_count_threshold) {
583 return parallelRowCount();
585 std::lock_guard<std::mutex> lock(row_iteration_mutex_);
589 auto crt_row = getNextRowUnlocked(
false,
false);
590 if (crt_row.empty()) {
602 const int64_t cached_row_count = cached_row_count_;
605 return cached_row_count;
607 setCachedRowCount(rowCountImpl(force_parallel));
608 return cached_row_count_;
616 const int64_t signed_row_count =
static_cast<int64_t
>(row_count);
617 const int64_t old_cached_row_count = cached_row_count_.exchange(signed_row_count);
619 old_cached_row_count == signed_row_count);
627 size_t row_count = storage_->binSearchRowCount();
628 for (
auto& s : appended_storage_) {
629 row_count += s->binSearchRowCount();
636 using namespace threading;
637 auto execute_parallel_row_count =
639 const blocked_range<size_t>& r,
size_t row_count) {
641 for (
size_t i = r.begin(); i < r.end(); ++i) {
642 if (!isRowAtEmpty(i)) {
648 const auto row_count =
parallel_reduce(blocked_range<size_t>(0, entryCount()),
650 execute_parallel_row_count,
671 return rowCount() == size_t(0);
675 return (!storage_ && !estimator_ && !just_explain_) || cached_row_count_ == 0;
680 return storage_->query_mem_desc_;
689 return storage_->target_init_vals_;
694 CHECK(device_estimator_buffer_);
695 return device_estimator_buffer_->getMemoryPtr();
699 return host_estimator_buffer_;
704 CHECK(!host_estimator_buffer_);
705 CHECK_EQ(
size_t(0), estimator_->getBufferSize() %
sizeof(int64_t));
706 host_estimator_buffer_ =
707 static_cast<int8_t*
>(
checked_calloc(estimator_->getBufferSize(), 1));
708 CHECK(device_estimator_buffer_);
709 auto device_buffer_ptr = device_estimator_buffer_->getMemoryPtr();
710 auto allocator = std::make_unique<CudaAllocator>(
712 allocator->copyFromDevice(
713 host_estimator_buffer_, device_buffer_ptr, estimator_->getBufferSize());
717 timings_.executor_queue_time = queue_time;
721 timings_.kernel_queue_time = kernel_queue_time;
725 timings_.compilation_queue_time += compilation_queue_time;
729 return timings_.executor_queue_time + timings_.kernel_queue_time +
730 timings_.compilation_queue_time;
734 return timings_.render_time;
738 crt_row_buff_idx_ = 0;
743 return keep_first_ + drop_first_;
747 return just_explain_;
751 for_validation_only_ =
true;
755 return for_validation_only_;
769 query_mem_desc_copy.resetGroupColWidths(
770 std::vector<int8_t>(query_mem_desc_copy.getGroupbyColCount(), 8));
772 return query_mem_desc_copy;
774 query_mem_desc_copy.alignPaddedSlots();
775 return query_mem_desc_copy;
781 const Executor* executor) {
787 invalidateCachedRowCount();
788 CHECK(!targets_.empty());
790 if (canUseFastBaselineSort(order_entries, top_n)) {
791 baselineSort(order_entries, top_n, device_type, executor);
795 if (query_mem_desc_.sortOnGpu()) {
797 radixSortOnGpu(order_entries);
799 LOG(
WARNING) <<
"Out of GPU memory during sort, finish on CPU";
800 radixSortOnCpu(order_entries);
801 }
catch (
const std::bad_alloc&) {
802 LOG(
WARNING) <<
"Out of GPU memory during sort, finish on CPU";
803 radixSortOnCpu(order_entries);
808 if (query_mem_desc_.getEntryCount() > std::numeric_limits<uint32_t>::max()) {
812 CHECK(permutation_.empty());
818 parallelTop(order_entries, top_n, executor);
823 permutation_.resize(query_mem_desc_.getEntryCount());
826 pv = initPermutationBuffer(pv, 0, permutation_.size());
830 pv = topPermutation(pv, top_n, createComparator(order_entries, pv, executor,
false));
831 if (pv.size() < permutation_.size()) {
832 permutation_.resize(pv.size());
833 permutation_.shrink_to_fit();
842 const Executor* executor) {
863 const auto storage_lookup_result = findStorage(i);
864 const auto lhs_storage = storage_lookup_result.storage_ptr;
865 const auto off = storage_lookup_result.fixedup_entry_idx;
867 if (!lhs_storage->isEmptyEntry(off)) {
880 const Executor* executor) {
885 permutation_.resize(query_mem_desc_.getEntryCount());
886 std::vector<PermutationView> permutation_views(nthreads);
888 for (
auto interval : makeIntervals<PermutationIdx>(0, permutation_.size(), nthreads)) {
889 top_sort_threads.
run([
this,
897 PermutationView pv(permutation_.data() + interval.begin, 0, interval.size());
898 pv = initPermutationBuffer(pv, interval.begin, interval.end);
899 const auto compare = createComparator(order_entries, pv, executor,
true);
900 permutation_views[interval.index] = topPermutation(pv, top_n, compare);
903 top_sort_threads.
wait();
912 auto end = permutation_.begin() + permutation_views.front().size();
913 for (
size_t i = 1; i < nthreads; ++i) {
914 std::copy(permutation_views[i].begin(), permutation_views[i].end(), end);
915 end += permutation_views[i].size();
920 const auto compare = createComparator(order_entries, pv, executor,
false);
921 pv = topPermutation(pv, top_n, compare);
922 permutation_.resize(pv.size());
923 permutation_.shrink_to_fit();
927 size_t fixedup_entry_idx = entry_idx;
928 auto entry_count = storage_->query_mem_desc_.getEntryCount();
929 const bool is_rowwise_layout = !storage_->query_mem_desc_.didOutputColumnar();
930 if (fixedup_entry_idx < entry_count) {
931 return {0, fixedup_entry_idx};
933 fixedup_entry_idx -= entry_count;
934 for (
size_t i = 0; i < appended_storage_.size(); ++i) {
935 const auto& desc = appended_storage_[i]->query_mem_desc_;
936 CHECK_NE(is_rowwise_layout, desc.didOutputColumnar());
937 entry_count = desc.getEntryCount();
938 if (fixedup_entry_idx < entry_count) {
939 return {i + 1, fixedup_entry_idx};
941 fixedup_entry_idx -= entry_count;
943 UNREACHABLE() <<
"entry_idx = " << entry_idx <<
", query_mem_desc_.getEntryCount() = "
944 << query_mem_desc_.getEntryCount();
952 auto [stg_idx, fixedup_entry_idx] = getStorageIndex(entry_idx);
953 return {stg_idx ? appended_storage_[stg_idx - 1].get() : storage_.get(),
958 template <
typename BUFFER_ITERATOR_TYPE>
960 BUFFER_ITERATOR_TYPE>::materializeCountDistinctColumns() {
961 for (
const auto& order_entry : order_entries_) {
963 count_distinct_materialized_buffers_.emplace_back(
964 materializeCountDistinctColumn(order_entry));
974 : targets_(targets), agg_kind_(agg_kind) {}
976 return targets_[order_entry.
tle_no - 1].agg_kind == agg_kind_;
981 template <
typename BUFFER_ITERATOR_TYPE>
983 BUFFER_ITERATOR_TYPE>::materializeApproxQuantileColumns()
const {
985 for (
const auto& order_entry : order_entries_) {
986 if (result_set_->targets_[order_entry.tle_no - 1].agg_kind ==
kAPPROX_QUANTILE) {
987 approx_quantile_materialized_buffers.emplace_back(
988 materializeApproxQuantileColumn(order_entry));
991 return approx_quantile_materialized_buffers;
994 template <
typename BUFFER_ITERATOR_TYPE>
998 IsAggKind
const is_mode(result_set_->targets_,
kMODE);
999 mode_buffers.reserve(
1000 std::count_if(order_entries_.begin(), order_entries_.end(), is_mode));
1001 for (
auto const& order_entry : order_entries_) {
1002 if (is_mode(order_entry)) {
1003 mode_buffers.emplace_back(materializeModeColumn(order_entry));
1006 return mode_buffers;
1009 template <
typename BUFFER_ITERATOR_TYPE>
1010 std::vector<int64_t>
1013 const size_t num_storage_entries = result_set_->query_mem_desc_.getEntryCount();
1014 std::vector<int64_t> count_distinct_materialized_buffer(num_storage_entries);
1016 result_set_->query_mem_desc_.getCountDistinctDescriptor(order_entry.
tle_no - 1);
1017 const size_t num_non_empty_entries = permutation_.size();
1020 const size_t start,
const size_t end) {
1022 for (
size_t i = start; i < end; ++i) {
1024 const auto storage_lookup_result = result_set_->findStorage(permuted_idx);
1025 const auto storage = storage_lookup_result.storage_ptr;
1026 const auto off = storage_lookup_result.fixedup_entry_idx;
1027 const auto value = buffer_itr_.getColumnInternal(
1028 storage->buff_, off, order_entry.
tle_no - 1, storage_lookup_result);
1029 count_distinct_materialized_buffer[permuted_idx] =
1035 if (single_threaded_) {
1036 work(0, num_non_empty_entries);
1039 for (
auto interval : makeIntervals<size_t>(0, num_non_empty_entries,
cpu_threads())) {
1040 thread_pool.
run([=] { work(interval.begin, interval.end); });
1044 return count_distinct_materialized_buffer;
1051 double const quantile = t_digest->
quantile();
1052 return boost::math::isnan(quantile) ?
NULL_DOUBLE : quantile;
1055 template <
typename BUFFER_ITERATOR_TYPE>
1056 ResultSet::ApproxQuantileBuffers::value_type
1059 ResultSet::ApproxQuantileBuffers::value_type materialized_buffer(
1060 result_set_->query_mem_desc_.getEntryCount());
1061 const size_t size = permutation_.size();
1063 const size_t start,
const size_t end) {
1065 for (
size_t i = start; i < end; ++i) {
1067 const auto storage_lookup_result = result_set_->findStorage(permuted_idx);
1068 const auto storage = storage_lookup_result.storage_ptr;
1069 const auto off = storage_lookup_result.fixedup_entry_idx;
1070 const auto value = buffer_itr_.getColumnInternal(
1071 storage->buff_, off, order_entry.
tle_no - 1, storage_lookup_result);
1072 materialized_buffer[permuted_idx] =
1073 value.i1 ? calculateQuantile(reinterpret_cast<quantile::TDigest*>(value.i1))
1077 if (single_threaded_) {
1081 for (
auto interval : makeIntervals<size_t>(0, size,
cpu_threads())) {
1082 thread_pool.
run([=] { work(interval.begin, interval.end); });
1086 return materialized_buffer;
1092 if (
auto const*
const agg_mode = reinterpret_cast<AggMode const*>(i1)) {
1093 if (std::optional<int64_t>
const mode = agg_mode->mode()) {
1103 template <
typename BUFFER_ITERATOR_TYPE>
1112 for (
size_t i = r.begin(); i != r.end(); ++i) {
1114 auto const storage_lookup_result = rsc_->result_set_->findStorage(permuted_idx);
1115 auto const storage = storage_lookup_result.storage_ptr;
1116 auto const off = storage_lookup_result.fixedup_entry_idx;
1117 auto const value = rsc_->buffer_itr_.getColumnInternal(
1118 storage->buff_, off, order_entry_.tle_no - 1, storage_lookup_result);
1124 template <
typename BUFFER_ITERATOR_TYPE>
1125 ResultSet::ModeBuffers::value_type
1128 ResultSet::ModeBuffers::value_type materialized_buffer(
1129 result_set_->query_mem_desc_.getEntryCount());
1130 ModeScatter mode_scatter{
1132 if (single_threaded_) {
1137 return materialized_buffer;
1140 template <
typename BUFFER_ITERATOR_TYPE>
1146 const auto lhs_storage_lookup_result = result_set_->findStorage(lhs);
1147 const auto rhs_storage_lookup_result = result_set_->findStorage(rhs);
1148 const auto lhs_storage = lhs_storage_lookup_result.storage_ptr;
1149 const auto rhs_storage = rhs_storage_lookup_result.storage_ptr;
1150 const auto fixedup_lhs = lhs_storage_lookup_result.fixedup_entry_idx;
1151 const auto fixedup_rhs = rhs_storage_lookup_result.fixedup_entry_idx;
1152 size_t materialized_count_distinct_buffer_idx{0};
1153 size_t materialized_approx_quantile_buffer_idx{0};
1154 size_t materialized_mode_buffer_idx{0};
1156 for (
const auto& order_entry : order_entries_) {
1159 const auto& lhs_agg_info = lhs_storage->targets_[order_entry.tle_no - 1];
1160 const auto& rhs_agg_info = rhs_storage->targets_[order_entry.tle_no - 1];
1170 if (lhs_entry_ti.get_type() ==
kFLOAT) {
1171 const auto is_col_lazy =
1172 !result_set_->lazy_fetch_info_.empty() &&
1173 result_set_->lazy_fetch_info_[order_entry.tle_no - 1].is_lazily_fetched;
1174 if (result_set_->query_mem_desc_.getPaddedSlotWidthBytes(order_entry.tle_no - 1) ==
1176 float_argument_input =
1177 result_set_->query_mem_desc_.didOutputColumnar() ? !is_col_lazy :
true;
1182 CHECK_LT(materialized_count_distinct_buffer_idx,
1183 count_distinct_materialized_buffers_.size());
1185 const auto& count_distinct_materialized_buffer =
1186 count_distinct_materialized_buffers_[materialized_count_distinct_buffer_idx];
1187 const auto lhs_sz = count_distinct_materialized_buffer[lhs];
1188 const auto rhs_sz = count_distinct_materialized_buffer[rhs];
1189 ++materialized_count_distinct_buffer_idx;
1190 if (lhs_sz == rhs_sz) {
1193 return (lhs_sz < rhs_sz) != order_entry.is_desc;
1195 CHECK_LT(materialized_approx_quantile_buffer_idx,
1196 approx_quantile_materialized_buffers_.size());
1197 const auto& approx_quantile_materialized_buffer =
1198 approx_quantile_materialized_buffers_[materialized_approx_quantile_buffer_idx];
1199 const auto lhs_value = approx_quantile_materialized_buffer[lhs];
1200 const auto rhs_value = approx_quantile_materialized_buffer[rhs];
1201 ++materialized_approx_quantile_buffer_idx;
1202 if (lhs_value == rhs_value) {
1204 }
else if (!lhs_entry_ti.get_notnull()) {
1206 return order_entry.nulls_first;
1208 return !order_entry.nulls_first;
1211 return (lhs_value < rhs_value) != order_entry.is_desc;
1213 CHECK_LT(materialized_mode_buffer_idx, mode_buffers_.size());
1214 auto const& mode_buffer = mode_buffers_[materialized_mode_buffer_idx++];
1215 int64_t
const lhs_value = mode_buffer[lhs];
1216 int64_t
const rhs_value = mode_buffer[rhs];
1217 if (lhs_value == rhs_value) {
1221 return order_entry.nulls_first;
1223 return !order_entry.nulls_first;
1225 return result_set_->isLessThan(lhs_entry_ti, lhs_value, rhs_value) !=
1226 order_entry.is_desc;
1230 const auto lhs_v = buffer_itr_.getColumnInternal(lhs_storage->buff_,
1232 order_entry.tle_no - 1,
1233 lhs_storage_lookup_result);
1234 const auto rhs_v = buffer_itr_.getColumnInternal(rhs_storage->buff_,
1236 order_entry.tle_no - 1,
1237 rhs_storage_lookup_result);
1239 if (
UNLIKELY(isNull(lhs_entry_ti, lhs_v, float_argument_input) &&
1240 isNull(rhs_entry_ti, rhs_v, float_argument_input))) {
1243 if (
UNLIKELY(isNull(lhs_entry_ti, lhs_v, float_argument_input) &&
1244 !isNull(rhs_entry_ti, rhs_v, float_argument_input))) {
1245 return order_entry.nulls_first;
1247 if (
UNLIKELY(isNull(rhs_entry_ti, rhs_v, float_argument_input) &&
1248 !isNull(lhs_entry_ti, lhs_v, float_argument_input))) {
1249 return !order_entry.nulls_first;
1252 if (
LIKELY(lhs_v.isInt())) {
1253 CHECK(rhs_v.isInt());
1254 if (
UNLIKELY(lhs_entry_ti.is_string() &&
1256 CHECK_EQ(4, lhs_entry_ti.get_logical_size());
1258 const auto lhs_string_dict_proxy =
executor_->getStringDictionaryProxy(
1259 lhs_entry_ti.getStringDictKey(), result_set_->row_set_mem_owner_,
false);
1260 const auto rhs_string_dict_proxy =
executor_->getStringDictionaryProxy(
1261 rhs_entry_ti.getStringDictKey(), result_set_->row_set_mem_owner_,
false);
1262 const auto lhs_str = lhs_string_dict_proxy->getString(lhs_v.i1);
1263 const auto rhs_str = rhs_string_dict_proxy->getString(rhs_v.i1);
1264 if (lhs_str == rhs_str) {
1267 return (lhs_str < rhs_str) != order_entry.is_desc;
1270 if (lhs_v.i1 == rhs_v.i1) {
1273 if (lhs_entry_ti.is_fp()) {
1274 if (float_argument_input) {
1275 const auto lhs_dval = *
reinterpret_cast<const float*
>(may_alias_ptr(&lhs_v.i1));
1276 const auto rhs_dval = *
reinterpret_cast<const float*
>(may_alias_ptr(&rhs_v.i1));
1277 return (lhs_dval < rhs_dval) != order_entry.is_desc;
1279 const auto lhs_dval =
1280 *
reinterpret_cast<const double*
>(may_alias_ptr(&lhs_v.i1));
1281 const auto rhs_dval =
1282 *
reinterpret_cast<const double*
>(may_alias_ptr(&rhs_v.i1));
1283 return (lhs_dval < rhs_dval) != order_entry.is_desc;
1286 return (lhs_v.i1 < rhs_v.i1) != order_entry.is_desc;
1288 if (lhs_v.isPair()) {
1289 CHECK(rhs_v.isPair());
1291 pair_to_double({lhs_v.i1, lhs_v.i2}, lhs_entry_ti, float_argument_input);
1293 pair_to_double({rhs_v.i1, rhs_v.i2}, rhs_entry_ti, float_argument_input);
1297 return (lhs < rhs) != order_entry.is_desc;
1299 CHECK(lhs_v.isStr() && rhs_v.isStr());
1300 const auto lhs = lhs_v.strVal();
1301 const auto rhs = rhs_v.strVal();
1305 return (lhs < rhs) != order_entry.is_desc;
1319 if (n < permutation.
size()) {
1321 permutation.
begin(), permutation.
begin() +
n, permutation.
end(), compare);
1330 const std::list<Analyzer::OrderEntry>& order_entries)
const {
1333 const int device_id{0};
1334 auto allocator = std::make_unique<CudaAllocator>(
1338 std::vector<int64_t*> group_by_buffers(block_size_);
1339 group_by_buffers[0] =
reinterpret_cast<int64_t*
>(storage_->getUnderlyingBuffer());
1340 auto dev_group_by_buffers =
1355 order_entries, query_mem_desc_, dev_group_by_buffers, data_mgr, device_id);
1360 dev_group_by_buffers.data,
1370 const std::list<Analyzer::OrderEntry>& order_entries)
const {
1372 CHECK(!query_mem_desc_.hasKeylessHash());
1373 std::vector<int64_t> tmp_buff(query_mem_desc_.getEntryCount());
1374 std::vector<int32_t> idx_buff(query_mem_desc_.getEntryCount());
1375 CHECK_EQ(
size_t(1), order_entries.size());
1376 auto buffer_ptr = storage_->getUnderlyingBuffer();
1377 for (
const auto& order_entry : order_entries) {
1378 const auto target_idx = order_entry.tle_no - 1;
1379 const auto sortkey_val_buff =
reinterpret_cast<int64_t*
>(
1380 buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
1381 const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
1384 query_mem_desc_.getEntryCount(),
1385 order_entry.is_desc,
1389 query_mem_desc_.getEntryCount(),
1392 for (
size_t target_idx = 0; target_idx < query_mem_desc_.getSlotCount();
1394 if (static_cast<int>(target_idx) == order_entry.tle_no - 1) {
1397 const auto chosen_bytes = query_mem_desc_.getPaddedSlotWidthBytes(target_idx);
1398 const auto satellite_val_buff =
reinterpret_cast<int64_t*
>(
1399 buffer_ptr + query_mem_desc_.getColOffInBytes(target_idx));
1402 query_mem_desc_.getEntryCount(),
1416 row_set_mem_owner_->getOrAddStringDictProxy(dict_key,
true);
1418 return sdp->getDictionary()->copyStrings();
1421 const std::pair<std::vector<int32_t>, std::vector<std::string>>
1423 const auto col_type_info = getColType(col_idx);
1424 std::unordered_set<int32_t> unique_string_ids_set;
1425 const size_t num_entries = entryCount();
1426 std::vector<bool> targets_to_skip(colCount(),
true);
1427 targets_to_skip[col_idx] =
false;
1428 CHECK(col_type_info.is_dict_encoded_type());
1430 col_type_info.is_array() ? col_type_info.get_elem_type() : col_type_info);
1432 for (
size_t row_idx = 0; row_idx < num_entries; ++row_idx) {
1433 const auto result_row = getRowAtNoTranslations(row_idx, targets_to_skip);
1434 if (!result_row.empty()) {
1435 if (
const auto scalar_col_val =
1436 boost::get<ScalarTargetValue>(&result_row[col_idx])) {
1437 const int32_t string_id =
1438 static_cast<int32_t
>(boost::get<int64_t>(*scalar_col_val));
1439 if (string_id != null_val) {
1440 unique_string_ids_set.emplace(string_id);
1442 }
else if (
const auto array_col_val =
1443 boost::get<ArrayTargetValue>(&result_row[col_idx])) {
1444 if (*array_col_val) {
1446 const int32_t string_id =
static_cast<int32_t
>(boost::get<int64_t>(scalar));
1447 if (string_id != null_val) {
1448 unique_string_ids_set.emplace(string_id);
1456 const size_t num_unique_strings = unique_string_ids_set.size();
1457 std::vector<int32_t> unique_string_ids(num_unique_strings);
1458 size_t string_idx{0};
1459 for (
const auto unique_string_id : unique_string_ids_set) {
1460 unique_string_ids[string_idx++] = unique_string_id;
1463 const auto sdp = row_set_mem_owner_->getOrAddStringDictProxy(
1464 col_type_info.getStringDictKey(),
true);
1467 return std::make_pair(unique_string_ids, sdp->getStrings(unique_string_ids));
1480 }
else if (query_mem_desc_.didOutputColumnar()) {
1481 return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
1483 query_mem_desc_.getQueryDescriptionType() ==
1485 query_mem_desc_.getQueryDescriptionType() ==
1487 query_mem_desc_.getQueryDescriptionType() ==
1490 CHECK(!(query_mem_desc_.getQueryDescriptionType() ==
1492 return permutation_.empty() && (query_mem_desc_.getQueryDescriptionType() ==
1494 query_mem_desc_.getQueryDescriptionType() ==
1500 return query_mem_desc_.didOutputColumnar() &&
1502 query_mem_desc_.getQueryDescriptionType() ==
1504 appended_storage_.empty() && storage_ &&
1505 (lazy_fetch_info_.empty() || !lazy_fetch_info_[column_idx].is_lazily_fetched);
1509 CHECK(isZeroCopyColumnarConversionPossible(column_idx));
1510 return storage_->getUnderlyingBuffer() + query_mem_desc_.getColOffInBytes(column_idx);
1514 const auto col_context = query_mem_desc_.getColSlotContext();
1515 const auto idx = col_context.getSlotsForCol(column_idx).front();
1516 return query_mem_desc_.getPaddedSlotBufferSize(idx);
1517 if (checkSlotUsesFlatBufferFormat(idx)) {
1518 return query_mem_desc_.getFlatBufferSize(idx);
1520 const size_t padded_slot_width =
static_cast<size_t>(getPaddedSlotWidthBytes(idx));
1521 return padded_slot_width * entryCount();
1526 std::vector<bool> target_bitmap(targets_.size(),
true);
1527 size_t num_single_slot_targets = 0;
1528 for (
size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1529 const auto& sql_type = targets_[target_idx].sql_type;
1530 if (targets_[target_idx].
is_agg && targets_[target_idx].agg_kind ==
kAVG) {
1531 target_bitmap[target_idx] =
false;
1532 }
else if (sql_type.is_varlen()) {
1533 target_bitmap[target_idx] =
false;
1535 num_single_slot_targets++;
1538 return std::make_tuple(std::move(target_bitmap), num_single_slot_targets);
1551 CHECK(isDirectColumnarConversionPossible());
1552 auto [single_slot_targets, num_single_slot_targets] = getSingleSlotTargetBitmap();
1554 for (
size_t target_idx = 0; target_idx < single_slot_targets.size(); target_idx++) {
1555 const auto& target = targets_[target_idx];
1556 if (single_slot_targets[target_idx] &&
1558 shared::is_any<kAPPROX_QUANTILE, kMODE>(target.agg_kind) ||
1559 (target.is_agg && target.agg_kind ==
kSAMPLE && target.sql_type ==
kFLOAT))) {
1560 single_slot_targets[target_idx] =
false;
1561 num_single_slot_targets--;
1564 CHECK_GE(num_single_slot_targets,
size_t(0));
1565 return std::make_tuple(std::move(single_slot_targets), num_single_slot_targets);
1570 std::vector<size_t> slot_indices(targets_.size(), 0);
1571 size_t slot_index = 0;
1572 for (
size_t target_idx = 0; target_idx < targets_.size(); target_idx++) {
1573 slot_indices[target_idx] = slot_index;
1574 slot_index =
advance_slot(slot_index, targets_[target_idx],
false);
1576 return slot_indices;
1582 return !rows.isTruncated();
1594 std::vector<TargetInfo>
const& targets) {
1595 auto const itr = std::find_if(targets.begin(), targets.end(), IsDictEncodedStr{});
1596 return itr == targets.end() ? std::nullopt
1597 : std::make_optional<size_t>(itr - targets.begin());
bool is_agg(const Analyzer::Expr *expr)
void syncEstimatorBuffer() const
Analyzer::OrderEntry const & order_entry_
const QueryMemoryDescriptor & getQueryMemDesc() const
void sort_groups_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
GpuGroupByBuffers create_dev_group_by_buffers(DeviceAllocator *device_allocator, const std::vector< int64_t * > &group_by_buffers, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const ExecutorDispatchMode dispatch_mode, const int64_t num_input_rows, const bool prepend_index_buffer, const bool always_init_group_by_on_host, const bool use_bump_allocator, const bool has_varlen_output, Allocator *insitu_allocator)
size_t g_parallel_top_max
std::pair< size_t, size_t > getStorageIndex(const size_t entry_idx) const
DEVICE void push_back(T const &value)
bool isValidationOnlyRes() const
void setValidationOnlyRes()
PermutationView initPermutationBuffer(PermutationView permutation, PermutationIdx const begin, PermutationIdx const end) const
bool g_enable_direct_columnarization
StringDictionaryProxy * getStringDictionaryProxy(const shared::StringDictKey &dict_key) const
T advance_to_next_columnar_target_buff(T target_ptr, const QueryMemoryDescriptor &query_mem_desc, const size_t target_slot_idx)
void sort(const std::list< Analyzer::OrderEntry > &order_entries, size_t top_n, const ExecutorDeviceType device_type, const Executor *executor)
ResultSet::ResultSetComparator< BUFFER_ITERATOR_TYPE > const *const rsc_
constexpr QueryPlanHash EMPTY_HASHED_PLAN_DAG_KEY
DEVICE RealType quantile(VectorView< IndexType const > const partial_sum, RealType const q) const
static const size_t baseline_threshold
DEVICE void sort(ARGS &&...args)
const std::vector< TargetInfo > & getTargetInfos() const
std::optional< size_t > first_dict_encoded_idx(std::vector< TargetInfo > const &)
void setKernelQueueTime(const int64_t kernel_queue_time)
size_t rowCount(const bool force_parallel=false) const
Returns the number of valid entries in the result set (i.e that will be returned from the SQL query o...
std::shared_ptr< ResultSet > ResultSetPtr
CellCallback(StringDictionaryProxy::IdMap &&id_map, int64_t const null_int)
int64_t read_int_from_buff(const int8_t *ptr, const int8_t compact_sz)
void keepFirstN(const size_t n)
size_t g_streaming_topn_max
double pair_to_double(const std::pair< int64_t, int64_t > &fp_pair, const SQLTypeInfo &ti, const bool float_argument_input)
void addCompilationQueueTime(const int64_t compilation_queue_time)
std::vector< std::vector< double >> ApproxQuantileBuffers
bool takes_float_argument(const TargetInfo &target_info)
void parallelTop(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
void inplace_sort_gpu(const std::list< Analyzer::OrderEntry > &order_entries, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &group_by_buffers, Data_Namespace::DataMgr *data_mgr, const int device_id)
void apply_permutation_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
DEVICE void resize(size_type const size)
std::vector< int64_t > materializeCountDistinctColumn(const Analyzer::OrderEntry &order_entry) const
ApproxQuantileBuffers::value_type materializeApproxQuantileColumn(const Analyzer::OrderEntry &order_entry) const
size_t get_truncated_row_count(size_t total_row_count, size_t limit, size_t offset)
size_t parallelRowCount() const
void baselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const ExecutorDeviceType device_type, const Executor *executor)
DEVICE void mergeBufferFinal()
void radixSortOnCpu(const std::list< Analyzer::OrderEntry > &order_entries) const
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool definitelyHasNoRows() const
bool use_parallel_algorithms(const ResultSet &rows)
bool isZeroCopyColumnarConversionPossible(size_t column_idx) const
size_t g_parallel_top_min
int8_t * getHostEstimatorBuffer() const
Data_Namespace::DataMgr & getDataMgr() const
DEVICE size_type size() const
void invalidateCachedRowCount() const
IsAggKind(std::vector< TargetInfo > const &targets, SQLAgg const agg_kind)
static SysCatalog & instance()
const ResultSetStorage * allocateStorage() const
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
int64_t count_distinct_set_size(const int64_t set_handle, const CountDistinctDescriptor &count_distinct_desc)
bool operator()(TargetInfo const &target_info) const
DEVICE auto copy(ARGS &&...args)
void setQueueTime(const int64_t queue_time)
void dropFirstN(const size_t n)
const size_t getColumnarBufferSize(size_t column_idx) const
std::vector< PermutationIdx > Permutation
std::tuple< std::vector< bool >, size_t > getSingleSlotTargetBitmap() const
void * checked_calloc(const size_t nmemb, const size_t size)
StorageLookupResult findStorage(const size_t entry_idx) const
bool is_distinct_target(const TargetInfo &target_info)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
std::function< bool(const PermutationIdx, const PermutationIdx)> Comparator
logger::ThreadLocalIds const parent_thread_local_ids_
bool g_enable_smem_group_by true
This file includes the class specification for the buffer manager (BufferMgr), and related data struc...
static double calculateQuantile(quantile::TDigest *const t_digest)
std::vector< TargetInfo > const & targets_
T row_ptr_rowwise(T buff, const QueryMemoryDescriptor &query_mem_desc, const size_t entry_idx)
void radixSortOnGpu(const std::list< Analyzer::OrderEntry > &order_entries) const
const ResultSetStorage * getStorage() const
Value parallel_reduce(const blocked_range< Int > &range, const Value &identity, const RealBody &real_body, const Reduction &reduction, const Partitioner &p=Partitioner())
Parallel iteration with reduction.
const std::pair< std::vector< int32_t >, std::vector< std::string > > getUniqueStringsForDictEncodedTargetCol(const size_t col_idx) const
int64_t getQueueTime() const
ModeBuffers::value_type materializeModeColumn(const Analyzer::OrderEntry &order_entry) const
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const shared::StringDictKey &dest_dict_key, const std::vector< int32_t > &source_ids, const shared::StringDictKey &source_dict_key, const int32_t dest_generation)
SQLTypeInfo getColType(const size_t col_idx) const
std::tuple< std::vector< bool >, size_t > getSupportedSingleSlotTargetBitmap() const
ExecutorDeviceType getDeviceType() const
const int8_t * getColumnarBuffer(size_t column_idx) const
void eachCellInColumn(RowIterationState &, CellCallback const &)
StringDictionaryProxy::IdMap const id_map_
const std::vector< std::string > getStringDictionaryPayloadCopy(const shared::StringDictKey &dict_key) const
static Data_Namespace::AbstractBuffer * allocGpuAbstractBuffer(Data_Namespace::DataMgr *data_mgr, const size_t num_bytes, const int device_id)
static QueryMemoryDescriptor fixupQueryMemoryDescriptor(const QueryMemoryDescriptor &)
CUstream getQueryEngineCudaStreamForDevice(int device_num)
size_t rowCountImpl(const bool force_parallel) const
const Permutation & getPermutationBuffer() const
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
void append(ResultSet &that)
std::string summaryToString() const
bool didOutputColumnar() const
static PermutationView topPermutation(PermutationView, const size_t n, const Comparator &)
size_t getCurrentRowBufferIndex() const
bool g_enable_watchdog false
#define DEBUG_TIMER(name)
int8_t * getDeviceEstimatorBuffer() const
int64_t materializeMode(int64_t const i1)
tbb::blocked_range< size_t > ModeBlockedRange
int64_t inline_int_null_val(const SQL_TYPE_INFO &ti)
int64_t inline_fixed_encoding_null_val(const SQL_TYPE_INFO &ti)
bool operator()(const PermutationIdx lhs, const PermutationIdx rhs) const
Basic constructors and methods of the row set interface.
bool operator()(Analyzer::OrderEntry const &order_entry) const
bool isEmpty() const
Returns a boolean signifying whether there are valid entries in the result set.
bool is_dict_encoded_string() const
const std::vector< int64_t > & getTargetInitVals() const
std::vector< size_t > getSlotIndicesForTargetIndices() const
Allocate GPU memory using GpuBuffers via DataMgr.
Execution unit for relational algebra. It's a low-level description of any relational algebra operati...
constexpr int64_t uninitialized_cached_row_count
void operator()(ModeBlockedRange const &r) const
T get_cols_ptr(T buff, const QueryMemoryDescriptor &query_mem_desc)
void translateDictEncodedColumns(std::vector< TargetInfo > const &, size_t const start_idx)
Divide up indexes (A, A+1, A+2, ..., B-2, B-1) among N workers as evenly as possible in a range-based...
void copy_group_by_buffers_from_gpu(DeviceAllocator &device_allocator, const std::vector< int64_t * > &group_by_buffers, const size_t groups_buffer_size, const int8_t *group_by_dev_buffers_mem, const QueryMemoryDescriptor &query_mem_desc, const unsigned block_size_x, const unsigned grid_size_x, const int device_id, const bool prepend_index_buffer, const bool has_varlen_output)
void operator()(int8_t const *const cell_ptr) const
bool can_use_parallel_algorithms(const ResultSet &rows)
int64_t getRenderTime() const
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const unsigned block_size, const unsigned grid_size)
void setCachedRowCount(const size_t row_count) const
bool isDirectColumnarConversionPossible() const
size_t get_key_bytes_rowwise(const QueryMemoryDescriptor &query_mem_desc)
ThreadLocalIds thread_local_ids()
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
ModeBuffers materializeModeColumns() const
ResultSet::ModeBuffers::value_type & materialized_buffer_
static constexpr int32_t literalsDictId
size_t binSearchRowCount() const
std::vector< std::vector< int64_t >> ModeBuffers
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue