28 #include "../Shared/thread_count.h"
32 std::unique_ptr<CudaMgr_Namespace::CudaMgr> g_cuda_mgr;
43 g_cuda_mgr->setContext(device_id);
49 const std::list<Analyzer::OrderEntry>& order_entries,
52 CHECK_EQ(
size_t(1), order_entries.size());
54 const auto& oe = order_entries.front();
57 size_t logical_slot_idx = 0;
58 size_t physical_slot_off = 0;
59 for (
size_t i = 0; i < static_cast<size_t>(oe.tle_no - 1); ++i) {
69 CHECK(target_groupby_indices_sz == 0 ||
70 static_cast<size_t>(oe.tle_no) <= target_groupby_indices_sz);
71 const int64_t target_groupby_index{
72 target_groupby_indices_sz == 0
80 target_groupby_index};
82 auto groupby_buffer =
storage_->getUnderlyingBuffer();
84 const auto step =
static_cast<size_t>(
89 std::vector<std::future<void>> top_futures;
90 std::vector<Permutation> strided_permutations(step);
91 for (
size_t start = 0; start < step; ++start) {
94 [&strided_permutations,
105 set_cuda_context(data_mgr, start);
107 strided_permutations[start] = (key_bytewidth == 4)
128 for (
auto& top_future : top_futures) {
131 for (
auto& top_future : top_futures) {
134 permutation_.reserve(strided_permutations.size() * top_n);
135 for (
const auto& strided_permutation : strided_permutations) {
137 permutation_.end(), strided_permutation.begin(), strided_permutation.end());
150 device_type, 0, data_mgr, groupby_buffer, pod_oe, layout, top_n, 0, 1)
152 device_type, 0, data_mgr, groupby_buffer, pod_oe, layout, top_n, 0, 1);
157 const std::list<Analyzer::OrderEntry>& order_entries,
158 const size_t top_n) {
163 const auto& order_entry = order_entries.front();
166 const auto& target_info =
targets_[order_entry.tle_no - 1];
183 return g_cuda_mgr ? g_cuda_mgr->getDeviceCount() : 0;
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
VectorView< PermutationIdx > PermutationView
size_t getEntryCount() const
int64_t getTargetGroupbyIndex(const size_t target_idx) const
void setContext(const int device_num) const
QueryMemoryDescriptor query_mem_desc_
bool hasKeylessHash() const
std::unique_ptr< ResultSetStorage > storage_
size_t get_slot_off_quad(const QueryMemoryDescriptor &query_mem_desc)
size_t getEffectiveKeyWidth() const
Data_Namespace::DataMgr & getDataMgr() const
const std::vector< TargetInfo > targets_
future< Result > async(Fn &&fn, Args &&...args)
static SysCatalog & instance()
size_t advance_slot(const size_t j, const TargetInfo &target_info, const bool separate_varlen_storage)
int getDeviceCount() const
size_t targetGroupbyIndicesSize() const
bool canUseFastBaselineSort(const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n)
bool is_distinct_target(const TargetInfo &target_info)
Comparator createComparator(const std::list< Analyzer::OrderEntry > &order_entries, const PermutationView permutation, const Executor *executor, const bool single_threaded)
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
QueryDescriptionType getQueryDescriptionType() const
bool isSingleColumnGroupByWithPerfectHash() const
size_t get_row_bytes(const QueryMemoryDescriptor &query_mem_desc)
bool didOutputColumnar() const
static PermutationView topPermutation(PermutationView, const size_t n, const Comparator &)
Basic constructors and methods of the row set interface.
void doBaselineSort(const ExecutorDeviceType device_type, const std::list< Analyzer::OrderEntry > &order_entries, const size_t top_n, const Executor *executor)
bool separate_varlen_storage_valid_
Data_Namespace::DataMgr * getDataManager() const
template std::vector< uint32_t > baseline_sort< int32_t >(const ExecutorDeviceType device_type, const int device_id, Data_Namespace::DataMgr *data_mgr, const int8_t *groupby_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n, const size_t start, const size_t step)
template std::vector< uint32_t > baseline_sort< int64_t >(const ExecutorDeviceType device_type, const int device_id, Data_Namespace::DataMgr *data_mgr, const int8_t *groupby_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n, const size_t start, const size_t step)