OmniSciDB
a5dc49c757
|
#include "BufferEntryUtils.h"
#include "GpuMemUtils.h"
#include "ResultSetBufferAccessors.h"
#include "SortUtils.cuh"
#include "StreamingTopN.h"
#include "TopKSort.h"
#include <thrust/copy.h>
#include <thrust/execution_policy.h>
#include <thrust/functional.h>
#include <thrust/partition.h>
#include <thrust/sort.h>
#include <cuda.h>
#include <iostream>
Go to the source code of this file.
Classes | |
struct | is_taken_entry< K, I > |
struct | is_null_order_entry< K, I > |
struct | KeyFetcher< K, I > |
struct | KeyReseter< K > |
struct | RowFetcher< I > |
Macros | |
#define | checkCudaErrors(err) CHECK_EQ(err, CUDA_SUCCESS) |
Functions | |
CUstream | getQueryEngineCudaStreamForDevice (int device_num) |
template<typename ForwardIterator > | |
ForwardIterator | partition_by_null (ForwardIterator first, ForwardIterator last, const int64_t null_val, const bool nulls_first, const int8_t *rows_ptr, const GroupByBufferLayoutInfo &layout) |
template<class K , class I > | |
void | collect_order_entry_column (thrust::device_ptr< K > &d_oe_col_buffer, const int8_t *d_src_buffer, const thrust::device_ptr< I > &d_idx_first, const size_t idx_count, const size_t oe_offset, const size_t oe_stride, ThrustAllocator &allocator, const int device_id) |
template<class K , class I > | |
void | sort_indices_by_key (thrust::device_ptr< I > d_idx_first, const size_t idx_count, const thrust::device_ptr< K > &d_key_buffer, const bool desc, ThrustAllocator &allocator, const int device_id) |
template<class I = int32_t> | |
void | do_radix_sort (thrust::device_ptr< I > d_idx_first, const size_t idx_count, const int8_t *d_src_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, ThrustAllocator &allocator, const int device_id) |
template<typename DerivedPolicy > | |
void | reset_keys_in_row_buffer (const thrust::detail::execution_policy_base< DerivedPolicy > &exec, int8_t *row_buffer, const size_t key_width, const size_t row_size, const size_t first, const size_t last) |
std::vector< int8_t > | pop_n_rows_from_merged_heaps_gpu (Data_Namespace::DataMgr *data_mgr, const int64_t *dev_heaps, const size_t heaps_size, const size_t n, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t group_key_bytes, const size_t thread_count, const int device_id) |
#define checkCudaErrors | ( | err | ) | CHECK_EQ(err, CUDA_SUCCESS) |
Definition at line 33 of file TopKSort.cu.
void collect_order_entry_column | ( | thrust::device_ptr< K > & | d_oe_col_buffer, |
const int8_t * | d_src_buffer, | ||
const thrust::device_ptr< I > & | d_idx_first, | ||
const size_t | idx_count, | ||
const size_t | oe_offset, | ||
const size_t | oe_stride, | ||
ThrustAllocator & | allocator, | ||
const int | device_id | ||
) |
Definition at line 137 of file TopKSort.cu.
References checkCudaErrors, and getQueryEngineCudaStreamForDevice().
Referenced by do_radix_sort().
void do_radix_sort | ( | thrust::device_ptr< I > | d_idx_first, |
const size_t | idx_count, | ||
const int8_t * | d_src_buffer, | ||
const PodOrderEntry & | oe, | ||
const GroupByBufferLayoutInfo & | layout, | ||
ThrustAllocator & | allocator, | ||
const int | device_id | ||
) |
Definition at line 180 of file TopKSort.cu.
References CHECK, GroupByBufferLayoutInfo::col_bytes, GroupByBufferLayoutInfo::col_off, collect_order_entry_column(), PodOrderEntry::is_desc, GroupByBufferLayoutInfo::oe_target_info, GroupByBufferLayoutInfo::row_bytes, sort_indices_by_key(), and TargetInfo::sql_type.
Referenced by pop_n_rows_from_merged_heaps_gpu().
CUstream getQueryEngineCudaStreamForDevice | ( | int | device_num | ) |
Definition at line 7 of file QueryEngine.cpp.
References QueryEngine::getInstance().
Referenced by RangeJoinHashTable::approximateTupleCount(), BoundingBoxIntersectJoinHashTable::approximateTupleCount(), BaselineJoinHashTable::approximateTupleCount(), collect_order_entry_column(), anonymous_namespace{BoundingBoxIntersectJoinHashTable.cpp}::compute_bucket_sizes(), copy_projection_buffer_from_gpu_columnar(), copy_to_nvidia_gpu(), BaselineJoinHashTable::copyCpuHashTableToGpu(), PerfectJoinHashTable::copyCpuHashTableToGpu(), QueryMemoryInitializer::copyFromTableFunctionGpuBuffers(), anonymous_namespace{ResultSetSortImpl.cu}::do_radix_sort(), TableFunctionExecutionContext::execute(), anonymous_namespace{ResultSetIteration.cpp}::fetch_data_from_gpu(), ResultSet::getVarlenOrderEntry(), BaselineJoinHashTable::initHashTableForDevice(), BaselineJoinHashTableBuilder::initHashTableOnGpu(), InValuesBitmap::InValuesBitmap(), TableFunctionExecutionContext::launchGpuCode(), ResultSet::makeVarlenTargetValue(), pop_n_rows_from_merged_heaps_gpu(), QueryExecutionContext::QueryExecutionContext(), ResultSet::radixSortOnGpu(), PerfectJoinHashTable::reify(), RangeJoinHashTable::reifyWithLayout(), BoundingBoxIntersectJoinHashTable::reifyWithLayout(), BaselineJoinHashTable::reifyWithLayout(), ExecutionKernel::runImpl(), sort_indices_by_key(), ResultSet::syncEstimatorBuffer(), PerfectJoinHashTable::toSet(), BaselineJoinHashTable::toSet(), BoundingBoxIntersectJoinHashTable::toSet(), PerfectJoinHashTable::toString(), BaselineJoinHashTable::toString(), and BoundingBoxIntersectJoinHashTable::toString().
ForwardIterator partition_by_null | ( | ForwardIterator | first, |
ForwardIterator | last, | ||
const int64_t | null_val, | ||
const bool | nulls_first, | ||
const int8_t * | rows_ptr, | ||
const GroupByBufferLayoutInfo & | layout | ||
) |
Definition at line 71 of file TopKSort.cu.
References GroupByBufferLayoutInfo::col_bytes, GroupByBufferLayoutInfo::col_off, and GroupByBufferLayoutInfo::row_bytes.
Referenced by pop_n_rows_from_merged_heaps_gpu().
std::vector<int8_t> pop_n_rows_from_merged_heaps_gpu | ( | Data_Namespace::DataMgr * | data_mgr, |
const int64_t * | dev_heaps, | ||
const size_t | heaps_size, | ||
const size_t | n, | ||
const PodOrderEntry & | oe, | ||
const GroupByBufferLayoutInfo & | layout, | ||
const size_t | group_key_bytes, | ||
const size_t | thread_count, | ||
const int | device_id | ||
) |
Definition at line 303 of file TopKSort.cu.
References CHECK_EQ, checkCudaErrors, GroupByBufferLayoutInfo::col_bytes, gpu_enabled::copy(), do_radix_sort(), streaming_top_n::get_heap_size(), streaming_top_n::get_rows_offset_of_heaps(), getQueryEngineCudaStreamForDevice(), anonymous_namespace{Utm.h}::n, null_val_bit_pattern(), PodOrderEntry::nulls_first, GroupByBufferLayoutInfo::oe_target_info, partition_by_null(), reset_keys_in_row_buffer(), GroupByBufferLayoutInfo::row_bytes, generate_TableFunctionsFactory_init::separator, and TargetInfo::sql_type.
void reset_keys_in_row_buffer | ( | const thrust::detail::execution_policy_base< DerivedPolicy > & | exec, |
int8_t * | row_buffer, | ||
const size_t | key_width, | ||
const size_t | row_size, | ||
const size_t | first, | ||
const size_t | last | ||
) |
Definition at line 276 of file TopKSort.cu.
References CHECK, EMPTY_KEY_32, and EMPTY_KEY_64.
Referenced by pop_n_rows_from_merged_heaps_gpu().
void sort_indices_by_key | ( | thrust::device_ptr< I > | d_idx_first, |
const size_t | idx_count, | ||
const thrust::device_ptr< K > & | d_key_buffer, | ||
const bool | desc, | ||
ThrustAllocator & | allocator, | ||
const int | device_id | ||
) |
Definition at line 157 of file TopKSort.cu.
References checkCudaErrors, and getQueryEngineCudaStreamForDevice().
Referenced by do_radix_sort().