OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ResultSetSortImpl.cu File Reference
#include <cuda.h>
#include "BufferCompaction.h"
#include "GpuMemUtils.h"
#include "GpuRtConstants.h"
#include "ResultSetBufferAccessors.h"
#include "ResultSetSortImpl.h"
#include "SortUtils.cuh"
#include <thrust/copy.h>
#include <thrust/execution_policy.h>
#include <thrust/host_vector.h>
#include <thrust/sort.h>
#include "BufferEntryUtils.h"
+ Include dependency graph for ResultSetSortImpl.cu:

Go to the source code of this file.

Namespaces

 anonymous_namespace{ResultSetSortImpl.cu}
 

Macros

#define checkCudaErrors(err)   CHECK_EQ(err, CUDA_SUCCESS)
 
#define FORCE_CPU_VERSION
 

Functions

CUstream getQueryEngineCudaStreamForDevice (int device_num)
 
template<class K , class V , class I >
std::vector< uint32_t > anonymous_namespace{ResultSetSortImpl.cu}::do_radix_sort (const ExecutorDeviceType device_type, const int device_id, ThrustAllocator &thrust_allocator, const int8_t *groupby_buffer, V dev_oe_col_buffer_begin, V dev_oe_col_buffer_end, I dev_idx_buff_begin, const size_t dev_idx_buff_size, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n)
 
void anonymous_namespace{ResultSetSortImpl.cu}::add_nulls (std::vector< uint32_t > &idx_buff, const std::vector< uint32_t > &null_idx_buff, const PodOrderEntry &oe)
 
template<typename T >
thrust::device_ptr< T > anonymous_namespace{ResultSetSortImpl.cu}::get_device_copy_ptr (const thrust::host_vector< T > &host_vec, ThrustAllocator &thrust_allocator)
 
template<class K >
std::vector< uint32_t > anonymous_namespace{ResultSetSortImpl.cu}::baseline_sort_fp (const ExecutorDeviceType device_type, const int device_id, Data_Namespace::DataMgr *data_mgr, const int8_t *groupby_buffer, const thrust::host_vector< int64_t > &oe_col_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n, const size_t start, const size_t step)
 
template<class K >
std::vector< uint32_t > anonymous_namespace{ResultSetSortImpl.cu}::baseline_sort_int (const ExecutorDeviceType device_type, const int device_id, Data_Namespace::DataMgr *data_mgr, const int8_t *groupby_buffer, const thrust::host_vector< int64_t > &oe_col_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n, const size_t start, const size_t step)
 
template<class K >
thrust::host_vector< int64_t > anonymous_namespace{ResultSetSortImpl.cu}::collect_order_entry_column (const int8_t *groupby_buffer, const GroupByBufferLayoutInfo &layout, const size_t start, const size_t step)
 
template<class K >
std::vector< uint32_t > baseline_sort (const ExecutorDeviceType device_type, const int device_id, Data_Namespace::DataMgr *data_mgr, const int8_t *groupby_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n, const size_t start, const size_t step)
 
template std::vector< uint32_t > baseline_sort< int32_t > (const ExecutorDeviceType device_type, const int device_id, Data_Namespace::DataMgr *data_mgr, const int8_t *groupby_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n, const size_t start, const size_t step)
 
template std::vector< uint32_t > baseline_sort< int64_t > (const ExecutorDeviceType device_type, const int device_id, Data_Namespace::DataMgr *data_mgr, const int8_t *groupby_buffer, const PodOrderEntry &oe, const GroupByBufferLayoutInfo &layout, const size_t top_n, const size_t start, const size_t step)
 

Macro Definition Documentation

#define checkCudaErrors (   err)    CHECK_EQ(err, CUDA_SUCCESS)

Definition at line 16 of file ResultSetSortImpl.cu.

#define FORCE_CPU_VERSION

Definition at line 18 of file ResultSetSortImpl.cu.

Function Documentation

template<class K >
std::vector<uint32_t> baseline_sort ( const ExecutorDeviceType  device_type,
const int  device_id,
Data_Namespace::DataMgr data_mgr,
const int8_t *  groupby_buffer,
const PodOrderEntry oe,
const GroupByBufferLayoutInfo layout,
const size_t  top_n,
const size_t  start,
const size_t  step 
)

Definition at line 353 of file ResultSetSortImpl.cu.

References CHECK, CPU, get_compact_type(), anonymous_namespace{ResultSetSortImpl.cu}::get_device_copy_ptr(), GPU, PodOrderEntry::is_desc, kAVG, and PodOrderEntry::nulls_first.

361  {
362  auto oe_col_buffer = collect_order_entry_column<K>(groupby_buffer, layout, start, step);
363  const auto& entry_ti = get_compact_type(layout.oe_target_info);
364  CHECK(entry_ti.is_number());
365  if (entry_ti.is_fp() || layout.oe_target_info.agg_kind == kAVG) {
366  return baseline_sort_fp<K>(device_type,
367  device_id,
368  data_mgr,
369  groupby_buffer,
370  oe_col_buffer,
371  oe,
372  layout,
373  top_n,
374  start,
375  step);
376  }
377  // Because of how we represent nulls for integral types, they'd be at the
378  // wrong position in these two cases. Separate them into a different buffer.
379  if ((oe.is_desc && oe.nulls_first) || (!oe.is_desc && !oe.nulls_first)) {
380  return baseline_sort_int<K>(device_type,
381  device_id,
382  data_mgr,
383  groupby_buffer,
384  oe_col_buffer,
385  oe,
386  layout,
387  top_n,
388  start,
389  step);
390  }
391  ThrustAllocator thrust_allocator(data_mgr, device_id);
392  // Fastest path, no need to separate nulls away since they'll end up at the
393  // right place as a side effect of how we're representing nulls.
394  if (device_type == ExecutorDeviceType::GPU) {
395  if (oe_col_buffer.empty()) {
396  return {};
397  }
398  const auto dev_idx_buff =
399  get_device_ptr<uint32_t>(oe_col_buffer.size(), thrust_allocator);
400  thrust::sequence(dev_idx_buff, dev_idx_buff + oe_col_buffer.size(), start, step);
401  const auto dev_oe_col_buffer = get_device_copy_ptr(oe_col_buffer, thrust_allocator);
402  return do_radix_sort<K>(device_type,
403  device_id,
404  thrust_allocator,
405  groupby_buffer,
406  dev_oe_col_buffer,
407  dev_oe_col_buffer + oe_col_buffer.size(),
408  dev_idx_buff,
409  oe_col_buffer.size(),
410  oe,
411  layout,
412  top_n);
413  }
414  CHECK(device_type == ExecutorDeviceType::CPU);
415  thrust::host_vector<uint32_t> host_idx_buff(oe_col_buffer.size());
416  thrust::sequence(host_idx_buff.begin(), host_idx_buff.end(), start, step);
417  return do_radix_sort<K>(device_type,
418  device_id,
419  thrust_allocator,
420  groupby_buffer,
421  oe_col_buffer.begin(),
422  oe_col_buffer.end(),
423  host_idx_buff.begin(),
424  host_idx_buff.size(),
425  oe,
426  layout,
427  top_n);
428 }
thrust::device_ptr< T > get_device_copy_ptr(const thrust::host_vector< T > &host_vec, ThrustAllocator &thrust_allocator)
bool nulls_first
const SQLTypeInfo get_compact_type(const TargetInfo &target)
bool is_desc
#define CHECK(condition)
Definition: Logger.h:291
Definition: sqldefs.h:77

+ Here is the call graph for this function:

template std::vector<uint32_t> baseline_sort< int32_t > ( const ExecutorDeviceType  device_type,
const int  device_id,
Data_Namespace::DataMgr data_mgr,
const int8_t *  groupby_buffer,
const PodOrderEntry oe,
const GroupByBufferLayoutInfo layout,
const size_t  top_n,
const size_t  start,
const size_t  step 
)
template std::vector<uint32_t> baseline_sort< int64_t > ( const ExecutorDeviceType  device_type,
const int  device_id,
Data_Namespace::DataMgr data_mgr,
const int8_t *  groupby_buffer,
const PodOrderEntry oe,
const GroupByBufferLayoutInfo layout,
const size_t  top_n,
const size_t  start,
const size_t  step 
)
CUstream getQueryEngineCudaStreamForDevice ( int  device_num)

Definition at line 7 of file QueryEngine.cpp.

8  { // NOTE: CUstream is cudaStream_t
9  return QueryEngine::getInstance()->getCudaStreamForDevice(device_num);
10 }
static std::shared_ptr< QueryEngine > getInstance()
Definition: QueryEngine.h:89