OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
InPlaceSort.cpp File Reference
#include "InPlaceSort.h"
#include "InPlaceSortImpl.h"
#include <Analyzer/Analyzer.h>
#include "DataMgr/Allocators/ThrustAllocator.h"
#include "Descriptors/QueryMemoryDescriptor.h"
#include "Logger/Logger.h"
#include <cstdint>
+ Include dependency graph for InPlaceSort.cpp:

Go to the source code of this file.

Namespaces

 anonymous_namespace{InPlaceSort.cpp}
 

Functions

void sort_groups_cpu (int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
 
void apply_permutation_cpu (int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
 
void anonymous_namespace{InPlaceSort.cpp}::sort_groups_gpu (int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
 
void anonymous_namespace{InPlaceSort.cpp}::apply_permutation_gpu (int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
 
void inplace_sort_gpu (const std::list< Analyzer::OrderEntry > &order_entries, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &group_by_buffers, Data_Namespace::DataMgr *data_mgr, const int device_id)
 

Function Documentation

void apply_permutation_cpu ( int64_t *  val_buff,
int32_t *  idx_buff,
const uint64_t  entry_count,
int64_t *  tmp_buff,
const uint32_t  chosen_bytes 
)

Definition at line 46 of file InPlaceSort.cpp.

References apply_permutation_on_cpu(), and CHECK.

Referenced by ResultSet::radixSortOnCpu().

50  {
51 #ifdef HAVE_CUDA
52  switch (chosen_bytes) {
53  case 1:
54  case 2:
55  case 4:
56  case 8:
57  apply_permutation_on_cpu(val_buff, idx_buff, entry_count, tmp_buff, chosen_bytes);
58  break;
59  default:
60  CHECK(false);
61  }
62 #endif
63 }
#define CHECK(condition)
Definition: Logger.h:291
void apply_permutation_on_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void inplace_sort_gpu ( const std::list< Analyzer::OrderEntry > &  order_entries,
const QueryMemoryDescriptor query_mem_desc,
const GpuGroupByBuffers group_by_buffers,
Data_Namespace::DataMgr data_mgr,
const int  device_id 
)

Definition at line 111 of file InPlaceSort.cpp.

References align_to_int64(), anonymous_namespace{InPlaceSort.cpp}::apply_permutation_gpu(), CHECK_EQ, GpuGroupByBuffers::data, QueryMemoryDescriptor::getColOffInBytes(), QueryMemoryDescriptor::getEntryCount(), QueryMemoryDescriptor::getPaddedSlotWidthBytes(), QueryMemoryDescriptor::getSlotCount(), QueryMemoryDescriptor::hasKeylessHash(), and anonymous_namespace{InPlaceSort.cpp}::sort_groups_gpu().

Referenced by QueryExecutionContext::launchGpuCode(), and ResultSet::radixSortOnGpu().

115  {
116  ThrustAllocator alloc(data_mgr, device_id);
117  CHECK_EQ(size_t(1), order_entries.size());
118  const auto idx_buff = group_by_buffers.data -
119  align_to_int64(query_mem_desc.getEntryCount() * sizeof(int32_t));
120  for (const auto& order_entry : order_entries) {
121  const auto target_idx = order_entry.tle_no - 1;
122  const auto val_buff =
123  group_by_buffers.data + query_mem_desc.getColOffInBytes(target_idx);
124  const auto chosen_bytes = query_mem_desc.getPaddedSlotWidthBytes(target_idx);
125  sort_groups_gpu(reinterpret_cast<int64_t*>(val_buff),
126  reinterpret_cast<int32_t*>(idx_buff),
127  query_mem_desc.getEntryCount(),
128  order_entry.is_desc,
129  chosen_bytes,
130  alloc,
131  device_id);
132  if (!query_mem_desc.hasKeylessHash()) {
133  apply_permutation_gpu(reinterpret_cast<int64_t*>(group_by_buffers.data),
134  reinterpret_cast<int32_t*>(idx_buff),
135  query_mem_desc.getEntryCount(),
136  sizeof(int64_t),
137  alloc,
138  device_id);
139  }
140  for (size_t target_idx = 0; target_idx < query_mem_desc.getSlotCount();
141  ++target_idx) {
142  if (static_cast<int>(target_idx) == order_entry.tle_no - 1) {
143  continue;
144  }
145  const auto chosen_bytes = query_mem_desc.getPaddedSlotWidthBytes(target_idx);
146  const auto val_buff =
147  group_by_buffers.data + query_mem_desc.getColOffInBytes(target_idx);
148  apply_permutation_gpu(reinterpret_cast<int64_t*>(val_buff),
149  reinterpret_cast<int32_t*>(idx_buff),
150  query_mem_desc.getEntryCount(),
151  chosen_bytes,
152  alloc,
153  device_id);
154  }
155  }
156 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
void sort_groups_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
Definition: InPlaceSort.cpp:67
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
void apply_permutation_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
Definition: InPlaceSort.cpp:88
size_t getColOffInBytes(const size_t col_idx) const
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void sort_groups_cpu ( int64_t *  val_buff,
int32_t *  idx_buff,
const uint64_t  entry_count,
const bool  desc,
const uint32_t  chosen_bytes 
)

Definition at line 27 of file InPlaceSort.cpp.

References CHECK, and sort_on_cpu().

Referenced by ResultSet::radixSortOnCpu().

31  {
32 #ifdef HAVE_CUDA
33  switch (chosen_bytes) {
34  case 1:
35  case 2:
36  case 4:
37  case 8:
38  sort_on_cpu(val_buff, idx_buff, entry_count, desc, chosen_bytes);
39  break;
40  default:
41  CHECK(false);
42  }
43 #endif
44 }
void sort_on_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
#define CHECK(condition)
Definition: Logger.h:291

+ Here is the call graph for this function:

+ Here is the caller graph for this function: