OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
InPlaceSort.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "InPlaceSort.h"
18 #include "InPlaceSortImpl.h"
19 
20 #include <Analyzer/Analyzer.h>
23 #include "Logger/Logger.h"
24 
25 #include <cstdint>
26 
27 void sort_groups_cpu(int64_t* val_buff,
28  int32_t* idx_buff,
29  const uint64_t entry_count,
30  const bool desc,
31  const uint32_t chosen_bytes) {
32 #ifdef HAVE_CUDA
33  switch (chosen_bytes) {
34  case 1:
35  case 2:
36  case 4:
37  case 8:
38  sort_on_cpu(val_buff, idx_buff, entry_count, desc, chosen_bytes);
39  break;
40  default:
41  CHECK(false);
42  }
43 #endif
44 }
45 
46 void apply_permutation_cpu(int64_t* val_buff,
47  int32_t* idx_buff,
48  const uint64_t entry_count,
49  int64_t* tmp_buff,
50  const uint32_t chosen_bytes) {
51 #ifdef HAVE_CUDA
52  switch (chosen_bytes) {
53  case 1:
54  case 2:
55  case 4:
56  case 8:
57  apply_permutation_on_cpu(val_buff, idx_buff, entry_count, tmp_buff, chosen_bytes);
58  break;
59  default:
60  CHECK(false);
61  }
62 #endif
63 }
64 
65 namespace {
66 
67 void sort_groups_gpu(int64_t* val_buff,
68  int32_t* idx_buff,
69  const uint64_t entry_count,
70  const bool desc,
71  const uint32_t chosen_bytes,
72  ThrustAllocator& alloc,
73  const int device_id) {
74 #ifdef HAVE_CUDA
75  switch (chosen_bytes) {
76  case 1:
77  case 2:
78  case 4:
79  case 8:
80  sort_on_gpu(val_buff, idx_buff, entry_count, desc, chosen_bytes, alloc, device_id);
81  break;
82  default:
83  CHECK(false);
84  }
85 #endif
86 }
87 
88 void apply_permutation_gpu(int64_t* val_buff,
89  int32_t* idx_buff,
90  const uint64_t entry_count,
91  const uint32_t chosen_bytes,
92  ThrustAllocator& alloc,
93  const int device_id) {
94 #ifdef HAVE_CUDA
95  switch (chosen_bytes) {
96  case 1:
97  case 2:
98  case 4:
99  case 8:
101  val_buff, idx_buff, entry_count, chosen_bytes, alloc, device_id);
102  break;
103  default:
104  CHECK(false);
105  }
106 #endif
107 }
108 
109 } // namespace
110 
111 void inplace_sort_gpu(const std::list<Analyzer::OrderEntry>& order_entries,
113  const GpuGroupByBuffers& group_by_buffers,
114  Data_Namespace::DataMgr* data_mgr,
115  const int device_id) {
116  ThrustAllocator alloc(data_mgr, device_id);
117  CHECK_EQ(size_t(1), order_entries.size());
118  const auto idx_buff = group_by_buffers.data -
119  align_to_int64(query_mem_desc.getEntryCount() * sizeof(int32_t));
120  for (const auto& order_entry : order_entries) {
121  const auto target_idx = order_entry.tle_no - 1;
122  const auto val_buff =
123  group_by_buffers.data + query_mem_desc.getColOffInBytes(target_idx);
124  const auto chosen_bytes = query_mem_desc.getPaddedSlotWidthBytes(target_idx);
125  sort_groups_gpu(reinterpret_cast<int64_t*>(val_buff),
126  reinterpret_cast<int32_t*>(idx_buff),
127  query_mem_desc.getEntryCount(),
128  order_entry.is_desc,
129  chosen_bytes,
130  alloc,
131  device_id);
132  if (!query_mem_desc.hasKeylessHash()) {
133  apply_permutation_gpu(reinterpret_cast<int64_t*>(group_by_buffers.data),
134  reinterpret_cast<int32_t*>(idx_buff),
135  query_mem_desc.getEntryCount(),
136  sizeof(int64_t),
137  alloc,
138  device_id);
139  }
140  for (size_t target_idx = 0; target_idx < query_mem_desc.getSlotCount();
141  ++target_idx) {
142  if (static_cast<int>(target_idx) == order_entry.tle_no - 1) {
143  continue;
144  }
145  const auto chosen_bytes = query_mem_desc.getPaddedSlotWidthBytes(target_idx);
146  const auto val_buff =
147  group_by_buffers.data + query_mem_desc.getColOffInBytes(target_idx);
148  apply_permutation_gpu(reinterpret_cast<int64_t*>(val_buff),
149  reinterpret_cast<int32_t*>(idx_buff),
150  query_mem_desc.getEntryCount(),
151  chosen_bytes,
152  alloc,
153  device_id);
154  }
155  }
156 }
Defines data structures for the semantic analysis phase of query processing.
#define CHECK_EQ(x, y)
Definition: Logger.h:301
void sort_groups_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:27
void sort_groups_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
Definition: InPlaceSort.cpp:67
void sort_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
void sort_on_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const bool desc, const uint32_t chosen_bytes)
void inplace_sort_gpu(const std::list< Analyzer::OrderEntry > &order_entries, const QueryMemoryDescriptor &query_mem_desc, const GpuGroupByBuffers &group_by_buffers, Data_Namespace::DataMgr *data_mgr, const int device_id)
void apply_permutation_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
Definition: InPlaceSort.cpp:46
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
Descriptor for the result set buffer layout.
void apply_permutation_on_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
#define CHECK(condition)
Definition: Logger.h:291
void apply_permutation_gpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, const uint32_t chosen_bytes, ThrustAllocator &alloc, const int device_id)
Definition: InPlaceSort.cpp:88
void apply_permutation_on_cpu(int64_t *val_buff, int32_t *idx_buff, const uint64_t entry_count, int64_t *tmp_buff, const uint32_t chosen_bytes)
size_t getColOffInBytes(const size_t col_idx) const
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)