18 #error This code is not intended to be compiled with a CUDA C++ compiler
46 #define DEF_ARITH_NULLABLE(type, null_type, opname, opsym) \
47 extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable( \
48 const type lhs, const type rhs, const null_type null_val) { \
49 if (lhs != null_val && rhs != null_val) { \
50 return lhs opsym rhs; \
55 #define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym) \
56 extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable_lhs( \
57 const type lhs, const type rhs, const null_type null_val) { \
58 if (lhs != null_val) { \
59 return lhs opsym rhs; \
64 #define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym) \
65 extern "C" RUNTIME_EXPORT ALWAYS_INLINE type opname##_##type##_nullable_rhs( \
66 const type lhs, const type rhs, const null_type null_val) { \
67 if (rhs != null_val) { \
68 return lhs opsym rhs; \
73 #define DEF_CMP_NULLABLE(type, null_type, opname, opsym) \
74 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable( \
77 const null_type null_val, \
78 const int8_t null_bool_val) { \
79 if (lhs != null_val && rhs != null_val) { \
80 return lhs opsym rhs; \
82 return null_bool_val; \
85 #define DEF_CMP_NULLABLE_LHS(type, null_type, opname, opsym) \
86 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable_lhs( \
89 const null_type null_val, \
90 const int8_t null_bool_val) { \
91 if (lhs != null_val) { \
92 return lhs opsym rhs; \
94 return null_bool_val; \
97 #define DEF_CMP_NULLABLE_RHS(type, null_type, opname, opsym) \
98 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int8_t opname##_##type##_nullable_rhs( \
101 const null_type null_val, \
102 const int8_t null_bool_val) { \
103 if (rhs != null_val) { \
104 return lhs opsym rhs; \
106 return null_bool_val; \
109 #define DEF_SAFE_DIV_NULLABLE(type, null_type, opname) \
110 extern "C" RUNTIME_EXPORT ALWAYS_INLINE type safe_div_##type( \
111 const type lhs, const type rhs, const null_type null_val) { \
112 if (lhs != null_val && rhs != null_val && rhs != 0) { \
118 #define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type) \
119 DEF_ARITH_NULLABLE(type, null_type, add, +) \
120 DEF_ARITH_NULLABLE(type, null_type, sub, -) \
121 DEF_ARITH_NULLABLE(type, null_type, mul, *) \
122 DEF_ARITH_NULLABLE(type, null_type, div, /) \
123 DEF_SAFE_DIV_NULLABLE(type, null_type, safe_div) \
124 DEF_ARITH_NULLABLE_LHS(type, null_type, add, +) \
125 DEF_ARITH_NULLABLE_LHS(type, null_type, sub, -) \
126 DEF_ARITH_NULLABLE_LHS(type, null_type, mul, *) \
127 DEF_ARITH_NULLABLE_LHS(type, null_type, div, /) \
128 DEF_ARITH_NULLABLE_RHS(type, null_type, add, +) \
129 DEF_ARITH_NULLABLE_RHS(type, null_type, sub, -) \
130 DEF_ARITH_NULLABLE_RHS(type, null_type, mul, *) \
131 DEF_ARITH_NULLABLE_RHS(type, null_type, div, /) \
132 DEF_CMP_NULLABLE(type, null_type, eq, ==) \
133 DEF_CMP_NULLABLE(type, null_type, ne, !=) \
134 DEF_CMP_NULLABLE(type, null_type, lt, <) \
135 DEF_CMP_NULLABLE(type, null_type, gt, >) \
136 DEF_CMP_NULLABLE(type, null_type, le, <=) \
137 DEF_CMP_NULLABLE(type, null_type, ge, >=) \
138 DEF_CMP_NULLABLE_LHS(type, null_type, eq, ==) \
139 DEF_CMP_NULLABLE_LHS(type, null_type, ne, !=) \
140 DEF_CMP_NULLABLE_LHS(type, null_type, lt, <) \
141 DEF_CMP_NULLABLE_LHS(type, null_type, gt, >) \
142 DEF_CMP_NULLABLE_LHS(type, null_type, le, <=) \
143 DEF_CMP_NULLABLE_LHS(type, null_type, ge, >=) \
144 DEF_CMP_NULLABLE_RHS(type, null_type, eq, ==) \
145 DEF_CMP_NULLABLE_RHS(type, null_type, ne, !=) \
146 DEF_CMP_NULLABLE_RHS(type, null_type, lt, <) \
147 DEF_CMP_NULLABLE_RHS(type, null_type, gt, >) \
148 DEF_CMP_NULLABLE_RHS(type, null_type, le, <=) \
149 DEF_CMP_NULLABLE_RHS(type, null_type, ge, >=)
170 #undef DEF_BINARY_NULLABLE_ALL_OPS
171 #undef DEF_SAFE_DIV_NULLABLE
172 #undef DEF_CMP_NULLABLE_RHS
173 #undef DEF_CMP_NULLABLE_LHS
174 #undef DEF_CMP_NULLABLE
175 #undef DEF_ARITH_NULLABLE_RHS
176 #undef DEF_ARITH_NULLABLE_LHS
177 #undef DEF_ARITH_NULLABLE
179 #define DEF_MAP_STRING_TO_DATUM(value_type, value_name) \
180 extern "C" ALWAYS_INLINE DEVICE value_type map_string_to_datum_##value_name( \
181 const int32_t string_id, \
182 const int64_t translation_map_handle, \
183 const int32_t min_source_id) { \
184 const Datum* translation_map = \
185 reinterpret_cast<const Datum*>(translation_map_handle); \
186 const Datum& out_datum = translation_map[string_id - min_source_id]; \
187 return out_datum.value_name##val; \
198 #undef DEF_MAP_STRING_TO_DATUM
202 const uint64_t scale,
203 const int64_t operand_null_val,
204 const int64_t result_null_val) {
205 return operand != operand_null_val ? operand * scale : result_null_val;
211 const int64_t null_val) {
213 if (operand == null_val) {
217 int64_t tmp = scale >> 1;
218 tmp = operand >= 0 ? operand + tmp : operand - tmp;
225 const int64_t null_val) {
226 int64_t tmp = scale >> 1;
227 tmp = operand >= 0 ? operand + tmp : operand - tmp;
234 const int64_t divisor) {
235 return (dividend < 0 ? dividend - (divisor - 1) : dividend) / divisor;
242 const int64_t divisor,
243 const int64_t null_val) {
244 return dividend == null_val ? null_val :
floor_div_lhs(dividend, divisor);
247 #define DEF_UMINUS_NULLABLE(type, null_type) \
248 extern "C" RUNTIME_EXPORT ALWAYS_INLINE type uminus_##type##_nullable( \
249 const type operand, const null_type null_val) { \
250 return operand == null_val ? null_val : -operand; \
260 #undef DEF_UMINUS_NULLABLE
262 #define DEF_CAST_NULLABLE(from_type, to_type) \
263 extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type \
264 cast_##from_type##_to_##to_type##_nullable(const from_type operand, \
265 const from_type from_null_val, \
266 const to_type to_null_val) { \
267 return operand == from_null_val ? to_null_val : operand; \
270 #define DEF_CAST_SCALED_NULLABLE(from_type, to_type) \
271 extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type \
272 cast_##from_type##_to_##to_type##_scaled_nullable(const from_type operand, \
273 const from_type from_null_val, \
274 const to_type to_null_val, \
275 const to_type divider) { \
276 return operand == from_null_val ? to_null_val : operand / divider; \
279 #define DEF_CAST_NULLABLE_BIDIR(type1, type2) \
280 DEF_CAST_NULLABLE(type1, type2) \
281 DEF_CAST_NULLABLE(type2, type1)
283 #define DEF_ROUND_NULLABLE(from_type, to_type) \
284 extern "C" RUNTIME_EXPORT ALWAYS_INLINE to_type \
285 cast_##from_type##_to_##to_type##_nullable(const from_type operand, \
286 const from_type from_null_val, \
287 const to_type to_null_val) { \
288 return operand == from_null_val \
290 : static_cast<to_type>(operand + (operand < from_type(0) \
292 : from_type(0.5))); \
326 #undef DEF_ROUND_NULLABLE
327 #undef DEF_CAST_NULLABLE_BIDIR
328 #undef DEF_CAST_SCALED_NULLABLE
329 #undef DEF_CAST_NULLABLE
332 const int8_t null_val) {
333 return operand == null_val ? operand : (operand ? 0 : 1);
338 const int8_t null_val) {
339 if (lhs == null_val) {
340 return rhs == 0 ? rhs : null_val;
342 if (rhs == null_val) {
343 return lhs == 0 ? lhs : null_val;
345 return (lhs && rhs) ? 1 : 0;
350 const int8_t null_val) {
351 if (lhs == null_val) {
352 return rhs == 0 ? null_val : rhs;
354 if (rhs == null_val) {
355 return lhs == 0 ? null_val : lhs;
357 return (lhs || rhs) ? 1 : 0;
369 const int64_t min_val,
370 const int64_t bucket_size) {
371 uint64_t bitmap_idx = val - min_val;
372 if (1 < bucket_size) {
373 bitmap_idx /=
static_cast<uint64_t
>(bucket_size);
375 reinterpret_cast<int8_t*
>(*agg)[bitmap_idx >> 3] |= (1 << (bitmap_idx & 7));
379 #define GPU_RT_STUB NEVER_INLINE
381 #define GPU_RT_STUB NEVER_INLINE __attribute__((optnone))
396 const uint32_t index = hash >> (64 - b);
397 const uint8_t rank =
get_rank(hash << b, 64 - b);
398 uint8_t* M =
reinterpret_cast<uint8_t*
>(*agg);
399 M[index] = std::max(M[index], rank);
410 const int64_t min_val,
411 const int64_t max_val,
412 const int64_t null_val,
413 const int8_t null_bool_val) {
414 if (val == null_val) {
415 return null_bool_val;
417 if (val < min_val || val > max_val) {
423 const uint64_t bitmap_idx = val - min_val;
424 return bitset[bitmap_idx >> 3] & (1 << (bitmap_idx & 7)) ? 1 : 0;
429 const int64_t target_value,
430 const int64_t* col_buf) {
432 int64_t h = entry_cnt - 1;
434 int64_t mid = l + (h - l) / 2;
435 if (target_value < col_buf[mid]) {
446 return null_start_pos == 0 ? null_end_pos + 1 : 0;
451 const int64_t null_start_pos,
452 const int64_t null_end_pos) {
453 return null_end_pos == num_elems ? null_start_pos : num_elems;
456 template <
typename T,
typename Comparator>
458 const int64_t cur_row_idx,
460 const int32_t* partition_rowid_buf,
461 const int64_t* ordered_index_buf,
463 const bool nulls_first,
464 const int64_t null_start_pos,
465 const int64_t null_end_pos,
467 const auto target_value = col_buf[cur_row_idx];
468 if (target_value == null_val) {
469 for (int64_t target_offset = null_start_pos; target_offset < null_end_pos;
471 const auto candidate_offset = partition_rowid_buf[ordered_index_buf[target_offset]];
472 if (candidate_offset == cur_row_idx) {
473 return target_offset;
477 auto const modified_null_end_pos = nulls_first ? null_end_pos - 1 : null_end_pos;
481 int64_t mid = l + (h - l) / 2;
482 auto const target_row_idx = partition_rowid_buf[ordered_index_buf[mid]];
483 auto const cur_value = col_buf[target_row_idx];
484 if (cmp(target_value, cur_value)) {
490 int64_t target_offset = l;
491 int64_t candidate_row_idx = partition_rowid_buf[ordered_index_buf[target_offset]];
492 while (col_buf[candidate_row_idx] == target_value && target_offset < num_elems) {
493 if (candidate_row_idx == cur_row_idx) {
494 return target_offset;
496 candidate_row_idx = partition_rowid_buf[ordered_index_buf[++target_offset]];
501 #define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(value_type, oper_name) \
502 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
503 compute_##value_type##_##oper_name##_current_row_idx_in_frame( \
504 const int64_t num_elems, \
505 const int64_t cur_row_idx, \
506 const value_type* col_buf, \
507 const int32_t* partition_rowid_buf, \
508 const int64_t* ordered_index_buf, \
509 const value_type null_val, \
510 const bool nulls_first, \
511 const int64_t null_start_pos, \
512 const int64_t null_end_pos) { \
513 return compute_current_row_idx_in_frame<value_type>(num_elems, \
516 partition_rowid_buf, \
522 std::oper_name<value_type>{}); \
524 #define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES(oper_name) \
525 DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int8_t, oper_name) \
526 DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int16_t, oper_name) \
527 DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int32_t, oper_name) \
528 DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(int64_t, oper_name) \
529 DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(float, oper_name) \
530 DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME(double, oper_name)
535 #undef DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES
536 #undef DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME
538 template <
typename TARGET_VAL_TYPE,
typename COL_TYPE,
typename NULL_TYPE>
540 const int64_t num_elems,
541 const TARGET_VAL_TYPE target_val,
542 const COL_TYPE* col_buf,
543 const int32_t* partition_rowid_buf,
544 const int64_t* ordered_index_buf,
545 const NULL_TYPE null_val,
546 const bool nulls_first,
547 const int64_t null_start_offset,
548 const int64_t null_end_offset) {
549 if (target_val == null_val) {
550 return null_start_offset;
552 auto const modified_null_end_pos = nulls_first ? null_end_offset - 1 : null_end_offset;
556 int64_t mid = l + (h - l) / 2;
557 if (target_val <= col_buf[partition_rowid_buf[ordered_index_buf[mid]]]) {
566 #define DEF_RANGE_MODE_FRAME_LOWER_BOUND( \
567 target_val_type, col_type, null_type, opname, opsym) \
568 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
569 range_mode_##target_val_type##_##col_type##_##null_type##_##opname##_frame_lower_bound( \
570 const int64_t num_elems, \
571 const target_val_type target_value, \
572 const col_type* col_buf, \
573 const int32_t* partition_rowid_buf, \
574 const int64_t* ordered_index_buf, \
575 const int64_t frame_bound_val, \
576 const null_type null_val, \
577 const bool nulls_first, \
578 const int64_t null_start_pos, \
579 const int64_t null_end_pos) { \
580 if (target_value == null_val) { \
581 return null_start_pos; \
583 target_val_type new_val = target_value opsym frame_bound_val; \
584 return compute_lower_bound_from_ordered_partition_index<target_val_type, \
590 partition_rowid_buf, \
617 #undef DEF_RANGE_MODE_FRAME_LOWER_BOUND
619 template <
typename TARGET_VAL_TYPE,
typename COL_TYPE,
typename NULL_TYPE>
621 const int64_t num_elems,
622 const TARGET_VAL_TYPE target_val,
623 const COL_TYPE* col_buf,
624 const int32_t* partition_rowid_buf,
625 const int64_t* ordered_index_buf,
626 const NULL_TYPE null_val,
627 const bool nulls_first,
628 const int64_t null_start_offset,
629 const int64_t null_end_offset) {
630 if (target_val == null_val) {
631 return null_end_offset;
633 auto const modified_null_end_pos = nulls_first ? null_end_offset - 1 : null_end_offset;
637 int64_t mid = l + (h - l) / 2;
638 if (target_val >= col_buf[partition_rowid_buf[ordered_index_buf[mid]]]) {
647 #define DEF_RANGE_MODE_FRAME_UPPER_BOUND( \
648 target_val_type, col_type, null_type, opname, opsym) \
649 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int64_t \
650 range_mode_##target_val_type##_##col_type##_##null_type##_##opname##_frame_upper_bound( \
651 const int64_t num_elems, \
652 const target_val_type target_value, \
653 const col_type* col_buf, \
654 const int32_t* partition_rowid_buf, \
655 const int64_t* ordered_index_buf, \
656 const int64_t frame_bound_val, \
657 const null_type null_val, \
658 const bool nulls_first, \
659 const int64_t null_start_pos, \
660 const int64_t null_end_pos) { \
661 if (target_value == null_val) { \
662 return null_end_pos; \
664 target_val_type new_val = target_value opsym frame_bound_val; \
665 return compute_upper_bound_from_ordered_partition_index<target_val_type, \
671 partition_rowid_buf, \
698 #undef DEF_RANGE_MODE_FRAME_UPPER_BOUND
700 template <
typename COL_TYPE,
typename LOGICAL_TYPE>
702 const int64_t frame_start_offset,
703 const int64_t frame_end_offset,
704 const COL_TYPE* col_buf,
705 const int32_t* partition_rowid_buf,
706 const int64_t* ordered_index_buf,
707 const LOGICAL_TYPE logical_null_val,
708 const LOGICAL_TYPE col_null_val) {
709 if (target_row_idx_in_frame < frame_start_offset ||
710 target_row_idx_in_frame > frame_end_offset) {
711 return logical_null_val;
713 const auto target_offset =
714 partition_rowid_buf[ordered_index_buf[target_row_idx_in_frame]];
715 LOGICAL_TYPE target_val = col_buf[target_offset];
716 if (target_val == col_null_val) {
717 return logical_null_val;
722 #define DEF_GET_VALUE_IN_FRAME(col_type, logical_type) \
723 extern "C" RUNTIME_EXPORT ALWAYS_INLINE logical_type \
724 get_##col_type##_value_##logical_type##_type_in_frame( \
725 const int64_t target_row_idx_in_frame, \
726 const int64_t frame_start_offset, \
727 const int64_t frame_end_offset, \
728 const col_type* col_buf, \
729 const int32_t* partition_rowid_buf, \
730 const int64_t* ordered_index_buf, \
731 const logical_type logical_null_val, \
732 const logical_type col_null_val) { \
733 return get_value_in_window_frame<col_type, logical_type>(target_row_idx_in_frame, \
734 frame_start_offset, \
737 partition_rowid_buf, \
754 #undef DEF_GET_VALUE_IN_FRAME
758 int64_t multiplier) {
759 return decoded_val == null_val ? decoded_val : decoded_val * multiplier;
764 int64_t current_partition_start_offset,
765 int64_t frame_bound) {
766 int64_t index = candidate_index - current_partition_start_offset - frame_bound;
767 return index < 0 ? 0 : index;
772 int64_t current_partition_start_offset,
774 int64_t num_current_partition_elem) {
775 int64_t index = candidate_index - current_partition_start_offset + frame_bound;
776 return index >= num_current_partition_elem ? num_current_partition_elem : index;
781 int64_t current_partition_start_offset,
782 int64_t frame_bound) {
783 int64_t index = candidate_index - current_partition_start_offset - frame_bound;
784 return index < 0 ? 0 : index + 1;
789 int64_t current_partition_start_offset,
791 int64_t num_current_partition_elem) {
792 int64_t index = candidate_index - current_partition_start_offset + frame_bound;
793 return index >= num_current_partition_elem ? num_current_partition_elem : index + 1;
797 int64_t** aggregation_trees,
798 size_t partition_idx) {
799 return aggregation_trees[partition_idx];
803 int64_t** aggregation_trees,
804 size_t partition_idx) {
805 double** casted_aggregation_trees =
reinterpret_cast<double**
>(aggregation_trees);
806 return casted_aggregation_trees[partition_idx];
813 return casted_aggregation_trees[partition_idx];
820 return casted_aggregation_trees[partition_idx];
826 for (
size_t i = 0; i < level; i++) {
827 offset += pow(tree_fanout, i);
834 template <AggFuncType AGG_FUNC_TYPE,
typename AGG_TYPE>
835 inline AGG_TYPE
agg_func(AGG_TYPE
const lhs, AGG_TYPE
const rhs) {
837 return std::min(lhs, rhs);
839 return std::max(lhs, rhs);
846 template <AggFuncType AGG_FUNC_TYPE,
typename AGG_TYPE>
848 AGG_TYPE* aggregation_tree_for_partition,
849 size_t query_range_start_idx,
850 size_t query_range_end_idx,
854 AGG_TYPE invalid_val,
857 size_t begin = leaf_start_idx + query_range_start_idx;
858 size_t end = leaf_start_idx + query_range_end_idx;
859 AGG_TYPE
res = init_val;
860 bool all_nulls =
true;
861 for (
int level = leaf_level; level >= 0; level--) {
862 size_t parentBegin = begin / tree_fanout;
863 size_t parentEnd = (end - 1) / tree_fanout;
864 if (parentBegin == parentEnd) {
865 for (
size_t pos = begin; pos < end; pos++) {
866 if (aggregation_tree_for_partition[pos] != null_val) {
868 res = agg_func<AGG_FUNC_TYPE>(
res, aggregation_tree_for_partition[pos]);
871 return all_nulls ? null_val :
res;
872 }
else if (parentBegin > parentEnd) {
875 size_t group_begin = (parentBegin * tree_fanout) + 1;
876 if (begin != group_begin) {
877 size_t limit = (parentBegin * tree_fanout) + tree_fanout + 1;
878 for (
size_t pos = begin; pos < limit; pos++) {
879 if (aggregation_tree_for_partition[pos] != null_val) {
881 res = agg_func<AGG_FUNC_TYPE>(
res, aggregation_tree_for_partition[pos]);
886 size_t group_end = (parentEnd * tree_fanout) + 1;
887 if (end != group_end) {
888 for (
size_t pos = group_end; pos < end; pos++) {
889 if (aggregation_tree_for_partition[pos] != null_val) {
891 res = agg_func<AGG_FUNC_TYPE>(
res, aggregation_tree_for_partition[pos]);
901 #define DEF_SEARCH_AGGREGATION_TREE(agg_value_type) \
902 extern "C" RUNTIME_EXPORT ALWAYS_INLINE agg_value_type \
903 search_##agg_value_type##_aggregation_tree( \
904 agg_value_type* aggregated_tree_for_partition, \
905 size_t query_range_start_idx, \
906 size_t query_range_end_idx, \
908 size_t tree_fanout, \
911 agg_value_type invalid_val, \
912 agg_value_type null_val, \
913 int32_t agg_type) { \
914 if (!aggregated_tree_for_partition || query_range_start_idx > query_range_end_idx) { \
917 switch (static_cast<AggFuncType>(agg_type)) { \
918 case AggFuncType::MIN: \
919 return compute_window_func_via_aggregation_tree<AggFuncType::MIN>( \
920 aggregated_tree_for_partition, \
921 query_range_start_idx, \
922 query_range_end_idx, \
925 std::numeric_limits<agg_value_type>::max(), \
928 case AggFuncType::MAX: \
929 return compute_window_func_via_aggregation_tree<AggFuncType::MAX>( \
930 aggregated_tree_for_partition, \
931 query_range_start_idx, \
932 query_range_end_idx, \
935 std::numeric_limits<agg_value_type>::lowest(), \
939 return compute_window_func_via_aggregation_tree<AggFuncType::SUM>( \
940 aggregated_tree_for_partition, \
941 query_range_start_idx, \
942 query_range_end_idx, \
945 static_cast<agg_value_type>(0), \
953 #undef DEF_SEARCH_AGGREGATION_TREE
955 template <
typename AGG_VALUE_TYPE>
959 size_t query_range_start_idx,
960 size_t query_range_end_idx,
963 AGG_VALUE_TYPE invalid_val,
964 AGG_VALUE_TYPE null_val) {
966 size_t begin = leaf_start_idx + query_range_start_idx;
967 size_t end = leaf_start_idx + query_range_end_idx;
970 bool all_nulls =
true;
971 for (
int level = leaf_level; level >= 0; level--) {
972 size_t parentBegin = begin / tree_fanout;
973 size_t parentEnd = (end - 1) / tree_fanout;
974 if (parentBegin == parentEnd) {
975 for (
size_t pos = begin; pos < end; pos++) {
976 if (aggregation_tree_for_partition[pos].sum != null_val) {
978 res.
sum += aggregation_tree_for_partition[pos].
sum;
979 res.
count += aggregation_tree_for_partition[pos].
count;
986 }
else if (parentBegin > parentEnd) {
990 size_t group_begin = (parentBegin * tree_fanout) + 1;
991 if (begin != group_begin) {
992 size_t limit = (parentBegin * tree_fanout) + tree_fanout + 1;
993 for (
size_t pos = begin; pos < limit; pos++) {
994 if (aggregation_tree_for_partition[pos].sum != null_val) {
996 res.
sum += aggregation_tree_for_partition[pos].
sum;
997 res.
count += aggregation_tree_for_partition[pos].
count;
1002 size_t group_end = (parentEnd * tree_fanout) + 1;
1003 if (end != group_end) {
1004 for (
size_t pos = group_end; pos < end; pos++) {
1005 if (aggregation_tree_for_partition[pos].sum != null_val) {
1007 res.
sum += aggregation_tree_for_partition[pos].
sum;
1008 res.
count += aggregation_tree_for_partition[pos].
count;
1012 begin = parentBegin;
1019 #define DEF_SEARCH_DERIVED_AGGREGATION_TREE(agg_value_type) \
1020 extern "C" RUNTIME_EXPORT ALWAYS_INLINE double \
1021 search_##agg_value_type##_derived_aggregation_tree( \
1022 SumAndCountPair<agg_value_type>* aggregated_tree_for_partition, \
1023 size_t query_range_start_idx, \
1024 size_t query_range_end_idx, \
1025 size_t leaf_level, \
1026 size_t tree_fanout, \
1027 bool decimal_type, \
1029 agg_value_type invalid_val, \
1030 agg_value_type null_val, \
1031 int32_t agg_type) { \
1032 if (!aggregated_tree_for_partition || query_range_start_idx > query_range_end_idx) { \
1035 SumAndCountPair<agg_value_type> res{0, 0}; \
1036 compute_derived_aggregates<agg_value_type>(aggregated_tree_for_partition, \
1038 query_range_start_idx, \
1039 query_range_end_idx, \
1044 if (res.sum == null_val) { \
1046 } else if (res.count > 0) { \
1047 if (decimal_type) { \
1048 return (static_cast<double>(res.sum) / pow(10, scale)) / res.count; \
1050 return (static_cast<double>(res.sum)) / res.count; \
1052 return invalid_val; \
1058 #undef DEF_SEARCH_DERIVED_AGGREGATION_TREE
1060 #define DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG(agg_type, null_type) \
1061 extern "C" RUNTIME_EXPORT ALWAYS_INLINE agg_type \
1062 handle_null_val_##agg_type##_##null_type##_window_framing_agg( \
1063 agg_type res, null_type agg_null_val, agg_type input_col_null_val) { \
1064 if (res == agg_null_val) { \
1065 return input_col_null_val; \
1072 #undef DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG
1074 template <
typename T>
1078 int64_t
const num_elems_in_partition,
1079 int32_t*
const partition_rowid_buf,
1080 int64_t*
const ordered_index_buf,
1081 bool const is_forward_fill) {
1082 T const cur_val = col_buf[partition_rowid_buf[ordered_index_buf[cur_idx]]];
1083 if (cur_val == null_val) {
1084 if (is_forward_fill) {
1085 for (int64_t cand_idx = cur_idx - 1; cand_idx >= 0; --cand_idx) {
1086 T const candidate_val = col_buf[partition_rowid_buf[ordered_index_buf[cand_idx]]];
1087 if (candidate_val != null_val) {
1088 return candidate_val;
1092 for (int64_t cand_idx = cur_idx + 1; cand_idx < num_elems_in_partition;
1094 T const candidate_val = col_buf[partition_rowid_buf[ordered_index_buf[cand_idx]]];
1095 if (candidate_val != null_val) {
1096 return candidate_val;
1103 #define DEF_FILL_MISSING_VALUE(col_type) \
1104 extern "C" RUNTIME_EXPORT ALWAYS_INLINE col_type fill_##col_type##_missing_value( \
1105 int64_t const cur_row_idx_in_frame, \
1106 col_type const null_val, \
1107 col_type* const col_buf, \
1108 int64_t const num_elems_in_partition, \
1109 int32_t* const partition_rowid_buf, \
1110 int64_t* const ordered_index_buf, \
1111 bool const is_forward_fill) { \
1112 return fill_missing_value<col_type>(cur_row_idx_in_frame, \
1115 num_elems_in_partition, \
1116 partition_rowid_buf, \
1117 ordered_index_buf, \
1126 #undef DEF_FILL_MISSING_VALUE
1129 const auto old = *agg;
1136 const int8_t cond) {
1137 return cond ?
agg_sum(agg, val) : *agg;
1141 *agg = std::max(*agg, val);
1145 *agg = std::min(*agg, val);
1153 const int64_t offset,
1154 const int8_t* value,
1155 const int64_t size_bytes) {
1156 for (
auto i = 0; i < size_bytes; i++) {
1157 varlen_buffer[offset + i] = value[i];
1159 return &varlen_buffer[offset];
1164 if (val == null_val) {
1170 }
else if (*agg == null_val) {
1182 const int64_t min_val,
1183 const int64_t bucket_size,
1184 const int64_t skip_val) {
1185 if (val != skip_val) {
1206 const int32_t cond) {
1207 return cond ? (*agg)++ : *agg;
1211 const int32_t val) {
1212 const auto old = *agg;
1219 const int8_t cond) {
1223 #define DEF_AGG_MAX_INT(n) \
1224 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_max_int##n(int##n##_t* agg, \
1225 const int##n##_t val) { \
1226 *agg = std::max(*agg, val); \
1232 #undef DEF_AGG_MAX_INT
1234 #define DEF_AGG_MIN_INT(n) \
1235 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_min_int##n(int##n##_t* agg, \
1236 const int##n##_t val) { \
1237 *agg = std::min(*agg, val); \
1243 #undef DEF_AGG_MIN_INT
1245 #define DEF_AGG_ID_INT(n) \
1246 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void agg_id_int##n(int##n##_t* agg, \
1247 const int##n##_t val) { \
1251 #define DEF_CHECKED_SINGLE_AGG_ID_INT(n) \
1252 extern "C" RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_int##n( \
1253 int##n##_t* agg, const int##n##_t val, const int##n##_t null_val) { \
1254 if (val == null_val) { \
1257 if (*agg == val) { \
1259 } else if (*agg == null_val) { \
1276 #undef DEF_AGG_ID_INT
1277 #undef DEF_CHECKED_SINGLE_AGG_ID_INT
1279 #define DEF_WRITE_PROJECTION_INT(n) \
1280 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void write_projection_int##n( \
1281 int8_t* slot_ptr, const int##n##_t val, const int64_t init_val) { \
1282 if (val != init_val) { \
1283 *reinterpret_cast<int##n##_t*>(slot_ptr) = val; \
1289 #undef DEF_WRITE_PROJECTION_INT
1293 const int64_t skip_val) {
1294 const auto old = *agg;
1295 if (val != skip_val) {
1296 if (old != skip_val) {
1307 const auto old = *agg;
1308 if (val != skip_val) {
1309 if (old != skip_val) {
1321 const int64_t skip_val,
1322 const int8_t cond) {
1329 const int32_t skip_val,
1330 const int8_t cond) {
1335 const int64_t cond) {
1336 return cond ? (*agg)++ : *agg;
1341 if (val != skip_val) {
1349 if (cond != skip_val) {
1357 if (val != skip_val) {
1365 if (cond != skip_val) {
1371 #define DEF_SKIP_AGG_ADD(base_agg_func) \
1372 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1373 DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
1374 if (val != skip_val) { \
1375 base_agg_func(agg, val); \
1379 #define DEF_SKIP_AGG(base_agg_func) \
1380 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1381 DATA_T* agg, const DATA_T val, const DATA_T skip_val) { \
1382 if (val != skip_val) { \
1383 const DATA_T old_agg = *agg; \
1384 if (old_agg != skip_val) { \
1385 base_agg_func(agg, val); \
1392 #define DATA_T int64_t
1397 #define DATA_T int32_t
1402 #define DATA_T int16_t
1407 #define DATA_T int8_t
1412 #undef DEF_SKIP_AGG_ADD
1424 const auto r = *
reinterpret_cast<const double*
>(agg) + val;
1425 *agg = *
reinterpret_cast<const int64_t*
>(may_alias_ptr(&r));
1430 const int8_t cond) {
1438 const auto r = std::max(*reinterpret_cast<const double*>(agg), val);
1439 *agg = *(
reinterpret_cast<const int64_t*
>(may_alias_ptr(&r)));
1444 const auto r = std::min(*reinterpret_cast<const double*>(agg), val);
1445 *agg = *(
reinterpret_cast<const int64_t*
>(may_alias_ptr(&r)));
1450 *agg = *(
reinterpret_cast<const int64_t*
>(may_alias_ptr(&val)));
1455 if (val == null_val) {
1459 if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&val)))) {
1461 }
else if (*agg == *(reinterpret_cast<const int64_t*>(may_alias_ptr(&null_val)))) {
1462 *agg = *(
reinterpret_cast<const int64_t*
>(may_alias_ptr(&val)));
1477 const auto r = *
reinterpret_cast<const float*
>(agg) + val;
1478 *agg = *
reinterpret_cast<const int32_t*
>(may_alias_ptr(&r));
1483 const int8_t cond) {
1491 const auto r = std::max(*reinterpret_cast<const float*>(agg), val);
1492 *agg = *(
reinterpret_cast<const int32_t*
>(may_alias_ptr(&r)));
1497 const auto r = std::min(*reinterpret_cast<const float*>(agg), val);
1498 *agg = *(
reinterpret_cast<const int32_t*
>(may_alias_ptr(&r)));
1502 *agg = *(
reinterpret_cast<const int32_t*
>(may_alias_ptr(&val)));
1507 if (val == null_val) {
1511 if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&val)))) {
1513 }
else if (*agg == *(reinterpret_cast<const int32_t*>(may_alias_ptr(&null_val)))) {
1514 *agg = *(
reinterpret_cast<const int32_t*
>(may_alias_ptr(&val)));
1524 if (val != skip_val) {
1532 if (val != skip_val) {
1538 #define DEF_SKIP_AGG(base_agg_func) \
1539 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void base_agg_func##_skip_val( \
1540 ADDR_T* agg, const DATA_T val, const DATA_T skip_val) { \
1541 if (val != skip_val) { \
1542 const ADDR_T old_agg = *agg; \
1543 if (old_agg != *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&skip_val))) { \
1544 base_agg_func(agg, val); \
1546 *agg = *reinterpret_cast<const ADDR_T*>(may_alias_ptr(&val)); \
1551 #define DEF_SKIP_IF_AGG(skip_agg_func, base_agg_func) \
1552 extern "C" RUNTIME_EXPORT ALWAYS_INLINE void skip_agg_func##_skip_val( \
1553 ADDR_T* agg, const DATA_T val, const DATA_T skip_val, const int8_t cond) { \
1555 base_agg_func##_skip_val(agg, val, skip_val); \
1559 #define DATA_T double
1560 #define ADDR_T int64_t
1568 #define DATA_T float
1569 #define ADDR_T int32_t
1578 #undef DEF_SKIP_IF_AGG
1581 const int64_t scale) {
1583 return x / scale * scale;
1588 return x / scale * scale - scale;
1592 const int64_t scale) {
1598 #define DEF_SHARED_AGG_RET_STUBS(base_agg_func) \
1599 extern "C" GPU_RT_STUB uint64_t base_agg_func##_shared(uint64_t* agg, \
1600 const int64_t val) { \
1604 extern "C" GPU_RT_STUB uint64_t base_agg_func##_skip_val_shared( \
1605 uint64_t* agg, const int64_t val, const int64_t skip_val) { \
1608 extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_shared(uint32_t* agg, \
1609 const int32_t val) { \
1613 extern "C" GPU_RT_STUB uint32_t base_agg_func##_int32_skip_val_shared( \
1614 uint32_t* agg, const int32_t val, const int32_t skip_val) { \
1618 extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_shared(uint64_t* agg, \
1619 const double val) { \
1623 extern "C" GPU_RT_STUB uint64_t base_agg_func##_double_skip_val_shared( \
1624 uint64_t* agg, const double val, const double skip_val) { \
1627 extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_shared(uint32_t* agg, \
1628 const float val) { \
1632 extern "C" GPU_RT_STUB uint32_t base_agg_func##_float_skip_val_shared( \
1633 uint32_t* agg, const float val, const float skip_val) { \
1637 #define DEF_SHARED_AGG_STUBS(base_agg_func) \
1638 extern "C" GPU_RT_STUB void base_agg_func##_shared(int64_t* agg, const int64_t val) {} \
1640 extern "C" GPU_RT_STUB void base_agg_func##_skip_val_shared( \
1641 int64_t* agg, const int64_t val, const int64_t skip_val) {} \
1642 extern "C" GPU_RT_STUB void base_agg_func##_int32_shared(int32_t* agg, \
1643 const int32_t val) {} \
1644 extern "C" GPU_RT_STUB void base_agg_func##_int16_shared(int16_t* agg, \
1645 const int16_t val) {} \
1646 extern "C" GPU_RT_STUB void base_agg_func##_int8_shared(int8_t* agg, \
1647 const int8_t val) {} \
1649 extern "C" GPU_RT_STUB void base_agg_func##_int32_skip_val_shared( \
1650 int32_t* agg, const int32_t val, const int32_t skip_val) {} \
1652 extern "C" GPU_RT_STUB void base_agg_func##_double_shared(int64_t* agg, \
1653 const double val) {} \
1655 extern "C" GPU_RT_STUB void base_agg_func##_double_skip_val_shared( \
1656 int64_t* agg, const double val, const double skip_val) {} \
1657 extern "C" GPU_RT_STUB void base_agg_func##_float_shared(int32_t* agg, \
1658 const float val) {} \
1660 extern "C" GPU_RT_STUB void base_agg_func##_float_skip_val_shared( \
1661 int32_t* agg, const float val, const float skip_val) {}
1670 const int64_t offset,
1671 const int8_t* value,
1672 const int64_t size_bytes) {
1678 const int64_t null_val) {
1685 const int32_t null_val) {
1691 const int16_t null_val) {
1696 const int8_t null_val) {
1703 const double null_val) {
1709 const float null_val) {
1715 const int16_t skip_val) {}
1719 const int8_t skip_val) {}
1723 const int16_t skip_val) {}
1727 const int8_t skip_val) {}
1737 const int8_t cond) {
1743 const int64_t skip_val) {
1749 const int64_t skip_val,
1750 const int8_t cond) {
1759 const int32_t skip_val) {
1767 const double skip_val) {}
1772 const float skip_val) {}
1776 const int8_t cond) {
1782 const int32_t skip_val,
1783 const int8_t cond) {
1789 const int8_t cond) {}
1793 const double skip_val,
1794 const int8_t cond) {}
1797 const int8_t cond) {}
1801 const float skip_val,
1802 const int8_t cond) {}
1811 int64_t* output_buffer,
1812 const int32_t num_agg_cols){};
1817 return row_index_resume ? *row_index_resume : 0;
1847 int32_t* error_codes) {
1867 const int64_t* groups_buffer,
1868 const int32_t groups_buffer_size) {
1869 return groups_buffer;
1882 const int32_t groups_buffer_size) {
1887 int64_t* groups_buffer,
1888 const int64_t* init_vals,
1889 const uint32_t groups_buffer_entry_count,
1890 const uint32_t key_qw_count,
1891 const uint32_t agg_col_count,
1893 const int8_t warp_size) {
1896 assert(groups_buffer);
1901 int64_t* groups_buffer,
1902 const int64_t* init_vals,
1903 const uint32_t groups_buffer_entry_count,
1904 const uint32_t key_qw_count,
1905 const uint32_t agg_col_count,
1907 const bool blocks_share_memory,
1908 const int32_t frag_idx) {
1911 assert(groups_buffer);
1916 int64_t* groups_buffer,
1917 const int64_t* init_vals,
1918 const uint32_t groups_buffer_entry_count,
1919 const uint32_t key_qw_count,
1920 const uint32_t agg_col_count,
1922 const int8_t warp_size) {
1925 assert(groups_buffer);
1929 template <
typename T>
1933 const uint32_t key_count,
1934 const uint32_t row_size_quad) {
1935 auto off = h * row_size_quad;
1936 auto row_ptr =
reinterpret_cast<T*
>(groups_buffer + off);
1937 if (*row_ptr == get_empty_key<T>()) {
1938 memcpy(row_ptr, key, key_count *
sizeof(
T));
1939 auto row_ptr_i8 =
reinterpret_cast<int8_t*
>(row_ptr + key_count);
1942 if (memcmp(row_ptr, key, key_count *
sizeof(
T)) == 0) {
1943 auto row_ptr_i8 =
reinterpret_cast<int8_t*
>(row_ptr + key_count);
1950 int64_t* groups_buffer,
1953 const uint32_t key_count,
1954 const uint32_t key_width,
1955 const uint32_t row_size_quad) {
1956 switch (key_width) {
1960 reinterpret_cast<const int32_t*>(key),
1970 template <
typename T>
1972 const uint32_t entry_count,
1975 const uint32_t key_count) {
1977 auto key_buffer =
reinterpret_cast<T*
>(groups_buffer);
1978 if (key_buffer[off] == get_empty_key<T>()) {
1979 for (
size_t i = 0; i < key_count; ++i) {
1980 key_buffer[off] = key[i];
1986 for (
size_t i = 0; i < key_count; ++i) {
1987 if (key_buffer[off] != key[i]) {
1997 const uint32_t entry_count,
2000 const uint32_t key_count,
2001 const uint32_t key_width) {
2002 switch (key_width) {
2007 reinterpret_cast<const int32_t*>(key),
2011 groups_buffer, entry_count, h, key, key_count);
2019 int64_t* groups_buffer,
2022 const uint32_t key_qw_count,
2023 const size_t entry_count) {
2026 for (
size_t i = 0; i < key_qw_count; ++i) {
2027 groups_buffer[off] = key[i];
2030 return &groups_buffer[off];
2033 for (
size_t i = 0; i < key_qw_count; ++i) {
2034 if (groups_buffer[off] != key[i]) {
2039 return &groups_buffer[off];
2054 int64_t* groups_buffer,
2055 const uint32_t hashed_index,
2057 const uint32_t key_count,
2058 const uint32_t row_size_quad) {
2059 uint32_t off = hashed_index * row_size_quad;
2061 for (uint32_t i = 0; i < key_count; ++i) {
2062 groups_buffer[off + i] = key[i];
2065 return groups_buffer + off + key_count;
2076 const uint32_t hashed_index,
2077 const uint32_t row_size_quad) {
2078 return groups_buffer + row_size_quad * hashed_index;
2087 const uint32_t hashed_index,
2089 const uint32_t key_count,
2090 const uint32_t entry_count) {
2092 for (uint32_t i = 0; i < key_count; i++) {
2093 groups_buffer[i * entry_count + hashed_index] = key[i];
2103 int64_t* groups_buffer,
2105 const int64_t min_key,
2107 const uint32_t row_size_quad) {
2108 return groups_buffer + row_size_quad * (key - min_key);
2112 int64_t* groups_buffer,
2114 const int64_t min_key,
2116 const uint32_t row_size_quad,
2118 const uint8_t warp_size) {
2119 return groups_buffer + row_size_quad * (warp_size * (key - min_key) + thread_warp_idx);
2123 const int32_t len) {
2124 return {
reinterpret_cast<char const*
>(ptr), static_cast<uint64_t>(len)};
2128 #include "../Utils/StringLike.cpp"
2155 const int64_t translation_map_handle,
2156 const int32_t min_source_id) {
2157 const int32_t* translation_map =
2158 reinterpret_cast<const int32_t*
>(translation_map_handle);
2159 return translation_map[string_id - min_source_id];
2163 const double* regressor_inputs,
2164 const int64_t decision_tree_table_handle,
2165 const int64_t decision_tree_offsets_handle,
2166 const int32_t num_regressors,
2167 const int32_t num_trees,
2168 const bool compute_avg,
2169 const double null_value) {
2170 for (int32_t regressor_idx = 0; regressor_idx < num_regressors; ++regressor_idx) {
2171 if (regressor_inputs[regressor_idx] == null_value) {
2177 const int64_t* decision_tree_offsets =
2178 reinterpret_cast<const int64_t*
>(decision_tree_offsets_handle);
2179 double sum_tree_results{0};
2180 for (int32_t tree_idx = 0; tree_idx < num_trees; ++tree_idx) {
2181 int64_t row_idx = decision_tree_offsets[tree_idx];
2185 sum_tree_results += current_entry.
value;
2188 const auto regressor_input = regressor_inputs[current_entry.
feature_index];
2189 row_idx = regressor_input <= current_entry.
value
2194 return compute_avg ? sum_tree_results / num_trees : sum_tree_results;
2198 const double proportion,
2199 const int64_t row_offset) {
2200 const int64_t threshold = 4294967296 * proportion;
2201 return (row_offset * 2654435761) % 4294967296 < threshold;
2208 const double scale_factor,
2209 const int32_t partition_count) {
2210 if (target_value < lower_bound) {
2212 }
else if (target_value >= upper_bound) {
2213 return partition_count + 1;
2215 return ((target_value - lower_bound) * scale_factor) + 1;
2222 const double scale_factor,
2223 const int32_t partition_count) {
2224 if (target_value > lower_bound) {
2226 }
else if (target_value <= upper_bound) {
2227 return partition_count + 1;
2229 return ((lower_bound - target_value) * scale_factor) + 1;
2236 const double scale_factor,
2237 const int32_t partition_count,
2238 const double null_val) {
2239 if (target_value == null_val) {
2243 target_value, lower_bound, upper_bound, scale_factor, partition_count);
2250 const double scale_factor,
2251 const int32_t partition_count,
2252 const double null_val) {
2253 if (target_value == null_val) {
2257 target_value, lower_bound, upper_bound, scale_factor, partition_count);
2266 const double scale_factor) {
2267 int32_t calc = (target_value -
lower_bound) * scale_factor;
2274 const double scale_factor) {
2275 int32_t calc = (lower_bound - target_value) * scale_factor;
2281 const bool reversed,
2284 const int32_t partition_count) {
2289 partition_count / (lower_bound - upper_bound),
2295 partition_count / (upper_bound - lower_bound),
2301 const bool reversed,
2304 const int32_t partition_count,
2305 const double null_val) {
2306 if (target_value == null_val) {
2310 target_value, reversed, lower_bound, upper_bound, partition_count);
2315 const bool reversed,
2318 const int32_t partition_count) {
2321 target_value, lower_bound, partition_count / (lower_bound - upper_bound));
2324 target_value, lower_bound, partition_count / (upper_bound - lower_bound));
2329 return reinterpret_cast<const int64_t*
>(output_buff)[pos];
2333 const int64_t output_buff,
2334 const int64_t pos) {
2335 return reinterpret_cast<const double*
>(output_buff)[pos];
2339 return *
reinterpret_cast<const double*
>(may_alias_ptr(agg));
2343 return *
reinterpret_cast<const float*
>(may_alias_ptr(agg));
2347 const int64_t* count,
2348 const double null_val) {
2349 return *count != 0 ?
static_cast<double>(*sum) / *count : null_val;
2353 const int64_t* count,
2354 const double null_val,
2355 const uint32_t scale) {
2356 return *count != 0 ? (
static_cast<double>(*sum) / pow(10, scale)) / *count : null_val;
2360 const int64_t* count,
2361 const double null_val) {
2362 return *count != 0 ? *
reinterpret_cast<const double*
>(may_alias_ptr(agg)) / *count
2367 const int32_t* count,
2368 const double null_val) {
2369 return *count != 0 ? *
reinterpret_cast<const float*
>(may_alias_ptr(agg)) / *count
2375 const uint32_t bitmap_bytes,
2376 const uint8_t* key_bytes,
2377 const uint32_t key_len) {
2378 const uint32_t bit_pos =
MurmurHash3(key_bytes, key_len, 0) % (bitmap_bytes * 8);
2379 const uint32_t word_idx = bit_pos / 32;
2380 const uint32_t bit_idx = bit_pos % 32;
2381 reinterpret_cast<uint32_t*
>(bitmap)[word_idx] |= 1 << bit_idx;
2386 int32_t* error_codes,
2387 int32_t* total_matched,
2389 const uint32_t frag_idx,
2390 const uint32_t* row_index_resume,
2391 const int8_t** col_buffers,
2392 const int8_t* literals,
2393 const int64_t* num_rows,
2394 const uint64_t* frag_row_offsets,
2395 const int32_t* max_matched,
2396 const int64_t* init_agg_value,
2397 const int64_t* join_hash_tables,
2398 const int8_t* row_func_mgr) {
2400 assert(error_codes || total_matched || out || frag_idx || row_index_resume ||
2401 col_buffers || literals || num_rows || frag_row_offsets || max_matched ||
2402 init_agg_value || join_hash_tables || row_func_mgr);
2408 int32_t* error_codes,
2409 int32_t* total_matched,
2411 const uint32_t* num_fragments_ptr,
2412 const uint32_t* num_tables_ptr,
2413 const uint32_t* row_index_resume,
2414 const int8_t*** col_buffers,
2415 const int8_t* literals,
2416 const int64_t* num_rows,
2417 const uint64_t* frag_row_offsets,
2418 const int32_t* max_matched,
2419 const int64_t* init_agg_value,
2420 const int64_t* join_hash_tables,
2421 const int8_t* row_func_mgr) {
2422 uint32_t
const num_fragments = *num_fragments_ptr;
2423 uint32_t
const num_tables = *num_tables_ptr;
2425 for (uint32_t frag_idx = 0;
2433 col_buffers ? col_buffers[frag_idx] :
nullptr,
2435 &num_rows[frag_idx * num_tables],
2436 &frag_row_offsets[frag_idx * num_tables],
2446 int32_t* total_matched,
2448 const uint32_t frag_idx,
2449 const uint32_t* row_index_resume,
2450 const int8_t** col_buffers,
2451 const int64_t* num_rows,
2452 const uint64_t* frag_row_offsets,
2453 const int32_t* max_matched,
2454 const int64_t* init_agg_value,
2455 const int64_t* join_hash_tables,
2456 const int8_t* row_func_mgr) {
2458 assert(error_codes || total_matched || out || frag_idx || row_index_resume ||
2459 col_buffers || num_rows || frag_row_offsets || max_matched || init_agg_value ||
2460 join_hash_tables || row_func_mgr);
2466 int32_t* total_matched,
2468 const uint32_t* num_fragments_ptr,
2469 const uint32_t* num_tables_ptr,
2470 const uint32_t* row_index_resume,
2471 const int8_t*** col_buffers,
2472 const int64_t* num_rows,
2473 const uint64_t* frag_row_offsets,
2474 const int32_t* max_matched,
2475 const int64_t* init_agg_value,
2476 const int64_t* join_hash_tables,
2477 const int8_t* row_func_mgr) {
2478 uint32_t
const num_fragments = *num_fragments_ptr;
2479 uint32_t
const num_tables = *num_tables_ptr;
2481 for (uint32_t frag_idx = 0;
2489 col_buffers ? col_buffers[frag_idx] :
nullptr,
2490 &num_rows[frag_idx * num_tables],
2491 &frag_row_offsets[frag_idx * num_tables],
2507 constexpr uint32_t null_array_compressed_32 = 0x80000000U;
2508 return point ==
nullptr || uint32_t(*point) == null_array_compressed_32;
2512 constexpr
double null_array_double = 2 * DBL_MIN;
2513 return point ==
nullptr || *point == null_array_double;
2526 if (command == static_cast<unsigned>(
INT_CHECK)) {
2532 if (command == static_cast<unsigned>(
INT_ABORT)) {
2536 if (command == static_cast<unsigned>(
INT_RESET)) {
DEVICE auto upper_bound(ARGS &&...args)
__device__ void sync_warp_protected(int64_t thread_pos, int64_t row_count)
RUNTIME_EXPORT NEVER_INLINE void query_stub_hoisted_literals(int32_t *error_codes, int32_t *total_matched, int64_t **out, const uint32_t frag_idx, const uint32_t *row_index_resume, const int8_t **col_buffers, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
RUNTIME_EXPORT void agg_min_int8(int8_t *agg, const int8_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t encode_date(int64_t decoded_val, int64_t null_val, int64_t multiplier)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_if_int32_skip_val(int32_t *agg, const int32_t val, const int32_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_start_index_sub(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound)
#define DEF_UMINUS_NULLABLE(type, null_type)
GPU_RT_STUB int32_t checked_single_agg_id_int32_shared(int32_t *agg, const int32_t val, const int32_t null_val)
#define DEF_CHECKED_SINGLE_AGG_ID_INT(n)
RUNTIME_EXPORT ALWAYS_INLINE int8_t * agg_id_varlen(int8_t *varlen_buffer, const int64_t offset, const int8_t *value, const int64_t size_bytes)
RUNTIME_EXPORT void agg_max_int32(int32_t *agg, const int32_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_down_not_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
RUNTIME_EXPORT void multifrag_query(int32_t *error_codes, int32_t *total_matched, int64_t **out, const uint32_t *num_fragments_ptr, const uint32_t *num_tables_ptr, const uint32_t *row_index_resume, const int8_t ***col_buffers, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
__device__ int64_t * get_matching_group_value_columnar(int64_t *groups_buffer, const uint32_t h, const int64_t *key, const uint32_t key_qw_count, const size_t entry_count)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t row_size_quad)
int64_t left_child_row_idx
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_if(uint64_t *agg, const int64_t cond)
__device__ void write_back_nop(int64_t *dest, int64_t *src, const int32_t sz)
RUNTIME_EXPORT void agg_min_int16(int16_t *agg, const int16_t val)
RUNTIME_EXPORT ALWAYS_INLINE StringView string_pack(const int8_t *ptr, const int32_t len)
__device__ void agg_sum_float_skip_val_shared(int32_t *agg, const float val, const float skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_nullable_lhs(const int64_t dividend, const int64_t divisor, const int64_t null_val)
#define DEF_COMPUTE_CURRENT_ROW_IDX_IN_FRAME_ALL_TYPES(oper_name)
RUNTIME_EXPORT void agg_max_int16(int16_t *agg, const int16_t val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE bool point_int32_is_null(int32_t *point)
#define DEF_CAST_NULLABLE_BIDIR(type1, type2)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_group_value_fast_keyless(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32_skip_val(uint32_t *agg, const int32_t val, const int32_t skip_val)
RUNTIME_EXPORT NEVER_INLINE void query_stub(int32_t *error_codes, int32_t *total_matched, int64_t **out, const uint32_t frag_idx, const uint32_t *row_index_resume, const int8_t **col_buffers, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
RUNTIME_EXPORT NEVER_INLINE void agg_approximate_count_distinct(int64_t *agg, const int64_t key, const uint32_t b)
FORCE_INLINE uint8_t get_rank(uint64_t x, uint32_t b)
__device__ int8_t thread_warp_idx(const int8_t warp_sz)
__global__ void init_group_by_buffer_gpu(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_count, const uint32_t key_width, const uint32_t row_size_quad, const bool keyless, const int8_t warp_size)
#define DEF_CAST_NULLABLE(from_type, to_type)
RUNTIME_EXPORT ALWAYS_INLINE void agg_max_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_if(int64_t *agg, const int64_t val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_max(int64_t *agg, const int64_t val)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_skip_val(uint64_t *agg, const int64_t val, const int64_t skip_val)
__device__ int64_t get_thread_index()
RUNTIME_EXPORT NEVER_INLINE DEVICE uint64_t MurmurHash64A(const void *key, int len, uint64_t seed)
__device__ void agg_sum_if_double_skip_val_shared(int64_t *agg, const double val, const double skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_min_float(int32_t *agg, const float val)
__device__ int32_t pos_step_impl()
__device__ void write_back_non_grouped_agg(int64_t *input_buffer, int64_t *output_buffer, const int32_t agg_idx)
Calculate approximate median and general quantiles, based on "Computing Extremely Accurate Quantiles ...
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_int(const int64_t *sum, const int64_t *count, const double null_val)
Structures and runtime functions of streaming top-k heap.
__device__ int32_t checked_single_agg_id_double_shared(int64_t *agg, const double val, const double null_val)
__device__ const int64_t * init_shared_mem_nop(const int64_t *groups_buffer, const int32_t groups_buffer_size)
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id(int64_t *agg, const int64_t val, const int64_t null_val)
__device__ void agg_sum_if_float_shared(int32_t *agg, const float val, const int8_t cond)
#define DEF_ARITH_NULLABLE_RHS(type, null_type, opname, opsym)
#define DEF_AGG_MAX_INT(n)
Definitions for core Datum union type.
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_integer_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
__device__ int32_t checked_single_agg_id_float_shared(int32_t *agg, const float val, const float null_val)
int64_t compute_upper_bound_from_ordered_partition_index(const int64_t num_elems, const TARGET_VAL_TYPE target_val, const COL_TYPE *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const NULL_TYPE null_val, const bool nulls_first, const int64_t null_start_offset, const int64_t null_end_offset)
AGG_TYPE agg_func(AGG_TYPE const lhs, AGG_TYPE const rhs)
Macros and functions for groupby buffer compaction.
__device__ int64_t * get_matching_group_value(int64_t *groups_buffer, const uint32_t h, const T *key, const uint32_t key_count, const uint32_t row_size_quad)
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_and(const int8_t lhs, const int8_t rhs, const int8_t null_val)
#define DEF_CAST_SCALED_NULLABLE(from_type, to_type)
RUNTIME_EXPORT ALWAYS_INLINE void agg_count_distinct_bitmap(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t bucket_size)
T fill_missing_value(int64_t const cur_idx, T const null_val, T *const col_buf, int64_t const num_elems_in_partition, int32_t *const partition_rowid_buf, int64_t *const ordered_index_buf, bool const is_forward_fill)
__device__ void agg_sum_if_double_shared(int64_t *agg, const double val, const int8_t cond)
__device__ int64_t agg_sum_shared(int64_t *agg, const int64_t val)
RUNTIME_EXPORT void agg_sum_if_float(int32_t *agg, const float val, const int8_t cond)
__device__ void agg_id_double_shared_slow(int64_t *agg, const double *val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_count_distinct_bitmap_skip_val(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t bucket_size, const int64_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t floor_div_lhs(const int64_t dividend, const int64_t divisor)
__device__ int32_t agg_sum_if_int32_shared(int32_t *agg, const int32_t val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t key_for_string_encoded(const int32_t str_id)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_matching_group_value_perfect_hash_keyless(int64_t *groups_buffer, const uint32_t hashed_index, const uint32_t row_size_quad)
__device__ int8_t * agg_id_varlen_shared(int8_t *varlen_buffer, const int64_t offset, const int8_t *value, const int64_t size_bytes)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_if_int32_skip_val(uint32_t *agg, const int32_t cond, const int32_t skip_val)
int64_t compute_current_row_idx_in_frame(const int64_t num_elems, const int64_t cur_row_idx, const T *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const T null_val, const bool nulls_first, const int64_t null_start_pos, const int64_t null_end_pos, Comparator cmp)
__device__ int64_t * declare_dynamic_shared_memory()
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_end_index_add(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound, int64_t num_current_partition_elem)
ALWAYS_INLINE DEVICE int32_t map_string_dict_id(const int32_t string_id, const int64_t translation_map_handle, const int32_t min_source_id)
__device__ int32_t agg_sum_int32_shared(int32_t *agg, const int32_t val)
__device__ int64_t agg_sum_skip_val_shared(int64_t *agg, const int64_t val, const int64_t skip_val)
__device__ void agg_sum_float_shared(int32_t *agg, const float val)
__device__ int64_t agg_sum_if_shared(int64_t *agg, const int64_t val, const int8_t cond)
#define DEF_ROUND_NULLABLE(from_type, to_type)
ALWAYS_INLINE DEVICE double tree_model_reg_predict(const double *regressor_inputs, const int64_t decision_tree_table_handle, const int64_t decision_tree_offsets_handle, const int32_t num_regressors, const int32_t num_trees, const bool compute_avg, const double null_value)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE bool point_double_is_null(double *point)
RUNTIME_EXPORT void multifrag_query_hoisted_literals(int32_t *error_codes, int32_t *total_matched, int64_t **out, const uint32_t *num_fragments_ptr, const uint32_t *num_tables_ptr, const uint32_t *row_index_resume, const int8_t ***col_buffers, const int8_t *literals, const int64_t *num_rows, const uint64_t *frag_row_offsets, const int32_t *max_matched, const int64_t *init_agg_value, const int64_t *join_hash_tables, const int8_t *row_func_mgr)
#define DEF_SKIP_AGG(base_agg_func)
__device__ int64_t get_block_index()
__device__ bool check_interrupt()
#define DEF_WRITE_PROJECTION_INT(n)
GPU_RT_STUB int32_t checked_single_agg_id_int8_shared(int8_t *agg, const int8_t val, const int8_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_start_index_add(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound, int64_t num_current_partition_elem)
__device__ int32_t agg_sum_int32_skip_val_shared(int32_t *agg, const int32_t val, const int32_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_floor(const int64_t x, const int64_t scale)
#define DEF_SEARCH_DERIVED_AGGREGATION_TREE(agg_value_type)
__device__ void linear_probabilistic_count(uint8_t *bitmap, const uint32_t bitmap_bytes, const uint8_t *key_bytes, const uint32_t key_len)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double(uint64_t *agg, const double val)
#define DEF_SHARED_AGG_RET_STUBS(base_agg_func)
__device__ void agg_sum_double_shared(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_min_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_no_oob_check(const double target_value, const double lower_bound, const double scale_factor)
RUNTIME_EXPORT ALWAYS_INLINE int64_t decimal_ceil(const int64_t x, const int64_t scale)
#define DEF_ARITH_NULLABLE_LHS(type, null_type, opname, opsym)
__device__ int64_t agg_sum_if_skip_val_shared(int64_t *agg, const int64_t val, const int64_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_if_sum_float(int32_t *agg, const float val, const int8_t cond)
#define DEF_MAP_STRING_TO_DATUM(value_type, value_name)
RUNTIME_EXPORT ALWAYS_INLINE int64_t get_valid_buf_start_pos(const int64_t null_start_pos, const int64_t null_end_pos)
#define DEF_AGG_MIN_INT(n)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_double_skip_val(uint64_t *agg, const double val, const double skip_val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_min(int64_t *agg, const int64_t val)
__device__ int32_t pos_start_impl(const int32_t *row_index_resume)
RUNTIME_EXPORT ALWAYS_INLINE int32_t width_bucket_nullable(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count, const double null_val)
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_not(const int8_t operand, const int8_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_id_float(int32_t *agg, const float val)
RUNTIME_EXPORT ALWAYS_INLINE float load_float(const int32_t *agg)
__device__ int32_t runtime_interrupt_flag
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_double(int64_t *agg, const double val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t row_number_window_func(const int64_t output_buff, const int64_t pos)
RUNTIME_EXPORT NEVER_INLINE void init_columnar_group_by_buffer_gpu(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_qw_count, const uint32_t agg_col_count, const bool keyless, const bool blocks_share_memory, const int32_t frag_idx)
std::function< bool(const PermutationIdx, const PermutationIdx)> Comparator
__device__ void agg_approximate_count_distinct_gpu(int64_t *agg, const int64_t key, const uint32_t b, const int64_t base_dev_addr, const int64_t base_host_addr)
__device__ void sync_warp()
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_down_nullable(const int64_t operand, const int64_t scale, const int64_t null_val)
__device__ void agg_count_distinct_bitmap_skip_val_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t bucket_size, const int64_t skip_val, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_if_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE void agg_id(int64_t *agg, const int64_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t scale_decimal_up(const int64_t operand, const uint64_t scale, const int64_t operand_null_val, const int64_t result_null_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t char_length(const char *str, const int32_t str_len)
RUNTIME_EXPORT ALWAYS_INLINE double * get_double_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_reversed(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32(int32_t *agg, const int32_t val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_float(int32_t *agg, const float val, const float null_val)
__device__ void agg_sum_double_skip_val_shared(int64_t *agg, const double val, const double skip_val)
int64_t right_child_row_idx
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count_if_skip_val(uint64_t *agg, const int64_t cond, const int64_t skip_val)
LOGICAL_TYPE get_value_in_window_frame(const int64_t target_row_idx_in_frame, const int64_t frame_start_offset, const int64_t frame_end_offset, const COL_TYPE *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const LOGICAL_TYPE logical_null_val, const LOGICAL_TYPE col_null_val)
__device__ void agg_max_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t get_valid_buf_end_pos(const int64_t num_elems, const int64_t null_start_pos, const int64_t null_end_pos)
RUNTIME_EXPORT ALWAYS_INLINE double load_double(const int64_t *agg)
RUNTIME_EXPORT ALWAYS_INLINE void agg_id_double(int64_t *agg, const double val)
#define DEF_FILL_MISSING_VALUE(col_type)
RUNTIME_EXPORT ALWAYS_INLINE int8_t bit_is_set(const int8_t *bitset, const int64_t val, const int64_t min_val, const int64_t max_val, const int64_t null_val, const int8_t null_bool_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr_no_oob_check(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
DEVICE auto lower_bound(ARGS &&...args)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_if_int32(uint32_t *agg, const int32_t cond)
__device__ void agg_max_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
#define DEF_SEARCH_AGGREGATION_TREE(agg_value_type)
RUNTIME_EXPORT NEVER_INLINE DEVICE uint32_t MurmurHash3(const void *key, int len, const uint32_t seed)
void compute_derived_aggregates(SumAndCountPair< AGG_VALUE_TYPE > *aggregation_tree_for_partition, SumAndCountPair< AGG_VALUE_TYPE > &res, size_t query_range_start_idx, size_t query_range_end_idx, size_t leaf_level, size_t tree_fanout, AGG_VALUE_TYPE invalid_val, AGG_VALUE_TYPE null_val)
#define DEF_RANGE_MODE_FRAME_UPPER_BOUND(target_val_type, col_type, null_type, opname, opsym)
RUNTIME_EXPORT void agg_max_int8(int8_t *agg, const int8_t val)
RUNTIME_EXPORT ALWAYS_INLINE int64_t * get_group_value_fast_keyless_semiprivate(int64_t *groups_buffer, const int64_t key, const int64_t min_key, const int64_t, const uint32_t row_size_quad, const uint8_t thread_warp_idx, const uint8_t warp_size)
RUNTIME_EXPORT ALWAYS_INLINE size_t getStartOffsetForSegmentTreeTraversal(size_t level, size_t tree_fanout)
RUNTIME_EXPORT ALWAYS_INLINE void agg_max_float(int32_t *agg, const float val)
__device__ const int64_t * init_shared_mem(const int64_t *global_groups_buffer, const int32_t groups_buffer_size)
RUNTIME_EXPORT ALWAYS_INLINE int32_t checked_single_agg_id_double(int64_t *agg, const double val, const double null_val)
GPU_RT_STUB int32_t checked_single_agg_id_int16_shared(int16_t *agg, const int16_t val, const int16_t null_val)
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_float(const int32_t *agg, const int32_t *count, const double null_val)
#define DEF_BINARY_NULLABLE_ALL_OPS(type, null_type)
RUNTIME_EXPORT ALWAYS_INLINE SumAndCountPair< double > * get_double_derived_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
RUNTIME_EXPORT NEVER_INLINE void init_group_by_buffer_impl(int64_t *groups_buffer, const int64_t *init_vals, const uint32_t groups_buffer_entry_count, const uint32_t key_qw_count, const uint32_t agg_col_count, const bool keyless, const int8_t warp_size)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_int32(uint32_t *agg, const int32_t)
RUNTIME_EXPORT ALWAYS_INLINE void set_matching_group_value_perfect_hash_columnar(int64_t *groups_buffer, const uint32_t hashed_index, const int64_t *key, const uint32_t key_count, const uint32_t entry_count)
#define DEF_GET_VALUE_IN_FRAME(col_type, logical_type)
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_row_mode_end_index_sub(int64_t candidate_index, int64_t current_partition_start_offset, int64_t frame_bound)
AGG_TYPE compute_window_func_via_aggregation_tree(AGG_TYPE *aggregation_tree_for_partition, size_t query_range_start_idx, size_t query_range_end_idx, size_t leaf_level, size_t tree_fanout, AGG_TYPE init_val, AGG_TYPE invalid_val, AGG_TYPE null_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr_nullable(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count, const double null_val)
#define DEF_ARITH_NULLABLE(type, null_type, opname, opsym)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_if_int32(int32_t *agg, const int32_t val, const int8_t cond)
__device__ void agg_min_int16_skip_val_shared(int16_t *agg, const int16_t val, const int16_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t width_bucket_reversed_nullable(const double target_value, const double lower_bound, const double upper_bound, const double scale_factor, const int32_t partition_count, const double null_val)
int64_t compute_lower_bound_from_ordered_partition_index(const int64_t num_elems, const TARGET_VAL_TYPE target_val, const COL_TYPE *col_buf, const int32_t *partition_rowid_buf, const int64_t *ordered_index_buf, const NULL_TYPE null_val, const bool nulls_first, const int64_t null_start_offset, const int64_t null_end_offset)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum_skip_val(int64_t *agg, const int64_t val, const int64_t skip_val)
__device__ void sync_threadblock()
RUNTIME_EXPORT ALWAYS_INLINE int64_t compute_int64_t_lower_bound(const int64_t entry_cnt, const int64_t target_value, const int64_t *col_buf)
__device__ void agg_min_int8_skip_val_shared(int8_t *agg, const int8_t val, const int8_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE int32_t agg_sum_int32_skip_val(int32_t *agg, const int32_t val, const int32_t skip_val)
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_double(const int64_t *agg, const int64_t *count, const double null_val)
RUNTIME_EXPORT ALWAYS_INLINE uint64_t agg_count(uint64_t *agg, const int64_t)
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_float(int32_t *agg, const float val)
RUNTIME_EXPORT ALWAYS_INLINE double load_avg_decimal(const int64_t *sum, const int64_t *count, const double null_val, const uint32_t scale)
RUNTIME_EXPORT bool check_interrupt_init(unsigned command)
RUNTIME_EXPORT ALWAYS_INLINE int32_t get_error_code(int32_t *error_codes)
__device__ void agg_count_distinct_bitmap_gpu(int64_t *agg, const int64_t val, const int64_t min_val, const int64_t bucket_size, const int64_t base_dev_addr, const int64_t base_host_addr, const uint64_t sub_bitmap_count, const uint64_t bitmap_bytes)
RUNTIME_EXPORT ALWAYS_INLINE int64_t agg_sum(int64_t *agg, const int64_t val)
#define DEF_SHARED_AGG_STUBS(base_agg_func)
__device__ int32_t get_matching_group_value_columnar_slot(int64_t *groups_buffer, const uint32_t entry_count, const uint32_t h, const T *key, const uint32_t key_count)
__device__ int32_t checked_single_agg_id_shared(int64_t *agg, const int64_t val, const int64_t null_val)
#define DEF_HANDLE_NULL_FOR_WINDOW_FRAMING_AGG(agg_type, null_type)
#define DEF_AGG_ID_INT(n)
RUNTIME_EXPORT ALWAYS_INLINE void record_error_code(const int32_t err_code, int32_t *error_codes)
__device__ void agg_sum_if_float_skip_val_shared(int32_t *agg, const float val, const float skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE bool sample_ratio(const double proportion, const int64_t row_offset)
FORCE_INLINE HOST DEVICE T align_to_int64(T addr)
RUNTIME_EXPORT ALWAYS_INLINE double percent_window_func(const int64_t output_buff, const int64_t pos)
RUNTIME_EXPORT ALWAYS_INLINE int8_t logical_or(const int8_t lhs, const int8_t rhs, const int8_t null_val)
__device__ void force_sync()
#define DEF_SKIP_IF_AGG(skip_agg_func, base_agg_func)
#define DEF_RANGE_MODE_FRAME_LOWER_BOUND(target_val_type, col_type, null_type, opname, opsym)
RUNTIME_EXPORT void agg_min_int32(int32_t *agg, const int32_t val)
RUNTIME_EXPORT ALWAYS_INLINE void agg_sum_if_double(int64_t *agg, const double val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float_skip_val(uint32_t *agg, const float val, const float skip_val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t char_length_nullable(const char *str, const int32_t str_len, const int32_t int_null)
__device__ int32_t agg_sum_if_int32_skip_val_shared(int32_t *agg, const int32_t val, const int32_t skip_val, const int8_t cond)
RUNTIME_EXPORT ALWAYS_INLINE SumAndCountPair< int64_t > * get_integer_derived_aggregation_tree(int64_t **aggregation_trees, size_t partition_idx)
RUNTIME_EXPORT ALWAYS_INLINE uint32_t agg_count_float(uint32_t *agg, const float val)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_reversed_no_oob_check(const double target_value, const double lower_bound, const double scale_factor)
RUNTIME_EXPORT ALWAYS_INLINE DEVICE int32_t width_bucket_expr(const double target_value, const bool reversed, const double lower_bound, const double upper_bound, const int32_t partition_count)
__device__ int32_t group_buff_idx_impl()