26 #include <type_traits>
28 #include "arrow/api.h"
29 #include "arrow/ipc/api.h"
31 #include <arrow/gpu/cuda_api.h>
34 static_assert(ARROW_VERSION >= 16000,
"Apache Arrow v0.16.0 or above is required.");
37 using ValueArray = boost::variant<std::vector<bool>,
42 std::vector<arrow::Decimal128>,
45 std::vector<std::string>,
46 std::vector<std::vector<int8_t>>,
47 std::vector<std::vector<int16_t>>,
48 std::vector<std::vector<int32_t>>,
49 std::vector<std::vector<int64_t>>,
50 std::vector<std::vector<float>>,
51 std::vector<std::vector<double>>,
52 std::vector<std::vector<std::string>>>;
55 using Vec2 = std::vector<std::vector<T>>;
63 using pointer = std::vector<TargetValue>*;
71 return !(*
this == other);
111 const std::vector<TargetMetaInfo>& targets_meta,
115 const std::shared_ptr<ResultSet>& rows,
116 const std::vector<TargetMetaInfo>& targets_meta,
118 const size_t min_result_size_for_bulk_dictionary_fetch,
119 const double max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch);
126 bool translate_strings,
129 for (
size_t i = 0; i < from_index; i++) {
137 bool decimal_to_double)
const {
138 return rowIterator(0, translate_strings, decimal_to_double);
141 std::vector<std::string> getDictionaryStrings(
const size_t col_idx)
const;
143 std::vector<TargetValue>
getRowAt(
const size_t index)
const;
145 std::vector<TargetValue>
getNextRow(
const bool translate_strings,
146 const bool decimal_to_double)
const;
162 const size_t device_id,
163 std::shared_ptr<Data_Namespace::DataMgr>& data_mgr);
171 const size_t min_result_size_for_bulk_dictionary_fetch,
172 const double max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch);
174 template <
typename Type,
typename ArrayType>
176 const arrow::Array& column,
178 const size_t idx)
const;
188 std::vector<std::shared_ptr<arrow::Array>>
columns_;
208 const std::shared_ptr<ResultSet>& rows,
217 const std::shared_ptr<ResultSet>& rows,
219 const size_t min_result_size_for_bulk_dictionary_fetch,
220 const double max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch);
231 static constexpr
double
235 const std::shared_ptr<Data_Namespace::DataMgr> data_mgr,
237 const int32_t device_id,
238 const std::vector<std::string>& col_names,
239 const int32_t first_n,
255 const std::shared_ptr<ResultSet>& results,
256 const std::shared_ptr<Data_Namespace::DataMgr> data_mgr,
258 const int32_t device_id,
259 const std::vector<std::string>& col_names,
260 const int32_t first_n,
262 const size_t min_result_size_for_bulk_dictionary_fetch,
263 const double max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch)
272 min_result_size_for_bulk_dictionary_fetch)
274 max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch) {}
284 std::shared_ptr<arrow::Field>
field;
294 const std::vector<std::string>& col_names,
295 const int32_t first_n)
306 const std::shared_ptr<ResultSet>& results,
307 const std::vector<std::string>& col_names,
308 const int32_t first_n,
309 const size_t min_result_size_for_bulk_dictionary_fetch,
310 const double max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch)
315 min_result_size_for_bulk_dictionary_fetch)
317 max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch) {}
323 const std::shared_ptr<arrow::Schema>& schema)
const;
325 std::shared_ptr<arrow::Field>
makeField(
const std::string
name,
333 arrow::ipc::DictionaryFieldMapper* mapper)
const;
337 const size_t result_col_idx,
338 const std::shared_ptr<arrow::Field>&
field)
const;
342 const std::shared_ptr<std::vector<bool>>& is_valid)
const;
348 std::shared_ptr<Data_Namespace::DataMgr>
data_mgr_ =
nullptr;
359 template <
typename T>
361 return std::is_same<T, arrow::Date32Builder>::value ||
362 std::is_same<T, arrow::Date64Builder>::value;
ArrowStringRemapMode string_remap_mode
std::vector< std::vector< T >> Vec2
const size_t min_result_size_for_bulk_dictionary_fetch_
std::unique_ptr< arrow::ArrayBuilder > builder
std::shared_ptr< arrow::StringArray > string_array
ArrowResultSetRowIterator & operator++(void)
double decimal_to_double(const SQLTypeInfo &otype, int64_t oval)
void append(ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid) const
ArrowResult getArrowResult() const
std::vector< char > sm_handle
void initializeColumnBuilder(ColumnBuilder &column_builder, const SQLTypeInfo &col_type, const size_t result_col_idx, const std::shared_ptr< arrow::Field > &field) const
std::shared_ptr< ArrowResult > results_
std::shared_ptr< ResultSet > rows_
const double max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch_
SQLTypeInfo getColType(const size_t col_idx) const
std::shared_ptr< Data_Namespace::DataMgr > data_mgr_
std::shared_ptr< arrow::Array > finishColumnBuilder(ColumnBuilder &column_builder) const
boost::variant< std::vector< bool >, std::vector< int8_t >, std::vector< int16_t >, std::vector< int32_t >, std::vector< int64_t >, std::vector< arrow::Decimal128 >, std::vector< float >, std::vector< double >, std::vector< std::string >, std::vector< std::vector< int8_t >>, std::vector< std::vector< int16_t >>, std::vector< std::vector< int32_t >>, std::vector< std::vector< int64_t >>, std::vector< std::vector< float >>, std::vector< std::vector< double >>, std::vector< std::vector< std::string >>> ValueArray
std::shared_ptr< arrow::Field > field
arrow::ipc::DictionaryMemo dictionary_memo_
High-level representation of SQL values.
ArrowResultSet(const std::shared_ptr< ResultSet > &rows, const std::vector< TargetMetaInfo > &targets_meta, const ExecutorDeviceType device_type=ExecutorDeviceType::CPU)
ArrowTransport transport_method_
const rapidjson::Value & field(const rapidjson::Value &obj, const char field[]) noexcept
std::shared_ptr< arrow::RecordBatch > getArrowBatch(const std::shared_ptr< arrow::Schema > &schema) const
std::vector< std::string > col_names_
std::shared_ptr< arrow::Buffer > records
std::vector< TargetValue > & reference
ArrowResultSetConverter(const std::shared_ptr< ResultSet > &results, const std::shared_ptr< Data_Namespace::DataMgr > data_mgr, const ExecutorDeviceType device_type, const int32_t device_id, const std::vector< std::string > &col_names, const int32_t first_n, const ArrowTransport transport_method, const size_t min_result_size_for_bulk_dictionary_fetch, const double max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch)
bool operator==(const ArrowResultSetRowIterator &other) const
ExecutorDeviceType device_type_
size_t entryCount() const
std::vector< char > df_handle
std::shared_ptr< arrow::Field > makeField(const std::string name, const SQLTypeInfo &target_type) const
ArrowResultSet(const std::shared_ptr< ResultSet > &rows, const ExecutorDeviceType device_type=ExecutorDeviceType::CPU)
std::vector< TargetValue > getNextRow(const bool translate_strings, const bool decimal_to_double) const
SerializedArrowOutput getSerializedArrowOutput(arrow::ipc::DictionaryFieldMapper *mapper) const
std::vector< TargetValue > * pointer
std::ptrdiff_t difference_type
ArrowResultSetConverter(const std::shared_ptr< ResultSet > &results, const std::vector< std::string > &col_names, const int32_t first_n)
static void deallocateArrowResultBuffer(const ArrowResult &result, const ExecutorDeviceType device_type, const size_t device_id, std::shared_ptr< Data_Namespace::DataMgr > &data_mgr)
std::input_iterator_tag iterator_category
std::unordered_map< StrId, ArrowStrId > string_remapping
std::shared_ptr< ResultSet > results_
std::unique_ptr< ArrowResultSet > result_set_arrow_loopback(const ExecutionResult &results)
ArrowResultSetRowIterator(const ArrowResultSet *rs)
std::vector< TargetValue > value_type
value_type operator*() const
std::string serialized_cuda_handle
std::vector< TargetMetaInfo > column_metainfo_
bool operator!=(const ArrowResultSetRowIterator &other) const
bool definitelyHasNoRows() const
ArrowResultSetRowIterator operator++(int)
ArrowResultSetConverter(const std::shared_ptr< ResultSet > &results, const std::vector< std::string > &col_names, const int32_t first_n, const size_t min_result_size_for_bulk_dictionary_fetch, const double max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch)
static constexpr size_t default_min_result_size_for_bulk_dictionary_fetch
static constexpr double default_max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch
void appendValue(std::vector< TargetValue > &row, const arrow::Array &column, const Type null_val, const size_t idx) const
Basic constructors and methods of the row set interface.
ArrowResultSetConverter(const std::shared_ptr< ResultSet > &results, const std::shared_ptr< Data_Namespace::DataMgr > data_mgr, const ExecutorDeviceType device_type, const int32_t device_id, const std::vector< std::string > &col_names, const int32_t first_n, const ArrowTransport transport_method)
void resultSetArrowLoopback(const ExecutorDeviceType device_type=ExecutorDeviceType::CPU)
std::vector< TargetValue > getRowAt(const size_t index) const
std::shared_ptr< arrow::RecordBatch > record_batch_
const ArrowResultSet * result_set_
std::shared_ptr< arrow::Buffer > schema
std::vector< TargetMetaInfo > targets_meta_
std::vector< char > df_buffer
constexpr auto scale_epoch_values()
std::vector< std::shared_ptr< arrow::Array > > columns_
std::shared_ptr< arrow::RecordBatch > convertToArrow() const