OmniSciDB
a5dc49c757
|
#include <ArrowResultSet.h>
Classes | |
struct | ColumnBuilder |
struct | SerializedArrowOutput |
Public Member Functions | |
ArrowResultSetConverter (const std::shared_ptr< ResultSet > &results, const std::shared_ptr< Data_Namespace::DataMgr > data_mgr, const ExecutorDeviceType device_type, const int32_t device_id, const std::vector< std::string > &col_names, const int32_t first_n, const ArrowTransport transport_method) | |
ArrowResultSetConverter (const std::shared_ptr< ResultSet > &results, const std::shared_ptr< Data_Namespace::DataMgr > data_mgr, const ExecutorDeviceType device_type, const int32_t device_id, const std::vector< std::string > &col_names, const int32_t first_n, const ArrowTransport transport_method, const size_t min_result_size_for_bulk_dictionary_fetch, const double max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch) | |
ArrowResult | getArrowResult () const |
ArrowResultSetConverter (const std::shared_ptr< ResultSet > &results, const std::vector< std::string > &col_names, const int32_t first_n) | |
ArrowResultSetConverter (const std::shared_ptr< ResultSet > &results, const std::vector< std::string > &col_names, const int32_t first_n, const size_t min_result_size_for_bulk_dictionary_fetch, const double max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch) | |
std::shared_ptr < arrow::RecordBatch > | convertToArrow () const |
Static Public Attributes | |
static constexpr size_t | default_min_result_size_for_bulk_dictionary_fetch {10000UL} |
static constexpr double | default_max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch {0.1} |
Private Member Functions | |
std::shared_ptr < arrow::RecordBatch > | getArrowBatch (const std::shared_ptr< arrow::Schema > &schema) const |
std::shared_ptr< arrow::Field > | makeField (const std::string name, const SQLTypeInfo &target_type) const |
SerializedArrowOutput | getSerializedArrowOutput (arrow::ipc::DictionaryFieldMapper *mapper) const |
void | initializeColumnBuilder (ColumnBuilder &column_builder, const SQLTypeInfo &col_type, const size_t result_col_idx, const std::shared_ptr< arrow::Field > &field) const |
void | append (ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr< std::vector< bool >> &is_valid) const |
std::shared_ptr< arrow::Array > | finishColumnBuilder (ColumnBuilder &column_builder) const |
Private Attributes | |
std::shared_ptr< ResultSet > | results_ |
std::shared_ptr < Data_Namespace::DataMgr > | data_mgr_ = nullptr |
ExecutorDeviceType | device_type_ = ExecutorDeviceType::GPU |
int32_t | device_id_ = 0 |
std::vector< std::string > | col_names_ |
int32_t | top_n_ |
ArrowTransport | transport_method_ |
const size_t | min_result_size_for_bulk_dictionary_fetch_ |
const double | max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch_ |
Friends | |
class | ArrowResultSet |
Definition at line 228 of file ArrowResultSet.h.
|
inline |
Definition at line 234 of file ArrowResultSet.h.
|
inline |
Definition at line 254 of file ArrowResultSet.h.
|
inline |
Definition at line 293 of file ArrowResultSet.h.
|
inline |
Definition at line 305 of file ArrowResultSet.h.
|
private |
Definition at line 1618 of file ArrowResultSetConverter.cpp.
References CHECK_EQ, ArrowResultSetConverter::ColumnBuilder::col_type, device_type_, GPU, SQLTypeInfo::is_dict_encoded_string(), kARRAY, kBIGINT, kBOOLEAN, kCHAR, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, kVARCHAR, and ArrowResultSetConverter::ColumnBuilder::physical_type.
Referenced by getArrowBatch().
std::shared_ptr< arrow::RecordBatch > ArrowResultSetConverter::convertToArrow | ( | ) | const |
Definition at line 715 of file ArrowResultSetConverter.cpp.
References CHECK, col_names_, DEBUG_TIMER, f(), getArrowBatch(), makeField(), results_, and VLOG.
Referenced by getArrowResult(), and getSerializedArrowOutput().
|
inlineprivate |
Definition at line 1386 of file ArrowResultSetConverter.cpp.
References ARROW_THROW_NOT_OK, and ArrowResultSetConverter::ColumnBuilder::builder.
Referenced by getArrowBatch().
|
private |
Definition at line 733 of file ArrowResultSetConverter.cpp.
References append(), ARROW_RECORDBATCH_MAKE, threading_serial::async(), CHECK, CHECK_EQ, cpu_threads(), anonymous_namespace{ArrowResultSetConverter.cpp}::create_or_append_validity(), DEBUG_TIMER, device_type_, field(), finishColumnBuilder(), GPU, initializeColumnBuilder(), kBIGINT, kBOOLEAN, kDATE, kDECIMAL, kDOUBLE, kFLOAT, kINT, kSMALLINT, kTEXT, kTIME, kTIMESTAMP, kTINYINT, heavyai::Projection, run_benchmark_import::result, results_, heavyai::TableFunction, and top_n_.
Referenced by convertToArrow().
ArrowResult ArrowResultSetConverter::getArrowResult | ( | ) | const |
Serialize an Arrow result to IPC memory. Users are responsible for freeing all CPU IPC buffers using deallocateArrowResultBuffer. GPU buffers will become owned by the caller upon deserialization, and will be automatically freed when they go out of scope.
Definition at line 446 of file ArrowResultSetConverter.cpp.
References ARROW_ASSIGN_OR_THROW, ARROW_LOG, ARROW_THROW_NOT_OK, CHECK, CHECK_GE, convertToArrow(), CPU, DEBUG_TIMER, device_id_, device_type_, arrow::get_and_copy_to_shm(), anonymous_namespace{ArrowResultSetConverter.cpp}::get_shm_buffer(), GPU, SHARED_MEMORY, transport_method_, UNREACHABLE, and WIRE.
|
private |
Definition at line 693 of file ArrowResultSetConverter.cpp.
References ARROW_ASSIGN_OR_THROW, ARROW_THROW_NOT_OK, convertToArrow(), and DEBUG_TIMER.
|
private |
Definition at line 1252 of file ArrowResultSetConverter.cpp.
References ALL_STRINGS_REMAPPED, ARROW_THROW_NOT_OK, ArrowResultSetConverter::ColumnBuilder::builder, CHECK, CHECK_EQ, CHECK_GT, ArrowResultSetConverter::ColumnBuilder::col_type, DEBUG_TIMER, field(), ArrowResultSetConverter::ColumnBuilder::field, anonymous_namespace{ArrowResultSetConverter.cpp}::get_dict_index_type(), foreign_storage::get_physical_type(), SQLTypeInfo::getStringDictKey(), SQLTypeInfo::is_array(), SQLTypeInfo::is_dict_encoded_string(), SQLTypeInfo::is_dict_encoded_type(), max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch_, min_result_size_for_bulk_dictionary_fetch_, ONLY_TRANSIENT_STRINGS_REMAPPED, ArrowResultSetConverter::ColumnBuilder::physical_type, results_, ArrowResultSetConverter::ColumnBuilder::string_array, ArrowResultSetConverter::ColumnBuilder::string_remap_mode, ArrowResultSetConverter::ColumnBuilder::string_remapping, StringDictionaryProxy::transientIndexToId(), and VLOG.
Referenced by getArrowBatch().
|
private |
Definition at line 1205 of file ArrowResultSetConverter.cpp.
References device_type_, field(), anonymous_namespace{ArrowResultSetConverter.cpp}::get_arrow_type(), and SQLTypeInfo::get_notnull().
Referenced by convertToArrow().
|
friend |
Definition at line 356 of file ArrowResultSet.h.
|
private |
Definition at line 351 of file ArrowResultSet.h.
Referenced by convertToArrow().
|
private |
Definition at line 348 of file ArrowResultSet.h.
|
static |
Definition at line 232 of file ArrowResultSet.h.
|
static |
Definition at line 230 of file ArrowResultSet.h.
Referenced by ArrowResultSet::resultSetArrowLoopback().
|
private |
Definition at line 350 of file ArrowResultSet.h.
Referenced by getArrowResult().
|
private |
Definition at line 349 of file ArrowResultSet.h.
Referenced by append(), getArrowBatch(), getArrowResult(), and makeField().
|
private |
Definition at line 355 of file ArrowResultSet.h.
Referenced by initializeColumnBuilder().
|
private |
Definition at line 354 of file ArrowResultSet.h.
Referenced by initializeColumnBuilder().
|
private |
Definition at line 347 of file ArrowResultSet.h.
Referenced by convertToArrow(), getArrowBatch(), and initializeColumnBuilder().
|
private |
Definition at line 352 of file ArrowResultSet.h.
Referenced by getArrowBatch().
|
private |
Definition at line 353 of file ArrowResultSet.h.
Referenced by getArrowResult().