OmniSciDB
a5dc49c757
|
#include <ColumnarResults.h>
Public Types | |
using | ReadFunction = std::function< int64_t(const ResultSet &, const size_t, const size_t, const size_t)> |
using | WriteFunction = std::function< void(const ResultSet &, const size_t, const size_t, const size_t, const size_t, const ReadFunction &)> |
Public Member Functions | |
ColumnarResults (const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const ResultSet &rows, const size_t num_columns, const std::vector< SQLTypeInfo > &target_types, const size_t executor_id, const size_t thread_idx, const bool is_parallel_execution_enforced=false) | |
ColumnarResults (const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const int8_t *one_col_buffer, const size_t num_rows, const SQLTypeInfo &target_type, const size_t executor_id, const size_t thread_idx) | |
const std::vector< int8_t * > & | getColumnBuffers () const |
const size_t | size () const |
const SQLTypeInfo & | getColumnType (const int col_id) const |
bool | isParallelConversion () const |
bool | isDirectColumnarConversionPossible () const |
Static Public Member Functions | |
static std::unique_ptr < ColumnarResults > | mergeResults (const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const std::vector< std::unique_ptr< ColumnarResults >> &sub_results) |
Protected Attributes | |
std::vector< int8_t * > | column_buffers_ |
size_t | num_rows_ |
Private Member Functions | |
ColumnarResults (const size_t num_rows, const std::vector< SQLTypeInfo > &target_types, const std::vector< size_t > &padded_target_sizes) | |
void | writeBackCell (const TargetValue &col_val, const size_t row_idx, const SQLTypeInfo &type_info, int8_t *column_buf, std::mutex *write_mutex=nullptr) |
void | materializeAllColumnsDirectly (const ResultSet &rows, const size_t num_columns) |
void | materializeAllColumnsThroughIteration (const ResultSet &rows, const size_t num_columns) |
void | materializeAllColumnsGroupBy (const ResultSet &rows, const size_t num_columns) |
void | materializeAllColumnsProjection (const ResultSet &rows, const size_t num_columns) |
void | materializeAllColumnsTableFunction (const ResultSet &rows, const size_t num_columns) |
void | copyAllNonLazyColumns (const std::vector< ColumnLazyFetchInfo > &lazy_fetch_info, const ResultSet &rows, const size_t num_columns) |
void | materializeAllLazyColumns (const std::vector< ColumnLazyFetchInfo > &lazy_fetch_info, const ResultSet &rows, const size_t num_columns) |
void | locateAndCountEntries (const ResultSet &rows, ColumnBitmap &bitmap, std::vector< size_t > &non_empty_per_thread, const size_t entry_count, const size_t num_threads, const size_t size_per_thread) const |
void | compactAndCopyEntries (const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread) |
void | compactAndCopyEntriesWithTargetSkipping (const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const std::vector< size_t > &global_offsets, const std::vector< bool > &targets_to_skip, const std::vector< size_t > &slot_idx_per_target_idx, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread) |
void | compactAndCopyEntriesWithoutTargetSkipping (const ResultSet &rows, const ColumnBitmap &bitmap, const std::vector< size_t > &non_empty_per_thread, const std::vector< size_t > &global_offsets, const std::vector< size_t > &slot_idx_per_target_idx, const size_t num_columns, const size_t entry_count, const size_t num_threads, const size_t size_per_thread) |
template<typename DATA_TYPE > | |
void | writeBackCellDirect (const ResultSet &rows, const size_t input_buffer_entry_idx, const size_t output_buffer_entry_idx, const size_t target_idx, const size_t slot_idx, const ReadFunction &read_function) |
std::vector< WriteFunction > | initWriteFunctions (const ResultSet &rows, const std::vector< bool > &targets_to_skip={}) |
template<QueryDescriptionType QUERY_TYPE, bool COLUMNAR_OUTPUT> | |
std::vector< ReadFunction > | initReadFunctions (const ResultSet &rows, const std::vector< size_t > &slot_idx_per_target_idx, const std::vector< bool > &targets_to_skip={}) |
std::tuple< std::vector < WriteFunction >, std::vector < ReadFunction > > | initAllConversionFunctions (const ResultSet &rows, const std::vector< size_t > &slot_idx_per_target_idx, const std::vector< bool > &targets_to_skip={}) |
template<> | |
void | writeBackCellDirect (const ResultSet &rows, const size_t input_buffer_entry_idx, const size_t output_buffer_entry_idx, const size_t target_idx, const size_t slot_idx, const ReadFunction &read_from_function) |
template<> | |
void | writeBackCellDirect (const ResultSet &rows, const size_t input_buffer_entry_idx, const size_t output_buffer_entry_idx, const size_t target_idx, const size_t slot_idx, const ReadFunction &read_from_function) |
Private Attributes | |
const std::vector< SQLTypeInfo > | target_types_ |
bool | parallel_conversion_ |
bool | direct_columnar_conversion_ |
size_t | thread_idx_ |
std::shared_ptr< Executor > | executor_ |
std::vector< size_t > | padded_target_sizes_ |
Definition at line 61 of file ColumnarResults.h.
using ColumnarResults::ReadFunction = std::function<int64_t(const ResultSet&, const size_t, const size_t, const size_t)> |
Definition at line 98 of file ColumnarResults.h.
using ColumnarResults::WriteFunction = std::function<void(const ResultSet&, const size_t, const size_t, const size_t, const size_t, const ReadFunction&)> |
Definition at line 107 of file ColumnarResults.h.
ColumnarResults::ColumnarResults | ( | const std::shared_ptr< RowSetMemoryOwner > | row_set_mem_owner, |
const ResultSet & | rows, | ||
const size_t | num_columns, | ||
const std::vector< SQLTypeInfo > & | target_types, | ||
const size_t | executor_id, | ||
const size_t | thread_idx, | ||
const bool | is_parallel_execution_enforced = false |
||
) |
Definition at line 256 of file ColumnarResults.cpp.
References CHECK, CHECK_EQ, column_buffers_, anonymous_namespace{ColumnarResults.cpp}::computeTotalNofValuesForColumnArray(), anonymous_namespace{ColumnarResults.cpp}::computeTotalNofValuesForColumnGeoType(), anonymous_namespace{ColumnarResults.cpp}::computeTotalNofValuesForColumnTextEncodingNone(), DEBUG_TIMER, executor_, Executor::getExecutor(), getFlatBufferSize(), initializeFlatBuffer(), isDirectColumnarConversionPossible(), kARRAY, kENCODING_DICT, kENCODING_NONE, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kTEXT, materializeAllColumnsDirectly(), materializeAllColumnsThroughIteration(), num_rows_, padded_target_sizes_, report::rows, target_types_, thread_idx_, and UNREACHABLE.
Referenced by mergeResults().
ColumnarResults::ColumnarResults | ( | const std::shared_ptr< RowSetMemoryOwner > | row_set_mem_owner, |
const int8_t * | one_col_buffer, | ||
const size_t | num_rows, | ||
const SQLTypeInfo & | target_type, | ||
const size_t | executor_id, | ||
const size_t | thread_idx | ||
) |
Definition at line 389 of file ColumnarResults.cpp.
|
inlineprivate |
Definition at line 114 of file ColumnarResults.h.
|
private |
This function goes through all non-empty elements marked in the bitmap data structure, and store them back into output column buffers. The output column buffers are compacted without any holes in it.
TODO(Saman): if necessary, we can look into the distribution of non-empty entries and choose a different load-balanced strategy (assigning equal number of non-empties to each thread) as opposed to equal partitioning of the bitmap
Definition at line 1351 of file ColumnarResults.cpp.
References CHECK, CHECK_EQ, compactAndCopyEntriesWithoutTargetSkipping(), compactAndCopyEntriesWithTargetSkipping(), heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, isDirectColumnarConversionPossible(), and gpu_enabled::partial_sum().
Referenced by materializeAllColumnsGroupBy().
|
private |
This functions takes a bitmap of non-empty entries within the result set's storage and compact and copy those contents back into the output column_buffers_. In this variation, all targets are assumed to be single-slot and thus can be directly columnarized.
Definition at line 1530 of file ColumnarResults.cpp.
References threading_serial::async(), CHECK, CHECK_EQ, executor_, g_enable_non_kernel_time_query_interrupt, heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, QueryExecutionError::hasErrorCode(), initAllConversionFunctions(), isDirectColumnarConversionPossible(), report::rows, and UNLIKELY.
Referenced by compactAndCopyEntries().
|
private |
This functions takes a bitmap of non-empty entries within the result set's storage and compact and copy those contents back into the output column_buffers_. In this variation, multi-slot targets (e.g., AVG) are treated with the existing result set's iterations, but everything else is directly columnarized.
Definition at line 1406 of file ColumnarResults.cpp.
References threading_serial::async(), CHECK, CHECK_EQ, column_buffers_, executor_, g_enable_non_kernel_time_query_interrupt, ColumnBitmap::get(), heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, QueryExecutionError::hasErrorCode(), initAllConversionFunctions(), isDirectColumnarConversionPossible(), report::rows, target_types_, UNLIKELY, and writeBackCell().
Referenced by compactAndCopyEntries().
|
private |
Definition at line 1097 of file ColumnarResults.cpp.
References threading_serial::async(), CHECK, column_buffers_, isDirectColumnarConversionPossible(), heavyai::TableFunction, target_types_, and UNREACHABLE.
Referenced by materializeAllColumnsProjection(), and materializeAllColumnsTableFunction().
|
inline |
Definition at line 82 of file ColumnarResults.h.
References column_buffers_.
Referenced by ColumnFetcher::transferColumnIfNeeded().
|
inline |
Definition at line 86 of file ColumnarResults.h.
References CHECK_GE, CHECK_LT, and target_types_.
Referenced by ColumnFetcher::transferColumnIfNeeded().
|
private |
This function goes through all target types in the output, and chooses appropriate write and read functions per target. The goal is then to simply use these functions for each row and per target. Read functions are used to read each cell's data content (particular target in a row), and write functions are used to properly write back the cell's content into the output column buffers.
Definition at line 1917 of file ColumnarResults.cpp.
References CHECK, heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, initWriteFunctions(), and isDirectColumnarConversionPossible().
Referenced by compactAndCopyEntriesWithoutTargetSkipping(), and compactAndCopyEntriesWithTargetSkipping().
|
private |
Initializes a set of read funtions to properly access the contents of the result set's storage buffer. Each particular read function is chosen based on the data type and data size used to store that target in the result set's storage buffer. These functions are then used for each row in the result set.
Definition at line 1819 of file ColumnarResults.cpp.
References CHECK, CHECK_EQ, heavyai::GroupByBaselineHash, anonymous_namespace{ColumnarResults.cpp}::invalid_read_func(), isDirectColumnarConversionPossible(), kDOUBLE, kFLOAT, target_types_, and UNREACHABLE.
|
private |
Initialize a set of write functions per target (i.e., column). Target types' logical size are used to categorize the correct write function per target. These functions are then used for every row in the result set.
Definition at line 1631 of file ColumnarResults.cpp.
References CHECK, heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, isDirectColumnarConversionPossible(), run_benchmark_import::result, target_types_, and UNREACHABLE.
Referenced by initAllConversionFunctions().
|
inline |
Definition at line 93 of file ColumnarResults.h.
References direct_columnar_conversion_.
Referenced by ColumnarResults(), compactAndCopyEntries(), compactAndCopyEntriesWithoutTargetSkipping(), compactAndCopyEntriesWithTargetSkipping(), copyAllNonLazyColumns(), initAllConversionFunctions(), initReadFunctions(), initWriteFunctions(), locateAndCountEntries(), materializeAllColumnsDirectly(), materializeAllColumnsGroupBy(), materializeAllColumnsProjection(), materializeAllColumnsTableFunction(), and materializeAllLazyColumns().
|
inline |
Definition at line 92 of file ColumnarResults.h.
References parallel_conversion_.
Referenced by materializeAllColumnsGroupBy(), and materializeAllColumnsThroughIteration().
|
private |
This function goes through all the keys in the result set, and count the total number of non-empty keys. It also store the location of non-empty keys in a bitmap data structure for later faster access.
Definition at line 1278 of file ColumnarResults.cpp.
References threading_serial::async(), CHECK, CHECK_EQ, executor_, g_enable_non_kernel_time_query_interrupt, heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, QueryExecutionError::hasErrorCode(), isDirectColumnarConversionPossible(), report::rows, ColumnBitmap::set(), and UNLIKELY.
Referenced by materializeAllColumnsGroupBy().
|
private |
This function materializes all columns from the main storage and all appended storages and form a single continguous column for each output column. Depending on whether the column is lazily fetched or not, it will treat them differently.
NOTE: this function should only be used when the result set is columnar and completely compacted (e.g., in columnar projections).
Definition at line 1029 of file ColumnarResults.cpp.
References CHECK, heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, isDirectColumnarConversionPossible(), materializeAllColumnsGroupBy(), materializeAllColumnsProjection(), materializeAllColumnsTableFunction(), heavyai::Projection, heavyai::TableFunction, and UNREACHABLE.
Referenced by ColumnarResults().
|
private |
This function is to directly columnarize a result set for group by queries. Its main difference with the traditional alternative is that it directly reads non-empty entries from the result set, and then writes them into output column buffers, rather than using the result set's iterators.
Definition at line 1243 of file ColumnarResults.cpp.
References CHECK, compactAndCopyEntries(), cpu_threads(), heavyai::GroupByBaselineHash, heavyai::GroupByPerfectHash, isDirectColumnarConversionPossible(), isParallelConversion(), and locateAndCountEntries().
Referenced by materializeAllColumnsDirectly().
|
private |
This function handles materialization for two types of columns in columnar projections:
Definition at line 1059 of file ColumnarResults.cpp.
References CHECK, copyAllNonLazyColumns(), isDirectColumnarConversionPossible(), materializeAllLazyColumns(), and heavyai::Projection.
Referenced by materializeAllColumnsDirectly().
|
private |
Definition at line 1075 of file ColumnarResults.cpp.
References CHECK, copyAllNonLazyColumns(), isDirectColumnarConversionPossible(), and heavyai::TableFunction.
Referenced by materializeAllColumnsDirectly().
|
private |
This function iterates through the result set (using the getRowAtNoTranslation and getNextRow family of functions) and writes back the results into output column buffers.
Definition at line 466 of file ColumnarResults.cpp.
References threading_serial::async(), column_buffers_, cpu_threads(), executor_, g_enable_non_kernel_time_query_interrupt, QueryExecutionError::hasErrorCode(), isParallelConversion(), makeIntervals(), num_rows_, report::rows, target_types_, UNLIKELY, and writeBackCell().
Referenced by ColumnarResults().
|
private |
For all lazy fetched columns, we should iterate through the column's content and properly materialize it.
This function is parallelized through dividing total rows among all existing threads. Since there's no invalid element in the result set (e.g., columnar projections), the output buffer will have as many rows as there are in the result set, removing the need for atomicly incrementing the output buffer position.
Definition at line 1153 of file ColumnarResults.cpp.
References threading_serial::async(), CHECK, CHECK_EQ, column_buffers_, cpu_threads(), executor_, g_enable_non_kernel_time_query_interrupt, QueryExecutionError::hasErrorCode(), isDirectColumnarConversionPossible(), makeIntervals(), report::rows, heavyai::TableFunction, target_types_, UNLIKELY, result_set::use_parallel_algorithms(), and writeBackCell().
Referenced by materializeAllColumnsProjection().
|
static |
Definition at line 418 of file ColumnarResults.cpp.
References gpu_enabled::accumulate(), CHECK_EQ, ColumnarResults(), logger::init(), padded_target_sizes_, run_benchmark_import::result, and target_types_.
Referenced by ColumnFetcher::getAllTableColumnFragments().
|
inline |
Definition at line 84 of file ColumnarResults.h.
References num_rows_.
Referenced by ColumnFetcher::transferColumnIfNeeded().
|
inlineprivate |
Definition at line 869 of file ColumnarResults.cpp.
References SQLTypeInfoLite::BIGINT, SQLTypeInfoLite::BOOLEAN, CHECK, SQLTypeInfoLite::DOUBLE, SQLTypeInfoLite::FLOAT, SQLTypeInfo::get_compression(), SQLTypeInfo::get_size(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), SQLTypeInfoLite::INT, SQLTypeInfo::is_array(), SQLTypeInfo::is_geometry(), SQLTypeInfo::is_text_encoding_none(), FlatBufferManager::isFlatBuffer(), kENCODING_NONE, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kPOINT, kPOLYGON, kTEXT, SQLTypeInfoLite::SMALLINT, SQLTypeInfoLite::subtype, SQLTypeInfoLite::TEXT, SQLTypeInfoLite::TINYINT, anonymous_namespace{ColumnarResults.cpp}::toBuffer(), SQLTypeInfo::toString(), UNREACHABLE, SQLTypeInfo::usesFlatBuffer(), writeBackCellGeoNestedArray(), writeBackCellGeoPoint(), and writeBackCellTextEncodingNone().
Referenced by compactAndCopyEntriesWithTargetSkipping(), materializeAllColumnsThroughIteration(), and materializeAllLazyColumns().
|
private |
A set of write functions to be used to directly write into final column_buffers_. The read_from_function is used to read from the input result set's storage NOTE: currently only used for direct columnarizations
Definition at line 980 of file ColumnarResults.cpp.
References column_buffers_, anonymous_namespace{ColumnarResults.cpp}::fixed_encoding_nullable_val(), and target_types_.
|
private |
Definition at line 994 of file ColumnarResults.cpp.
|
private |
Definition at line 1007 of file ColumnarResults.cpp.
|
protected |
Definition at line 110 of file ColumnarResults.h.
Referenced by ColumnarResults(), compactAndCopyEntriesWithTargetSkipping(), copyAllNonLazyColumns(), getColumnBuffers(), materializeAllColumnsThroughIteration(), materializeAllLazyColumns(), and writeBackCellDirect().
|
private |
Definition at line 205 of file ColumnarResults.h.
Referenced by isDirectColumnarConversionPossible().
|
private |
Definition at line 208 of file ColumnarResults.h.
Referenced by ColumnarResults(), compactAndCopyEntriesWithoutTargetSkipping(), compactAndCopyEntriesWithTargetSkipping(), locateAndCountEntries(), materializeAllColumnsThroughIteration(), and materializeAllLazyColumns().
|
protected |
Definition at line 111 of file ColumnarResults.h.
Referenced by ColumnarResults(), materializeAllColumnsThroughIteration(), and size().
|
private |
Definition at line 209 of file ColumnarResults.h.
Referenced by ColumnarResults(), and mergeResults().
|
private |
Definition at line 204 of file ColumnarResults.h.
Referenced by isParallelConversion().
|
private |
Definition at line 203 of file ColumnarResults.h.
Referenced by ColumnarResults(), compactAndCopyEntriesWithTargetSkipping(), copyAllNonLazyColumns(), getColumnType(), initReadFunctions(), initWriteFunctions(), materializeAllColumnsThroughIteration(), materializeAllLazyColumns(), mergeResults(), and writeBackCellDirect().
|
private |
Definition at line 207 of file ColumnarResults.h.
Referenced by ColumnarResults().