OmniSciDB
a5dc49c757
|
Typedefs | |
using | OptionsMap = std::map< std::string, std::string, std::less<>> |
using | SampleRows = std::vector< std::vector< std::string >> |
using | FirstLineByFilePath = std::map< std::string, std::string > |
using | FileRegions = std::vector< FileRegion > |
using | ChunkToBufferMap = std::map< ChunkKey, AbstractBuffer * > |
using | FilePathAndRowGroup = std::pair< std::string, int32_t > |
using | RejectedRowIndices = std::set< int64_t > |
using | InvalidRowGroupIndices = std::set< int64_t > |
template<typename T > | |
using | DateInSecondsBoundsValidator = BaseDateBoundsValidator< T, true > |
template<typename T > | |
using | DateInDaysBoundsValidator = BaseDateBoundsValidator< T, false > |
using | UniqueReaderPtr = std::unique_ptr< parquet::arrow::FileReader > |
using | ReaderPtr = parquet::arrow::FileReader * |
template<typename V , typename T , T conversion_denominator, typename NullType = V> | |
using | ParquetDateInSecondsFromTimestampEncoder = ParquetTimestampEncoder< V, T, conversion_denominator, NullType > |
using | S3ObjectComparator = std::function< bool(const Aws::S3::Model::Object &, const Aws::S3::Model::Object &)> |
Functions | |
size_t | get_num_threads (const ForeignTable &table) |
ParseFileRegionResult | parse_file_regions (const FileRegions &file_regions, const size_t start_index, const size_t end_index, FileReader &file_reader, ParseBufferRequest &parse_file_request, const std::map< int, Chunk_NS::Chunk > &column_id_to_chunk_map, const TextFileBufferParser &parser) |
size_t | num_rows_to_process (const size_t start_row_index, const size_t max_fragment_size, const size_t rows_remaining) |
std::vector< size_t > | partition_by_fragment (const size_t start_row_index, const size_t max_fragment_size, const size_t buffer_row_count) |
std::optional< ParseBufferRequest > | get_next_scan_request (MetadataScanMultiThreadingParams &multi_threading_params) |
void | add_file_region (std::map< int, FileRegions > &fragment_id_to_file_regions_map, int fragment_id, size_t first_row_index, const ParseBufferResult &result, const std::string &file_path) |
void | update_stats (Encoder *encoder, const SQLTypeInfo &column_type, DataBlockPtr data_block, const size_t row_count) |
void | cache_blocks (std::map< ChunkKey, Chunk_NS::Chunk > &cached_chunks, DataBlockPtr data_block, size_t row_count, ChunkKey &chunk_key, const ColumnDescriptor *column, bool is_first_block, bool disable_cache) |
void | append_data_block_to_chunk (const foreign_storage::IterativeFileScanParameters &file_scan_param, DataBlockPtr data_block, size_t row_count, const int column_id, const ColumnDescriptor *column, const size_t element_count_required) |
std::pair< std::map< int, DataBlockPtr >, std::map< int, DataBlockPtr > > | partition_data_blocks (const std::map< int, const ColumnDescriptor * > &column_by_id, const std::map< int, DataBlockPtr > &data_blocks) |
void | update_delete_buffer (const ParseBufferRequest &request, const ParseBufferResult &result, const foreign_storage::IterativeFileScanParameters &file_scan_param, const size_t start_position_in_fragment) |
void | populate_chunks_using_data_blocks (MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const foreign_storage::IterativeFileScanParameters &file_scan_param, const size_t expected_current_element_count) |
void | process_data_blocks (MetadataScanMultiThreadingParams &multi_threading_params, int fragment_id, const ParseBufferRequest &request, ParseBufferResult &result, std::map< int, const ColumnDescriptor * > &column_by_id, std::map< int, FileRegions > &fragment_id_to_file_regions_map) |
void | add_request_to_pool (MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request) |
void | scan_metadata (MetadataScanMultiThreadingParams &multi_threading_params, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const TextFileBufferParser &parser) |
ParseBufferRequest | get_request_from_pool (MetadataScanMultiThreadingParams &multi_threading_params) |
bool | request_pool_non_empty (MetadataScanMultiThreadingParams &multi_threading_params) |
void | defer_scan_request (MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request) |
void | dispatch_all_deferred_requests (MetadataScanMultiThreadingParams &multi_threading_params) |
void | dispatch_scan_request (MetadataScanMultiThreadingParams &multi_threading_params, ParseBufferRequest &request) |
void | populate_chunks (MetadataScanMultiThreadingParams &multi_threading_params, std::map< int, FileRegions > &fragment_id_to_file_regions_map, const TextFileBufferParser &parser, foreign_storage::IterativeFileScanParameters &file_scan_param) |
void | resize_buffer_if_needed (std::unique_ptr< char[]> &buffer, size_t &buffer_size, const size_t alloc_size) |
void | reset_multithreading_params (foreign_storage::MetadataScanMultiThreadingParams &multi_threading_params) |
void | dispatch_scan_requests (const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams ©_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t ¤t_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call, int &iterative_scan_last_fragment_id) |
void | dispatch_scan_requests_with_exception_handling (const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams ©_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t ¤t_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call, int &iterative_scan_last_fragment_id) |
void | dispatch_scan_requests_with_exception_handling (const foreign_storage::ForeignTable *table, const size_t &buffer_size, const std::string &file_path, FileReader &file_reader, const import_export::CopyParams ©_params, MetadataScanMultiThreadingParams &multi_threading_params, size_t &first_row_index_in_buffer, size_t ¤t_file_offset, const TextFileBufferParser &parser, const foreign_storage::IterativeFileScanParameters *file_scan_param, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer &iterative_residual_buffer, const bool is_first_file_scan_call) |
void | set_value (rapidjson::Value &json_val, const FileRegion &file_region, rapidjson::Document::AllocatorType &allocator) |
void | get_value (const rapidjson::Value &json_val, FileRegion &file_region) |
std::optional< SQLTypes > | detect_geo_type (const SampleRows &sample_rows, size_t column_index) |
std::tuple< std::unique_ptr < foreign_storage::ForeignServer > , std::unique_ptr < foreign_storage::UserMapping > , std::unique_ptr < foreign_storage::ForeignTable > > | create_proxy_fsi_objects (const std::string ©_from_source, const import_export::CopyParams ©_params, const int db_id, const TableDescriptor *table, const int32_t user_id) |
Create proxy fsi objects for use outside FSI. More... | |
std::tuple< std::unique_ptr < foreign_storage::ForeignServer > , std::unique_ptr < foreign_storage::UserMapping > , std::unique_ptr < foreign_storage::ForeignTable > > | create_proxy_fsi_objects (const std::string ©_from_source, const import_export::CopyParams ©_params, const TableDescriptor *table) |
Create proxy fsi objects for use outside FSI NOTE: parameters mirror function above. More... | |
void | validate_regex_parser_options (const import_export::CopyParams ©_params) |
bool | is_valid_source_type (const import_export::CopyParams ©_params) |
std::string | bool_to_option_value (const bool value) |
void | throw_unexpected_number_of_items (const size_t &num_expected, const size_t &num_loaded, const std::string &item_type) |
void | throw_removed_row_in_result_set_error (const std::string &select_statement) |
void | throw_removed_row_in_file_error (const std::string &file_path) |
void | throw_removed_file_error (const std::string &file_path) |
void | throw_number_of_columns_mismatch_error (size_t num_table_cols, size_t num_file_cols, const std::string &file_path) |
void | throw_file_access_error (const std::string &file_path, const std::string &message) |
void | throw_file_not_found_error (const std::string &file_path) |
void | throw_s3_compressed_mime_type (const std::string &file_path, const std::string &mime_type) |
void | throw_s3_compressed_extension (const std::string &file_path, const std::string &ext_type) |
bool | is_append_table_chunk_key (const ChunkKey &chunk_key) |
bool | set_comp (const ChunkKey &left, const ChunkKey &right) |
std::vector< ChunkKey > | get_column_key_vec (const ChunkKey &destination_chunk_key) |
std::set< ChunkKey > | get_column_key_set (const ChunkKey &destination_chunk_key) |
size_t | get_max_chunk_size (const ChunkKey &key) |
bool | contains_fragment_key (const std::set< ChunkKey > &key_set, const ChunkKey &target_key) |
bool | is_table_enabled_on_node (const ChunkKey &key) |
void | refresh_foreign_table_unlocked (Catalog_Namespace::Catalog &catalog, const ForeignTable &td, const bool evict_cached_entries) |
void | refresh_foreign_table (Catalog_Namespace::Catalog &catalog, const std::string &table_name, const bool evict_cached_entries) |
void | init_chunk_for_column (const ChunkKey &chunk_key, const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &chunk_metadata_map, const std::map< ChunkKey, AbstractBuffer * > &buffers, Chunk_NS::Chunk &chunk) |
std::shared_ptr< ChunkMetadata > | get_placeholder_metadata (const SQLTypeInfo &type, size_t num_elements) |
const foreign_storage::ForeignTable & | get_foreign_table_for_key (const ChunkKey &key) |
bool | is_system_table_chunk_key (const ChunkKey &chunk_key) |
bool | is_replicated_table_chunk_key (const ChunkKey &chunk_key) |
bool | is_shardable_key (const ChunkKey &key) |
bool | fragment_maps_to_leaf (const ChunkKey &key) |
bool | key_does_not_shard_to_leaf (const ChunkKey &key) |
template<typename T > | |
auto | partition_for_threads (const std::set< T > &items, size_t max_threads) |
template<typename T > | |
auto | partition_for_threads (const std::vector< T > &items, size_t max_threads) |
template<typename Container > | |
std::vector< std::future< void > > | create_futures_for_workers (const Container &items, size_t max_threads, std::function< void(const Container &)> lambda) |
template<typename T > | |
ArrayDatum | encode_as_array_datum (const std::vector< T > &data) |
std::string | get_db_name (int32_t db_id) |
std::string | get_table_name (int32_t db_id, int32_t table_id) |
void | set_node_name (std::map< std::string, import_export::TypedImportBuffer * > &import_buffers) |
void | set_value (rapidjson::Value &json_val, const RowGroupInterval &value, rapidjson::Document::AllocatorType &allocator) |
void | get_value (const rapidjson::Value &json_val, RowGroupInterval &value) |
template<typename D , typename T > | |
bool | check_bounds (const T &value) |
template<typename D > | |
std::string | datetime_to_string (const D ×tamp, const SQLTypeInfo &column_type) |
void | throw_parquet_metadata_out_of_bounds_error (const std::string &min_value, const std::string &max_value, const std::string &encountered_value) |
UniqueReaderPtr | open_parquet_table (const std::string &file_path, std::shared_ptr< arrow::fs::FileSystem > &file_system) |
std::pair< int, int > | get_parquet_table_size (const ReaderPtr &reader) |
const parquet::ColumnDescriptor * | get_column_descriptor (const parquet::arrow::FileReader *reader, const int logical_column_index) |
parquet::Type::type | get_physical_type (ReaderPtr &reader, const int logical_column_index) |
void | validate_equal_column_descriptor (const parquet::ColumnDescriptor *reference_descriptor, const parquet::ColumnDescriptor *new_descriptor, const std::string &reference_file_path, const std::string &new_file_path) |
std::unique_ptr< ColumnDescriptor > | get_sub_type_column_descriptor (const ColumnDescriptor *column) |
std::shared_ptr < parquet::Statistics > | validate_and_get_column_metadata_statistics (const parquet::ColumnChunkMetaData *column_metadata) |
std::vector < Aws::S3::Model::Object > | s3_objects_filter_sort_files (const std::vector< Aws::S3::Model::Object > &file_paths, const shared::FilePathOptions &options) |
template<typename V , std::enable_if_t< std::is_integral< V >::value, int > = 0> | |
V | get_null_value () |
template<typename D , std::enable_if_t< std::is_integral< D >::value, int > = 0> | |
std::pair< D, D > | get_min_max_bounds () |
void | populate_string_dictionary (int32_t table_id, int32_t col_id, int32_t db_id) |
void | validate_non_foreign_table_write (const TableDescriptor *table_descriptor) |
Variables | |
constexpr const char * | kDeletedValueIndicator {"<DELETED>"} |
using foreign_storage::ChunkToBufferMap = typedef std::map<ChunkKey, AbstractBuffer*> |
Definition at line 31 of file ForeignDataWrapper.h.
using foreign_storage::DateInDaysBoundsValidator = typedef BaseDateBoundsValidator<T, false> |
Definition at line 277 of file ParquetMetadataValidator.h.
using foreign_storage::DateInSecondsBoundsValidator = typedef BaseDateBoundsValidator<T, true> |
Definition at line 274 of file ParquetMetadataValidator.h.
using foreign_storage::FilePathAndRowGroup = typedef std::pair<std::string, int32_t> |
Definition at line 40 of file ParquetDataWrapper.h.
using foreign_storage::FileRegions = typedef std::vector<FileRegion> |
Definition at line 60 of file FileRegions.h.
using foreign_storage::FirstLineByFilePath = typedef std::map<std::string, std::string> |
Definition at line 37 of file FileReader.h.
using foreign_storage::InvalidRowGroupIndices = typedef std::set<int64_t> |
Definition at line 124 of file ParquetEncoder.h.
using foreign_storage::OptionsMap = typedef std::map<std::string, std::string, std::less<>> |
Definition at line 30 of file OptionsContainer.h.
using foreign_storage::ParquetDateInSecondsFromTimestampEncoder = typedef ParquetTimestampEncoder<V, T, conversion_denominator, NullType> |
Definition at line 89 of file ParquetTimestampEncoder.h.
using foreign_storage::ReaderPtr = typedef parquet::arrow::FileReader* |
Definition at line 33 of file ParquetShared.h.
using foreign_storage::RejectedRowIndices = typedef std::set<int64_t> |
Definition at line 28 of file ParquetEncoder.h.
using foreign_storage::S3ObjectComparator = typedef std::function<bool(const Aws::S3::Model::Object&, const Aws::S3::Model::Object&)> |
Definition at line 16 of file S3FilePathUtil.h.
using foreign_storage::SampleRows = typedef std::vector<std::vector<std::string>> |
Definition at line 26 of file DataPreview.h.
using foreign_storage::UniqueReaderPtr = typedef std::unique_ptr<parquet::arrow::FileReader> |
Definition at line 32 of file ParquetShared.h.
void foreign_storage::add_file_region | ( | std::map< int, FileRegions > & | fragment_id_to_file_regions_map, |
int | fragment_id, | ||
size_t | first_row_index, | ||
const ParseBufferResult & | result, | ||
const std::string & | file_path | ||
) |
Creates a new file region based on metadata from parsed file buffers and adds the new region to the fragment id to file regions map.
Definition at line 562 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::ParseBufferResult::row_count, and foreign_storage::ParseBufferResult::row_offsets.
Referenced by populate_chunks_using_data_blocks(), and process_data_blocks().
void foreign_storage::add_request_to_pool | ( | MetadataScanMultiThreadingParams & | multi_threading_params, |
ParseBufferRequest & | request | ||
) |
Adds the request object for a processed request back to the request pool for reuse in subsequent requests.
Definition at line 850 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::MetadataScanMultiThreadingParams::request_pool, foreign_storage::MetadataScanMultiThreadingParams::request_pool_condition, and foreign_storage::MetadataScanMultiThreadingParams::request_pool_mutex.
Referenced by dispatch_scan_requests(), populate_chunks(), and scan_metadata().
void foreign_storage::append_data_block_to_chunk | ( | const foreign_storage::IterativeFileScanParameters & | file_scan_param, |
DataBlockPtr | data_block, | ||
size_t | row_count, | ||
const int | column_id, | ||
const ColumnDescriptor * | column, | ||
const size_t | element_count_required | ||
) |
Definition at line 649 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::IterativeFileScanParameters::column_id_to_chunk_map, shared::get_from_map(), foreign_storage::IterativeFileScanParameters::getChunkConditionalVariable(), and foreign_storage::IterativeFileScanParameters::getChunkMutex().
Referenced by populate_chunks_using_data_blocks().
std::string foreign_storage::bool_to_option_value | ( | const bool | value | ) |
Definition at line 131 of file ForeignDataWrapperFactory.cpp.
Referenced by foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy().
void foreign_storage::cache_blocks | ( | std::map< ChunkKey, Chunk_NS::Chunk > & | cached_chunks, |
DataBlockPtr | data_block, | ||
size_t | row_count, | ||
ChunkKey & | chunk_key, | ||
const ColumnDescriptor * | column, | ||
bool | is_first_block, | ||
bool | disable_cache | ||
) |
Definition at line 608 of file AbstractTextFileDataWrapper.cpp.
References CHECK, CHUNK_KEY_COLUMN_IDX, CHUNK_KEY_DB_IDX, CHUNK_KEY_FRAGMENT_IDX, CHUNK_KEY_TABLE_IDX, ColumnDescriptor::columnType, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::get_cache_if_enabled(), Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), SQLTypeInfo::is_varlen_indeed(), and key_does_not_shard_to_leaf().
Referenced by process_data_blocks().
|
inline |
Definition at line 32 of file ParquetMetadataValidator.h.
bool foreign_storage::contains_fragment_key | ( | const std::set< ChunkKey > & | key_set, |
const ChunkKey & | target_key | ||
) |
std::vector<std::future<void> > foreign_storage::create_futures_for_workers | ( | const Container & | items, |
size_t | max_threads, | ||
std::function< void(const Container &)> | lambda | ||
) |
Definition at line 74 of file FsiChunkUtils.h.
References threading_serial::async(), and partition_for_threads().
Referenced by import_export::ForeignDataImporter::importGeneralS3(), foreign_storage::ParquetDataWrapper::populateChunkBuffers(), and foreign_storage::LazyParquetChunkLoader::previewFiles().
std::tuple< std::unique_ptr< foreign_storage::ForeignServer >, std::unique_ptr< foreign_storage::UserMapping >, std::unique_ptr< foreign_storage::ForeignTable > > foreign_storage::create_proxy_fsi_objects | ( | const std::string & | copy_from_source, |
const import_export::CopyParams & | copy_params, | ||
const int | db_id, | ||
const TableDescriptor * | table, | ||
const int32_t | user_id | ||
) |
Create proxy fsi objects for use outside FSI.
copy_from_source | - the source that will be copied |
copy_params | - CopyParams that specify parameters around use case |
db_id | - db id of database in use case |
table | - the table descriptor of the table in use case |
user_id | - the user id of user in use case |
Definition at line 61 of file ForeignDataWrapperFactory.cpp.
References CHECK, foreign_storage::ForeignDataWrapperFactory::createForeignServerProxy(), foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy(), and foreign_storage::ForeignDataWrapperFactory::createUserMappingProxyIfApplicable().
Referenced by create_proxy_fsi_objects(), and import_export::ForeignDataImporter::importGeneral().
std::tuple< std::unique_ptr< foreign_storage::ForeignServer >, std::unique_ptr< foreign_storage::UserMapping >, std::unique_ptr< foreign_storage::ForeignTable > > foreign_storage::create_proxy_fsi_objects | ( | const std::string & | copy_from_source, |
const import_export::CopyParams & | copy_params, | ||
const TableDescriptor * | table | ||
) |
Create proxy fsi objects for use outside FSI NOTE: parameters mirror function above.
Definition at line 93 of file ForeignDataWrapperFactory.cpp.
References create_proxy_fsi_objects().
|
inline |
Definition at line 38 of file ParquetMetadataValidator.h.
References Datum::bigintval, CHECK, DatumToString(), SQLTypeInfo::is_date(), and SQLTypeInfo::is_timestamp().
Referenced by foreign_storage::TimestampBoundsValidator< T >::getMinMaxBoundsAsStrings(), foreign_storage::BaseDateBoundsValidator< T, is_in_seconds >::getMinMaxBoundsAsStrings(), foreign_storage::TimestampBoundsValidator< T >::validateValue(), and foreign_storage::BaseDateBoundsValidator< T, is_in_seconds >::validateValue().
void foreign_storage::defer_scan_request | ( | MetadataScanMultiThreadingParams & | multi_threading_params, |
ParseBufferRequest & | request | ||
) |
Definition at line 943 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::MetadataScanMultiThreadingParams::deferred_requests, and foreign_storage::MetadataScanMultiThreadingParams::deferred_requests_mutex.
Referenced by populate_chunks().
std::optional< SQLTypes > foreign_storage::detect_geo_type | ( | const SampleRows & | sample_rows, |
size_t | column_index | ||
) |
Definition at line 22 of file DataPreview.cpp.
References CHECK_EQ, CHECK_LT, kLINESTRING, kMULTILINESTRING, kMULTIPOINT, kMULTIPOLYGON, kNULLT, kPOINT, kPOLYGON, and UNREACHABLE.
Referenced by foreign_storage::LazyParquetChunkLoader::previewFiles().
void foreign_storage::dispatch_all_deferred_requests | ( | MetadataScanMultiThreadingParams & | multi_threading_params | ) |
Definition at line 953 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::MetadataScanMultiThreadingParams::deferred_requests, foreign_storage::MetadataScanMultiThreadingParams::deferred_requests_mutex, foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.
Referenced by dispatch_scan_requests().
void foreign_storage::dispatch_scan_request | ( | MetadataScanMultiThreadingParams & | multi_threading_params, |
ParseBufferRequest & | request | ||
) |
Dispatches a new metadata scan request by adding the request to the pending requests queue to be consumed by a worker thread.
Definition at line 974 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.
Referenced by dispatch_scan_requests().
void foreign_storage::dispatch_scan_requests | ( | const foreign_storage::ForeignTable * | table, |
const size_t & | buffer_size, | ||
const std::string & | file_path, | ||
FileReader & | file_reader, | ||
const import_export::CopyParams & | copy_params, | ||
MetadataScanMultiThreadingParams & | multi_threading_params, | ||
size_t & | first_row_index_in_buffer, | ||
size_t & | current_file_offset, | ||
const TextFileBufferParser & | parser, | ||
const foreign_storage::IterativeFileScanParameters * | file_scan_param, | ||
foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer & | iterative_residual_buffer, | ||
const bool | is_first_file_scan_call, | ||
int & | iterative_scan_last_fragment_id | ||
) |
Reads from a text file iteratively and dispatches metadata scan requests that are processed by worker threads.
Definition at line 1086 of file AbstractTextFileDataWrapper.cpp.
References add_request_to_pool(), foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::alloc_size, foreign_storage::MetadataScanMultiThreadingParams::continue_processing, dispatch_all_deferred_requests(), dispatch_scan_request(), foreign_storage::TextFileBufferParser::findRowEndPosition(), foreign_storage::IterativeFileScanParameters::fragment_id, get_request_from_pool(), foreign_storage::FileReader::getCurrentFilePath(), foreign_storage::FileReader::isScanFinished(), import_export::CopyParams::line_delim, TableDescriptor::maxFragRows, foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::no_deferred_requests(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex, foreign_storage::FileReader::read(), request_pool_non_empty(), foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::residual_buffer_alloc_size, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::residual_buffer_size, foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer::residual_data, and resize_buffer_if_needed().
Referenced by dispatch_scan_requests_with_exception_handling().
void foreign_storage::dispatch_scan_requests_with_exception_handling | ( | const foreign_storage::ForeignTable * | table, |
const size_t & | buffer_size, | ||
const std::string & | file_path, | ||
FileReader & | file_reader, | ||
const import_export::CopyParams & | copy_params, | ||
MetadataScanMultiThreadingParams & | multi_threading_params, | ||
size_t & | first_row_index_in_buffer, | ||
size_t & | current_file_offset, | ||
const TextFileBufferParser & | parser, | ||
const foreign_storage::IterativeFileScanParameters * | file_scan_param, | ||
foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer & | iterative_residual_buffer, | ||
const bool | is_first_file_scan_call, | ||
int & | iterative_scan_last_fragment_id | ||
) |
Definition at line 1224 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::MetadataScanMultiThreadingParams::continue_processing, dispatch_scan_requests(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.
Referenced by dispatch_scan_requests_with_exception_handling(), foreign_storage::AbstractTextFileDataWrapper::iterativeFileScan(), and foreign_storage::AbstractTextFileDataWrapper::populateChunkMetadata().
void foreign_storage::dispatch_scan_requests_with_exception_handling | ( | const foreign_storage::ForeignTable * | table, |
const size_t & | buffer_size, | ||
const std::string & | file_path, | ||
FileReader & | file_reader, | ||
const import_export::CopyParams & | copy_params, | ||
MetadataScanMultiThreadingParams & | multi_threading_params, | ||
size_t & | first_row_index_in_buffer, | ||
size_t & | current_file_offset, | ||
const TextFileBufferParser & | parser, | ||
const foreign_storage::IterativeFileScanParameters * | file_scan_param, | ||
foreign_storage::AbstractTextFileDataWrapper::ResidualBuffer & | iterative_residual_buffer, | ||
const bool | is_first_file_scan_call | ||
) |
Definition at line 1264 of file AbstractTextFileDataWrapper.cpp.
References dispatch_scan_requests_with_exception_handling().
|
inline |
Definition at line 32 of file GeospatialEncoder.h.
References heavydb.dtypes::T.
Referenced by foreign_storage::GeospatialEncoder::processGeoElement(), and foreign_storage::GeospatialEncoder::processNullGeoElement().
bool foreign_storage::fragment_maps_to_leaf | ( | const ChunkKey & | key | ) |
Definition at line 128 of file FsiChunkUtils.cpp.
References CHECK, g_distributed_leaf_idx, g_distributed_num_leaves, get_fragment(), dist::is_aggregator(), and dist::is_distributed().
Referenced by anonymous_namespace{ForeignStorageMgr.cpp}::filter_metadata_by_leaf(), foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecForKeyPrefix(), key_does_not_shard_to_leaf(), and foreign_storage::CachingForeignStorageMgr::refreshTableInCache().
const parquet::ColumnDescriptor * foreign_storage::get_column_descriptor | ( | const parquet::arrow::FileReader * | reader, |
const int | logical_column_index | ||
) |
Definition at line 46 of file ParquetShared.cpp.
Referenced by foreign_storage::LazyParquetChunkLoader::appendRowGroups(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_equal_schema().
Referenced by foreign_storage::CachingForeignStorageMgr::fetchBuffer(), foreign_storage::CachingForeignStorageMgr::getOptionalKeysWithinSizeLimit(), foreign_storage::CachingForeignStorageMgr::getRequiredBuffersSize(), and foreign_storage::CachingForeignStorageMgr::refreshChunksInCacheByFragment().
std::string foreign_storage::get_db_name | ( | int32_t | db_id | ) |
Definition at line 31 of file InternalSystemDataWrapper.cpp.
References Catalog_Namespace::DBMetadata::dbName, Catalog_Namespace::SysCatalog::instance(), and kDeletedValueIndicator.
Referenced by foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_dashboards(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_permissions(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_tables(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::populate_import_buffers_for_catalog_users(), foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_details(), and foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::populate_import_buffers_for_storage_details().
const foreign_storage::ForeignTable & foreign_storage::get_foreign_table_for_key | ( | const ChunkKey & | key | ) |
Definition at line 101 of file FsiChunkUtils.cpp.
References CHECK, get_table_prefix(), Catalog_Namespace::SysCatalog::getCatalog(), and Catalog_Namespace::SysCatalog::instance().
Referenced by is_append_table_chunk_key(), foreign_storage::anonymous_namespace{CachingForeignStorageMgr.cpp}::is_in_memory_system_table_chunk_key(), is_replicated_table_chunk_key(), and is_system_table_chunk_key().
size_t foreign_storage::get_max_chunk_size | ( | const ChunkKey & | key | ) |
Referenced by foreign_storage::CachingForeignStorageMgr::getBufferSize().
|
inline |
Definition at line 31 of file SharedMetadataValidator.h.
std::optional<ParseBufferRequest> foreign_storage::get_next_scan_request | ( | MetadataScanMultiThreadingParams & | multi_threading_params | ) |
Gets the next metadata scan request object from the pending requests queue. A null optional is returned if there are no further requests to be processed.
Definition at line 539 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::MetadataScanMultiThreadingParams::continue_processing, foreign_storage::MetadataScanMultiThreadingParams::pending_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests_condition, and foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex.
Referenced by populate_chunks(), and scan_metadata().
|
inline |
Definition at line 21 of file SharedMetadataValidator.h.
size_t foreign_storage::get_num_threads | ( | const ForeignTable & | table | ) |
Definition at line 17 of file AbstractFileStorageDataWrapper.cpp.
References foreign_storage::OptionsContainer::getOption(), import_export::num_import_threads(), and foreign_storage::AbstractFileStorageDataWrapper::THREADS_KEY.
Referenced by foreign_storage::LazyParquetChunkLoader::metadataScan(), foreign_storage::ParquetDataWrapper::populateChunkBuffers(), and foreign_storage::LazyParquetChunkLoader::previewFiles().
std::pair< int, int > foreign_storage::get_parquet_table_size | ( | const ReaderPtr & | reader | ) |
Definition at line 39 of file ParquetShared.cpp.
Referenced by foreign_storage::LazyParquetChunkLoader::appendRowGroups(), foreign_storage::LazyParquetChunkLoader::loadRowGroups(), and foreign_storage::LazyParquetChunkLoader::metadataScan().
parquet::Type::type foreign_storage::get_physical_type | ( | ReaderPtr & | reader, |
const int | logical_column_index | ||
) |
Definition at line 52 of file ParquetShared.cpp.
Referenced by anonymous_namespace{ArrowResultSetConverter.cpp}::get_arrow_type(), and ArrowResultSetConverter::initializeColumnBuilder().
std::shared_ptr< ChunkMetadata > foreign_storage::get_placeholder_metadata | ( | const SQLTypeInfo & | type, |
size_t | num_elements | ||
) |
Definition at line 77 of file FsiChunkUtils.cpp.
References SQLTypeInfo::get_elem_type(), SQLTypeInfo::get_size(), Data_Namespace::AbstractBuffer::getEncoder(), Encoder::getMetadata(), Data_Namespace::AbstractBuffer::initEncoder(), SQLTypeInfo::is_array(), and SQLTypeInfo::is_varlen_indeed().
Referenced by foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::add_placeholder_metadata(), foreign_storage::InternalSystemDataWrapper::populateChunkMetadata(), and foreign_storage::AbstractTextFileDataWrapper::updateRolledOffChunks().
ParseBufferRequest foreign_storage::get_request_from_pool | ( | MetadataScanMultiThreadingParams & | multi_threading_params | ) |
Gets a request from the metadata scan request pool.
Definition at line 918 of file AbstractTextFileDataWrapper.cpp.
References CHECK, foreign_storage::MetadataScanMultiThreadingParams::request_pool, foreign_storage::MetadataScanMultiThreadingParams::request_pool_condition, and foreign_storage::MetadataScanMultiThreadingParams::request_pool_mutex.
Referenced by dispatch_scan_requests().
std::unique_ptr< ColumnDescriptor > foreign_storage::get_sub_type_column_descriptor | ( | const ColumnDescriptor * | column | ) |
Definition at line 76 of file ParquetShared.cpp.
References ColumnDescriptor::columnId, ColumnDescriptor::columnName, ColumnDescriptor::columnType, ColumnDescriptor::db_id, SQLTypeInfo::get_elem_type(), SQLTypeInfo::set_size(), and ColumnDescriptor::tableId.
Referenced by foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::create_parquet_array_encoder(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_allowed_mapping().
std::string foreign_storage::get_table_name | ( | int32_t | db_id, |
int32_t | table_id | ||
) |
Definition at line 42 of file InternalSystemDataWrapper.cpp.
References CHECK, Catalog_Namespace::SysCatalog::getCatalog(), Catalog_Namespace::SysCatalog::instance(), and kDeletedValueIndicator.
Referenced by anonymous_namespace{Execute.cpp}::checkWorkUnitWatchdog(), foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_details(), and foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::populate_import_buffers_for_storage_details().
void foreign_storage::get_value | ( | const rapidjson::Value & | json_val, |
FileRegion & | file_region | ||
) |
Definition at line 44 of file CsvShared.cpp.
References CHECK, foreign_storage::FileRegion::first_row_file_offset, foreign_storage::FileRegion::first_row_index, json_utils::get_value_from_object(), foreign_storage::FileRegion::region_size, and foreign_storage::FileRegion::row_count.
void foreign_storage::get_value | ( | const rapidjson::Value & | json_val, |
RowGroupInterval & | value | ||
) |
Definition at line 680 of file ParquetDataWrapper.cpp.
References CHECK, foreign_storage::RowGroupInterval::end_index, foreign_storage::RowGroupInterval::file_path, json_utils::get_value_from_object(), and foreign_storage::RowGroupInterval::start_index.
void foreign_storage::init_chunk_for_column | ( | const ChunkKey & | chunk_key, |
const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> & | chunk_metadata_map, | ||
const std::map< ChunkKey, AbstractBuffer * > & | buffers, | ||
Chunk_NS::Chunk & | chunk | ||
) |
Definition at line 21 of file FsiChunkUtils.cpp.
References CHECK, CHECK_EQ, CHUNK_KEY_COLUMN_IDX, CHUNK_KEY_DB_IDX, CHUNK_KEY_TABLE_IDX, Catalog_Namespace::SysCatalog::getCatalog(), Chunk_NS::Chunk::initEncoder(), Catalog_Namespace::SysCatalog::instance(), Data_Namespace::AbstractBuffer::reserve(), Chunk_NS::Chunk::setBuffer(), Chunk_NS::Chunk::setColumnDesc(), Chunk_NS::Chunk::setIndexBuffer(), Chunk_NS::Chunk::setPinnable(), Data_Namespace::AbstractBuffer::size(), and UNREACHABLE.
Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunkMapForColumns().
bool foreign_storage::is_append_table_chunk_key | ( | const ChunkKey & | chunk_key | ) |
Definition at line 118 of file FsiChunkUtils.cpp.
References get_foreign_table_for_key(), and foreign_storage::ForeignTable::isAppendMode().
Referenced by foreign_storage::CachingForeignStorageMgr::refreshTableInCache().
bool foreign_storage::is_replicated_table_chunk_key | ( | const ChunkKey & | chunk_key | ) |
Definition at line 114 of file FsiChunkUtils.cpp.
References get_foreign_table_for_key(), and table_is_replicated().
Referenced by is_shardable_key().
bool foreign_storage::is_shardable_key | ( | const ChunkKey & | key | ) |
Definition at line 122 of file FsiChunkUtils.cpp.
References dist::is_aggregator(), dist::is_distributed(), is_replicated_table_chunk_key(), and is_system_table_chunk_key().
Referenced by anonymous_namespace{ForeignStorageMgr.cpp}::filter_metadata_by_leaf(), foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecForKeyPrefix(), key_does_not_shard_to_leaf(), and foreign_storage::CachingForeignStorageMgr::refreshTableInCache().
bool foreign_storage::is_system_table_chunk_key | ( | const ChunkKey & | chunk_key | ) |
Definition at line 110 of file FsiChunkUtils.cpp.
References get_foreign_table_for_key(), and TableDescriptor::is_system_table.
Referenced by is_shardable_key().
bool foreign_storage::is_table_enabled_on_node | ( | const ChunkKey & | key | ) |
Referenced by foreign_storage::CachingForeignStorageMgr::getChunkMetadataVecFromDataWrapper().
bool foreign_storage::is_valid_source_type | ( | const import_export::CopyParams & | copy_params | ) |
Verify if source_type
is valid.
Definition at line 122 of file ForeignDataWrapperFactory.cpp.
References import_export::kDelimitedFile, import_export::kParquetFile, import_export::kRegexParsedFile, and import_export::CopyParams::source_type.
Referenced by foreign_storage::ForeignDataWrapperFactory::createForeignServerProxy(), foreign_storage::ForeignDataWrapperFactory::createForeignTableProxy(), and import_export::ForeignDataImporter::importGeneral().
bool foreign_storage::key_does_not_shard_to_leaf | ( | const ChunkKey & | key | ) |
Definition at line 135 of file FsiChunkUtils.cpp.
References fragment_maps_to_leaf(), and is_shardable_key().
Referenced by cache_blocks(), populate_string_dictionary(), and anonymous_namespace{RelAlgExecutor.cpp}::set_parallelism_hints().
size_t foreign_storage::num_rows_to_process | ( | const size_t | start_row_index, |
const size_t | max_fragment_size, | ||
const size_t | rows_remaining | ||
) |
Number of rows to process given the current position defined by start_row_index
, the max fragment size and the number of rows left to process.
Definition at line 493 of file AbstractTextFileDataWrapper.cpp.
Referenced by QueryExecutionContext::launchCpuCode(), and populate_chunks().
UniqueReaderPtr foreign_storage::open_parquet_table | ( | const std::string & | file_path, |
std::shared_ptr< arrow::fs::FileSystem > & | file_system | ||
) |
Definition at line 26 of file ParquetShared.cpp.
Referenced by foreign_storage::FileReaderMap::getOrInsert(), foreign_storage::FileReaderMap::insert(), and foreign_storage::LazyParquetChunkLoader::loadRowGroups().
ParseFileRegionResult foreign_storage::parse_file_regions | ( | const FileRegions & | file_regions, |
const size_t | start_index, | ||
const size_t | end_index, | ||
FileReader & | file_reader, | ||
ParseBufferRequest & | parse_file_request, | ||
const std::map< int, Chunk_NS::Chunk > & | column_id_to_chunk_map, | ||
const TextFileBufferParser & | parser | ||
) |
Parses a set of file regions given a handle to the file and range of indexes for the file regions to be parsed.
Definition at line 215 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::ParseBufferRequest::begin_pos, foreign_storage::ParseBufferRequest::buffer, foreign_storage::ParseBufferRequest::buffer_size, CHECK, CHECK_EQ, foreign_storage::ParseBufferResult::column_id_to_data_blocks_map, DEBUG_TIMER, foreign_storage::ParseBufferRequest::end_pos, foreign_storage::ParseBufferRequest::file_offset, foreign_storage::ParseFileRegionResult::file_offset, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::ParseBufferRequest::getTableName(), foreign_storage::TextFileBufferParser::parseBuffer(), foreign_storage::ParseBufferRequest::process_row_count, foreign_storage::FileReader::readRegion(), foreign_storage::ParseBufferResult::rejected_rows, run_benchmark_import::result, foreign_storage::ParseBufferResult::row_count, and throw_unexpected_number_of_items().
Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunks().
std::vector<size_t> foreign_storage::partition_by_fragment | ( | const size_t | start_row_index, |
const size_t | max_fragment_size, | ||
const size_t | buffer_row_count | ||
) |
Given a start row index, maximum fragment size, and number of rows in a buffer, this function returns a vector indicating how the rows in the buffer should be partitioned in order to fill up available fragment slots while staying within the capacity of fragments.
Definition at line 506 of file AbstractTextFileDataWrapper.cpp.
References CHECK.
Referenced by scan_metadata().
std::pair<std::map<int, DataBlockPtr>, std::map<int, DataBlockPtr> > foreign_storage::partition_data_blocks | ( | const std::map< int, const ColumnDescriptor * > & | column_by_id, |
const std::map< int, DataBlockPtr > & | data_blocks | ||
) |
Partition data blocks such that dictionary encoded columns are disjoint of other columns
Definition at line 679 of file AbstractTextFileDataWrapper.cpp.
References shared::get_from_map().
Referenced by populate_chunks_using_data_blocks().
auto foreign_storage::partition_for_threads | ( | const std::set< T > & | items, |
size_t | max_threads | ||
) |
Definition at line 41 of file FsiChunkUtils.h.
Referenced by create_futures_for_workers(), and foreign_storage::LazyParquetChunkLoader::metadataScan().
auto foreign_storage::partition_for_threads | ( | const std::vector< T > & | items, |
size_t | max_threads | ||
) |
Definition at line 60 of file FsiChunkUtils.h.
void foreign_storage::populate_chunks | ( | MetadataScanMultiThreadingParams & | multi_threading_params, |
std::map< int, FileRegions > & | fragment_id_to_file_regions_map, | ||
const TextFileBufferParser & | parser, | ||
foreign_storage::IterativeFileScanParameters & | file_scan_param | ||
) |
Consumes and processes scan requests from a pending requests queue and populates chunks during an iterative file scan
Definition at line 988 of file AbstractTextFileDataWrapper.cpp.
References add_request_to_pool(), foreign_storage::ParseBufferRequest::begin_pos, foreign_storage::ParseBufferRequest::buffer_row_count, CHECK_LE, foreign_storage::MetadataScanMultiThreadingParams::continue_processing, defer_scan_request(), foreign_storage::ParseBufferRequest::file_offset, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::IterativeFileScanParameters::fragment_id, get_next_scan_request(), foreign_storage::ParseBufferRequest::getColumns(), foreign_storage::ParseBufferRequest::getMaxFragRows(), foreign_storage::ParseBufferRequest::import_buffers, num_rows_to_process(), foreign_storage::TextFileBufferParser::parseBuffer(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex, populate_chunks_using_data_blocks(), foreign_storage::ParseBufferRequest::process_row_count, foreign_storage::ParseBufferRequest::processed_row_count, run_benchmark_import::result, and update_delete_buffer().
Referenced by foreign_storage::AbstractTextFileDataWrapper::iterativeFileScan().
void foreign_storage::populate_chunks_using_data_blocks | ( | MetadataScanMultiThreadingParams & | multi_threading_params, |
int | fragment_id, | ||
const ParseBufferRequest & | request, | ||
ParseBufferResult & | result, | ||
std::map< int, const ColumnDescriptor * > & | column_by_id, | ||
std::map< int, FileRegions > & | fragment_id_to_file_regions_map, | ||
const foreign_storage::IterativeFileScanParameters & | file_scan_param, | ||
const size_t | expected_current_element_count | ||
) |
Definition at line 717 of file AbstractTextFileDataWrapper.cpp.
References add_file_region(), append_data_block_to_chunk(), threading_serial::async(), CHECK_EQ, CHECK_GT, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers_mutex, foreign_storage::ParseBufferResult::column_id_to_data_blocks_map, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::IterativeFileScanParameters::fragment_id, foreign_storage::ParseBufferRequest::getFilePath(), foreign_storage::ParseBufferRequest::import_buffers, kENCODING_DICT, partition_data_blocks(), and foreign_storage::ParseBufferResult::row_count.
Referenced by populate_chunks().
void foreign_storage::populate_string_dictionary | ( | int32_t | table_id, |
int32_t | col_id, | ||
int32_t | db_id | ||
) |
Definition at line 237 of file Execute.cpp.
References CHECK, Data_Namespace::CPU_LEVEL, Catalog_Namespace::SysCatalog::getCatalog(), Chunk_NS::Chunk::getChunk(), Catalog_Namespace::SysCatalog::instance(), anonymous_namespace{Execute.cpp}::is_empty_table(), and key_does_not_shard_to_leaf().
Referenced by anonymous_namespace{RelAlgExecutor.cpp}::prepare_string_dictionaries().
void foreign_storage::process_data_blocks | ( | MetadataScanMultiThreadingParams & | multi_threading_params, |
int | fragment_id, | ||
const ParseBufferRequest & | request, | ||
ParseBufferResult & | result, | ||
std::map< int, const ColumnDescriptor * > & | column_by_id, | ||
std::map< int, FileRegions > & | fragment_id_to_file_regions_map | ||
) |
Updates metadata encapsulated in encoders for all table columns given new data blocks gotten from parsing a new set of rows in a file buffer. If cache is available, also append the data_blocks to chunks in the cache
Definition at line 798 of file AbstractTextFileDataWrapper.cpp.
References add_file_region(), cache_blocks(), foreign_storage::MetadataScanMultiThreadingParams::cached_chunks, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers_mutex, foreign_storage::ParseBufferResult::column_id_to_data_blocks_map, foreign_storage::ParseBufferRequest::db_id, foreign_storage::MetadataScanMultiThreadingParams::disable_cache, foreign_storage::ParseBufferRequest::first_row_index, foreign_storage::ParseBufferRequest::getFilePath(), foreign_storage::ParseBufferRequest::getTableId(), foreign_storage::ParseBufferResult::row_count, and update_stats().
Referenced by scan_metadata().
void foreign_storage::refresh_foreign_table | ( | Catalog_Namespace::Catalog & | catalog, |
const std::string & | table_name, | ||
const bool | evict_cached_entries | ||
) |
Definition at line 103 of file ForeignTableRefresh.cpp.
References CHECK, shared::contains(), StorageType::FOREIGN_TABLE, dist::is_leaf_node(), Catalog_Namespace::kAggregatorOnlySystemTables, and refresh_foreign_table_unlocked().
Referenced by RefreshForeignTablesCommand::execute(), and foreign_storage::ForeignTableRefreshScheduler::start().
void foreign_storage::refresh_foreign_table_unlocked | ( | Catalog_Namespace::Catalog & | catalog, |
const ForeignTable & | td, | ||
const bool | evict_cached_entries | ||
) |
Definition at line 35 of file ForeignTableRefresh.cpp.
References CHECK, CHUNK_KEY_DB_IDX, CHUNK_KEY_FRAGMENT_IDX, CHUNK_KEY_TABLE_IDX, foreign_storage::anonymous_namespace{ForeignTableRefresh.cpp}::clear_cpu_and_gpu_cache(), Executor::clearExternalCaches(), Catalog_Namespace::DBMetadata::dbId, Catalog_Namespace::Catalog::getCurrentDB(), Catalog_Namespace::Catalog::getDatabaseId(), Catalog_Namespace::Catalog::getDataMgr(), PostEvictionRefreshException::getOriginalException(), logger::INFO, foreign_storage::ForeignTable::isAppendMode(), LOG, Catalog_Namespace::Catalog::removeFragmenterForTable(), TableDescriptor::tableId, TableDescriptor::tableName, and Catalog_Namespace::Catalog::updateForeignTableRefreshTimes().
Referenced by refresh_foreign_table().
bool foreign_storage::request_pool_non_empty | ( | MetadataScanMultiThreadingParams & | multi_threading_params | ) |
Definition at line 932 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::MetadataScanMultiThreadingParams::request_pool, and foreign_storage::MetadataScanMultiThreadingParams::request_pool_mutex.
Referenced by dispatch_scan_requests().
void foreign_storage::reset_multithreading_params | ( | foreign_storage::MetadataScanMultiThreadingParams & | multi_threading_params | ) |
Definition at line 1073 of file AbstractTextFileDataWrapper.cpp.
References foreign_storage::MetadataScanMultiThreadingParams::cached_chunks, foreign_storage::MetadataScanMultiThreadingParams::chunk_encoder_buffers, foreign_storage::MetadataScanMultiThreadingParams::deferred_requests, foreign_storage::MetadataScanMultiThreadingParams::pending_requests, and foreign_storage::MetadataScanMultiThreadingParams::request_pool.
Referenced by foreign_storage::AbstractTextFileDataWrapper::iterativeFileScan().
void foreign_storage::resize_buffer_if_needed | ( | std::unique_ptr< char[]> & | buffer, |
size_t & | buffer_size, | ||
const size_t | alloc_size | ||
) |
Optionally resizes the given buffer if the buffer size is less than the current buffer allocation size.
Definition at line 1063 of file AbstractTextFileDataWrapper.cpp.
References CHECK_LE.
Referenced by dispatch_scan_requests().
std::vector< Aws::S3::Model::Object > foreign_storage::s3_objects_filter_sort_files | ( | const std::vector< Aws::S3::Model::Object > & | file_paths, |
const shared::FilePathOptions & | options | ||
) |
Definition at line 22 of file S3FilePathUtil.cpp.
References shared::FilePathOptions::filter_regex, shared::PATHNAME_ORDER_TYPE, foreign_storage::anonymous_namespace{S3FilePathUtil.cpp}::s3_objects_regex_file_filter(), and shared::FilePathOptions::sort_by.
void foreign_storage::scan_metadata | ( | MetadataScanMultiThreadingParams & | multi_threading_params, |
std::map< int, FileRegions > & | fragment_id_to_file_regions_map, | ||
const TextFileBufferParser & | parser | ||
) |
Consumes and processes metadata scan requests from a pending requests queue and updates existing metadata objects based on newly scanned metadata.
Definition at line 863 of file AbstractTextFileDataWrapper.cpp.
References add_request_to_pool(), foreign_storage::MetadataScanMultiThreadingParams::continue_processing, get_next_scan_request(), foreign_storage::TextFileBufferParser::parseBuffer(), partition_by_fragment(), foreign_storage::MetadataScanMultiThreadingParams::pending_requests_mutex, process_data_blocks(), and run_benchmark_import::result.
Referenced by foreign_storage::AbstractTextFileDataWrapper::populateChunkMetadata().
void foreign_storage::set_node_name | ( | std::map< std::string, import_export::TypedImportBuffer * > & | import_buffers | ) |
Definition at line 55 of file InternalSystemDataWrapper.cpp.
References g_distributed_leaf_idx, dist::is_leaf_node(), and to_string().
Referenced by foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_details(), foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::populate_import_buffers_for_memory_summary(), and foreign_storage::anonymous_namespace{InternalStorageStatsDataWrapper.cpp}::populate_import_buffers_for_storage_details().
void foreign_storage::set_value | ( | rapidjson::Value & | json_val, |
const FileRegion & | file_region, | ||
rapidjson::Document::AllocatorType & | allocator | ||
) |
Definition at line 26 of file CsvShared.cpp.
References json_utils::add_value_to_object(), foreign_storage::FileRegion::first_row_file_offset, foreign_storage::FileRegion::first_row_index, foreign_storage::FileRegion::region_size, and foreign_storage::FileRegion::row_count.
void foreign_storage::set_value | ( | rapidjson::Value & | json_val, |
const RowGroupInterval & | value, | ||
rapidjson::Document::AllocatorType & | allocator | ||
) |
Definition at line 671 of file ParquetDataWrapper.cpp.
References json_utils::add_value_to_object(), foreign_storage::RowGroupInterval::end_index, foreign_storage::RowGroupInterval::file_path, and foreign_storage::RowGroupInterval::start_index.
|
inline |
Definition at line 89 of file ForeignStorageException.h.
Referenced by foreign_storage::ParquetImporter::getAllFilePaths().
|
inline |
Definition at line 95 of file ForeignStorageException.h.
Referenced by foreign_storage::ParquetImporter::getAllFilePaths().
|
inline |
Definition at line 80 of file ForeignStorageException.h.
References to_string().
Referenced by foreign_storage::RegexFileBufferParser::regexMatchColumns(), foreign_storage::anonymous_namespace{CsvFileBufferParser.cpp}::validate_expected_column_count(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_number_of_columns().
|
inline |
Definition at line 46 of file ParquetMetadataValidator.h.
Referenced by foreign_storage::TimestampBoundsValidator< T >::validateValue(), foreign_storage::IntegralFixedLengthBoundsValidator< T >::validateValue(), foreign_storage::BaseDateBoundsValidator< T, is_in_seconds >::validateValue(), and foreign_storage::FloatPointValidator< T >::validateValue().
|
inline |
Definition at line 73 of file ForeignStorageException.h.
Referenced by foreign_storage::LocalMultiFileReader::checkForMoreRows(), and foreign_storage::ParquetDataWrapper::fetchChunkMetadata().
|
inline |
Definition at line 66 of file ForeignStorageException.h.
Referenced by foreign_storage::SingleTextFileReader::checkForMoreRows(), and foreign_storage::ParquetDataWrapper::fetchChunkMetadata().
|
inline |
Definition at line 58 of file ForeignStorageException.h.
|
inline |
Definition at line 107 of file ForeignStorageException.h.
|
inline |
Definition at line 100 of file ForeignStorageException.h.
|
inline |
Definition at line 40 of file ForeignStorageException.h.
References to_string().
Referenced by parse_file_regions(), and foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::throw_unexpected_number_of_items().
void foreign_storage::update_delete_buffer | ( | const ParseBufferRequest & | request, |
const ParseBufferResult & | result, | ||
const foreign_storage::IterativeFileScanParameters & | file_scan_param, | ||
const size_t | start_position_in_fragment | ||
) |
Definition at line 695 of file AbstractTextFileDataWrapper.cpp.
References CHECK, foreign_storage::IterativeFileScanParameters::delete_buffer, foreign_storage::IterativeFileScanParameters::delete_buffer_mutex, foreign_storage::ParseBufferRequest::processed_row_count, foreign_storage::ParseBufferResult::rejected_rows, and foreign_storage::anonymous_namespace{AbstractTextFileDataWrapper.cpp}::resize_delete_buffer().
Referenced by populate_chunks().
void foreign_storage::update_stats | ( | Encoder * | encoder, |
const SQLTypeInfo & | column_type, | ||
DataBlockPtr | data_block, | ||
const size_t | row_count | ||
) |
Updates the statistics metadata encapsulated in the encoder given new data in a data block.
Definition at line 580 of file AbstractTextFileDataWrapper.cpp.
References DataBlockPtr::arraysPtr, SQLTypeInfo::is_array(), SQLTypeInfo::is_varlen(), DataBlockPtr::numbersPtr, DataBlockPtr::stringsPtr, and Encoder::updateStats().
Referenced by process_data_blocks(), Fragmenter_Namespace::InsertOrderFragmenter::updateColumn(), Fragmenter_Namespace::InsertOrderFragmenter::updateColumnMetadata(), and StorageIOFacility::yieldUpdateCallback().
std::shared_ptr< parquet::Statistics > foreign_storage::validate_and_get_column_metadata_statistics | ( | const parquet::ColumnChunkMetaData * | column_metadata | ) |
Definition at line 86 of file ParquetShared.cpp.
References CHECK, and report::stats.
Referenced by foreign_storage::ParquetEncoder::getRowGroupMetadata(), foreign_storage::TypedParquetInPlaceEncoder< V, V >::getRowGroupMetadata(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_list_column_metadata_statistics().
void foreign_storage::validate_equal_column_descriptor | ( | const parquet::ColumnDescriptor * | reference_descriptor, |
const parquet::ColumnDescriptor * | new_descriptor, | ||
const std::string & | reference_file_path, | ||
const std::string & | new_file_path | ||
) |
Definition at line 60 of file ParquetShared.cpp.
Referenced by foreign_storage::LazyParquetChunkLoader::appendRowGroups(), and foreign_storage::anonymous_namespace{LazyParquetChunkLoader.cpp}::validate_equal_schema().
|
inline |
Definition at line 22 of file FsiUtils.h.
References StorageType::FOREIGN_TABLE, and TableDescriptor::storageType.
Referenced by Parser::InsertStmt::analyze(), Parser::TruncateTableStmt::execute(), Parser::InsertValuesStmt::execute(), Parser::InsertIntoTableAsSelectStmt::populateData(), and RelModify::RelModify().
void foreign_storage::validate_regex_parser_options | ( | const import_export::CopyParams & | copy_params | ) |
Definition at line 116 of file ForeignDataWrapperFactory.cpp.
References import_export::CopyParams::line_regex.
Referenced by anonymous_namespace{ForeignDataImporter.cpp}::validate_copy_params().
constexpr const char* foreign_storage::kDeletedValueIndicator {"<DELETED>"} |
Definition at line 26 of file InternalSystemDataWrapper.h.
Referenced by foreign_storage::anonymous_namespace{InternalMemoryStatsDataWrapper.cpp}::get_column_name(), get_db_name(), foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::get_table_ddl(), get_table_name(), and foreign_storage::anonymous_namespace{InternalCatalogDataWrapper.cpp}::get_user_name().