OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ArrowCsvForeignStorage Class Reference
+ Inheritance diagram for ArrowCsvForeignStorage:
+ Collaboration diagram for ArrowCsvForeignStorage:

Public Member Functions

 ArrowCsvForeignStorage ()
 
void prepareTable (const int db_id, const std::string &type, TableDescriptor &td, std::list< ColumnDescriptor > &cols) override
 
void registerTable (Catalog_Namespace::Catalog *catalog, std::pair< int, int > table_key, const std::string &type, const TableDescriptor &td, const std::list< ColumnDescriptor > &cols, Data_Namespace::AbstractBufferMgr *mgr) override
 
std::string getType () const override
 
- Public Member Functions inherited from ArrowForeignStorageBase
void append (const std::vector< ForeignStorageColumnBuffer > &column_buffers) override
 
void read (const ChunkKey &chunk_key, const SQLTypeInfo &sql_type, int8_t *dest, const size_t numBytes) override
 
int8_t * tryZeroCopy (const ChunkKey &chunk_key, const SQLTypeInfo &sql_type, const size_t numBytes) override
 
void dropTable (const int db_id, const int table_id) override
 
void parseArrowTable (Catalog_Namespace::Catalog *catalog, std::pair< int, int > table_key, const std::string &type, const TableDescriptor &td, const std::list< ColumnDescriptor > &cols, Data_Namespace::AbstractBufferMgr *mgr, const arrow::Table &table)
 
std::shared_ptr
< arrow::ChunkedArray > 
createDictionaryEncodedColumn (StringDictionary *dict, const ColumnDescriptor &c, std::shared_ptr< arrow::ChunkedArray > arr_col_chunked_array)
 
std::shared_ptr
< arrow::ChunkedArray > 
convertArrowDictionary (StringDictionary *dict, const ColumnDescriptor &c, std::shared_ptr< arrow::ChunkedArray > arr_col_chunked_array)
 
template<typename T , typename ChunkType >
std::shared_ptr
< arrow::ChunkedArray > 
createDecimalColumn (const ColumnDescriptor &c, std::shared_ptr< arrow::ChunkedArray > arr_col_chunked_array)
 
std::shared_ptr
< arrow::ChunkedArray > 
replaceNullValues (const SQLTypeInfo &columnType, std::shared_ptr< arrow::ChunkedArray > arr_col_chunked_array)
 
template<typename T >
std::shared_ptr
< arrow::ChunkedArray > 
replaceNullValuesImpl (std::shared_ptr< arrow::ChunkedArray > arr_col_chunked_array)
 
void getSizeAndOffset (const Frag &frag, const std::shared_ptr< arrow::Array > &chunk, size_t i, int &size, int &offset)
 
int64_t makeFragment (const Frag &frag, ArrowFragment &arrowFrag, const std::vector< std::shared_ptr< arrow::Array >> &chunks, bool is_varlen)
 
- Public Member Functions inherited from PersistentForeignStorageInterface
virtual ~PersistentForeignStorageInterface ()
 

Additional Inherited Members

- Public Attributes inherited from ArrowForeignStorageBase
std::map< std::array< int, 3 >
, std::vector< ArrowFragment > > 
m_columns
 

Detailed Description

Definition at line 897 of file ArrowForeignStorage.cpp.

Constructor & Destructor Documentation

ArrowCsvForeignStorage::ArrowCsvForeignStorage ( )
inline

Definition at line 899 of file ArrowForeignStorage.cpp.

899 {}

Member Function Documentation

std::string ArrowCsvForeignStorage::getType ( ) const
overridevirtual

Implements PersistentForeignStorageInterface.

Definition at line 1057 of file ArrowForeignStorage.cpp.

References logger::INFO, and LOG.

1057  {
1058  LOG(INFO) << "CSV backed temporary tables has been activated. Create table `with "
1059  "(storage_type='CSV:path/to/file.csv');`\n";
1060  return "CSV";
1061 }
#define LOG(tag)
Definition: Logger.h:285
void ArrowCsvForeignStorage::prepareTable ( const int  db_id,
const std::string &  type,
TableDescriptor td,
std::list< ColumnDescriptor > &  cols 
)
overridevirtual

Reimplemented from PersistentForeignStorageInterface.

Definition at line 915 of file ArrowForeignStorage.cpp.

References TableDescriptor::hasDeletedCol.

918  {
919  td.hasDeletedCol = false;
920 }
void ArrowCsvForeignStorage::registerTable ( Catalog_Namespace::Catalog catalog,
std::pair< int, int >  table_key,
const std::string &  type,
const TableDescriptor td,
const std::list< ColumnDescriptor > &  cols,
Data_Namespace::AbstractBufferMgr *  mgr 
)
overridevirtual

Implements PersistentForeignStorageInterface.

Definition at line 986 of file ArrowForeignStorage.cpp.

References ARROW_THROW_NOT_OK, CHECK, DataframeTableDescriptor::delimiter, measure< TimeT >::execution(), getArrowImportType(), DataframeTableDescriptor::hasHeader, ArrowForeignStorageBase::parseArrowTable(), DataframeTableDescriptor::skipRows, and VLOG.

991  {
992  const DataframeTableDescriptor* df_td =
993  dynamic_cast<const DataframeTableDescriptor*>(&td);
994  bool isDataframe = df_td ? true : false;
995  std::unique_ptr<DataframeTableDescriptor> df_td_owned;
996  if (!isDataframe) {
997  df_td_owned = std::make_unique<DataframeTableDescriptor>(td);
998  CHECK(df_td_owned);
999  df_td = df_td_owned.get();
1000  }
1001 
1002 #if defined(ENABLE_ARROW_4) || defined(_WIN32)
1003  auto io_context = arrow::io::default_io_context();
1004 #else
1005  auto io_context = arrow::default_memory_pool();
1006 #endif
1007  auto arrow_parse_options = arrow::csv::ParseOptions::Defaults();
1008  arrow_parse_options.quoting = false;
1009  arrow_parse_options.escaping = false;
1010  arrow_parse_options.newlines_in_values = false;
1011  arrow_parse_options.delimiter = *df_td->delimiter.c_str();
1012  auto arrow_read_options = arrow::csv::ReadOptions::Defaults();
1013  arrow_read_options.use_threads = true;
1014 
1015  arrow_read_options.block_size = 20 * 1024 * 1024;
1016  arrow_read_options.autogenerate_column_names = false;
1017  arrow_read_options.skip_rows =
1018  df_td->hasHeader ? (df_td->skipRows + 1) : df_td->skipRows;
1019 
1020  auto arrow_convert_options = arrow::csv::ConvertOptions::Defaults();
1021  arrow_convert_options.check_utf8 = false;
1022  arrow_convert_options.include_columns = arrow_read_options.column_names;
1023  arrow_convert_options.strings_can_be_null = true;
1024 
1025  for (auto& c : cols) {
1026  if (c.isSystemCol) {
1027  continue; // must be processed by base interface implementation
1028  }
1029  arrow_convert_options.column_types.emplace(c.columnName,
1030  getArrowImportType(c.columnType));
1031  arrow_read_options.column_names.push_back(c.columnName);
1032  }
1033 
1034  std::shared_ptr<arrow::io::ReadableFile> inp;
1035  auto file_result = arrow::io::ReadableFile::Open(info.c_str());
1036  ARROW_THROW_NOT_OK(file_result.status());
1037  inp = file_result.ValueOrDie();
1038 
1039  auto table_reader_result = arrow::csv::TableReader::Make(
1040  io_context, inp, arrow_read_options, arrow_parse_options, arrow_convert_options);
1041  ARROW_THROW_NOT_OK(table_reader_result.status());
1042  auto table_reader = table_reader_result.ValueOrDie();
1043 
1044  std::shared_ptr<arrow::Table> arrowTable;
1045  auto time = measure<>::execution([&]() {
1046  auto arrow_table_result = table_reader->Read();
1047  ARROW_THROW_NOT_OK(arrow_table_result.status());
1048  arrowTable = arrow_table_result.ValueOrDie();
1049  });
1050 
1051  VLOG(1) << "Read Arrow CSV file " << info << " in " << time << "ms";
1052 
1053  arrow::Table& table = *arrowTable.get();
1054  parseArrowTable(catalog, table_key, info, td, cols, mgr, table);
1055 }
#define ARROW_THROW_NOT_OK(s)
Definition: ArrowUtil.h:36
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
void parseArrowTable(Catalog_Namespace::Catalog *catalog, std::pair< int, int > table_key, const std::string &type, const TableDescriptor &td, const std::list< ColumnDescriptor > &cols, Data_Namespace::AbstractBufferMgr *mgr, const arrow::Table &table)
static std::shared_ptr< arrow::DataType > getArrowImportType(const SQLTypeInfo type)
#define CHECK(condition)
Definition: Logger.h:291
#define VLOG(n)
Definition: Logger.h:388
specifies the content in-memory of a row in the table metadata table

+ Here is the call graph for this function:


The documentation for this class was generated from the following file: