27 #include <unordered_map>
28 #include <unordered_set>
41 namespace Data_Namespace {
45 namespace Fragmenter_Namespace {
59 const std::vector<int> chunkKeyPrefix,
60 std::vector<Chunk_NS::Chunk>& chunkVec,
63 const int physicalTableId,
70 const bool uses_foreign_storage =
false);
107 const int fragment_id,
108 const std::shared_ptr<ChunkMetadata> metadata)
override;
111 std::unordered_map</*fragment_id*/ int, ChunkStats>& stats_map,
112 std::optional<Data_Namespace::MemoryLevel> memory_level)
override;
132 const int fragment_id,
133 const std::vector<uint64_t>& frag_offsets,
134 const std::vector<ScalarTargetValue>& rhs_values,
141 const int fragmentId,
142 const std::vector<TargetMetaInfo> sourceMetaInfo,
143 const std::vector<const ColumnDescriptor*> columnDescriptors,
145 const size_t indexOffFragmentOffsetColumn,
148 Executor* executor)
override;
153 const int fragment_id,
154 const std::vector<uint64_t>& frag_offsets,
162 std::shared_ptr<Chunk_NS::Chunk> chunk,
173 const int fragment_id,
174 const std::vector<uint64_t>& frag_offsets,
179 const std::shared_ptr<Chunk_NS::Chunk>& chunk)
override;
185 void dropColumns(
const std::vector<int>& columnIds)
override;
196 src_dst_column_pairs);
200 std::map<int, Chunk_NS::Chunk>
203 std::deque<std::unique_ptr<FragmentInfo>>
258 const std::shared_ptr<Chunk_NS::Chunk>& chunk,
259 const std::vector<uint64_t>& frag_offsets);
261 const std::shared_ptr<Chunk_NS::Chunk>& chunk,
262 const std::vector<uint64_t>& frag_offsets);
269 const std::optional<int> delete_column_id,
271 const size_t num_rows_to_insert,
272 size_t& num_rows_inserted,
273 size_t& num_rows_left,
274 std::vector<size_t>& valid_row_indices,
275 const size_t start_fragment);
void insertChunksIntoFragment(const InsertChunks &insert_chunks, const std::optional< int > delete_column_id, FragmentInfo *current_fragment, const size_t num_rows_to_insert, size_t &num_rows_inserted, size_t &num_rows_left, std::vector< size_t > &valid_row_indices, const size_t start_fragment)
void dropFragmentsToSizeNoInsertLock(const size_t max_rows)
void updateMetadata(const Catalog_Namespace::Catalog *catalog, const MetaDataKey &key, UpdelRoll &updel_roll) override
void insertChunksImpl(const InsertChunks &insert_chunk)
void dropColumns(const std::vector< int > &columnIds) override
FragmentInfo & getFragmentInfoFromId(const int fragment_id)
Data_Namespace::DataMgr * dataMgr_
Catalog_Namespace::Catalog * catalog_
class for a per-database catalog. also includes metadata for the current database and the current use...
TableInfo getFragmentsForQuery() override
returns (inside QueryInfo) object all ids and row sizes of fragments
Data_Namespace::MemoryLevel defaultInsertLevel_
InsertOrderFragmenter & operator=(const InsertOrderFragmenter &)
std::pair< const TableDescriptor *, Fragmenter_Namespace::FragmentInfo * > MetaDataKey
std::deque< std::unique_ptr< FragmentInfo > > fragmentInfoVec_
void dropFragmentsToSize(const size_t maxRows) override
Will truncate table to less than maxRows by dropping fragments.
InsertOrderFragmenter(const std::vector< int > chunkKeyPrefix, std::vector< Chunk_NS::Chunk > &chunkVec, Data_Namespace::DataMgr *dataMgr, Catalog_Namespace::Catalog *catalog, const int physicalTableId, const int shard, const size_t maxFragmentRows=DEFAULT_FRAGMENT_ROWS, const size_t maxChunkSize=DEFAULT_MAX_CHUNK_SIZE, const size_t pageSize=DEFAULT_PAGE_SIZE, const size_t maxRows=DEFAULT_MAX_ROWS, const Data_Namespace::MemoryLevel defaultInsertLevel=Data_Namespace::DISK_LEVEL, const bool uses_foreign_storage=false)
std::shared_ptr< std::mutex > mutex_access_inmem_states
#define DEFAULT_MAX_CHUNK_SIZE
std::optional< ChunkUpdateStats > updateColumn(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const ColumnDescriptor *cd, const int fragment_id, const std::vector< uint64_t > &frag_offsets, const std::vector< ScalarTargetValue > &rhs_values, const SQLTypeInfo &rhs_type, const Data_Namespace::MemoryLevel memory_level, UpdelRoll &updel_roll) override
void updateColumns(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragmentId, const std::vector< TargetMetaInfo > sourceMetaInfo, const std::vector< const ColumnDescriptor * > columnDescriptors, const RowDataProvider &sourceDataProvider, const size_t indexOffFragmentOffsetColumn, const Data_Namespace::MemoryLevel memoryLevel, UpdelRoll &updelRoll, Executor *executor) override
High-level representation of SQL values.
heavyai::shared_mutex fragmentInfoMutex_
The InsertOrderFragmenter is a child class of AbstractFragmenter, and fragments data in insert order...
auto vacuum_fixlen_rows(const FragmentInfo &fragment, const std::shared_ptr< Chunk_NS::Chunk > &chunk, const std::vector< uint64_t > &frag_offsets)
void insertData(InsertData &insert_data_struct) override
appends data onto the most recently occuring fragment, creating a new one if necessary ...
void updateColumnChunkMetadata(const ColumnDescriptor *cd, const int fragment_id, const std::shared_ptr< ChunkMetadata > metadata) override
Updates the metadata for a column chunk.
const int physicalTableId_
void insertChunks(const InsertChunks &insert_chunk) override
Insert chunks into minimal number of fragments.
std::string getFragmenterType() override
get fragmenter's type (as string
const std::vector< uint64_t > getVacuumOffsets(const std::shared_ptr< Chunk_NS::Chunk > &chunk) override
void conditionallyInstantiateFileMgrWithParams()
std::string fragmenterType_
std::vector< int > chunkKeyPrefix_
void addColumns(const InsertData &insertDataStruct)
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
void deleteFragments(const std::vector< int > &dropFragIds)
void setNumRows(const size_t numTuples) override
void setLastFragmentVarLenColumnSizes()
specifies the content in-memory of a row in the column metadata table
FragmentInfo * createNewFragment(const Data_Namespace::MemoryLevel memory_level=Data_Namespace::DISK_LEVEL)
creates new fragment, calling createChunk() method of BufferMgr to make a new chunk for each column o...
#define DEFAULT_PAGE_SIZE
Executor(const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
void lockInsertCheckpointData(const InsertData &insertDataStruct)
void insertChunksNoCheckpoint(const InsertChunks &insert_chunk) override
Insert chunks into minimal number of fragments; no locks or checkpoints taken.
void compactRows(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragment_id, const std::vector< uint64_t > &frag_offsets, const Data_Namespace::MemoryLevel memory_level, UpdelRoll &updel_roll) override
FragmentInfo * getFragmentInfo(const int fragment_id) const override
Retrieve the fragment info object for an individual fragment for editing.
#define DEFAULT_FRAGMENT_ROWS
heavyai::shared_mutex insertMutex_
void alterNonGeoColumnType(const std::list< const ColumnDescriptor * > &columns)
std::vector< std::unique_ptr< Chunk_NS::Chunk > > tracked_in_memory_chunks_
void resetSizesFromFragments() override
void insertDataImpl(InsertData &insert_data)
void updateChunkStats(const ColumnDescriptor *cd, std::unordered_map< int, ChunkStats > &stats_map, std::optional< Data_Namespace::MemoryLevel > memory_level) override
Update chunk stats.
auto getChunksForAllColumns(const TableDescriptor *td, const FragmentInfo &fragment, const Data_Namespace::MemoryLevel memory_level)
bool isAddingNewColumns(const InsertData &insert_data) const
size_t getNumRows() override
std::unordered_map< int, size_t > varLenColInfo_
size_t getNumFragments() override
returns the number of fragments in a table
void updateColumnMetadata(const ColumnDescriptor *cd, FragmentInfo &fragment, std::shared_ptr< Chunk_NS::Chunk > chunk, const UpdateValuesStats &update_values_stats, const SQLTypeInfo &rhs_type, UpdelRoll &updel_roll) override
bool hasMaterializedRowId_
The data to be inserted using the fragment manager.
void insertDataNoCheckpoint(InsertData &insert_data_struct) override
Given data wrapped in an InsertData struct, inserts it into the correct partitions No locks and check...
void alterColumnGeoType(const std::list< std::pair< const ColumnDescriptor *, std::list< const ColumnDescriptor * >>> &src_dst_column_pairs)
std::shared_timed_mutex shared_mutex
int getFragmenterId() override
get fragmenter's id
std::map< int, Chunk_NS::Chunk > columnMap_
std::vector< int > getChunkKeyPrefix() const
const bool uses_foreign_storage_
auto vacuum_varlen_rows(const FragmentInfo &fragment, const std::shared_ptr< Chunk_NS::Chunk > &chunk, const std::vector< uint64_t > &frag_offsets)
bool hasDeletedRows(const int delete_column_id) override
Iterates through chunk metadata to return whether any rows have been deleted.
~InsertOrderFragmenter() override
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue