OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
AbstractFragmenter.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include "Fragmenter/Fragmenter.h"
26 
27 #include <boost/variant.hpp>
28 #include <string>
29 #include <vector>
30 
33 #include "Shared/UpdelRoll.h"
34 #include "Shared/sqltypes.h"
36 
37 // Should the ColumnInfo and FragmentInfo structs be in
38 // AbstractFragmenter?
39 
40 class Executor;
41 
42 namespace Chunk_NS {
43 class Chunk;
44 };
45 
46 namespace Data_Namespace {
47 class AbstractBuffer;
48 class AbstractDataMgr;
49 } // namespace Data_Namespace
50 
51 namespace import_export {
52 class TypedImportBuffer;
53 }
54 
55 namespace Catalog_Namespace {
56 class Catalog;
57 }
58 struct TableDescriptor;
59 struct ColumnDescriptor;
60 
61 namespace Fragmenter_Namespace {
62 
67  public:
68  virtual size_t const getRowCount() const = 0;
69  virtual size_t const getEntryCount() const = 0;
70  virtual StringDictionaryProxy* getLiteralDictionary() const = 0;
71  virtual std::vector<TargetValue> getEntryAt(const size_t index) const = 0;
72  virtual std::vector<TargetValue> getTranslatedEntryAt(const size_t index) const = 0;
73 };
74 
76  bool has_null{false};
77  double max_double{std::numeric_limits<double>::lowest()};
78  double min_double{std::numeric_limits<double>::max()};
79  int64_t max_int64t{std::numeric_limits<int64_t>::min()};
80  int64_t min_int64t{std::numeric_limits<int64_t>::max()};
81 };
82 
83 /*
84  * @type ChunkUpdateStats
85  * @brief struct containing stats from a column chunk update.
86  * `new_values_stats` represents aggregate stats for the new
87  * values that were put into the chunk. `old_values_stats`
88  * represents aggregate stats for chunk values that were
89  * replaced.
90  */
94  int64_t updated_rows_count{0};
95  int64_t fragment_rows_count{0};
96  std::shared_ptr<Chunk_NS::Chunk> chunk;
97 };
98 
99 /*
100  * @type AbstractFragmenter
101  * @brief abstract base class for all table partitioners
102  *
103  * The virtual methods of this class provide an interface
104  * for an interface for getting the id and type of a
105  * partitioner, inserting data into a partitioner, and
106  * getting the partitions (fragments) managed by a
107  * partitioner that must be queried given a predicate
108  */
109 
111  public:
112  virtual ~AbstractFragmenter() {}
113 
121  // virtual void getFragmentsForQuery(QueryInfo &queryInfo, const void *predicate = 0) =
122  // 0
123 
128  virtual size_t getNumFragments() = 0;
129 
133  virtual TableInfo getFragmentsForQuery() = 0;
134 
140  virtual void insertData(InsertData& insert_data_struct) = 0;
141 
147  virtual void insertChunks(const InsertChunks& insert_chunk) = 0;
148 
154  virtual void insertDataNoCheckpoint(InsertData& insert_data_struct) = 0;
155 
162  virtual void insertChunksNoCheckpoint(const InsertChunks& insert_chunk) = 0;
163 
168  virtual void dropFragmentsToSize(const size_t maxRows) = 0;
169 
173  virtual void updateChunkStats(
174  const ColumnDescriptor* cd,
175  std::unordered_map</*fragment_id*/ int, ChunkStats>& stats_map,
176  std::optional<Data_Namespace::MemoryLevel> memory_level) = 0;
177 
181  virtual FragmentInfo* getFragmentInfo(const int fragment_id) const = 0;
182 
186  virtual int getFragmenterId() = 0;
187 
192  virtual std::string getFragmenterType() = 0;
193 
194  virtual size_t getNumRows() = 0;
195  virtual void setNumRows(const size_t numTuples) = 0;
196 
197  virtual std::optional<ChunkUpdateStats> updateColumn(
198  const Catalog_Namespace::Catalog* catalog,
199  const TableDescriptor* td,
200  const ColumnDescriptor* cd,
201  const int fragment_id,
202  const std::vector<uint64_t>& frag_offsets,
203  const std::vector<ScalarTargetValue>& rhs_values,
204  const SQLTypeInfo& rhs_type,
205  const Data_Namespace::MemoryLevel memory_level,
206  UpdelRoll& updel_roll) = 0;
207 
208  virtual void updateColumns(const Catalog_Namespace::Catalog* catalog,
209  const TableDescriptor* td,
210  const int fragmentId,
211  const std::vector<TargetMetaInfo> sourceMetaInfo,
212  const std::vector<const ColumnDescriptor*> columnDescriptors,
213  const RowDataProvider& sourceDataProvider,
214  const size_t indexOffFragmentOffsetColumn,
215  const Data_Namespace::MemoryLevel memoryLevel,
216  UpdelRoll& updelRoll,
217  Executor* executor) = 0;
218 
219  virtual void updateColumn(const Catalog_Namespace::Catalog* catalog,
220  const TableDescriptor* td,
221  const ColumnDescriptor* cd,
222  const int fragment_id,
223  const std::vector<uint64_t>& frag_offsets,
224  const ScalarTargetValue& rhs_value,
225  const SQLTypeInfo& rhs_type,
226  const Data_Namespace::MemoryLevel memory_level,
227  UpdelRoll& updel_roll) = 0;
228 
229  virtual void updateColumnMetadata(const ColumnDescriptor* cd,
230  FragmentInfo& fragment,
231  std::shared_ptr<Chunk_NS::Chunk> chunk,
232  const UpdateValuesStats& update_values_stats,
233  const SQLTypeInfo& rhs_type,
234  UpdelRoll& updel_roll) = 0;
235 
236  virtual void updateMetadata(const Catalog_Namespace::Catalog* catalog,
237  const MetaDataKey& key,
238  UpdelRoll& updel_roll) = 0;
239 
240  virtual void compactRows(const Catalog_Namespace::Catalog* catalog,
241  const TableDescriptor* td,
242  const int fragmentId,
243  const std::vector<uint64_t>& fragOffsets,
244  const Data_Namespace::MemoryLevel memoryLevel,
245  UpdelRoll& updelRoll) = 0;
246 
247  virtual const std::vector<uint64_t> getVacuumOffsets(
248  const std::shared_ptr<Chunk_NS::Chunk>& chunk) = 0;
249 
250  virtual void dropColumns(const std::vector<int>& columnIds) = 0;
251 
253  virtual bool hasDeletedRows(const int delete_column_id) = 0;
254 
262  virtual void updateColumnChunkMetadata(
263  const ColumnDescriptor* cd,
264  const int fragment_id,
265  const std::shared_ptr<ChunkMetadata> metadata) = 0;
266 
272  virtual void resetSizesFromFragments() = 0;
273 };
274 
275 } // namespace Fragmenter_Namespace
std::shared_ptr< Chunk_NS::Chunk > chunk
virtual size_t getNumFragments()=0
Should get the partitions(fragments) where at least one tuple could satisfy the (optional) provided p...
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:143
virtual std::string getFragmenterType()=0
Gets the string type of the partitioner.
virtual std::optional< ChunkUpdateStats > updateColumn(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const ColumnDescriptor *cd, const int fragment_id, const std::vector< uint64_t > &frag_offsets, const std::vector< ScalarTargetValue > &rhs_values, const SQLTypeInfo &rhs_type, const Data_Namespace::MemoryLevel memory_level, UpdelRoll &updel_roll)=0
std::pair< const TableDescriptor *, Fragmenter_Namespace::FragmentInfo * > MetaDataKey
Definition: UpdelRoll.h:41
virtual std::vector< TargetValue > getTranslatedEntryAt(const size_t index) const =0
virtual bool hasDeletedRows(const int delete_column_id)=0
Iterates through chunk metadata to return whether any rows have been deleted.
High-level representation of SQL values.
Constants for Builtin SQL Types supported by HEAVY.AI.
virtual void insertData(InsertData &insert_data_struct)=0
Given data wrapped in an InsertData struct, inserts it into the correct partitions with locks and che...
virtual void dropColumns(const std::vector< int > &columnIds)=0
virtual void dropFragmentsToSize(const size_t maxRows)=0
Will truncate table to less than maxRows by dropping fragments.
virtual void compactRows(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragmentId, const std::vector< uint64_t > &fragOffsets, const Data_Namespace::MemoryLevel memoryLevel, UpdelRoll &updelRoll)=0
virtual const std::vector< uint64_t > getVacuumOffsets(const std::shared_ptr< Chunk_NS::Chunk > &chunk)=0
virtual TableInfo getFragmentsForQuery()=0
Get all fragments for the current table.
virtual FragmentInfo * getFragmentInfo(const int fragment_id) const =0
Retrieve the fragment info object for an individual fragment for editing.
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:86
virtual size_t const getEntryCount() const =0
An AbstractBuffer is a unit of data management for a data manager.
virtual int getFragmenterId()=0
Gets the id of the partitioner.
specifies the content in-memory of a row in the column metadata table
virtual void updateColumnChunkMetadata(const ColumnDescriptor *cd, const int fragment_id, const std::shared_ptr< ChunkMetadata > metadata)=0
Updates the metadata for a column chunk.
virtual void updateColumnMetadata(const ColumnDescriptor *cd, FragmentInfo &fragment, std::shared_ptr< Chunk_NS::Chunk > chunk, const UpdateValuesStats &update_values_stats, const SQLTypeInfo &rhs_type, UpdelRoll &updel_roll)=0
virtual void setNumRows(const size_t numTuples)=0
Executor(const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:276
virtual void updateMetadata(const Catalog_Namespace::Catalog *catalog, const MetaDataKey &key, UpdelRoll &updel_roll)=0
virtual void insertChunksNoCheckpoint(const InsertChunks &insert_chunk)=0
Insert chunks into minimal number of fragments; no locks or checkpoints taken.
virtual StringDictionaryProxy * getLiteralDictionary() const =0
virtual void updateColumns(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragmentId, const std::vector< TargetMetaInfo > sourceMetaInfo, const std::vector< const ColumnDescriptor * > columnDescriptors, const RowDataProvider &sourceDataProvider, const size_t indexOffFragmentOffsetColumn, const Data_Namespace::MemoryLevel memoryLevel, UpdelRoll &updelRoll, Executor *executor)=0
virtual void updateChunkStats(const ColumnDescriptor *cd, std::unordered_map< int, ChunkStats > &stats_map, std::optional< Data_Namespace::MemoryLevel > memory_level)=0
Update chunk stats.
virtual void insertChunks(const InsertChunks &insert_chunk)=0
Insert chunks into minimal number of fragments.
FileBuffer Chunk
A Chunk is the fundamental unit of execution in Map-D.
Definition: FileMgr.h:80
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:68
virtual std::vector< TargetValue > getEntryAt(const size_t index) const =0
virtual void insertDataNoCheckpoint(InsertData &insert_data_struct)=0
Given data wrapped in an InsertData struct, inserts it into the correct partitions No locks and check...
virtual size_t const getRowCount() const =0
boost::variant< int64_t, double, float, NullableString > ScalarTargetValue
Definition: TargetValue.h:180