OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Fragmenter.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef FRAGMENTER_H
18 #define FRAGMENTER_H
19 
20 #include <deque>
21 #include <list>
22 #include <map>
23 #include <mutex>
24 #include "../Catalog/ColumnDescriptor.h"
25 #include "../DataMgr/Chunk/Chunk.h"
26 #include "../DataMgr/ChunkMetadata.h"
27 #include "../Shared/heavyai_shared_mutex.h"
28 #include "../Shared/types.h"
29 
30 namespace Data_Namespace {
31 class AbstractBuffer;
32 }
33 
34 class ResultSet;
35 
36 namespace Fragmenter_Namespace {
37 class InsertOrderFragmenter;
38 
46  INSERT_ORDER = 0 // these values persist in catalog. make explicit
47 };
48 
49 struct InsertChunks {
50  const int table_id;
51  const int db_id;
52  std::map</*column_id=*/int, std::shared_ptr<Chunk_NS::Chunk> > chunks;
53  std::vector<size_t> valid_row_indices; /* specifies which row indices in chunk are valid
54  for insertion */
55 };
56 
68 struct InsertData {
69  int databaseId;
70  int tableId;
71  std::vector<int> columnIds;
72  size_t numRows;
73  std::vector<DataBlockPtr> data;
74  std::vector<bool> is_default;
76 };
78 
86 class FragmentInfo {
87  public:
89  : fragmentId(-1)
90  , shadowNumTuples(0)
91  , physicalTableId(-1)
92  , shard(-1)
93  , resultSet(nullptr)
94  , numTuples(0)
97 
98  void setChunkMetadataMap(const ChunkMetadataMap& chunk_metadata_map) {
99  this->chunkMetadataMap = chunk_metadata_map;
100  }
101 
102  void setChunkMetadata(const int col, std::shared_ptr<ChunkMetadata> chunkMetadata) {
103  chunkMetadataMap[col] = chunkMetadata;
104  }
105 
106  const ChunkMetadataMap& getChunkMetadataMap() const;
107 
109 
111 
112  size_t getNumTuples() const;
113 
114  size_t getPhysicalNumTuples() const { return numTuples; }
115 
116  bool isEmptyPhysicalFragment() const { return physicalTableId >= 0 && !numTuples; }
117 
118  void setPhysicalNumTuples(const size_t physNumTuples) { numTuples = physNumTuples; }
119 
122 
123  // for unit tests
124  static void setUnconditionalVacuum(const double unconditionalVacuum) {
125  unconditionalVacuum_ = unconditionalVacuum;
126  }
127 
130  std::vector<int> deviceIds;
132  int shard;
134  mutable ResultSet* resultSet;
135  mutable std::shared_ptr<std::mutex> resultSetMutex;
136 
137  private:
138  mutable size_t numTuples;
142 
143  friend class InsertOrderFragmenter;
144  static bool unconditionalVacuum_;
145 };
146 
156 class TableInfo {
157  public:
159 
160  size_t getNumTuples() const;
161 
162  size_t getNumTuplesUpperBound() const;
163 
164  size_t getPhysicalNumTuples() const { return numTuples; }
165 
166  void setPhysicalNumTuples(const size_t physNumTuples) { numTuples = physNumTuples; }
167 
168  size_t getFragmentNumTuplesUpperBound() const;
169 
170  std::vector<int> chunkKeyPrefix;
171  std::vector<FragmentInfo> fragments;
172 
173  private:
174  mutable size_t numTuples;
175 };
176 
177 } // namespace Fragmenter_Namespace
178 
179 #endif // FRAGMENTER_H
static void setUnconditionalVacuum(const double unconditionalVacuum)
Definition: Fragmenter.h:124
ChunkMetadataMap getChunkMetadataMapPhysicalCopy() const
void setChunkMetadata(const int col, std::shared_ptr< ChunkMetadata > chunkMetadata)
Definition: Fragmenter.h:102
const ChunkMetadataMap & getChunkMetadataMapPhysical() const
Definition: Fragmenter.h:108
std::vector< bool > is_default
Definition: Fragmenter.h:75
size_t getPhysicalNumTuples() const
Definition: Fragmenter.h:114
std::shared_ptr< std::mutex > resultSetMutex
Definition: Fragmenter.h:135
std::vector< FragmentInfo > fragments
Definition: Fragmenter.h:171
The InsertOrderFragmenter is a child class of AbstractFragmenter, and fragments data in insert order...
std::vector< int > chunkKeyPrefix
Definition: Fragmenter.h:170
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:70
std::map< int, std::shared_ptr< ChunkMetadata >> ChunkMetadataMap
size_t getPhysicalNumTuples() const
Definition: Fragmenter.h:164
size_t numRows
a vector of column ids for the row(s) being inserted
Definition: Fragmenter.h:72
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:86
size_t getFragmentNumTuplesUpperBound() const
An AbstractBuffer is a unit of data management for a data manager.
const ChunkMetadataMap & getChunkMetadataMap() const
void invalidateChunkMetadataMap() const
Definition: Fragmenter.h:120
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:73
std::map< int, std::shared_ptr< Chunk_NS::Chunk > > chunks
Definition: Fragmenter.h:52
std::vector< size_t > valid_row_indices
Definition: Fragmenter.h:53
bool g_enable_watchdog false
Definition: Execute.cpp:80
void setPhysicalNumTuples(const size_t physNumTuples)
Definition: Fragmenter.h:166
void setPhysicalNumTuples(const size_t physNumTuples)
Definition: Fragmenter.h:118
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:68
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:71
ChunkMetadataMap shadowChunkMetadataMap
Definition: Fragmenter.h:133
ResultSet(const std::vector< TargetInfo > &targets, const ExecutorDeviceType device_type, const QueryMemoryDescriptor &query_mem_desc, const std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const unsigned block_size, const unsigned grid_size)
Definition: ResultSet.cpp:64
void setChunkMetadataMap(const ChunkMetadataMap &chunk_metadata_map)
Definition: Fragmenter.h:98