OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ChunkAccessorTable.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "ChunkAccessorTable.h"
24 #include <exception>
25 #include <memory>
26 #include "Catalog/Catalog.h"
27 #include "DataMgr/Chunk/Chunk.h"
28 
30  const TableDescriptor* td,
31  const std::vector<std::string>& columnNames) {
32  ChunkAccessorTable table;
33 
34  // get fragments
35  const auto tableInfo = td->fragmenter->getFragmentsForQuery();
36  if (tableInfo.fragments.size() == 0) {
37  throw std::runtime_error("No fragments in table '" + td->tableName + "'");
38  }
39 
40  // for each fragment...
41  for (const auto& fragment : tableInfo.fragments) {
42  // add a table entry for it
43  table.emplace_back();
44  std::get<0>(table.back()) = 0;
45 
46  // for each column...
47  bool isFirstColumn = true;
48  for (const auto& columnName : columnNames) {
49  // get column descriptor
50  const auto cd = cat.getMetadataForColumn(td->tableId, columnName);
51  if (!cd) {
52  throw std::runtime_error("Failed to find physical column '" + columnName + "'");
53  }
54 
55  // find the chunk
56  ChunkKey chunkKey{
57  cat.getCurrentDB().dbId, td->tableId, cd->columnId, fragment.fragmentId};
58  auto chunkMetaIt = fragment.getChunkMetadataMap().find(cd->columnId);
59  if (chunkMetaIt == fragment.getChunkMetadataMap().end()) {
60  throw std::runtime_error("Failed to find the chunk for column: " +
61  cd->columnName + " in table: " + td->tableName +
62  ". The column was likely deleted via a table truncate.");
63  }
64 
65  // get the chunk
66  std::shared_ptr<Chunk_NS::Chunk> chunk =
68  &cat.getDataMgr(),
69  chunkKey,
71  0,
72  chunkMetaIt->second->numBytes,
73  chunkMetaIt->second->numElements);
74  CHECK(chunk);
75 
76  // the size
77  size_t chunkSize = chunkMetaIt->second->numElements;
78 
79  // and an iterator
80  ChunkIter chunkIt = chunk->begin_iterator(chunkMetaIt->second);
81 
82  // populate table entry
83  if (isFirstColumn) {
84  // store the size
85  std::get<0>(table.back()) = chunkSize;
86  isFirstColumn = false;
87  } else {
88  // all columns chunks must be the same size
89  CHECK(std::get<0>(table.back()) == chunkSize);
90  }
91  std::get<1>(table.back()).push_back(chunk);
92  std::get<2>(table.back()).push_back(chunkIt);
93  }
94  }
95 
96  // prefix-sum the per-fragment counts
97  // these are now "first row of next fragment"
98  size_t sum = 0;
99  for (auto& entry : table) {
100  sum += std::get<0>(entry);
101  std::get<0>(entry) = sum;
102  }
103 
104  // done
105  return table;
106 }
107 
109  size_t rowid,
110  size_t& rowOffset) {
111  rowOffset = 0;
112  for (auto& entry : table) {
113  if (rowid < std::get<0>(entry)) {
114  return std::get<2>(entry);
115  }
116  rowOffset = std::get<0>(entry);
117  }
118  CHECK(false);
119  static ChunkIterVector emptyChunkIterVector;
120  return emptyChunkIterVector;
121 }
std::vector< int > ChunkKey
Definition: types.h:36
std::string cat(Ts &&...args)
ChunkAccessorTable getChunkAccessorTable(const Catalog_Namespace::Catalog &cat, const TableDescriptor *td, const std::vector< std::string > &columnNames)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:143
std::string tableName
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:266
This file contains the class specification and related data structures for Catalog.
ChunkIterVector & getChunkItersAndRowOffset(ChunkAccessorTable &table, size_t rowid, size_t &rowOffset)
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:265
const ColumnDescriptor * getMetadataForColumn(int tableId, const std::string &colName) const
std::vector< ChunkIter > ChunkIterVector
std::shared_ptr< Fragmenter_Namespace::AbstractFragmenter > fragmenter
std::vector< std::tuple< size_t, std::vector< std::shared_ptr< Chunk_NS::Chunk >>, ChunkIterVector >> ChunkAccessorTable
#define CHECK(condition)
Definition: Logger.h:291
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems, const bool pinnable=true)
Definition: Chunk.cpp:31