OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
FsiChunkUtils.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "FsiChunkUtils.h"
18 #include "Catalog/Catalog.h"
19 
20 namespace foreign_storage {
22  const ChunkKey& chunk_key,
23  const std::map<ChunkKey, std::shared_ptr<ChunkMetadata>>& chunk_metadata_map,
24  const std::map<ChunkKey, AbstractBuffer*>& buffers,
25  Chunk_NS::Chunk& chunk) {
26  auto catalog =
28  CHECK(catalog);
29 
30  ChunkKey data_chunk_key = chunk_key;
31  AbstractBuffer* data_buffer = nullptr;
32  AbstractBuffer* index_buffer = nullptr;
33  const auto column = catalog->getMetadataForColumn(chunk_key[CHUNK_KEY_TABLE_IDX],
34  chunk_key[CHUNK_KEY_COLUMN_IDX]);
35 
36  if (column->columnType.is_varlen_indeed()) {
37  data_chunk_key.push_back(1);
38  ChunkKey index_chunk_key = chunk_key;
39  index_chunk_key.push_back(2);
40 
41  CHECK(buffers.find(data_chunk_key) != buffers.end());
42  CHECK(buffers.find(index_chunk_key) != buffers.end());
43 
44  data_buffer = buffers.find(data_chunk_key)->second;
45  index_buffer = buffers.find(index_chunk_key)->second;
46  CHECK_EQ(data_buffer->size(), static_cast<size_t>(0));
47  CHECK_EQ(index_buffer->size(), static_cast<size_t>(0));
48 
49  size_t index_offset_size{0};
50  if (column->columnType.is_string() || column->columnType.is_geometry()) {
51  index_offset_size = sizeof(StringOffsetT);
52  } else if (column->columnType.is_array()) {
53  index_offset_size = sizeof(ArrayOffsetT);
54  } else {
55  UNREACHABLE();
56  }
57  if (chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end()) {
58  index_buffer->reserve(index_offset_size *
59  (chunk_metadata_map.at(data_chunk_key)->numElements + 1));
60  }
61  } else {
62  data_chunk_key = chunk_key;
63  CHECK(buffers.find(data_chunk_key) != buffers.end());
64  data_buffer = buffers.find(data_chunk_key)->second;
65  }
66  if (chunk_metadata_map.find(data_chunk_key) != chunk_metadata_map.end()) {
67  data_buffer->reserve(chunk_metadata_map.at(data_chunk_key)->numBytes);
68  }
69 
70  chunk.setPinnable(false);
71  chunk.setColumnDesc(column);
72  chunk.setBuffer(data_buffer);
73  chunk.setIndexBuffer(index_buffer);
74  chunk.initEncoder();
75 }
76 
77 std::shared_ptr<ChunkMetadata> get_placeholder_metadata(const SQLTypeInfo& type,
78  size_t num_elements) {
79  ForeignStorageBuffer empty_buffer;
80  // Use default encoder metadata as in parquet wrapper
81  empty_buffer.initEncoder(type);
82 
83  auto chunk_metadata = empty_buffer.getEncoder()->getMetadata(type);
84  chunk_metadata->numElements = num_elements;
85 
86  if (!type.is_varlen_indeed()) {
87  chunk_metadata->numBytes = type.get_size() * num_elements;
88  }
89  // min/max not set by default for arrays, so get from elem type encoder
90  if (type.is_array()) {
91  ForeignStorageBuffer scalar_buffer;
92  scalar_buffer.initEncoder(type.get_elem_type());
93  auto scalar_metadata = scalar_buffer.getEncoder()->getMetadata(type.get_elem_type());
94  chunk_metadata->chunkStats.min = scalar_metadata->chunkStats.min;
95  chunk_metadata->chunkStats.max = scalar_metadata->chunkStats.max;
96  }
97 
98  return chunk_metadata;
99 }
100 
102  auto [db_id, tb_id] = get_table_prefix(key);
103  auto catalog = Catalog_Namespace::SysCatalog::instance().getCatalog(db_id);
104  CHECK(catalog);
105  auto table = catalog->getForeignTable(tb_id);
106  CHECK(table);
107  return *table;
108 }
109 
110 bool is_system_table_chunk_key(const ChunkKey& chunk_key) {
111  return get_foreign_table_for_key(chunk_key).is_system_table;
112 }
113 
114 bool is_replicated_table_chunk_key(const ChunkKey& chunk_key) {
116 }
117 
118 bool is_append_table_chunk_key(const ChunkKey& chunk_key) {
119  return get_foreign_table_for_key(chunk_key).isAppendMode();
120 }
121 
122 bool is_shardable_key(const ChunkKey& key) {
123  return (dist::is_distributed() && !dist::is_aggregator() &&
125 }
126 
127 // If we want to change the way we shard foreign tables we can do it in this function.
128 bool fragment_maps_to_leaf(const ChunkKey& key) {
133 }
134 
136  return (is_shardable_key(key) && !fragment_maps_to_leaf(key));
137 }
138 } // namespace foreign_storage
#define CHECK_EQ(x, y)
Definition: Logger.h:301
void setPinnable(bool pinnable)
Definition: Chunk.h:63
std::vector< int > ChunkKey
Definition: types.h:36
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
bool is_system_table_chunk_key(const ChunkKey &chunk_key)
void setIndexBuffer(AbstractBuffer *ib)
Definition: Chunk.h:152
#define CHUNK_KEY_DB_IDX
Definition: types.h:38
#define UNREACHABLE()
Definition: Logger.h:338
bool is_append_table_chunk_key(const ChunkKey &chunk_key)
void initEncoder(const SQLTypeInfo &tmp_sql_type)
bool is_replicated_table_chunk_key(const ChunkKey &chunk_key)
void setBuffer(AbstractBuffer *b)
Definition: Chunk.h:150
int32_t StringOffsetT
Definition: sqltypes.h:1495
virtual void getMetadata(const std::shared_ptr< ChunkMetadata > &chunkMetadata)
Definition: Encoder.cpp:231
This file contains the class specification and related data structures for Catalog.
bool key_does_not_shard_to_leaf(const ChunkKey &key)
std::shared_ptr< ChunkMetadata > get_placeholder_metadata(const SQLTypeInfo &type, size_t num_elements)
static SysCatalog & instance()
Definition: SysCatalog.h:343
int get_fragment(const ChunkKey &key)
Definition: types.h:52
#define CHUNK_KEY_TABLE_IDX
Definition: types.h:39
void init_chunk_for_column(const ChunkKey &chunk_key, const std::map< ChunkKey, std::shared_ptr< ChunkMetadata >> &chunk_metadata_map, const std::map< ChunkKey, AbstractBuffer * > &buffers, Chunk_NS::Chunk &chunk)
An AbstractBuffer is a unit of data management for a data manager.
bool fragment_maps_to_leaf(const ChunkKey &key)
bool isAppendMode() const
Checks if the table is in append mode.
bool is_shardable_key(const ChunkKey &key)
const foreign_storage::ForeignTable & get_foreign_table_for_key(const ChunkKey &key)
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
int32_t g_distributed_leaf_idx
Definition: Catalog.cpp:98
bool is_aggregator()
Definition: distributed.cpp:33
bool table_is_replicated(const TableDescriptor *td)
int32_t ArrayOffsetT
Definition: sqltypes.h:1496
void initEncoder()
Definition: Chunk.cpp:290
int32_t g_distributed_num_leaves
Definition: Catalog.cpp:99
std::pair< int, int > get_table_prefix(const ChunkKey &key)
Definition: types.h:62
#define CHECK(condition)
Definition: Logger.h:291
void setColumnDesc(const ColumnDescriptor *cd)
Definition: Chunk.h:67
bool is_varlen_indeed() const
Definition: sqltypes.h:637
#define CHUNK_KEY_COLUMN_IDX
Definition: types.h:40
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:977
virtual void reserve(size_t num_bytes)=0
bool is_array() const
Definition: sqltypes.h:585
bool is_distributed()
Definition: distributed.cpp:21