OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
SharedDictionaryValidator.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
18 
19 #include <memory>
20 
21 #include "Logger/Logger.h"
22 
25  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs) {
26  size_t idx = 0;
27  for (; idx < shared_dict_defs.size(); idx++) {
28  if (!shared_dict_defs[idx].get_column().compare(cur_node.get_column()) &&
29  !shared_dict_defs[idx].get_foreign_table().compare(
30  cur_node.get_foreign_table()) &&
31  !shared_dict_defs[idx].get_foreign_column().compare(
32  cur_node.get_foreign_column())) {
33  break;
34  }
35  }
36  // Make sure we have found the shared dictionary definition
37  CHECK_LT(idx, shared_dict_defs.size());
38 
39  size_t ret_val_idx = idx;
40  for (size_t j = 0; j < shared_dict_defs.size(); j++) {
41  for (size_t i = 0; i < shared_dict_defs.size(); ++i) {
42  if (!shared_dict_defs[i].get_column().compare(
43  shared_dict_defs[ret_val_idx].get_foreign_column())) {
44  ret_val_idx = i;
45  break;
46  }
47  }
48  if (shared_dict_defs[ret_val_idx].get_foreign_table().compare(
49  cur_node.get_foreign_table())) {
50  // found a dictionary share definition which shares the dict outside this table to
51  // be created
52  break;
53  }
54  }
55 
56  return shared_dict_defs[ret_val_idx];
57 }
58 
59 // Make sure the dependency of shared dictionaries does not form a cycle
61  const Parser::CreateTableBaseStmt* stmt,
62  const Parser::SharedDictionaryDef* shared_dict_def,
63  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs,
64  const std::list<ColumnDescriptor>& columns) {
65  std::string reference_col_qualified_name =
66  shared_dict_def->get_foreign_column() + "." + shared_dict_def->get_foreign_table();
67  if (!shared_dict_def->get_column().compare(shared_dict_def->get_foreign_column())) {
68  throw std::runtime_error(
69  "Dictionary cannot be shared with itself. For dictionary : " +
70  reference_col_qualified_name);
71  }
72  auto table_name = stmt->get_table();
73  CHECK(!shared_dict_def->get_foreign_table().compare(*table_name));
74  auto col = std::find_if(
75  columns.rbegin(), columns.rend(), [shared_dict_def](const ColumnDescriptor& elem) {
76  return !elem.columnName.compare(shared_dict_def->get_column());
77  });
78  CHECK(col != columns.rend());
79  auto ref_col =
80  std::find_if(col, columns.rend(), [shared_dict_def](const ColumnDescriptor& elem) {
81  return !elem.columnName.compare(shared_dict_def->get_foreign_column());
82  });
83 
84  if (ref_col == columns.rend()) {
85  throw std::runtime_error("Dictionary dependencies might create a cycle for " +
86  shared_dict_def->get_column() + "referencing " +
87  reference_col_qualified_name);
88  }
89 }
90 
91 namespace {
92 const ColumnDescriptor* lookup_column(const std::string& name,
93  const std::list<ColumnDescriptor>& columns) {
94  for (const auto& cd : columns) {
95  if (cd.columnName == name) {
96  return &cd;
97  }
98  }
99  return nullptr;
100 }
101 
102 const ColumnDescriptor* lookup_column(const std::string& name,
103  const std::list<const ColumnDescriptor*>& columns) {
104  for (const auto& cd : columns) {
105  if (cd->columnName == name) {
106  return cd;
107  }
108  }
109  return nullptr;
110 }
111 
113  const std::string& name,
114  const std::list<std::unique_ptr<Parser::TableElement>>& table_element_list) {
115  for (const auto& e : table_element_list) {
116  const auto col_def = dynamic_cast<Parser::ColumnDef*>(e.get());
117  if (!col_def || *col_def->get_column_name() != name) {
118  continue;
119  }
120  return col_def->get_compression();
121  }
122  UNREACHABLE();
123  return nullptr;
124 }
125 
126 } // namespace
127 
128 // Validate shared dictionary directive against the list of columns seen so far.
130  const Parser::CreateTableBaseStmt* stmt,
131  const Parser::SharedDictionaryDef* shared_dict_def,
132  const std::list<ColumnDescriptor>& columns,
133  const std::vector<Parser::SharedDictionaryDef>& shared_dict_defs_so_far,
134  const Catalog_Namespace::Catalog& catalog) {
135  CHECK(shared_dict_def);
136  auto table_name = stmt->get_table();
137  const auto cd_ptr = lookup_column(shared_dict_def->get_column(), columns);
138  const auto col_qualified_name = *table_name + "." + shared_dict_def->get_column();
139  if (!cd_ptr) {
140  throw std::runtime_error("Column " + col_qualified_name + " doesn't exist");
141  }
142  if (!cd_ptr->columnType.is_string() ||
143  cd_ptr->columnType.get_compression() != kENCODING_DICT) {
144  throw std::runtime_error("Column " + col_qualified_name +
145  " must be a dictionary encoded string");
146  }
147  const std::list<std::unique_ptr<Parser::TableElement>>& table_element_list =
148  stmt->get_table_element_list();
149  if (get_compression_for_column(shared_dict_def->get_column(), table_element_list)) {
150  throw std::runtime_error(
151  "Column " + col_qualified_name +
152  " shouldn't specify an encoding, it borrows it from the referenced column");
153  }
154 
155  // NOTE(Misiu): Unfortunately we have overloaded the term "foreign table" here. In
156  // SharedDictionaryDef the "foreign table" is a table that is sharing a dictionary
157  // which it did not create. This is different from an FSI (HeavyConnect) "foreign
158  // table" which is a specific type of table who's data is stored outside of the database
159  // system. Currently string dictionaries have some unique handling in FSI (they are
160  // populated lazily) so we cannot share them with non-foreign (non-FSI) tables.
161  const auto foreign_table_name = shared_dict_def->get_foreign_table();
162  const auto foreign_td = catalog.getMetadataForTable(foreign_table_name);
163  if (!foreign_td && table_name->compare(foreign_table_name)) {
164  throw std::runtime_error("Table " + foreign_table_name + " doesn't exist");
165  }
166 
167  if (foreign_td) { // Dictionary is shared with another table
168  if (foreign_td->isForeignTable()) { // FSI foreign table
169  // The 'create foreign table' syntax does not support sharing dictionaries, so we
170  // only have to worry about foreign tables being the target of sharing, not the
171  // source.
172  throw std::runtime_error(
173  "Attempting to share dictionary with foreign table " + foreign_table_name +
174  ". Foreign table dictionaries cannot currently be shared.");
175  }
176  const auto reference_columns =
177  catalog.getAllColumnMetadataForTable(foreign_td->tableId, false, false, false);
178  const auto reference_cd_ptr =
179  lookup_column(shared_dict_def->get_foreign_column(), reference_columns);
180  if (!reference_cd_ptr) {
181  throw std::runtime_error("Could not find referenced column " +
182  shared_dict_def->get_foreign_column() + " in table " +
183  foreign_td->tableName);
184  }
185  if (!reference_cd_ptr->columnType.is_string() ||
186  reference_cd_ptr->columnType.get_compression() != kENCODING_DICT) {
187  const auto reference_col_qualified_name =
188  reference_cd_ptr->columnName + "." + shared_dict_def->get_foreign_column();
189  throw std::runtime_error("Referenced column " + reference_col_qualified_name +
190  " must be a dictionary encoded string column");
191  }
192  } else {
193  // The dictionary is to be shared within table
194  const auto reference_col_qualified_name =
195  *table_name + "." + shared_dict_def->get_foreign_column();
196  const auto reference_cd_ptr =
197  lookup_column(shared_dict_def->get_foreign_column(), columns);
198  if (!reference_cd_ptr) {
199  throw std::runtime_error("Column " + reference_col_qualified_name +
200  " doesn't exist");
201  }
202  if (!reference_cd_ptr->columnType.is_string() ||
203  reference_cd_ptr->columnType.get_compression() != kENCODING_DICT) {
204  throw std::runtime_error("Column " + reference_col_qualified_name +
205  " must be a dictionary encoded string");
206  }
208  stmt, shared_dict_def, shared_dict_defs_so_far, columns);
209  }
210  const auto it =
211  std::find_if(shared_dict_defs_so_far.begin(),
212  shared_dict_defs_so_far.end(),
213  [shared_dict_def](const Parser::SharedDictionaryDef& elem) {
214  return elem.get_column() == shared_dict_def->get_column();
215  });
216  if (it != shared_dict_defs_so_far.end()) {
217  throw std::runtime_error("Duplicate shared dictionary hint for column " +
218  *table_name + "." + shared_dict_def->get_column());
219  }
220 }
const Parser::SharedDictionaryDef compress_reference_path(Parser::SharedDictionaryDef cur_node, const std::vector< Parser::SharedDictionaryDef > &shared_dict_defs)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:143
virtual const std::list< std::unique_ptr< TableElement > > & get_table_element_list() const =0
#define UNREACHABLE()
Definition: Logger.h:338
const std::string & get_foreign_table() const
Definition: ParserNode.h:933
const std::string & get_column() const
Definition: ParserNode.h:931
void validate_shared_dictionary_order(const Parser::CreateTableBaseStmt *stmt, const Parser::SharedDictionaryDef *shared_dict_def, const std::vector< Parser::SharedDictionaryDef > &shared_dict_defs, const std::list< ColumnDescriptor > &columns)
const ColumnDescriptor * lookup_column(const std::string &name, const std::list< ColumnDescriptor > &columns)
const Parser::CompressDef * get_compression_for_column(const std::string &name, const std::list< std::unique_ptr< Parser::TableElement >> &table_element_list)
virtual const std::string * get_table() const =0
const std::string & get_foreign_column() const
Definition: ParserNode.h:935
specifies the content in-memory of a row in the column metadata table
#define CHECK_LT(x, y)
Definition: Logger.h:303
const CompressDef * get_compression() const
Definition: ParserNode.h:829
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Definition: Catalog.cpp:2175
#define CHECK(condition)
Definition: Logger.h:291
void validate_shared_dictionary(const Parser::CreateTableBaseStmt *stmt, const Parser::SharedDictionaryDef *shared_dict_def, const std::list< ColumnDescriptor > &columns, const std::vector< Parser::SharedDictionaryDef > &shared_dict_defs_so_far, const Catalog_Namespace::Catalog &catalog)
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
string name
Definition: setup.in.py:72
std::string columnName