OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
RelAlgDagSerializer.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <boost/archive/text_iarchive.hpp>
20 #include <boost/archive/text_oarchive.hpp>
21 #include <boost/serialization/access.hpp>
22 #include <boost/serialization/shared_ptr.hpp>
23 #include <boost/serialization/unique_ptr.hpp>
24 #include <boost/serialization/unordered_map.hpp>
25 #include <boost/serialization/variant.hpp>
26 #include <boost/serialization/vector.hpp>
27 
28 #include "QueryEngine/RelAlgDag.h"
36 #include "Shared/scope.h"
37 
47 // NOTE: RelTranslatedJoin is not in this list as it is a RelAlgNode only created
48 // during query execution and therefore not relevant here as RelAlgDag serialization
49 // should only be performed before query execution to avoid having to serialize any query
50 // state
51 #define REL_ALG_NODE_DERIVED_CLASSES \
52  RelScan, RelProject, RelAggregate, RelJoin, RelFilter, RelLeftDeepInnerJoin, \
53  RelCompound, RelSort, RelModify, RelTableFunction, RelLogicalValues, \
54  RelLogicalUnion
55 
56 #define REX_DERIVED_CLASSES \
57  RexAbstractInput, RexLiteral, RexOperator, RexSubQuery, RexInput, RexCase, \
58  RexFunctionOperator, RexWindowFunctionOperator, RexRef, RexAgg
59 
60 namespace {
61 
65 template <class T, class... Ts>
67  : std::bool_constant<(std::is_same_v<T, typename std::remove_cv_t<Ts>> || ...)> {};
68 
73 template <class T>
76 
77 template <class T>
79 
83 template <class T>
85 
86 template <class T>
87 inline constexpr bool is_rex_class_v = is_rex_class<T>::value;
88 
92 template <class T>
94  : std::bool_constant<is_rel_alg_node_class_v<T> || is_rex_class_v<T> ||
95  std::is_same_v<T, RelAlgDag>> {};
96 
97 template <class T>
98 inline constexpr bool all_serializable_rel_alg_classes_v =
100 
101 } // namespace
102 
119  template <class Archive,
120  class RexClass,
121  typename std::enable_if_t<is_rex_class_v<RexClass>>* = nullptr>
122  static void serialize(Archive& ar, RexClass& obj, const unsigned int version) {
123  if constexpr (std::is_same_v<Rex, RexClass>) {
124  (ar & obj.hash_);
125  } else if constexpr (std::is_same_v<RexScalar, RexClass>) {
126  (ar & boost::serialization::base_object<Rex>(obj));
127  } else if constexpr (std::is_same_v<RexAbstractInput, RexClass>) {
128  (ar & boost::serialization::base_object<RexScalar>(obj));
129  (ar & obj.in_index_);
130  } else if constexpr (std::is_same_v<RexLiteral, RexClass>) {
131  (ar & boost::serialization::base_object<RexScalar>(obj));
132  (ar & obj.literal_);
133  (ar & obj.type_);
134  (ar & obj.target_type_);
135  (ar & obj.scale_);
136  (ar & obj.precision_);
137  (ar & obj.target_scale_);
138  (ar & obj.target_precision_);
139  } else if constexpr (std::is_same_v<RexOperator, RexClass>) {
140  (ar & boost::serialization::base_object<RexScalar>(obj));
141  (ar & obj.op_);
142  (ar & obj.operands_);
143  (ar & obj.type_);
144  } else if constexpr (std::is_same_v<RexSubQuery, RexClass>) {
145  (ar & boost::serialization::base_object<RexScalar>(obj));
146  (ar & obj.type_);
147 
148  // Execution result should not be set before serialization. If it is means
149  // RelAlgExecutor got its hands on it first before serialization. This is not
150  // advised. Serialization should happen before any RelAlgExecutor processing.
151  CHECK(obj.result_);
152  CHECK(*obj.result_ == nullptr);
153 
154  // BUT we still need to serialize the RexSubQuery::result_. It is a shared_ptr of a
155  // shared_ptr. The outer shared ptr should always be defined, pointing to the
156  // interior shared_ptr that should be null. The way it is designed, this 2-tiered
157  // shared ptr acts as a link between RexSubQuery instances that were deep copied
158  // from a parent. A result should not exist, but the link should, so we need to
159  // serialize result_ (or find a better linking mechanism)
160  (ar & obj.result_);
161 
162  (ar & obj.ra_);
163  } else if constexpr (std::is_same_v<RexInput, RexClass>) {
164  (ar & boost::serialization::base_object<RexAbstractInput>(obj));
165  (ar & obj.node_);
166  } else if constexpr (std::is_same_v<RexCase, RexClass>) {
167  (ar & boost::serialization::base_object<RexScalar>(obj));
168  (ar & obj.expr_pair_list_);
169  (ar & obj.else_expr_);
170  } else if constexpr (std::is_same_v<RexFunctionOperator, RexClass>) {
171  (ar & boost::serialization::base_object<RexOperator>(obj));
172  (ar & obj.name_);
173  } else if constexpr (std::is_same_v<RexWindowFunctionOperator, RexClass>) {
174  (ar & boost::serialization::base_object<RexFunctionOperator>(obj));
175  (ar & obj.kind_);
176  (ar & obj.partition_keys_);
177  (ar & obj.order_keys_);
178  (ar & obj.collation_);
179  (ar & obj.frame_start_bound_);
180  (ar & obj.frame_end_bound_);
181  (ar & obj.is_rows_);
182  } else if constexpr (std::is_same_v<RexRef, RexClass>) {
183  (ar & boost::serialization::base_object<RexScalar>(obj));
184  (ar & obj.index_);
185  } else if constexpr (std::is_same_v<RexAgg, RexClass>) {
186  (ar & boost::serialization::base_object<Rex>(obj));
187  (ar & obj.agg_);
188  (ar & obj.distinct_);
189  (ar & obj.type_);
190  (ar & obj.operands_);
191  } else {
192  static_assert(!sizeof(RexClass), "Unhandled Rex class during serialization.");
193  }
194  }
195 
205  template <class Archive, class... RelAlgNodeClasses>
207  (ar.template register_type<RelAlgNodeClasses>(), ...);
208  }
209 
218  template <class Archive,
219  class RelAlgClass,
220  typename std::enable_if_t<is_rel_alg_node_class_v<RelAlgClass>>* = nullptr>
221  static void serialize(Archive& ar, RelAlgClass& obj, const unsigned int version) {
222  if constexpr (std::is_same_v<RelAlgNode, RelAlgClass>) {
223  (ar & obj.inputs_);
224  (ar & obj.id_);
225  (ar & obj.hash_);
226  (ar & obj.is_nop_);
227 
228  // NOTE: not serializing the id_in_plan_tree_, context_data_, targets_metainfo_,
229  // dag_node_id_, query_plan_dag_, & query_plan_dag_hash_ members. They are only
230  // needed for RelAlgExecutor pathways and not needed at the time serialization
231  // is needed.
232  } else if constexpr (std::is_same_v<RelScan, RelAlgClass>) {
233  (ar & boost::serialization::base_object<RelAlgNode>(obj));
234 
235  // NOTE: we're not serializing anything in regard to the member RelScan::td_. The
236  // table descriptor is instead a construction-dependent argument and will be
237  // serialized as part of the save/load contruction data. See
238  // boost::serialization::save_construct_data override below.
239  (ar & obj.field_names_);
240  (ar & obj.hint_applied_);
241  (ar & obj.hints_);
242  } else if constexpr (std::is_same_v<ModifyManipulationTarget, RelAlgClass>) {
243  (ar & obj.is_update_via_select_);
244  (ar & obj.is_delete_via_select_);
245  (ar & obj.varlen_update_required_);
246  (ar & obj.target_columns_);
247  (ar & obj.force_rowwise_output_);
248 
249  // NOTE: we're not serializing table_descriptor_. The table descriptor is
250  // instead a constructor-dependent argument and will be saved/loaded as part of
251  // custom contructor data. See: boost::serializer::load_construct_data below for
252  // more details.
253  } else if constexpr (std::is_same_v<RelProject, RelAlgClass>) {
254  (ar & boost::serialization::base_object<RelAlgNode>(obj));
255  (ar & boost::serialization::base_object<ModifyManipulationTarget>(obj));
256  (ar & obj.scalar_exprs_);
257  (ar & obj.fields_);
258  (ar & obj.hint_applied_);
259  (ar & obj.hints_);
260  (ar & obj.has_pushed_down_window_expr_);
261  } else if constexpr (std::is_same_v<RelAggregate, RelAlgClass>) {
262  (ar & boost::serialization::base_object<RelAlgNode>(obj));
263  (ar & obj.groupby_count_);
264  (ar & obj.agg_exprs_);
265  (ar & obj.fields_);
266  (ar & obj.hint_applied_);
267  (ar & obj.hints_);
268  } else if constexpr (std::is_same_v<RelJoin, RelAlgClass>) {
269  (ar & boost::serialization::base_object<RelAlgNode>(obj));
270  (ar & obj.condition_);
271  (ar & obj.join_type_);
272  (ar & obj.hint_applied_);
273  (ar & obj.hints_);
274  } else if constexpr (std::is_same_v<RelFilter, RelAlgClass>) {
275  (ar & boost::serialization::base_object<RelAlgNode>(obj));
276  (ar & obj.filter_);
277  } else if constexpr (std::is_same_v<RelLeftDeepInnerJoin, RelAlgClass>) {
278  (ar & boost::serialization::base_object<RelAlgNode>(obj));
279  (ar & obj.condition_);
280  (ar & obj.outer_conditions_per_level_);
281  (ar & obj.original_filter_);
282  (ar & obj.original_joins_);
283  } else if constexpr (std::is_same_v<RelCompound, RelAlgClass>) {
284  (ar & boost::serialization::base_object<RelAlgNode>(obj));
285  (ar & boost::serialization::base_object<ModifyManipulationTarget>(obj));
286 
287  (ar & obj.filter_expr_);
288  (ar & obj.groupby_count_);
289  (ar & obj.agg_exprs_);
290  (ar & obj.fields_);
291  (ar & obj.is_agg_);
292  (ar & obj.scalar_sources_);
293  (ar & obj.target_exprs_);
294  (ar & obj.hint_applied_);
295  (ar & obj.hints_);
296  } else if constexpr (std::is_same_v<RelSort, RelAlgClass>) {
297  (ar & boost::serialization::base_object<RelAlgNode>(obj));
298  (ar & obj.collation_);
299  (ar & obj.limit_);
300  (ar & obj.offset_);
301  } else if constexpr (std::is_same_v<RelModify, RelAlgClass>) {
302  (ar & boost::serialization::base_object<RelAlgNode>(obj));
303  // NOTE: not serializing anything in regard to RelModify::catalog_ or
304  // table_descriptor_ members. They will be used as constructor-dependent arguments
305  // instead and will be saved/loaded with custom constuctor data. See:
306  // RelAlgSerializer for more.
307  (ar & obj.flattened_);
308  (ar & obj.operation_);
309  (ar & obj.target_column_list_);
310  } else if constexpr (std::is_same_v<RelTableFunction, RelAlgClass>) {
311  (ar & boost::serialization::base_object<RelAlgNode>(obj));
312  (ar & obj.function_name_);
313  (ar & obj.fields_);
314  (ar & obj.col_inputs_);
315  (ar & obj.table_func_inputs_);
316  (ar & obj.target_exprs_);
317  } else if constexpr (std::is_same_v<RelLogicalValues, RelAlgClass>) {
318  (ar & boost::serialization::base_object<RelAlgNode>(obj));
319  (ar & obj.tuple_type_);
320  (ar & obj.values_);
321  } else if constexpr (std::is_same_v<RelLogicalUnion, RelAlgClass>) {
322  (ar & boost::serialization::base_object<RelAlgNode>(obj));
323  (ar & obj.is_all_);
324  } else {
325  static_assert(!sizeof(RelAlgClass),
326  "Unhandled RelAlgNode class during serialization");
327  }
328  }
329 
333  template <class Archive>
334  static void serialize(Archive& ar, RelAlgDag& rel_alg_dag, const unsigned int version) {
335  // Need to register all RelAlgNode and RexRexScalar-derived classes for
336  // serialization. This is to ensure derived classes referenced via polymorphic
337  // pointer get properly designated for serialization.
338  registerClassesWithArchive<Archive, REL_ALG_NODE_DERIVED_CLASSES>(ar);
339  registerClassesWithArchive<Archive, REX_DERIVED_CLASSES>(ar);
340 
341  // NOTE: we are not archiving RelTranslatedJoin as it is a RelAlgNode only created
342  // during query execution and therefore not relevant here as the serialization
343  // archive for the RelAlgDag should only be saved/loaded before query execution to
344  // avoid having to serialize any query state
345 
346  // now archive relevant RelAlgDag members
347  (ar & rel_alg_dag.build_state_);
348  (ar & rel_alg_dag.nodes_);
349  (ar & rel_alg_dag.subqueries_);
350  (ar & rel_alg_dag.query_hint_);
351  (ar & rel_alg_dag.global_hints_);
352  }
353 };
354 
355 namespace boost {
356 namespace serialization {
357 
367 template <
368  class RelAlgType,
369  typename std::enable_if_t<all_serializable_rel_alg_classes_v<RelAlgType>>* = nullptr>
370 void serialize(boost::archive::text_iarchive& ar,
371  RelAlgType& obj,
372  const unsigned int version) {
373  RelAlgDagSerializer::serialize(ar, obj, version);
374 }
375 
376 template <
377  class RelAlgType,
378  typename std::enable_if_t<all_serializable_rel_alg_classes_v<RelAlgType>>* = nullptr>
379 void serialize(boost::archive::text_oarchive& ar,
380  RelAlgType& obj,
381  const unsigned int version) {
382  RelAlgDagSerializer::serialize(ar, obj, version);
383 }
384 
388 template <class Archive>
389 void serialize(Archive& ar, boost::blank& blank, const unsigned int version) {
390  // no-op. does nothing with an empty class
391 }
392 
393 /*******************************************************************************
394  * The following serializes constructor arguments for TableDescriptor-dependent
395  * classes, which are RelScan, RelProject, RelCompound, & RelModify.
396  *******************************************************************************/
397 
401 template <class T>
403  : std::bool_constant<std::is_same_v<RelScan, typename std::remove_cv_t<T>> ||
404  std::is_same_v<RelProject, typename std::remove_cv_t<T>> ||
405  std::is_same_v<RelCompound, typename std::remove_cv_t<T>> ||
406  std::is_same_v<RelModify, typename std::remove_cv_t<T>>> {};
407 
408 template <class T>
410 
411 template <class T>
413  : std::bool_constant<std::is_same_v<RelProject, typename std::remove_cv_t<T>> ||
414  std::is_same_v<RelCompound, typename std::remove_cv_t<T>>> {};
415 
416 template <class T>
417 inline constexpr bool is_modify_target_rel_alg_node_v =
419 
431 template <class RelAlgNodeType,
432  typename std::enable_if_t<is_catalog_rel_alg_node_v<RelAlgNodeType>>* = nullptr>
433 inline void save_construct_data(boost::archive::text_oarchive& ar,
434  const RelAlgNodeType* node,
435  const unsigned int version) {
436  const Catalog_Namespace::Catalog* catalog{nullptr};
437  if constexpr (is_modify_target_rel_alg_node_v<RelAlgNodeType>) {
438  catalog = node->getModifiedTableCatalog();
439  } else {
440  catalog = &node->getCatalog();
441  }
442 
443  if (catalog) {
444  ar << catalog->name();
445  } else {
446  ar << std::string();
447  }
448 
449  auto* td = node->getTableDescriptor();
450  if (td) {
451  CHECK(!td->tableName.empty());
452  ar << td->tableName;
453  } else {
454  // we need to serialize an empty string as deserialization will expect to see a
455  // string. The empty string will indicate a null table descriptor. There are many
456  // circumstances in which a catalog-dependent RelAlgNode might have a null
457  // TableDescriptor. Generally speaking, RelScan and RelModify nodes require a valid
458  // table descriptor. RelCompound and RelProject do not.
459  ar << std::string();
460  }
461 }
462 
466 template <class RelAlgNodeType>
467 inline void construct_catalog_rel_alg_node(RelAlgNodeType* node,
469  const TableDescriptor* td) {
470  ::new (node) RelAlgNodeType(td, cat);
471 }
472 
478  const TableDescriptor* td) {
479  ::new (node) RelModify(cat, td);
480 }
481 
487  const TableDescriptor* td) {
488  ::new (node) RelScan(td, cat);
489 }
490 
501 template <
502  class RelAlgNodeType,
503  typename std::enable_if_t<is_catalog_rel_alg_node<RelAlgNodeType>::value>* = nullptr>
504 inline void load_construct_data(boost::archive::text_iarchive& ar,
505  RelAlgNodeType* node,
506  const unsigned int version) {
507  std::string db_name;
508  ar >> db_name;
509  const Catalog_Namespace::Catalog* cat{nullptr};
510  const TableDescriptor* td{nullptr};
511  if (!db_name.empty()) {
513  CHECK(cat) << "Catalog not found for database: " << db_name;
514  }
515 
516  std::string table_name;
517  ar >> table_name;
518  if (!table_name.empty()) {
519  CHECK(cat);
520  td = cat->getMetadataForTable(table_name, false);
521  CHECK(td) << "Table metadata not found for table: " << table_name
522  << " in catalog: " << cat->name();
523  }
524 
525  if constexpr (is_modify_target_rel_alg_node_v<RelAlgNodeType>) {
527  } else {
528  CHECK(cat);
530  }
531 }
532 
533 } // namespace serialization
534 } // namespace boost
constexpr bool is_catalog_rel_alg_node_v
std::unordered_map< size_t, std::unordered_map< unsigned, RegisteredQueryHint > > query_hint_
Definition: RelAlgDag.h:3388
std::string cat(Ts &&...args)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:143
static void serialize(Archive &ar, RexClass &obj, const unsigned int version)
std::vector< std::shared_ptr< RelAlgNode > > nodes_
Definition: RelAlgDag.h:3381
std::string name() const
Definition: Catalog.h:348
static SysCatalog & instance()
Definition: SysCatalog.h:343
std::vector< std::shared_ptr< RexSubQuery > > subqueries_
Definition: RelAlgDag.h:3382
string version
Definition: setup.in.py:73
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
void serialize(Archive &ar, RegisteredQueryHint &query_hint, const unsigned int version)
void construct_catalog_rel_alg_node(RelAlgNodeType *node, const Catalog_Namespace::Catalog *cat, const TableDescriptor *td)
static void registerClassesWithArchive(Archive &ar)
constexpr bool is_modify_target_rel_alg_node_v
#define CHECK(condition)
Definition: Logger.h:291
static void serialize(Archive &ar, RelAlgClass &obj, const unsigned int version)
RegisteredQueryHint global_hints_
Definition: RelAlgDag.h:3389
static void serialize(Archive &ar, RelAlgDag &rel_alg_dag, const unsigned int version)
void save_construct_data(Archive &ar, const ExplainedQueryHint *query_hint, const unsigned int version)
void load_construct_data(Archive &ar, ExplainedQueryHint *query_hint, const unsigned int version)
BuildState build_state_
Definition: RelAlgDag.h:3379