#include <unordered_map>
#include "QueryEngine/Descriptors/InputDescriptors.h"
#include "QueryEngine/RelAlgExecutionUnit.h"
#include "Shared/DbObjectKeys.h"

Include dependency graph for InputMetadata.h:

This graph shows which files directly or indirectly include this file:

Go to the source code of this file.

Classes
struct	InputTableInfo

class	InputTableInfoCache

Namespaces
	Catalog_Namespace

Typedefs
using	TemporaryTables = std::unordered_map< int, const ResultSetPtr & >

Functions
ChunkMetadataMap	synthesize_metadata (const ResultSet *rows)

size_t	get_frag_count_of_table (const shared::TableKey &table_key, Executor *executor)

std::vector< InputTableInfo >	get_table_infos (const std::vector< InputDescriptor > &input_descs, Executor *executor)

std::vector< InputTableInfo >	get_table_infos (const RelAlgExecutionUnit &ra_exe_unit, Executor *executor)

Fragmenter_Namespace::TableInfo	build_table_info (const std::vector< const TableDescriptor * > &shard_tables)

Typedef Documentation

using TemporaryTables = std::unordered_map<int, const ResultSetPtr&>

Definition at line 31 of file InputMetadata.h.

Function Documentation

Fragmenter_Namespace::TableInfo build_table_info ( const std::vector< const TableDescriptor * > & shard_tables )

Definition at line 44 of file InputMetadata.cpp.

References CHECK, Fragmenter_Namespace::TableInfo::fragments, and Fragmenter_Namespace::TableInfo::setPhysicalNumTuples().

Referenced by InputTableInfoCache::getTableInfo().

                                                            {
   size_t total_number_of_tuples{0};
   Fragmenter_Namespace::TableInfo table_info_all_shards;
   for (const TableDescriptor* shard_table : shard_tables) {
     CHECK(shard_table->fragmenter);
     const auto& shard_metainfo = shard_table->fragmenter->getFragmentsForQuery();
     total_number_of_tuples += shard_metainfo.getPhysicalNumTuples();
     table_info_all_shards.fragments.reserve(table_info_all_shards.fragments.size() +
                                             shard_metainfo.fragments.size());
     table_info_all_shards.fragments.insert(table_info_all_shards.fragments.end(),
                                            shard_metainfo.fragments.begin(),
                                            shard_metainfo.fragments.end());
   }
   table_info_all_shards.setPhysicalNumTuples(total_number_of_tuples);
   return table_info_all_shards;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

size_t get_frag_count_of_table	(	const shared::TableKey &	table_key,
		Executor *	executor
	)

Definition at line 500 of file InputMetadata.cpp.

References CHECK, CHECK_GE, and shared::TableKey::table_id.

Referenced by RelAlgExecutor::getOuterFragmentCount().

                                                                                     {
   const auto temporary_tables = executor->getTemporaryTables();
   CHECK(temporary_tables);
   auto it = temporary_tables->find(table_key.table_id);
   if (it != temporary_tables->end()) {
     CHECK_GE(int(0), table_key.table_id);
     return size_t(1);
   } else {
     const auto table_info = executor->getTableInfo(table_key);
     return table_info.fragments.size();
   }
 }

Here is the caller graph for this function:

std::vector<InputTableInfo> get_table_infos	(	const std::vector< InputDescriptor > &	input_descs,
		Executor *	executor
	)

Definition at line 513 of file InputMetadata.cpp.

References anonymous_namespace{InputMetadata.cpp}::collect_table_infos().

Referenced by RelAlgExecutor::computeWindow(), RelAlgExecutor::createAggregateWorkUnit(), RelAlgExecutor::createCompoundWorkUnit(), RelAlgExecutor::createFilterWorkUnit(), RelAlgExecutor::createProjectWorkUnit(), RelAlgExecutor::createTableFunctionWorkUnit(), RelAlgExecutor::createUnionWorkUnit(), RelAlgExecutor::executeDelete(), RelAlgExecutor::executeTableFunction(), RelAlgExecutor::executeUpdate(), RelAlgExecutor::executeWorkUnit(), TableOptimizer::getDeletedColumnStats(), RelAlgExecutor::getFilteredCountAll(), RelAlgExecutor::getFilterSelectivity(), RelAlgExecutor::getNDVEstimation(), RelAlgExecutor::handleOutOfMemoryRetry(), TableOptimizer::recomputeColumnMetadata(), and RelAlgExecutor::selectFiltersToBePushedDown().

                         {
   std::vector<InputTableInfo> table_infos;
   collect_table_infos(table_infos, input_descs, executor);
   return table_infos;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

std::vector<InputTableInfo> get_table_infos	(	const RelAlgExecutionUnit &	ra_exe_unit,
		Executor *	executor
	)

Definition at line 521 of file InputMetadata.cpp.

References anonymous_namespace{InputMetadata.cpp}::collect_table_infos(), and RelAlgExecutionUnit::input_descs.

                                                                 {
   std::vector<InputTableInfo> table_infos;
   collect_table_infos(table_infos, ra_exe_unit.input_descs, executor);
   return table_infos;
 }

Here is the call graph for this function:

ChunkMetadataMap synthesize_metadata ( const ResultSet * rows )

Definition at line 368 of file InputMetadata.cpp.

References CHECK, CHECK_LT, cpu_threads(), Encoder::Create(), DEBUG_TIMER, inline_fp_null_val(), inline_int_null_val(), kDOUBLE, kFLOAT, threading_serial::parallel_for(), report::rows, synthesize_metadata_table_function(), heavyai::TableFunction, result_set::use_parallel_algorithms(), and anonymous_namespace{InputMetadata.cpp}::uses_int_meta().

Referenced by Fragmenter_Namespace::FragmentInfo::getChunkMetadataMap().

                                                             {
   auto timer = DEBUG_TIMER(__func__);
   ChunkMetadataMap metadata_map;
 
   // If the ResultSet has no rows, fill with dummy metadata and return early.
   if (rows->definitelyHasNoRows()) {
     // resultset has no valid storage, so we fill dummy metadata and return early
     std::vector<std::unique_ptr<Encoder>> decoders;
     for (size_t i = 0; i < rows->colCount(); ++i) {
       decoders.emplace_back(Encoder::Create(nullptr, rows->getColType(i)));
       const auto it_ok =
           metadata_map.emplace(i, decoders.back()->getMetadata(rows->getColType(i)));
       CHECK(it_ok.second);
     }
     return metadata_map;
   }
 
   // Create a vector of Encoder vectors for each worker.
   std::vector<std::vector<std::unique_ptr<Encoder>>> dummy_encoders;
   const size_t worker_count =
       result_set::use_parallel_algorithms(*rows) ? cpu_threads() : 1;
   for (size_t worker_idx = 0; worker_idx < worker_count; ++worker_idx) {
     dummy_encoders.emplace_back();
     for (size_t i = 0; i < rows->colCount(); ++i) {
       const auto& col_ti = rows->getColType(i);
       dummy_encoders.back().emplace_back(Encoder::Create(nullptr, col_ti));
     }
   }
 
   // For TableFunctions, call the optimized function we have for this format.
   if (rows->getQueryMemDesc().getQueryDescriptionType() ==
       QueryDescriptionType::TableFunction) {
     return synthesize_metadata_table_function(rows);
   }
   rows->moveToBegin();
 
   std::vector<SQLTypeInfo> row_col_ti;
   std::vector<Number64> col_null_vals(rows->colCount());
   for (size_t i = 0; i < rows->colCount(); i++) {
     auto const col_ti = rows->getColType(i);
     row_col_ti.push_back(col_ti);
     if (uses_int_meta(col_ti)) {
       col_null_vals[i].as_int64 = inline_int_null_val(col_ti);
     } else if (col_ti.is_fp()) {
       col_null_vals[i].as_double = inline_fp_null_val(col_ti);
     } else {
       throw std::runtime_error(col_ti.get_type_name() +
                                " is not supported in temporary table.");
     }
   }
 
   // Code in the do_work lambda runs for and processes each row.
   const auto do_work = [rows, &row_col_ti, &col_null_vals](
                            const std::vector<TargetValue>& crt_row,
                            std::vector<std::unique_ptr<Encoder>>& dummy_encoders) {
     for (size_t i = 0; i < rows->colCount(); ++i) {
       const auto& col_ti = row_col_ti[i];
       const auto& col_val = crt_row[i];
       const auto scalar_col_val = boost::get<ScalarTargetValue>(&col_val);
       CHECK(scalar_col_val);
       if (uses_int_meta(col_ti)) {
         const auto i64_p = boost::get<int64_t>(scalar_col_val);
         CHECK(i64_p);
         dummy_encoders[i]->updateStats(*i64_p, *i64_p == col_null_vals[i].as_int64);
       } else {
         CHECK(col_ti.is_fp());
         switch (col_ti.get_type()) {
           case kFLOAT: {
             const auto float_p = boost::get<float>(scalar_col_val);
             CHECK(float_p);
             dummy_encoders[i]->updateStats(*float_p,
                                            *float_p == col_null_vals[i].as_double);
             break;
           }
           case kDOUBLE: {
             const auto double_p = boost::get<double>(scalar_col_val);
             CHECK(double_p);
             dummy_encoders[i]->updateStats(*double_p,
                                            *double_p == col_null_vals[i].as_double);
             break;
           }
           default:
             CHECK(false);
         }
       }
     }
   };
 
   // Parallelize the processing using TBB if parallel algorithms are enabled.
   if (result_set::use_parallel_algorithms(*rows)) {
     const size_t entry_count = rows->entryCount();
     tbb::parallel_for(
         tbb::blocked_range<size_t>(0, entry_count),
         [&do_work, &rows, &dummy_encoders](const tbb::blocked_range<size_t>& range) {
           const size_t worker_idx = tbb::this_task_arena::current_thread_index();
           for (size_t i = range.begin(); i < range.end(); ++i) {
             const auto crt_row = rows->getRowAtNoTranslations(i);
             if (!crt_row.empty()) {
               do_work(crt_row, dummy_encoders[worker_idx]);
             }
           }
         });
 
   } else {
     // If parallel algorithms are not enabled, process the rows sequentially.
     while (true) {
       auto crt_row = rows->getNextRow(false, false);
       if (crt_row.empty()) {
         break;
       }
       do_work(crt_row, dummy_encoders[0]);
     }
   }
   rows->moveToBegin();
 
   // Reduce the results from each worker.
   for (size_t worker_idx = 1; worker_idx < worker_count; ++worker_idx) {
     CHECK_LT(worker_idx, dummy_encoders.size());
     const auto& worker_encoders = dummy_encoders[worker_idx];
     for (size_t i = 0; i < rows->colCount(); ++i) {
       dummy_encoders[0][i]->reduceStats(*worker_encoders[i]);
     }
   }
   // Add each column's results to the metadata map.
   for (size_t i = 0; i < rows->colCount(); ++i) {
     const auto it_ok =
         metadata_map.emplace(i, dummy_encoders[0][i]->getMetadata(rows->getColType(i)));
     CHECK(it_ok.second);
   }
   return metadata_map;
 }

Here is the call graph for this function:

Here is the caller graph for this function:

Classes

Namespaces

Typedefs

Functions

Typedef Documentation

Function Documentation