32 : td_(td),
executor_(executor), cat_(cat) {
39 const auto stv = boost::get<ScalarTargetValue>(&tv);
41 const auto val_ptr = boost::get<T>(stv);
47 const std::vector<TargetValue>& row,
49 const bool has_nulls) {
61 int64_t min_val = read_scalar_target_value<int64_t>(row[0]);
62 int64_t max_val = read_scalar_target_value<int64_t>(row[1]);
67 float min_val = read_scalar_target_value<float>(row[0]);
68 float max_val = read_scalar_target_value<float>(row[1]);
73 double min_val = read_scalar_target_value<double>(row[0]);
74 double max_val = read_scalar_target_value<double>(row[1]);
82 int64_t min_val = read_scalar_target_value<int64_t>(row[0]);
83 int64_t max_val = read_scalar_target_value<int64_t>(row[1]);
95 const std::shared_ptr<const InputColDescriptor> input_col_desc,
96 const std::vector<Analyzer::Expr*>& target_exprs) {
142 std::vector<const TableDescriptor*> table_descriptors;
145 table_descriptors.insert(
146 table_descriptors.begin(), physical_tds.begin(), physical_tds.end());
148 table_descriptors.push_back(
td_);
156 for (
const auto td : table_descriptors) {
160 const auto table_id = td->tableId;
165 for (
const auto& cd : col_descs) {
173 if (data_mgr.gpusPresent()) {
181 std::map<int, std::list<const ColumnDescriptor*>> columns_by_table_id;
183 for (
const auto& entry : columns_for_update) {
184 auto column_descriptor = entry.first;
185 columns_by_table_id[column_descriptor->tableId].emplace_back(column_descriptor);
188 for (
const auto& [table_id, columns] : columns_by_table_id) {
191 for (
const auto cd : columns) {
192 CHECK(columns_for_update.find(cd) != columns_for_update.end());
196 stats.visible_row_count_per_fragment,
208 const std::set<size_t>& fragment_indexes)
const {
217 fragmenter->updateChunkStats(cd,
stats.chunk_stats_per_fragment, {});
218 fragmenter->setNumRows(
stats.total_row_count);
224 const std::set<size_t>& fragment_indexes)
const {
230 const auto column_id = cd->
columnId;
232 const auto input_col_desc = std::make_shared<const InputColDescriptor>(
234 const auto col_expr = makeExpr<Analyzer::ColumnVar>(
236 const auto count_expr =
237 makeExpr<Analyzer::AggExpr>(cd->columnType,
kCOUNT, col_expr,
false,
nullptr);
241 CHECK_EQ(table_infos.size(), size_t(1));
248 [&deleted_column_stats, cd](
251 if (cd->isDeletedCol) {
252 deleted_column_stats.
total_row_count += fragment_info.getPhysicalNumTuples();
254 if (fragment_info.getPhysicalNumTuples() == 0) {
256 LOG(
WARNING) <<
"Skipping completely empty fragment for column "
261 const auto row = results->getNextRow(
false,
false);
264 const auto& ti = cd->columnType;
266 auto chunk_metadata = std::make_shared<ChunkMetadata>();
269 const auto count_val = read_scalar_target_value<int64_t>(row[0]);
272 std::vector<TargetValue> fakerow;
274 auto num_tuples =
static_cast<size_t>(count_val);
277 if (num_tuples == fragment_info.getPhysicalNumTuples()) {
284 if (num_tuples == 0) {
301 LOG(
WARNING) <<
"Unable to process new metadata values for column "
307 std::make_pair(fragment_info.fragmentId, chunk_metadata->chunkStats));
309 std::make_pair(fragment_info.fragmentId, num_tuples));
312 executor_->executeWorkUnitPerFragment(ra_exe_unit,
317 compute_deleted_callback,
319 return deleted_column_stats;
325 const std::unordered_map</*fragment_id*/ int, size_t>& tuple_count_map,
326 std::optional<Data_Namespace::MemoryLevel> memory_level,
327 const std::set<size_t>& fragment_indexes)
const {
329 if (ti.is_varlen()) {
334 const auto column_id = cd->
columnId;
335 const auto input_col_desc = std::make_shared<const InputColDescriptor>(
337 const auto col_expr = makeExpr<Analyzer::ColumnVar>(
340 makeExpr<Analyzer::AggExpr>(cd->
columnType,
kMAX, col_expr,
false,
nullptr);
342 makeExpr<Analyzer::AggExpr>(cd->
columnType,
kMIN, col_expr,
false,
nullptr);
344 makeExpr<Analyzer::AggExpr>(cd->
columnType,
kCOUNT, col_expr,
false,
nullptr);
346 if (ti.is_string()) {
348 const auto fun_expr = makeExpr<Analyzer::KeyForStringExpr>(col_expr);
349 max_expr = makeExpr<Analyzer::AggExpr>(fun_ti,
kMAX, fun_expr,
false,
nullptr);
350 min_expr = makeExpr<Analyzer::AggExpr>(fun_ti,
kMIN, fun_expr,
false,
nullptr);
353 input_col_desc, {min_expr.get(), max_expr.get(), count_expr.get()});
355 CHECK_EQ(table_infos.size(), size_t(1));
360 std::unordered_map< int,
ChunkStats> stats_map;
363 [&stats_map, &tuple_count_map, cd](
365 if (fragment_info.getPhysicalNumTuples() == 0) {
367 LOG(
WARNING) <<
"Skipping completely empty fragment for column "
372 const auto row = results->getNextRow(
false,
false);
377 auto chunk_metadata = std::make_shared<ChunkMetadata>();
380 const auto count_val = read_scalar_target_value<int64_t>(row[2]);
381 if (count_val == 0) {
386 bool has_nulls =
true;
387 auto tuple_count_itr = tuple_count_map.find(fragment_info.fragmentId);
388 if (tuple_count_itr != tuple_count_map.end()) {
389 has_nulls = !(
static_cast<size_t>(count_val) == tuple_count_itr->second);
393 !(
static_cast<size_t>(count_val) == fragment_info.getPhysicalNumTuples());
397 LOG(
WARNING) <<
"Unable to process new metadata values for column "
403 std::make_pair(fragment_info.fragmentId, chunk_metadata->chunkStats));
406 executor_->executeWorkUnitPerFragment(ra_exe_unit,
411 compute_metadata_callback,
416 fragmenter->updateChunkStats(cd, stats_map, memory_level);
423 const std::set<int>& fragment_ids)
const {
425 auto table_info = td->
fragmenter->getFragmentsForQuery();
426 std::set<size_t> fragment_indexes;
427 for (
size_t i = 0; i < table_info.fragments.size(); i++) {
429 fragment_indexes.emplace(i);
432 return fragment_indexes;
439 const auto table_lock =
444 for (
const auto shard : shards) {
453 for (
auto shard : shards) {
463 int32_t fragment_id) {
465 std::set<ChunkKey> uncached_cpu_chunk_keys;
468 if (cd->columnType.is_varlen_indeed()) {
469 chunk_key.emplace_back(1);
470 if (!data_mgr.isBufferOnDevice(
472 uncached_cpu_chunk_keys.emplace(chunk_key);
474 chunk_key.back() = 2;
475 if (!data_mgr.isBufferOnDevice(
477 uncached_cpu_chunk_keys.emplace(chunk_key);
480 if (!data_mgr.isBufferOnDevice(
482 uncached_cpu_chunk_keys.emplace(chunk_key);
486 return uncached_cpu_chunk_keys;
490 const std::set<ChunkKey>& cpu_chunks_to_delete) {
492 for (
const auto& chunk_key : cpu_chunks_to_delete) {
499 const std::set<int>& fragment_ids)
const {
510 for (
auto& [chunk_key, chunk_metadata] : chunk_metadata_vec) {
514 if (chunk_metadata->chunkStats.max.tinyintval == 1 &&
516 auto cpu_chunks_to_delete =
530 chunk_metadata->numBytes,
531 chunk_metadata->numElements);
552 std::map<const TableDescriptor*, std::set<int32_t>> fragments_to_vacuum;
553 for (
const auto& [table_id, fragment_ids] :
557 if (td->maxRollbackEpochs == -1) {
568 deleted_column_stats =
573 std::set<int32_t> filtered_fragment_ids;
574 for (
const auto [fragment_id, visible_row_count] :
576 auto total_row_count =
577 td->fragmenter->getFragmentInfo(fragment_id)->getPhysicalNumTuples();
578 float deleted_row_count = total_row_count - visible_row_count;
580 filtered_fragment_ids.emplace(fragment_id);
584 if (!filtered_fragment_ids.empty()) {
585 fragments_to_vacuum[td] = filtered_fragment_ids;
589 if (!fragments_to_vacuum.empty()) {
591 const auto table_lock =
595 for (
const auto& [td, fragment_ids] : fragments_to_vacuum) {
598 <<
", table id: " << td->tableId;
bool contains(const T &container, const U &element)
Defines data structures for the semantic analysis phase of query processing.
Data_Namespace::MemoryLevel memoryLevel
std::vector< int > ChunkKey
DeletedColumnStats getDeletedColumnStats(const TableDescriptor *td, const std::set< size_t > &fragment_indexes) const
void vacuumFragmentsAboveMinSelectivity(const TableUpdateMetadata &table_update_metadata) const
class for a per-database catalog. also includes metadata for the current database and the current use...
const TableDescriptor * table_descriptor
const ColumnDescriptor * getDeletedColumn(const TableDescriptor *td) const
void recomputeMetadataUnlocked(const TableUpdateMetadata &table_update_metadata) const
Recomputes column chunk metadata for the given set of fragments. The caller of this method is expecte...
ExecutionOptions get_execution_options()
Data_Namespace::DataMgr & getDataMgr() const
CompilationOptions get_compilation_options(const ExecutorDeviceType &device_type)
#define CHUNK_KEY_FRAGMENT_IDX
void delete_cpu_chunks(const Catalog_Namespace::Catalog &catalog, const std::set< ChunkKey > &cpu_chunks_to_delete)
SQLTypeInfo get_logical_type_info(const SQLTypeInfo &type_info)
std::shared_ptr< ResultSet > ResultSetPtr
HOST DEVICE SQLTypes get_type() const
static constexpr size_t ROW_SET_SIZE
const TableDescriptor * td_
T read_scalar_target_value(const TargetValue &tv)
const Catalog_Namespace::Catalog * catalog
const DBMetadata & getCurrentDB() const
void getChunkMetadataVecForKeyPrefix(ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix)
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
std::unique_lock< T > unique_lock
std::unordered_map< int, size_t > visible_row_count_per_fragment
int getDatabaseId() const
void vacuumDeletedRows() const
Compacts fragments to remove deleted rows. When a row is deleted, a boolean deleted system column is ...
int getLogicalTableId(const int physicalTableId) const
specifies the content in-memory of a row in the column metadata table
std::unordered_map< int, ChunkStats > chunk_stats_per_fragment
void checkpointWithAutoRollback(const int logical_table_id) const
std::vector< const TableDescriptor * > getPhysicalTablesDescriptors(const TableDescriptor *logical_table_desc, bool populate_fragmenter=true) const
std::shared_ptr< Fragmenter_Namespace::AbstractFragmenter > fragmenter
void recomputeColumnMetadata(const TableDescriptor *td, const ColumnDescriptor *cd, const std::unordered_map< int, size_t > &tuple_count_map, std::optional< Data_Namespace::MemoryLevel > memory_level, const std::set< size_t > &fragment_indexes) const
DeletedColumnStats recomputeDeletedColumnMetadata(const TableDescriptor *td, const std::set< size_t > &fragment_indexes={}) const
File_Namespace::GlobalFileMgr * getGlobalFileMgr() const
void checkpoint(const int logicalTableId) const
void deleteChunksWithPrefix(const ChunkKey &keyPrefix)
HOST DEVICE EncodingType get_compression() const
static WriteLock getWriteLockForTable(const Catalog_Namespace::Catalog &cat, const std::string &table_name)
std::list< const ColumnDescriptor * > getAllColumnMetadataForTable(const int tableId, const bool fetchSystemColumns, const bool fetchVirtualColumns, const bool fetchPhysicalColumns) const
Returns a list of pointers to constant ColumnDescriptor structs for all the columns from a particular...
Data_Namespace::MemoryLevel persistenceLevel
float g_vacuum_min_selectivity
void removeFragmenterForTable(const int table_id) const
void compactDataFiles(const int32_t db_id, const int32_t tb_id)
bool set_metadata_from_results(ChunkMetadata &chunk_metadata, const std::vector< TargetValue > &row, const SQLTypeInfo &ti, const bool has_nulls)
std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)> PerFragmentCallBack
std::set< ChunkKey > get_uncached_cpu_chunk_keys(const Catalog_Namespace::Catalog &catalog, int32_t table_id, int32_t fragment_id)
#define DEBUG_TIMER(name)
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
TableOptimizer(const TableDescriptor *td, Executor *executor, const Catalog_Namespace::Catalog &cat)
void setTableEpochsLogExceptions(const int32_t db_id, const std::vector< TableEpochInfo > &table_epochs) const
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems, const bool pinnable=true)
#define CHUNK_KEY_COLUMN_IDX
RelAlgExecutionUnit build_ra_exe_unit(const std::shared_ptr< const InputColDescriptor > input_col_desc, const std::vector< Analyzer::Expr * > &target_exprs)
const Catalog_Namespace::Catalog & cat_
std::vector< TableEpochInfo > getTableEpochs(const int32_t db_id, const int32_t table_id) const
void recomputeMetadata() const
Recomputes per-chunk metadata for each fragment in the table. Updates and deletes can cause chunk met...
std::set< size_t > getFragmentIndexes(const TableDescriptor *td, const std::set< int > &fragment_ids) const
void vacuumFragments(const TableDescriptor *td, const std::set< int > &fragment_ids={}) const