OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
Execute.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef QUERYENGINE_EXECUTE_H
18 #define QUERYENGINE_EXECUTE_H
19 
20 #include <algorithm>
21 #include <atomic>
22 #include <condition_variable>
23 #include <cstddef>
24 #include <cstdlib>
25 #include <deque>
26 #include <functional>
27 #include <limits>
28 #include <map>
29 #include <mutex>
30 #include <queue>
31 #include <stack>
32 #include <unordered_map>
33 #include <unordered_set>
34 
35 #include <llvm/IR/Function.h>
36 #include <llvm/IR/Value.h>
37 #include <llvm/Linker/Linker.h>
38 #include <llvm/Transforms/Utils/ValueMapper.h>
39 #include <rapidjson/document.h>
40 
44 #include "QueryEngine/CgenState.h"
45 #include "QueryEngine/CodeCache.h"
59 #include "QueryEngine/PlanState.h"
68 
69 #include "DataMgr/Chunk/Chunk.h"
70 #include "Logger/Logger.h"
71 #include "Shared/DbObjectKeys.h"
72 #include "Shared/LruCache.h"
74 #include "Shared/funcannotations.h"
76 #include "Shared/measure.h"
77 #include "Shared/thread_count.h"
78 #include "Shared/toString.h"
82 
83 using QueryCompilationDescriptorOwned = std::unique_ptr<QueryCompilationDescriptor>;
85 using QueryMemoryDescriptorOwned = std::unique_ptr<QueryMemoryDescriptor>;
86 using QuerySessionId = std::string;
87 using CurrentQueryStatus = std::pair<QuerySessionId, std::string>;
88 using InterruptFlagMap = std::map<QuerySessionId, bool>;
90  // A class that is used to describe the query session's info
91  public:
92  /* todo(yoonmin): support more query status
93  * i.e., RUNNING_SORT, RUNNING_CARD_EST, CLEANUP, ... */
94  enum QueryStatus {
95  UNDEFINED = 0,
101  };
102 
103  QuerySessionStatus(const QuerySessionId& query_session,
104  const std::string& query_str,
105  const std::string& submitted_time)
106  : query_session_(query_session)
107  , executor_id_(0)
108  , query_str_(query_str)
109  , submitted_time_(submitted_time)
111  QuerySessionStatus(const QuerySessionId& query_session,
112  const size_t executor_id,
113  const std::string& query_str,
114  const std::string& submitted_time)
115  : query_session_(query_session)
116  , executor_id_(executor_id)
117  , query_str_(query_str)
118  , submitted_time_(submitted_time)
120  QuerySessionStatus(const QuerySessionId& query_session,
121  const size_t executor_id,
122  const std::string& query_str,
123  const std::string& submitted_time,
124  const QuerySessionStatus::QueryStatus& query_status)
125  : query_session_(query_session)
126  , executor_id_(executor_id)
127  , query_str_(query_str)
128  , submitted_time_(submitted_time)
129  , query_status_(query_status) {}
130 
132  const std::string getQueryStr() { return query_str_; }
133  const size_t getExecutorId() { return executor_id_; }
134  const std::string& getQuerySubmittedTime() { return submitted_time_; }
137  query_status_ = status;
138  }
139  void setExecutorId(const size_t executor_id) { executor_id_ = executor_id; }
140 
141  private:
143  size_t executor_id_;
144  const std::string query_str_;
145  const std::string submitted_time_;
146  // Currently we use three query status:
147  // 1) PENDING_IN_QUEUE: a task is submitted to the dispatch_queue but hangs due to no
148  // existing worker (= executor) 2) PENDING_IN_EXECUTOR: a task is assigned to the
149  // specific executor but waits to get the resource to run 3) RUNNING: a task is assigned
150  // to the specific executor and its execution has been successfully started
151  // 4) RUNNING_REDUCTION: a task is in the reduction phase
153 };
154 using QuerySessionMap =
155  std::map<const QuerySessionId, std::map<std::string, QuerySessionStatus>>;
156 
157 class ColumnFetcher;
158 
159 class WatchdogException : public std::runtime_error {
160  public:
161  WatchdogException(const std::string& cause) : std::runtime_error(cause) {}
162 };
163 
165 
166 class Executor;
167 
168 inline llvm::Value* get_arg_by_name(llvm::Function* func, const std::string& name) {
169  for (auto& arg : func->args()) {
170  if (arg.getName() == name) {
171  return &arg;
172  }
173  }
174  CHECK(false);
175  return nullptr;
176 }
177 
178 inline llvm::Value* get_arg_by_index(llvm::Function* func, unsigned const index) {
179 #if 10 <= LLVM_VERSION_MAJOR
180  return index < func->arg_size() ? func->getArg(index) : nullptr;
181 #else
182  return index < func->arg_size() ? func->arg_begin() + index : nullptr;
183 #endif
184 }
185 
186 // Returns func->arg_size() if name is not found.
187 inline unsigned get_index_by_name(llvm::Function* func, const std::string& name) {
188  unsigned index = 0;
189  for (auto& arg : func->args()) {
190  if (arg.getName() == name) {
191  break;
192  }
193  ++index;
194  }
195  return index;
196 }
197 
198 inline uint32_t log2_bytes(const uint32_t bytes) {
199  switch (bytes) {
200  case 1:
201  return 0;
202  case 2:
203  return 1;
204  case 4:
205  return 2;
206  case 8:
207  return 3;
208  default:
209  abort();
210  }
211 }
212 
214  const shared::ColumnKey& column_key) {
215  CHECK_GT(column_key.db_id, 0);
216  CHECK_GT(column_key.table_id, 0);
217  const auto col_desc = Catalog_Namespace::get_metadata_for_column(column_key);
218  CHECK(col_desc);
219  return col_desc;
220 }
221 
222 inline const Analyzer::Expr* extract_cast_arg(const Analyzer::Expr* expr) {
223  const auto cast_expr = dynamic_cast<const Analyzer::UOper*>(expr);
224  if (!cast_expr || cast_expr->get_optype() != kCAST) {
225  return expr;
226  }
227  return cast_expr->get_operand();
228 }
229 
230 inline std::string numeric_type_name(const SQLTypeInfo& ti) {
231  CHECK(ti.is_integer() || ti.is_decimal() || ti.is_boolean() || ti.is_time() ||
232  ti.is_fp() || (ti.is_string() && ti.get_compression() == kENCODING_DICT) ||
233  ti.is_timeinterval());
234  if (ti.is_integer() || ti.is_decimal() || ti.is_boolean() || ti.is_time() ||
235  ti.is_string() || ti.is_timeinterval()) {
236  return "int" + std::to_string(ti.get_logical_size() * 8) + "_t";
237  }
238  return ti.get_type() == kDOUBLE ? "double" : "float";
239 }
240 
242  const shared::ColumnKey& column_key) {
243  return column_key.table_id > 0 ? get_column_descriptor(column_key) : nullptr;
244 }
245 
246 inline const ResultSetPtr& get_temporary_table(const TemporaryTables* temporary_tables,
247  const int table_id) {
248  CHECK_LT(table_id, 0);
249  const auto it = temporary_tables->find(table_id);
250  CHECK(it != temporary_tables->end());
251  return it->second;
252 }
253 
254 inline const SQLTypeInfo get_column_type(const int col_id,
255  const int table_id,
256  const ColumnDescriptor* cd,
257  const TemporaryTables* temporary_tables) {
258  CHECK(cd || temporary_tables);
259  if (cd) {
260  CHECK_EQ(col_id, cd->columnId);
261  CHECK_EQ(table_id, cd->tableId);
262  return cd->columnType;
263  }
264  const auto& temp = get_temporary_table(temporary_tables, table_id);
265  return temp->getColType(col_id);
266 }
267 
268 class CompilationRetryNoLazyFetch : public std::runtime_error {
269  public:
271  : std::runtime_error("Retry query compilation with no GPU lazy fetch.") {}
272 };
273 
274 class CompilationRetryNewScanLimit : public std::runtime_error {
275  public:
276  CompilationRetryNewScanLimit(const size_t new_scan_limit)
277  : std::runtime_error("Retry query compilation with new scan limit.")
278  , new_scan_limit_(new_scan_limit) {}
279 
281 };
282 
283 class TooManyLiterals : public std::runtime_error {
284  public:
285  TooManyLiterals() : std::runtime_error("Too many literals in the query") {}
286 };
287 
288 class CompilationRetryNoCompaction : public std::runtime_error {
289  public:
291  : std::runtime_error("Retry query compilation with no compaction.") {}
292 };
293 
294 // Throwing QueryMustRunOnCpu allows us retry a query step on CPU if
295 // g_allow_query_step_cpu_retry is true (on by default) by catching
296 // the exception at the query step execution level in RelAlgExecutor,
297 // or if g_allow_query_step_cpu_retry is false but g_allow_cpu_retry is true,
298 // by retrying the entire query on CPU (if both flags are false, we return an
299 // error). This flag is thrown for the following broad categories of conditions:
300 // 1) we have not implemented an operator on GPU and so cannot codegen for GPU
301 // 2) we catch an unexpected GPU compilation/linking error (perhaps due
302 // to an outdated driver/CUDA installation not allowing a modern operator)
303 // 3) when we detect up front that we will not have enough GPU memory to execute
304 // a query.
305 // There is a fourth scenerio where our pre-flight GPU memory check passed but for
306 // whatever reason we still run out of memory. In those cases we go down the
307 // handleOutOfMemoryRetry path, which will first try per-fragment execution on GPU,
308 // and if that fails, CPU execution.
309 // Note that for distributed execution failures on leaves, we do not retry queries
310 // TODO(todd): See if CPU retry of individual steps can be turned on safely for
311 // distributed
312 
313 class QueryMustRunOnCpu : public std::runtime_error {
314  public:
315  QueryMustRunOnCpu() : std::runtime_error("Query must run in cpu mode.") {}
316 
317  QueryMustRunOnCpu(const std::string& err) : std::runtime_error(err) {}
318 };
319 
320 class ParseIRError : public std::runtime_error {
321  public:
322  ParseIRError(const std::string message) : std::runtime_error(message) {}
323 };
324 
325 class StringConstInResultSet : public std::runtime_error {
326  public:
328  : std::runtime_error(
329  "NONE ENCODED String types are not supported as input result set.") {}
330 };
331 
332 class ExtensionFunction;
333 
335 using ColumnToFragmentsMap = std::map<const ColumnDescriptor*, std::set<int32_t>>;
336 using TableToFragmentIds = std::map<int32_t, std::set<int32_t>>;
337 
341 };
342 
344  public:
346 
347  UpdateLogForFragment(FragmentInfoType const& fragment_info,
348  size_t const,
349  const std::shared_ptr<ResultSet>& rs);
350 
351  std::vector<TargetValue> getEntryAt(const size_t index) const override;
352  std::vector<TargetValue> getTranslatedEntryAt(const size_t index) const override;
353 
354  size_t const getRowCount() const override;
356  return rs_->getRowSetMemOwner()->getLiteralStringDictProxy();
357  }
358  size_t const getEntryCount() const override;
359  size_t const getFragmentIndex() const;
360  FragmentInfoType const& getFragmentInfo() const;
363  }
364  decltype(FragmentInfoType::fragmentId) const getFragmentId() const {
365  return fragment_info_.fragmentId;
366  }
367 
368  SQLTypeInfo getColumnType(const size_t col_idx) const;
369 
370  using Callback = std::function<void(const UpdateLogForFragment&, TableUpdateMetadata&)>;
371 
372  auto getResultSet() const { return rs_; }
373 
374  private:
377  std::shared_ptr<ResultSet> rs_;
378 };
379 
380 using LLVMValueVector = std::vector<llvm::Value*>;
381 
383 
384 std::ostream& operator<<(std::ostream&, FetchResult const&);
385 
386 namespace ExecutorResourceMgr_Namespace {
387 class ExecutorResourceMgr;
388 struct ChunkRequestInfo;
389 struct ResourcePoolInfo;
390 }; // namespace ExecutorResourceMgr_Namespace
391 
393  CardinalityCacheKey(const RelAlgExecutionUnit& ra_exe_unit);
394 
395  bool operator==(const CardinalityCacheKey& other) const;
396 
397  size_t hash() const;
398 
399  bool containsTableKey(const shared::TableKey& table_key) const;
400 
401  private:
402  std::string key;
403  std::unordered_set<shared::TableKey> table_keys;
404 };
405 
406 namespace std {
407 template <>
408 struct hash<CardinalityCacheKey> {
409  size_t operator()(const CardinalityCacheKey& cache_key) const {
410  return cache_key.hash();
411  }
412 };
413 } // namespace std
414 
415 class Executor {
416  static_assert(sizeof(float) == 4 && sizeof(double) == 8,
417  "Host hardware not supported, unexpected size of float / double.");
418  static_assert(sizeof(time_t) == 8,
419  "Host hardware not supported, 64-bit time support is required.");
420 
421  public:
422  using ExecutorId = size_t;
423  static constexpr ExecutorId UNITARY_EXECUTOR_ID = 0;
424  static constexpr ExecutorId INVALID_EXECUTOR_ID = SIZE_MAX;
425 
426  Executor(const ExecutorId id,
427  Data_Namespace::DataMgr* data_mgr,
428  const size_t block_size_x,
429  const size_t grid_size_x,
430  const size_t max_gpu_slab_size,
431  const std::string& debug_dir,
432  const std::string& debug_file);
433 
434  void clearCaches(bool runtime_only = false);
435 
436  std::string dumpCache() const;
437 
438  static void clearExternalCaches(bool for_update,
439  const TableDescriptor* td,
440  const int current_db_id) {
441  bool clearEntireCache = true;
442  if (td) {
443  const auto& table_chunk_key_prefix = td->getTableChunkKey(current_db_id);
444  if (!table_chunk_key_prefix.empty()) {
445  auto table_key = boost::hash_value(table_chunk_key_prefix);
447  if (for_update) {
449  } else {
451  }
453  clearEntireCache = false;
454  }
455  }
456  if (clearEntireCache) {
458  if (for_update) {
460  } else {
462  }
464  }
465  }
466 
467  void reset(bool discard_runtime_modules_only = false);
468 
469  template <typename F>
470  static void registerExtensionFunctions(F register_extension_functions) {
471  // Don't want native code to vanish while executing:
473  // Blocks Executor::getExecutor:
475  // Lock registration to avoid
476  // java.util.ConcurrentModificationException from calcite server
477  // when client registrations arrive too fast. Also blocks
478  // Executor::get_rt_udf_module for retrieving runtime UDF/UDTF
479  // module until this registration has rebuild it via
480  // Executor::update_after_registration:
481  std::lock_guard<std::mutex> register_lock(
483 
484  // Reset all executors:
485  for (auto& executor_item : Executor::executors_) {
486  executor_item.second->reset(/*discard_runtime_modules_only=*/true);
487  }
488  // Call registration worker, see
489  // DBHandler::register_runtime_extension_functions for details. In
490  // short, updates Executor::extension_module_sources,
491  // table_functions::TableFunctionsFactory, and registers runtime
492  // extension functions with Calcite:
493  register_extension_functions();
494 
495  // Update executors with registered LLVM modules:
496  update_after_registration(/*update_runtime_modules_only=*/true);
497  }
498 
499  static std::shared_ptr<Executor> getExecutor(
500  const ExecutorId id,
501  const std::string& debug_dir = "",
502  const std::string& debug_file = "",
503  const SystemParameters& system_parameters = SystemParameters());
504 
505  static void nukeCacheOfExecutors() {
507  execute_mutex_); // don't want native code to vanish while executing
509  executors_.clear();
510  }
511 
512  static void clearMemory(const Data_Namespace::MemoryLevel memory_level);
513 
514  static size_t getArenaBlockSize();
515 
516  static void addUdfIrToModule(const std::string& udf_ir_filename, const bool is_cuda_ir);
517 
518  enum class ExtModuleKinds {
519  template_module, // RuntimeFunctions.bc
520  udf_cpu_module, // Load-time UDFs for CPU execution
521  udf_gpu_module, // Load-time UDFs for GPU execution
522  rt_udf_cpu_module, // Run-time UDF/UDTFs for CPU execution
523  rt_udf_gpu_module, // Run-time UDF/UDTFs for GPU execution
524  rt_geos_module, // geos functions
525  rt_libdevice_module // math library functions for GPU execution
526  };
527  // Globally available mapping of extension module sources. Not thread-safe.
528  static std::map<ExtModuleKinds, std::string> extension_module_sources;
530 
531  // Convenience functions for retrieving executor-local extension modules, thread-safe:
532  const std::unique_ptr<llvm::Module>& get_rt_module() const {
534  }
535  const std::unique_ptr<llvm::Module>& get_udf_module(bool is_gpu = false) const {
536  return get_extension_module(
538  }
539  const std::unique_ptr<llvm::Module>& get_rt_udf_module(bool is_gpu = false) const {
540  std::lock_guard<std::mutex> lock(
542  return get_extension_module(
544  }
545  const std::unique_ptr<llvm::Module>& get_geos_module() const {
547  }
548  const std::unique_ptr<llvm::Module>& get_libdevice_module() const {
550  }
551 
552  bool has_rt_module() const {
554  }
555  bool has_udf_module(bool is_gpu = false) const {
556  return has_extension_module(
558  }
559  bool has_rt_udf_module(bool is_gpu = false) const {
560  return has_extension_module(
562  }
563  bool has_geos_module() const {
565  }
566  bool has_libdevice_module() const {
568  }
569 
574 
579  const bool with_generation) const {
581  return getStringDictionaryProxy(dict_key, row_set_mem_owner_, with_generation);
582  }
583 
585  const shared::StringDictKey& dict_key,
586  const std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
587  const bool with_generation) const;
588 
590  const shared::StringDictKey& source_dict_key,
591  const shared::StringDictKey& dest_dict_key,
592  const RowSetMemoryOwner::StringTranslationType translation_type,
593  const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
594  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
595  const bool with_generation) const;
596 
598  const StringDictionaryProxy* source_proxy,
599  StringDictionaryProxy* dest_proxy,
600  const std::vector<StringOps_Namespace::StringOpInfo>& source_string_op_infos,
601  const std::vector<StringOps_Namespace::StringOpInfo>& dest_source_string_op_infos,
602  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner) const;
603 
605  const shared::StringDictKey& source_dict_key,
606  const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos,
607  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
608  const bool with_generation) const;
609 
610  bool isCPUOnly() const;
611 
612  bool isArchMaxwell(const ExecutorDeviceType dt) const;
613 
615  return cgen_state_->contains_left_deep_outer_join_;
616  }
617 
619 
621  int) const;
622 
624  CHECK(data_mgr_);
625  return data_mgr_;
626  }
627 
628  const std::shared_ptr<RowSetMemoryOwner> getRowSetMemoryOwner() const;
629 
630  const TemporaryTables* getTemporaryTables() const;
631 
633 
634  const TableGeneration& getTableGeneration(const shared::TableKey& table_key) const;
635 
637 
639  const std::set<shared::TableKey>& table_keys_to_fetch) const;
640 
641  std::map<shared::ColumnKey, size_t> getColumnByteWidthMap(
642  const std::set<shared::TableKey>& table_ids_to_fetch,
643  const bool include_lazy_fetched_cols) const;
644 
645  size_t getNumBytesForFetchedRow(const std::set<int>& table_ids_to_fetch) const;
646 
674  const ExecutorDeviceType device_type,
675  const std::vector<InputDescriptor>& input_descs,
676  const std::vector<InputTableInfo>& query_infos,
677  const std::vector<std::pair<int32_t, FragmentsList>>& device_fragment_lists) const;
678 
679  bool hasLazyFetchColumns(const std::vector<Analyzer::Expr*>& target_exprs) const;
680  std::vector<ColumnLazyFetchInfo> getColLazyFetchInfo(
681  const std::vector<Analyzer::Expr*>& target_exprs) const;
682 
683  static void registerActiveModule(void* module, const int device_id);
684  static void unregisterActiveModule(const int device_id);
685  void interrupt(const QuerySessionId& query_session = "",
686  const QuerySessionId& interrupt_session = "");
687  void resetInterrupt();
688 
689  // only for testing usage
690  void enableRuntimeQueryInterrupt(const double runtime_query_check_freq,
691  const unsigned pending_query_check_freq) const;
692 
693  int8_t warpSize() const;
694  unsigned gridSize() const;
695  void setGridSize(unsigned grid_size);
696  void resetGridSize();
697  unsigned numBlocksPerMP() const;
698  unsigned blockSize() const;
699  void setBlockSize(unsigned block_size);
700  void resetBlockSize();
701  size_t maxGpuSlabSize() const;
702 
703  ResultSetPtr executeWorkUnit(size_t& max_groups_buffer_entry_guess,
704  const bool is_agg,
705  const std::vector<InputTableInfo>&,
706  const RelAlgExecutionUnit&,
707  const CompilationOptions&,
708  const ExecutionOptions& options,
709  RenderInfo* render_info,
710  const bool has_cardinality_estimation,
711  ColumnCacheMap& column_cache);
712 
714  const std::vector<InputTableInfo>& table_infos,
715  const TableDescriptor* updated_table_desc,
716  const CompilationOptions& co,
717  const ExecutionOptions& eo,
719  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner,
721  const bool is_agg);
722 
724  const RelAlgExecutionUnit& ra_exe_unit,
725  const std::shared_ptr<RowSetMemoryOwner>& row_set_mem_owner);
726 
727  int deviceCount(const ExecutorDeviceType) const;
728 
729  void logSystemCPUMemoryStatus(std::string const& tag, size_t const thread_idx) const;
730 
731  void logSystemGPUMemoryStatus(std::string const& tag, size_t const thread_idx) const;
732 
733  private:
734  void clearMetaInfoCache();
735 
736  int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const;
737 
738  // Generate code for a window function target.
739  llvm::Value* codegenWindowFunction(const size_t target_index,
740  const CompilationOptions& co);
741 
743  llvm::Value* cond_lv,
744  SQLAgg const aggKind,
745  CompilationOptions const& co) const;
746 
747  // Generate code for an aggregate window function target.
748  llvm::Value* codegenWindowFunctionAggregate(CodeGenerator* code_generator,
749  const CompilationOptions& co);
750 
751  // The aggregate state requires a state reset when starting a new partition. Generate
752  // the new partition check and return the continuation basic block.
753  std::pair<llvm::BasicBlock*, llvm::Value*> codegenWindowResetStateControlFlow(
754  CodeGenerator* code_generator,
755  const CompilationOptions& co);
756 
757  // Generate code for initializing the state of a window aggregate.
758  void codegenWindowFunctionStateInit(CodeGenerator* code_generator,
759  const CompilationOptions& co,
760  llvm::Value* aggregate_state);
761 
762  // Generates the required calls for an aggregate window function and returns the final
763  // result.
764  llvm::Value* codegenWindowFunctionAggregateCalls(llvm::Value* aggregate_state,
765  const CompilationOptions& co);
766 
767  // Generate code for computing window navigation function on frame
769 
770  // Generate code for computing current partition index from a given row_pos
771  llvm::Value* codegenCurrentPartitionIndex(
772  const WindowFunctionContext* window_func_context,
773  CodeGenerator* code_generator,
774  const CompilationOptions& co,
775  llvm::Value* current_row_pos_lv);
776 
777  // Generate code to analyze user-given window frame bound expr
778  llvm::Value* codegenFrameBoundExpr(const Analyzer::WindowFunction* window_func,
779  const Analyzer::WindowFrame* frame_bound,
780  CodeGenerator& code_generator,
781  const CompilationOptions& co);
782 
783  // Generate code for a given frame bound
784  llvm::Value* codegenFrameBound(bool for_start_bound,
785  bool for_range_mode,
786  bool for_window_frame_naviation,
787  const Analyzer::WindowFrame* frame_bound,
788  bool is_timestamp_type_frame,
789  llvm::Value* order_key_null_val,
791 
792  std::pair<std::string, llvm::Value*> codegenLoadOrderKeyBufPtr(
793  WindowFunctionContext* window_func_context,
794  CodeGenerator* code_generator,
795  const CompilationOptions& co) const;
796 
797  // Generate code to load null range of the window partition
798  std::pair<llvm::Value*, llvm::Value*> codegenFrameNullRange(
799  WindowFunctionContext* window_func_context,
800  CodeGenerator* code_generator,
801  const CompilationOptions& co,
802  llvm::Value* partition_index_lv) const;
803 
804  // Generate codes for loading various buffers of window partitions
806  WindowFunctionContext* window_func_context,
807  CodeGenerator* code_generator,
808  const CompilationOptions& co,
809  llvm::Value* partition_index_lv) const;
810 
811  // Generate code for computing a window frame bound
812  std::pair<llvm::Value*, llvm::Value*> codegenWindowFrameBounds(
813  WindowFunctionContext* window_func_context,
814  const Analyzer::WindowFrame* frame_start_bound,
815  const Analyzer::WindowFrame* frame_end_bound,
816  llvm::Value* order_key_col_null_val_lv,
818  CodeGenerator& code_generator);
819 
820  // Generate codes for computing a pair of window frame bounds
821  std::pair<llvm::Value*, llvm::Value*> codegenFrameBoundRange(
822  const Analyzer::WindowFunction* window_func,
823  CodeGenerator& code_generator,
824  const CompilationOptions& co);
825 
826  // frequently used utility functions to generate code for window framing
827  std::vector<llvm::Value*> prepareRowModeFuncArgs(
828  bool for_start_bound,
829  SqlWindowFrameBoundType bound_type,
830  const WindowFrameBoundFuncArgs& args) const;
831  std::vector<llvm::Value*> prepareRangeModeFuncArgs(
832  bool for_start_bound,
833  const Analyzer::WindowFrame* frame_bound,
834  bool is_timestamp_type_frame,
835  llvm::Value* order_key_null_val,
836  const WindowFrameBoundFuncArgs& frame_args) const;
837  const std::string getOrderKeyTypeName(WindowFunctionContext* window_func_context) const;
839  WindowFunctionContext* window_func_context,
840  CodeGenerator& code_generator,
842  size_t getOrderKeySize(WindowFunctionContext* window_func_context) const;
844  WindowFunctionContext* window_func_context) const;
845  std::string getFramingFuncName(const std::string& bound_type,
846  const std::string& order_col_type,
847  const std::string& op_type,
848  bool for_timestamp_type) const;
849 
850  // The AVG window function requires some post-processing: the sum is divided by count
851  // and the result is stored back for the current row.
852  void codegenWindowAvgEpilogue(CodeGenerator* code_generator,
853  const CompilationOptions& co,
854  llvm::Value* crt_val,
855  llvm::Value* window_func_null_val);
856 
857  // Generates code which loads the current aggregate value for the window context.
858  llvm::Value* codegenAggregateWindowState(CodeGenerator* code_generator,
859  const CompilationOptions& co,
860  llvm::Value* aggregate_state);
861 
862  llvm::Value* aggregateWindowStatePtr(CodeGenerator* code_generator,
863  const CompilationOptions& co);
864 
866  CHECK(data_mgr_);
867  auto cuda_mgr = data_mgr_->getCudaMgr();
868  CHECK(cuda_mgr);
869  return cuda_mgr;
870  }
871 
873  if (dt == ExecutorDeviceType::GPU) {
874  return cudaMgr()->isArchPascalOrLater();
875  }
876  return false;
877  }
878 
879  bool needFetchAllFragments(const InputColDescriptor& col_desc,
880  const RelAlgExecutionUnit& ra_exe_unit,
881  const FragmentsList& selected_fragments) const;
882 
884  const InputColDescriptor& inner_col_desc,
885  const RelAlgExecutionUnit& ra_exe_unit,
886  const FragmentsList& selected_fragments,
887  const Data_Namespace::MemoryLevel memory_level) const;
888 
889  using PerFragmentCallBack =
890  std::function<void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo&)>;
891 
897  void executeWorkUnitPerFragment(const RelAlgExecutionUnit& ra_exe_unit,
898  const InputTableInfo& table_info,
899  const CompilationOptions& co,
900  const ExecutionOptions& eo,
903  const std::set<size_t>& fragment_indexes_param);
904 
906 
913  const std::vector<InputTableInfo>& table_infos,
914  const CompilationOptions& co,
915  const ExecutionOptions& eo);
916 
918  const RelAlgExecutionUnit& ra_exe_unit,
919  const ExecutorDeviceType requested_device_type);
920 
922  SharedKernelContext& shared_context,
923  const RelAlgExecutionUnit& ra_exe_unit,
925  const ExecutorDeviceType device_type,
926  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner);
927 
929  SharedKernelContext& shared_context,
930  const RelAlgExecutionUnit& ra_exe_unit,
931  const ExecutorDeviceType device_type) const;
932 
933  std::unordered_map<shared::TableKey, const Analyzer::BinOper*> getInnerTabIdToJoinCond()
934  const;
935 
940  std::vector<std::unique_ptr<ExecutionKernel>> createKernels(
941  SharedKernelContext& shared_context,
942  const RelAlgExecutionUnit& ra_exe_unit,
943  ColumnFetcher& column_fetcher,
944  const std::vector<InputTableInfo>& table_infos,
945  const ExecutionOptions& eo,
946  const bool is_agg,
947  const bool allow_single_frag_table_opt,
948  const size_t context_count,
949  const QueryCompilationDescriptor& query_comp_desc,
951  RenderInfo* render_info,
952  std::unordered_set<int>& available_gpus,
953  int& available_cpus);
954 
959  void launchKernelsImpl(SharedKernelContext& shared_context,
960  std::vector<std::unique_ptr<ExecutionKernel>>&& kernels,
961  const ExecutorDeviceType device_type,
962  const size_t requested_num_threads);
963 
964  void launchKernelsLocked(SharedKernelContext& shared_context,
965  std::vector<std::unique_ptr<ExecutionKernel>>&& kernels,
966  const ExecutorDeviceType device_type);
967 
993  SharedKernelContext& shared_context,
994  std::vector<std::unique_ptr<ExecutionKernel>>&& kernels,
995  const ExecutorDeviceType device_type,
996  const std::vector<InputDescriptor>& input_descs,
998 
999  std::vector<size_t> getTableFragmentIndices(
1000  const RelAlgExecutionUnit& ra_exe_unit,
1001  const ExecutorDeviceType device_type,
1002  const size_t table_idx,
1003  const size_t outer_frag_idx,
1004  std::map<shared::TableKey, const TableFragments*>& selected_tables_fragments,
1005  const std::unordered_map<shared::TableKey, const Analyzer::BinOper*>&
1006  inner_table_id_to_join_condition);
1007 
1008  bool skipFragmentPair(
1009  const Fragmenter_Namespace::FragmentInfo& outer_fragment_info,
1010  const Fragmenter_Namespace::FragmentInfo& inner_fragment_info,
1011  const int inner_table_id,
1012  const std::unordered_map<shared::TableKey, const Analyzer::BinOper*>&
1013  inner_table_id_to_join_condition,
1014  const RelAlgExecutionUnit& ra_exe_unit,
1015  const ExecutorDeviceType device_type);
1016 
1018  const RelAlgExecutionUnit& ra_exe_unit,
1019  const int device_id,
1021  const std::map<shared::TableKey, const TableFragments*>&,
1022  const FragmentsList& selected_fragments,
1023  std::list<ChunkIter>&,
1024  std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
1025  DeviceAllocator* device_allocator,
1026  const size_t thread_idx,
1027  const bool allow_runtime_interrupt);
1028 
1030  const RelAlgExecutionUnit& ra_exe_unit,
1031  const int device_id,
1033  const std::map<shared::TableKey, const TableFragments*>&,
1034  const FragmentsList& selected_fragments,
1035  std::list<ChunkIter>&,
1036  std::list<std::shared_ptr<Chunk_NS::Chunk>>&,
1037  DeviceAllocator* device_allocator,
1038  const size_t thread_idx,
1039  const bool allow_runtime_interrupt);
1040 
1041  std::pair<std::vector<std::vector<int64_t>>, std::vector<std::vector<uint64_t>>>
1043  const RelAlgExecutionUnit& ra_exe_unit,
1044  const CartesianProduct<std::vector<std::vector<size_t>>>& frag_ids_crossjoin,
1045  const std::vector<InputDescriptor>& input_descs,
1046  const std::map<shared::TableKey, const TableFragments*>& all_tables_fragments);
1047 
1049  std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
1050  std::vector<size_t>& local_col_to_frag_pos,
1051  const std::list<std::shared_ptr<const InputColDescriptor>>& col_global_ids,
1052  const FragmentsList& selected_fragments,
1053  const RelAlgExecutionUnit& ra_exe_unit);
1054 
1056  std::vector<std::vector<size_t>>& selected_fragments_crossjoin,
1057  const FragmentsList& selected_fragments,
1058  const RelAlgExecutionUnit& ra_exe_unit);
1059 
1060  std::vector<size_t> getFragmentCount(const FragmentsList& selected_fragments,
1061  const size_t scan_idx,
1062  const RelAlgExecutionUnit& ra_exe_unit);
1063 
1064  // pass nullptr to results if it shouldn't be extracted from the execution context
1065  int32_t executePlanWithGroupBy(const RelAlgExecutionUnit& ra_exe_unit,
1066  const CompilationResult&,
1067  const bool hoist_literals,
1068  ResultSetPtr* results,
1069  const ExecutorDeviceType device_type,
1070  std::vector<std::vector<const int8_t*>>& col_buffers,
1071  const std::vector<size_t> outer_tab_frag_ids,
1073  const std::vector<std::vector<int64_t>>& num_rows,
1074  const std::vector<std::vector<uint64_t>>& frag_offsets,
1076  const int device_id,
1077  const shared::TableKey& outer_table_key,
1078  const int64_t limit,
1079  const uint32_t start_rowid,
1080  const uint32_t num_tables,
1081  const bool allow_runtime_interrupt,
1082  RenderInfo* render_info,
1083  const bool optimize_cuda_block_and_grid_sizes,
1084  const int64_t rows_to_process = -1);
1085  // pass nullptr to results if it shouldn't be extracted from the execution context
1086  int32_t executePlanWithoutGroupBy(
1087  const RelAlgExecutionUnit& ra_exe_unit,
1088  const CompilationResult&,
1089  const bool hoist_literals,
1090  ResultSetPtr* results,
1091  const std::vector<Analyzer::Expr*>& target_exprs,
1092  const ExecutorDeviceType device_type,
1093  std::vector<std::vector<const int8_t*>>& col_buffers,
1094  QueryExecutionContext* query_exe_context,
1095  const std::vector<std::vector<int64_t>>& num_rows,
1096  const std::vector<std::vector<uint64_t>>& frag_offsets,
1097  Data_Namespace::DataMgr* data_mgr,
1098  const int device_id,
1099  const uint32_t start_rowid,
1100  const uint32_t num_tables,
1101  const bool allow_runtime_interrupt,
1102  RenderInfo* render_info,
1103  const bool optimize_cuda_block_and_grid_sizes,
1104  const int64_t rows_to_process = -1);
1105 
1106  public: // Temporary, ask saman about this
1107  static std::pair<int64_t, int32_t> reduceResults(const SQLAgg agg,
1108  const SQLTypeInfo& ti,
1109  const int64_t agg_init_val,
1110  const int8_t out_byte_width,
1111  const int64_t* out_vec,
1112  const size_t out_vec_sz,
1113  const bool is_group_by,
1114  const bool float_argument_input);
1115 
1116  private:
1118  const RelAlgExecutionUnit& ra_exe_unit);
1119  std::vector<int8_t*> getJoinHashTablePtrs(const ExecutorDeviceType device_type,
1120  const int device_id);
1122  const RelAlgExecutionUnit&,
1123  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
1124  std::shared_ptr<RowSetMemoryOwner>,
1125  const QueryMemoryDescriptor&) const;
1126  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>
1128  const std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& results_per_device)
1129  const;
1131  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
1132  std::shared_ptr<RowSetMemoryOwner>,
1133  const QueryMemoryDescriptor&) const;
1135  const RelAlgExecutionUnit&,
1136  std::vector<std::pair<ResultSetPtr, std::vector<size_t>>>& all_fragment_results,
1137  std::shared_ptr<RowSetMemoryOwner>,
1138  const QueryMemoryDescriptor&) const;
1139 
1140  ResultSetPtr executeWorkUnitImpl(size_t& max_groups_buffer_entry_guess,
1141  const bool is_agg,
1142  const bool allow_single_frag_table_opt,
1143  const std::vector<InputTableInfo>&,
1144  const RelAlgExecutionUnit&,
1145  const CompilationOptions&,
1146  const ExecutionOptions& options,
1147  std::shared_ptr<RowSetMemoryOwner>,
1148  RenderInfo* render_info,
1149  const bool has_cardinality_estimation,
1150  ColumnCacheMap& column_cache);
1151 
1152  std::vector<llvm::Value*> inlineHoistedLiterals();
1153 
1155 
1156  std::tuple<CompilationResult, std::unique_ptr<QueryMemoryDescriptor>> compileWorkUnit(
1157  const std::vector<InputTableInfo>& query_infos,
1158  const PlanState::DeletedColumnsMap& deleted_cols_map,
1159  const RelAlgExecutionUnit& ra_exe_unit,
1160  const CompilationOptions& co,
1161  const ExecutionOptions& eo,
1162  const CudaMgr_Namespace::CudaMgr* cuda_mgr,
1163  const bool allow_lazy_fetch,
1164  std::shared_ptr<RowSetMemoryOwner>,
1165  const size_t max_groups_buffer_entry_count,
1166  const int8_t crt_min_byte_width,
1167  const bool has_cardinality_estimation,
1168  ColumnCacheMap& column_cache,
1169  RenderInfo* render_info = nullptr);
1170  // Generate code to skip the deleted rows in the outermost table.
1171  llvm::BasicBlock* codegenSkipDeletedOuterTableRow(
1172  const RelAlgExecutionUnit& ra_exe_unit,
1173  const CompilationOptions& co);
1174  std::vector<JoinLoop> buildJoinLoops(RelAlgExecutionUnit& ra_exe_unit,
1175  const CompilationOptions& co,
1176  const ExecutionOptions& eo,
1177  const std::vector<InputTableInfo>& query_infos,
1178  ColumnCacheMap& column_cache);
1179  // Create a callback which hoists left hand side filters above the join for left joins,
1180  // eliminating extra computation of the probe and matches if the row does not pass the
1181  // filters
1183  const RelAlgExecutionUnit& ra_exe_unit,
1184  const size_t level_idx,
1185  const shared::TableKey& inner_table_key,
1186  const CompilationOptions& co);
1187  // Create a callback which generates code which returns true iff the row on the given
1188  // level is deleted.
1189  std::function<llvm::Value*(const std::vector<llvm::Value*>&, llvm::Value*)>
1190  buildIsDeletedCb(const RelAlgExecutionUnit& ra_exe_unit,
1191  const size_t level_idx,
1192  const CompilationOptions& co);
1193  // Builds a join hash table for the provided conditions on the current level.
1194  // Returns null iff on failure and provides the reasons in `fail_reasons`.
1195  std::shared_ptr<HashJoin> buildCurrentLevelHashTable(
1196  const JoinCondition& current_level_join_conditions,
1197  size_t level_idx,
1198  RelAlgExecutionUnit& ra_exe_unit,
1199  const CompilationOptions& co,
1200  const std::vector<InputTableInfo>& query_infos,
1201  ColumnCacheMap& column_cache,
1202  std::vector<std::string>& fail_reasons);
1203  void redeclareFilterFunction();
1204  llvm::Value* addJoinLoopIterator(const std::vector<llvm::Value*>& prev_iters,
1205  const size_t level_idx);
1206  void codegenJoinLoops(const std::vector<JoinLoop>& join_loops,
1207  const RelAlgExecutionUnit& ra_exe_unit,
1208  GroupByAndAggregate& group_by_and_aggregate,
1209  llvm::Function* query_func,
1210  llvm::BasicBlock* entry_bb,
1212  const CompilationOptions& co,
1213  const ExecutionOptions& eo);
1214  bool compileBody(const RelAlgExecutionUnit& ra_exe_unit,
1215  GroupByAndAggregate& group_by_and_aggregate,
1217  const CompilationOptions& co,
1218  const GpuSharedMemoryContext& gpu_smem_context = {});
1219 
1220  void createErrorCheckControlFlow(llvm::Function* query_func,
1221  bool run_with_dynamic_watchdog,
1222  bool run_with_allowing_runtime_interrupt,
1223  const std::vector<JoinLoop>& join_loops,
1224  ExecutorDeviceType device_type,
1225  const std::vector<InputTableInfo>& input_table_infos);
1226 
1227  void insertErrorCodeChecker(llvm::Function* query_func,
1228  unsigned const error_code_idx,
1229  bool hoist_literals,
1230  bool allow_runtime_query_interrupt);
1231 
1232  void preloadFragOffsets(const std::vector<InputDescriptor>& input_descs,
1233  const std::vector<InputTableInfo>& query_infos);
1234 
1236  std::shared_ptr<HashJoin> hash_table;
1237  std::string fail_reason;
1238  };
1239 
1241  const std::shared_ptr<Analyzer::BinOper>& qual_bin_oper,
1242  const std::vector<InputTableInfo>& query_infos,
1243  const MemoryLevel memory_level,
1244  const JoinType join_type,
1245  const HashType preferred_hash_type,
1246  ColumnCacheMap& column_cache,
1247  const HashTableBuildDagMap& hashtable_build_dag_map,
1248  const RegisteredQueryHint& query_hint,
1249  const TableIdToNodeMap& table_id_to_node_map);
1250  void nukeOldState(const bool allow_lazy_fetch,
1251  const std::vector<InputTableInfo>& query_infos,
1252  const PlanState::DeletedColumnsMap& deleted_cols_map,
1253  const RelAlgExecutionUnit* ra_exe_unit);
1254 
1255  std::shared_ptr<CompilationContext> optimizeAndCodegenCPU(
1256  llvm::Function*,
1257  llvm::Function*,
1258  const std::unordered_set<llvm::Function*>&,
1259  const CompilationOptions&);
1260  std::shared_ptr<CompilationContext> optimizeAndCodegenGPU(
1261  llvm::Function*,
1262  llvm::Function*,
1263  std::unordered_set<llvm::Function*>&,
1264  const bool no_inline,
1265  const CudaMgr_Namespace::CudaMgr* cuda_mgr,
1266  const bool is_gpu_smem_used,
1267  const CompilationOptions&);
1268  std::string generatePTX(const std::string&) const;
1269  void initializeNVPTXBackend() const;
1270 
1271  int64_t deviceCycles(int milliseconds) const;
1272 
1274  llvm::Value* translated_value;
1275  llvm::Value* original_value;
1276  };
1277 
1279  const size_t col_width,
1280  const CompilationOptions&,
1281  const bool translate_null_val,
1282  const int64_t translated_null_val,
1283  DiamondCodegen&,
1284  std::stack<llvm::BasicBlock*>&,
1285  const bool thread_mem_shared);
1286 
1287  llvm::Value* castToFP(llvm::Value*,
1288  SQLTypeInfo const& from_ti,
1289  SQLTypeInfo const& to_ti);
1290  llvm::Value* castToIntPtrTyIn(llvm::Value* val, const size_t bit_width);
1291 
1292  std::tuple<RelAlgExecutionUnit, PlanState::DeletedColumnsMap> addDeletedColumn(
1293  const RelAlgExecutionUnit& ra_exe_unit,
1294  const CompilationOptions& co);
1295 
1296  bool isFragmentFullyDeleted(const InputDescriptor& table_desc,
1297  const Fragmenter_Namespace::FragmentInfo& fragment);
1298 
1300  const Analyzer::BinOper* comp_expr,
1301  const Analyzer::ColumnVar* lhs_col,
1302  const Fragmenter_Namespace::FragmentInfo& fragment,
1303  const Analyzer::Constant* rhs_const) const;
1304 
1305  std::pair<bool, int64_t> skipFragment(
1306  const InputDescriptor& table_desc,
1307  const Fragmenter_Namespace::FragmentInfo& frag_info,
1308  const std::list<std::shared_ptr<Analyzer::Expr>>& simple_quals,
1309  const std::vector<uint64_t>& frag_offsets,
1310  const size_t frag_idx);
1311 
1312  std::pair<bool, int64_t> skipFragmentInnerJoins(
1313  const InputDescriptor& table_desc,
1314  const RelAlgExecutionUnit& ra_exe_unit,
1315  const Fragmenter_Namespace::FragmentInfo& fragment,
1316  const std::vector<uint64_t>& frag_offsets,
1317  const size_t frag_idx);
1318 
1320  const std::unordered_set<PhysicalInput>& phys_inputs);
1322  const std::unordered_set<PhysicalInput>& phys_inputs);
1324  const std::unordered_set<shared::TableKey>& phys_table_keys);
1325 
1326  public:
1327  void setupCaching(const std::unordered_set<PhysicalInput>& phys_inputs,
1328  const std::unordered_set<shared::TableKey>& phys_table_keys);
1329  void setColRangeCache(const AggregatedColRange& aggregated_col_range) {
1330  agg_col_range_cache_ = aggregated_col_range;
1331  }
1336  const QuerySessionId& candidate_query_session,
1338  bool checkCurrentQuerySession(const std::string& candidate_query_session,
1342  bool addToQuerySessionList(const QuerySessionId& query_session,
1343  const std::string& query_str,
1344  const std::string& submitted,
1345  const size_t executor_id,
1346  const QuerySessionStatus::QueryStatus query_status,
1349  const QuerySessionId& query_session,
1350  const std::string& submitted_time_str,
1353  const QuerySessionId& query_session,
1356  const std::string& query_session,
1359  const QuerySessionId& query_session,
1362  const QuerySessionId& query_session,
1363  const std::string& submitted_time_str,
1364  const QuerySessionStatus::QueryStatus updated_query_status,
1367  const QuerySessionId& query_session,
1368  const std::string& submitted_time_str,
1369  const size_t executor_id,
1371  std::vector<QuerySessionStatus> getQuerySessionInfo(
1372  const QuerySessionId& query_session,
1374 
1377  const QuerySessionId& query_session_id,
1378  const std::string& query_str,
1379  const std::string& query_submitted_time);
1380  void checkPendingQueryStatus(const QuerySessionId& query_session);
1381  void clearQuerySessionStatus(const QuerySessionId& query_session,
1382  const std::string& submitted_time_str);
1383  void updateQuerySessionStatus(const QuerySessionId& query_session,
1384  const std::string& submitted_time_str,
1385  const QuerySessionStatus::QueryStatus new_query_status);
1386  void enrollQuerySession(const QuerySessionId& query_session,
1387  const std::string& query_str,
1388  const std::string& submitted_time_str,
1389  const size_t executor_id,
1390  const QuerySessionStatus::QueryStatus query_session_status);
1391  size_t getNumCurentSessionsEnrolled() const;
1392  // get a set of executor ids that a given session has fired regardless of
1393  // each executor's status: pending or running
1394  const std::vector<size_t> getExecutorIdsRunningQuery(
1395  const QuerySessionId& interrupt_session) const;
1396  // check whether the current session that this executor manages is interrupted
1397  // while performing non-kernel time task
1398  bool checkNonKernelTimeInterrupted() const;
1399  void registerExtractedQueryPlanDag(const QueryPlanDAG& query_plan_dag);
1401 
1402  // true when we have matched cardinality, and false otherwise
1403  using CachedCardinality = std::pair<bool, size_t>;
1404  void addToCardinalityCache(const CardinalityCacheKey& cache_key,
1405  const size_t cache_value);
1407  static void clearCardinalityCache();
1408  static void invalidateCardinalityCacheForTable(const shared::TableKey& table_key);
1409 
1413 
1414  CgenState* getCgenStatePtr() const { return cgen_state_.get(); }
1415  PlanState* getPlanStatePtr() const { return plan_state_.get(); }
1416 
1417  llvm::LLVMContext& getContext() { return *context_.get(); }
1418  void update_extension_modules(bool update_runtime_modules_only = false);
1419 
1420  static void update_after_registration(bool update_runtime_modules_only = false) {
1421  for (auto executor_item : Executor::executors_) {
1422  executor_item.second->update_extension_modules(update_runtime_modules_only);
1423  }
1424  }
1425  static void init_resource_mgr(const size_t num_cpu_slots,
1426  const size_t num_gpu_slots,
1427  const size_t cpu_result_mem,
1428  const size_t cpu_buffer_pool_mem,
1429  const size_t gpu_buffer_pool_mem,
1430  const double per_query_max_cpu_slots_ratio,
1431  const double per_query_max_cpu_result_mem_ratio,
1432  const bool allow_cpu_kernel_concurrency,
1433  const bool allow_cpu_gpu_kernel_concurrency,
1434  const bool allow_cpu_slot_oversubscription_concurrency,
1435  const bool allow_cpu_result_mem_oversubscription,
1436  const double max_available_resource_use_ratio);
1437 
1438  static void pause_executor_queue();
1439  static void resume_executor_queue();
1441  const ExecutorResourceMgr_Namespace::ResourceType resource_type);
1445  const ExecutorResourceMgr_Namespace::ResourceType resource_type,
1446  const size_t resource_quantity);
1447 
1448  static size_t getBaselineThreshold(bool for_count_distinct,
1449  ExecutorDeviceType device_type) {
1450  return for_count_distinct ? (device_type == ExecutorDeviceType::GPU
1454  }
1457  const ExecutorResourceMgr_Namespace::ResourceType resource_type);
1458 
1461  concurrent_resource_grant_policy);
1462 
1463  private:
1464  std::vector<int8_t> serializeLiterals(
1465  const std::unordered_map<int, CgenState::LiteralValues>& literals,
1466  const int device_id);
1467 
1468  static size_t align(const size_t off_in, const size_t alignment) {
1469  size_t off = off_in;
1470  if (off % alignment != 0) {
1471  off += (alignment - off % alignment);
1472  }
1473  return off;
1474  }
1475 
1477  std::unique_ptr<llvm::LLVMContext> context_;
1478 
1479  public:
1480  // CgenStateManager uses RAII pattern to ensure that recursive code
1481  // generation (e.g. as in multi-step multi-subqueries) uses a new
1482  // CgenState instance for each recursion depth while restoring the
1483  // old CgenState instances when returning from recursion.
1485  public:
1486  CgenStateManager(Executor& executor);
1487  CgenStateManager(Executor& executor,
1488  const bool allow_lazy_fetch,
1489  const std::vector<InputTableInfo>& query_infos,
1490  const PlanState::DeletedColumnsMap& deleted_cols_map,
1491  const RelAlgExecutionUnit* ra_exe_unit);
1493 
1494  private:
1496  std::chrono::steady_clock::time_point lock_queue_clock_;
1497  std::lock_guard<std::mutex> lock_;
1498  std::unique_ptr<CgenState> cgen_state_;
1499  };
1500 
1501  private:
1502  std::unique_ptr<CgenState> cgen_state_;
1503 
1504  const std::unique_ptr<llvm::Module>& get_extension_module(ExtModuleKinds kind) const {
1505  auto it = extension_modules_.find(kind);
1506  if (it != extension_modules_.end()) {
1507  return it->second;
1508  }
1509  static const std::unique_ptr<llvm::Module> empty;
1510  return empty;
1511  }
1512 
1514  return extension_modules_.find(kind) != extension_modules_.end();
1515  }
1516 
1517  std::map<ExtModuleKinds, std::unique_ptr<llvm::Module>> extension_modules_;
1518 
1520  public:
1522  : cgen_state_(cgen_state), saved_fetch_cache(cgen_state_->fetch_cache_) {}
1524 
1525  private:
1527  std::unordered_map<size_t, std::vector<llvm::Value*>> saved_fetch_cache;
1528  };
1529 
1530  llvm::Value* spillDoubleElement(llvm::Value* elem_val, llvm::Type* elem_ty);
1531 
1532  std::unique_ptr<PlanState> plan_state_;
1533  std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner_;
1534 
1535  static const int max_gpu_count{16};
1536  static const size_t auto_num_threads{size_t(0)};
1538 
1539  static std::mutex gpu_active_modules_mutex_;
1542  // indicates whether this executor has been interrupted
1543  std::atomic<bool> interrupted_{false};
1544 
1545  mutable std::mutex str_dict_mutex_;
1546 
1547  mutable std::unique_ptr<llvm::TargetMachine> nvptx_target_machine_;
1548 
1549  static const size_t baseline_threshold{
1550  1000000}; // if a perfect hash needs more entries, use baseline
1551 
1552  unsigned block_size_x_;
1553  unsigned grid_size_x_;
1554  const size_t max_gpu_slab_size_;
1555  const std::string debug_dir_;
1556  const std::string debug_file_;
1557 
1561 
1564 
1565  // Singleton instance used for an execution unit which is a project with window
1566  // functions.
1567  std::unique_ptr<WindowProjectNodeContext> window_project_node_context_owned_;
1568  // The active window function.
1570 
1575  // a query session that this executor manages
1577  // a pair of <QuerySessionId, interrupted_flag>
1579  // a pair of <QuerySessionId, query_session_status>
1581  static std::map<int, std::shared_ptr<Executor>> executors_;
1582 
1583  // SQL queries take a shared lock, exclusive options (cache clear, memory clear) take a
1584  // write lock
1586 
1590  };
1592  ExecutorMutexHolder ret;
1594  // Only one unitary executor can run at a time
1596  } else {
1598  }
1599  return ret;
1600  }
1601 
1603 
1606 
1607  static std::unordered_map<CardinalityCacheKey, size_t> cardinality_cache_;
1609 
1610  // a variable used for testing query plan DAG extractor when a query has a table
1611  // function
1613 
1614  public:
1615  // Although compilation is Executor-local, an executor may trigger
1616  // threaded compilations (see executeWorkUnitPerFragment) that share
1617  // executor cgen_state and LLVM context, for instance.
1619 
1620  // Runtime extension function registration updates
1621  // extension_modules_ that needs to be kept blocked from codegen
1622  // until the update is complete.
1624  static std::mutex kernel_mutex_; // TODO: should this be executor-local mutex?
1625 
1626  static const size_t auto_cpu_mem_bytes{size_t(0)};
1627  static std::shared_ptr<ExecutorResourceMgr_Namespace::ExecutorResourceMgr>
1629 
1631  friend class CodeGenerator;
1632  friend class ColumnFetcher;
1633  friend struct DiamondCodegen; // cgen_state_
1634  friend class ExecutionKernel;
1635  friend class KernelSubtask;
1636  friend class HashJoin; // cgen_state_
1638  friend class RangeJoinHashTable;
1639  friend class GroupByAndAggregate;
1645  friend class ResultSet;
1646  friend class InValuesBitmap;
1648  friend class LeafAggregator;
1649  friend class PerfectJoinHashTable;
1650  friend class QueryRewriter;
1652  friend class RelAlgExecutor;
1653  friend class TableOptimizer;
1657  friend struct TargetExprCodegen;
1659 };
1660 
1661 inline std::string get_null_check_suffix(const SQLTypeInfo& lhs_ti,
1662  const SQLTypeInfo& rhs_ti) {
1663  if (lhs_ti.get_notnull() && rhs_ti.get_notnull()) {
1664  return "";
1665  }
1666  std::string null_check_suffix{"_nullable"};
1667  if (lhs_ti.get_notnull()) {
1668  CHECK(!rhs_ti.get_notnull());
1669  null_check_suffix += "_rhs";
1670  } else if (rhs_ti.get_notnull()) {
1671  CHECK(!lhs_ti.get_notnull());
1672  null_check_suffix += "_lhs";
1673  }
1674  return null_check_suffix;
1675 }
1676 
1677 inline bool is_unnest(const Analyzer::Expr* expr) {
1678  return dynamic_cast<const Analyzer::UOper*>(expr) &&
1679  static_cast<const Analyzer::UOper*>(expr)->get_optype() == kUNNEST;
1680 }
1681 
1682 inline bool is_constructed_point(const Analyzer::Expr* expr) {
1683  auto uoper = dynamic_cast<const Analyzer::UOper*>(expr);
1684  auto oper = (uoper && uoper->get_optype() == kCAST) ? uoper->get_operand() : expr;
1685  auto arr = dynamic_cast<const Analyzer::ArrayExpr*>(oper);
1686  return (arr && arr->isLocalAlloc() && arr->get_type_info().is_fixlen_array());
1687 }
1688 
1689 size_t get_loop_join_size(const std::vector<InputTableInfo>& query_infos,
1690  const RelAlgExecutionUnit& ra_exe_unit);
1691 
1692 std::unordered_set<int> get_available_gpus(const Catalog_Namespace::Catalog& cat);
1693 
1694 size_t get_context_count(const ExecutorDeviceType device_type,
1695  const size_t cpu_count,
1696  const size_t gpu_count);
1697 
1698 extern "C" RUNTIME_EXPORT void register_buffer_with_executor_rsm(int64_t exec,
1699  int8_t* buffer);
1700 
1702 
1703 inline std::string toString(const Executor::ExtModuleKinds& kind) {
1704  switch (kind) {
1706  return "template_module";
1708  return "rt_geos_module";
1710  return "rt_libdevice_module";
1712  return "udf_cpu_module";
1714  return "udf_gpu_module";
1716  return "rt_udf_cpu_module";
1718  return "rt_udf_gpu_module";
1719  }
1720  LOG(FATAL) << "Invalid LLVM module kind.";
1721  return "";
1722 }
1723 
1724 namespace foreign_storage {
1725 void populate_string_dictionary(int32_t table_id, int32_t col_id, int32_t db_id);
1726 }
1727 
1728 #endif // QUERYENGINE_EXECUTE_H
void createErrorCheckControlFlow(llvm::Function *query_func, bool run_with_dynamic_watchdog, bool run_with_allowing_runtime_interrupt, const std::vector< JoinLoop > &join_loops, ExecutorDeviceType device_type, const std::vector< InputTableInfo > &input_table_infos)
void logSystemGPUMemoryStatus(std::string const &tag, size_t const thread_idx) const
Definition: Execute.cpp:776
A container for various stats about the current state of the ExecutorResourcePool. Note that ExecutorResourcePool does not persist a struct of this type, but rather builds one on the fly when ExecutorResourcePool::get_resource_info() is called.
SQLTypeInfo getColumnType(const size_t col_idx) const
CudaMgr_Namespace::CudaMgr * getCudaMgr() const
Definition: DataMgr.h:177
const std::string debug_dir_
Definition: Execute.h:1555
llvm::Value * translated_value
Definition: Execute.h:1274
void executeWorkUnitPerFragment(const RelAlgExecutionUnit &ra_exe_unit, const InputTableInfo &table_info, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, PerFragmentCallBack &cb, const std::set< size_t > &fragment_indexes_param)
Compiles and dispatches a work unit per fragment processing results with the per fragment callback...
Definition: Execute.cpp:2365
bool is_agg(const Analyzer::Expr *expr)
SqlWindowFrameBoundType
Definition: sqldefs.h:202
static void invalidateCachesByTable(size_t table_key)
AggregatedColRange computeColRangesCache(const std::unordered_set< PhysicalInput > &phys_inputs)
Definition: Execute.cpp:4894
void enableRuntimeQueryInterrupt(const double runtime_query_check_freq, const unsigned pending_query_check_freq) const
Definition: Execute.cpp:5274
SQLAgg
Definition: sqldefs.h:76
#define CHECK_EQ(x, y)
Definition: Logger.h:301
const QueryPlanDAG getLatestQueryPlanDagExtracted() const
Definition: Execute.cpp:5382
std::vector< std::unique_ptr< ExecutionKernel > > createKernels(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, ColumnFetcher &column_fetcher, const std::vector< InputTableInfo > &table_infos, const ExecutionOptions &eo, const bool is_agg, const bool allow_single_frag_table_opt, const size_t context_count, const QueryCompilationDescriptor &query_comp_desc, const QueryMemoryDescriptor &query_mem_desc, RenderInfo *render_info, std::unordered_set< int > &available_gpus, int &available_cpus)
Definition: Execute.cpp:2907
ExtModuleKinds
Definition: Execute.h:518
void codegenJoinLoops(const std::vector< JoinLoop > &join_loops, const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, llvm::Function *query_func, llvm::BasicBlock *entry_bb, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const ExecutionOptions &eo)
Definition: IRCodegen.cpp:1204
static heavyai::shared_mutex execute_mutex_
Definition: Execute.h:1585
const std::unique_ptr< llvm::Module > & get_udf_module(bool is_gpu=false) const
Definition: Execute.h:535
static QuerySessionMap queries_session_map_
Definition: Execute.h:1580
CudaMgr_Namespace::CudaMgr * cudaMgr() const
Definition: Execute.h:865
std::unique_ptr< llvm::TargetMachine > nvptx_target_machine_
Definition: Execute.h:1547
bool checkIsQuerySessionInterrupted(const std::string &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:5254
bool has_libdevice_module() const
Definition: Execute.h:566
int64_t kernel_queue_time_ms_
Definition: Execute.h:1562
JoinType
Definition: sqldefs.h:238
size_t maxGpuSlabSize() const
Definition: Execute.cpp:4392
ExecutorMutexHolder acquireExecuteMutex()
Definition: Execute.h:1591
Data_Namespace::DataMgr * data_mgr_
Definition: Execute.h:1558
std::vector< size_t > getTableFragmentIndices(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type, const size_t table_idx, const size_t outer_frag_idx, std::map< shared::TableKey, const TableFragments * > &selected_tables_fragments, const std::unordered_map< shared::TableKey, const Analyzer::BinOper * > &inner_table_id_to_join_condition)
Definition: Execute.cpp:3236
int64_t compilation_queue_time_ms_
Definition: Execute.h:1563
const std::string & getQuerySubmittedTime()
Definition: Execute.h:134
friend class ResultSet
Definition: Execute.h:1645
std::map< const ColumnDescriptor *, std::set< int32_t >> ColumnToFragmentsMap
Definition: Execute.h:335
const std::unique_ptr< llvm::Module > & get_geos_module() const
Definition: Execute.h:545
std::string cat(Ts &&...args)
static void initialize_extension_module_sources()
Definition: Execute.cpp:298
void codegenWindowFunctionStateInit(CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *aggregate_state)
void checkPendingQueryStatus(const QuerySessionId &query_session)
Definition: Execute.cpp:5035
const StringDictionaryProxy::IdMap * getJoinIntersectionStringProxyTranslationMap(const StringDictionaryProxy *source_proxy, StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &source_string_op_infos, const std::vector< StringOps_Namespace::StringOpInfo > &dest_source_string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner) const
Definition: Execute.cpp:621
static void registerActiveModule(void *module, const int device_id)
const Analyzer::Expr * remove_cast_to_int(const Analyzer::Expr *expr)
Definition: ColumnIR.cpp:618
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:143
std::unordered_map< size_t, std::vector< llvm::Value * > > fetch_cache_
Definition: CgenState.h:385
std::vector< int8_t * > getJoinHashTablePtrs(const ExecutorDeviceType device_type, const int device_id)
Definition: Execute.cpp:4253
std::unordered_map< shared::TableKey, const ColumnDescriptor * > DeletedColumnsMap
Definition: PlanState.h:44
FetchCacheAnchor(CgenState *cgen_state)
Definition: Execute.h:1521
const std::unique_ptr< llvm::Module > & get_extension_module(ExtModuleKinds kind) const
Definition: Execute.h:1504
const std::shared_ptr< RowSetMemoryOwner > getRowSetMemoryOwner() const
Definition: Execute.cpp:728
StringDictionaryProxy * getLiteralDictionary() const override
Definition: Execute.h:355
std::atomic< bool > interrupted_
Definition: Execute.h:1543
static ResultSetRecyclerHolder resultset_recycler_holder_
Definition: Execute.h:1608
Fragmenter_Namespace::RowDataProvider RowDataProvider
Definition: Execute.h:334
static const int max_gpu_count
Definition: Execute.h:1535
GroupColLLVMValue groupByColumnCodegen(Analyzer::Expr *group_by_col, const size_t col_width, const CompilationOptions &, const bool translate_null_val, const int64_t translated_null_val, DiamondCodegen &, std::stack< llvm::BasicBlock * > &, const bool thread_mem_shared)
Definition: IRCodegen.cpp:1384
static ExecutorResourceMgr_Namespace::ResourcePoolInfo get_executor_resource_pool_info()
Definition: Execute.cpp:5448
std::map< const QuerySessionId, std::map< std::string, QuerySessionStatus >> QuerySessionMap
Definition: Execute.h:155
size_t const getFragmentIndex() const
#define LOG(tag)
Definition: Logger.h:285
std::string QueryPlanDAG
bool isArchPascalOrLater(const ExecutorDeviceType dt) const
Definition: Execute.h:872
void AutoTrackBuffersInRuntimeIR()
std::ostream & operator<<(std::ostream &os, const SessionInfo &session_info)
Definition: SessionInfo.cpp:57
bool is_fp() const
Definition: sqltypes.h:573
Cache for physical column ranges. Set by the aggregator on the leaves.
std::pair< QuerySessionId, std::string > CurrentQueryStatus
Definition: Execute.h:87
bool is_constructed_point(const Analyzer::Expr *expr)
Definition: Execute.h:1682
llvm::Value * codegenFrameBound(bool for_start_bound, bool for_range_mode, bool for_window_frame_naviation, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &args)
heavyai::unique_lock< heavyai::shared_mutex > unique_lock
Definition: Execute.h:1589
static std::shared_ptr< ExecutorResourceMgr_Namespace::ExecutorResourceMgr > executor_resource_mgr_
Definition: Execute.h:1628
std::function< llvm::BasicBlock *(llvm::BasicBlock *, llvm::BasicBlock *, const std::string &, llvm::Function *, CgenState *)> HoistedFiltersCallback
Definition: JoinLoop.h:62
std::vector< ColumnLazyFetchInfo > getColLazyFetchInfo(const std::vector< Analyzer::Expr * > &target_exprs) const
Definition: Execute.cpp:1017
static const size_t baseline_threshold
Definition: Execute.h:1549
void updateQuerySessionStatus(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus new_query_status)
Definition: Execute.cpp:5075
const std::unique_ptr< llvm::Module > & get_rt_udf_module(bool is_gpu=false) const
Definition: Execute.h:539
std::unordered_set< int > get_available_gpus(const Data_Namespace::DataMgr *data_mgr)
Definition: Execute.cpp:1752
static constexpr ExecutorId INVALID_EXECUTOR_ID
Definition: Execute.h:424
std::unordered_map< size_t, std::vector< llvm::Value * > > saved_fetch_cache
Definition: Execute.h:1527
std::tuple< RelAlgExecutionUnit, PlanState::DeletedColumnsMap > addDeletedColumn(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
Definition: Execute.cpp:4475
bool isArchPascalOrLater() const
Definition: CudaMgr.h:156
TableToFragmentIds fragments_with_deleted_rows
Definition: Execute.h:340
bool hasLazyFetchColumns(const std::vector< Analyzer::Expr * > &target_exprs) const
Definition: Execute.cpp:1006
void launchKernelsImpl(SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type, const size_t requested_num_threads)
Definition: Execute.cpp:3040
llvm::Value * aggregateWindowStatePtr(CodeGenerator *code_generator, const CompilationOptions &co)
const SQLTypeInfo get_column_type(const int col_id, const int table_id, const ColumnDescriptor *cd, const TemporaryTables *temporary_tables)
Definition: Execute.h:254
const ColumnDescriptor * get_metadata_for_column(const ::shared::ColumnKey &column_key)
ResourceType
Stores the resource type for a ExecutorResourcePool request.
FetchResult fetchUnionChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< shared::TableKey, const TableFragments * > &, const FragmentsList &selected_fragments, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
Definition: Execute.cpp:3642
Driver for running cleanup processes on a table. TableOptimizer provides functions for various cleanu...
ResultSetPtr collectAllDeviceShardedTopResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type) const
Definition: Execute.cpp:2830
static std::pair< int64_t, int32_t > reduceResults(const SQLAgg agg, const SQLTypeInfo &ti, const int64_t agg_init_val, const int8_t out_byte_width, const int64_t *out_vec, const size_t out_vec_sz, const bool is_group_by, const bool float_argument_input)
Definition: Execute.cpp:1337
Definition: sqldefs.h:51
void insertErrorCodeChecker(llvm::Function *query_func, unsigned const error_code_idx, bool hoist_literals, bool allow_runtime_query_interrupt)
Macros and functions for groupby buffer compaction.
const StringDictionaryProxy::IdMap * getStringProxyTranslationMap(const shared::StringDictKey &source_dict_key, const shared::StringDictKey &dest_dict_key, const RowSetMemoryOwner::StringTranslationType translation_type, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
Definition: Execute.cpp:606
QuerySessionId current_query_session_
Definition: Execute.h:1576
ResultSetRecyclerHolder & getResultSetRecyclerHolder()
Definition: Execute.cpp:4978
const Analyzer::Expr * extract_cast_arg(const Analyzer::Expr *expr)
Definition: Execute.h:222
heavyai::shared_mutex & getSessionLock()
Definition: Execute.cpp:4982
std::string getFramingFuncName(const std::string &bound_type, const std::string &order_col_type, const std::string &op_type, bool for_timestamp_type) const
std::unordered_set< shared::TableKey > table_keys
Definition: Execute.h:403
const QuerySessionStatus::QueryStatus getQueryStatus()
Definition: Execute.h:135
const std::string query_str_
Definition: Execute.h:144
QuerySessionStatus(const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted_time)
Definition: Execute.h:103
ExecutorResourceMgr_Namespace::ChunkRequestInfo getChunkRequestInfo(const ExecutorDeviceType device_type, const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos, const std::vector< std::pair< int32_t, FragmentsList >> &device_fragment_lists) const
Determines a unique list of chunks and their associated byte sizes for a given query plan...
Definition: Execute.cpp:877
AggregatedColRange agg_col_range_cache_
Definition: Execute.h:1572
std::shared_ptr< ResultSet > ResultSetPtr
static void * gpu_active_modules_[max_gpu_count]
Definition: Execute.h:1541
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1502
llvm::Value * original_value
Definition: Execute.h:1275
void enrollQuerySession(const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted_time_str, const size_t executor_id, const QuerySessionStatus::QueryStatus query_session_status)
Definition: Execute.cpp:5091
static void init_resource_mgr(const size_t num_cpu_slots, const size_t num_gpu_slots, const size_t cpu_result_mem, const size_t cpu_buffer_pool_mem, const size_t gpu_buffer_pool_mem, const double per_query_max_cpu_slots_ratio, const double per_query_max_cpu_result_mem_ratio, const bool allow_cpu_kernel_concurrency, const bool allow_cpu_gpu_kernel_concurrency, const bool allow_cpu_slot_oversubscription_concurrency, const bool allow_cpu_result_mem_oversubscription, const double max_available_resource_use_ratio)
Definition: Execute.cpp:5387
ParseIRError(const std::string message)
Definition: Execute.h:322
Specifies the policies for resource grants in the presence of other requests, both under situations o...
int32_t executePlanWithoutGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const std::vector< Analyzer::Expr * > &target_exprs, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, QueryExecutionContext *query_exe_context, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *data_mgr, const int device_id, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
Definition: Execute.cpp:3834
static uint32_t gpu_active_modules_device_mask_
Definition: Execute.h:1540
TableUpdateMetadata executeUpdate(const RelAlgExecutionUnit &ra_exe_unit, const std::vector< InputTableInfo > &table_infos, const TableDescriptor *updated_table_desc, const CompilationOptions &co, const ExecutionOptions &eo, const Catalog_Namespace::Catalog &cat, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const UpdateLogForFragment::Callback &cb, const bool is_agg)
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
FragmentSkipStatus canSkipFragmentForFpQual(const Analyzer::BinOper *comp_expr, const Analyzer::ColumnVar *lhs_col, const Fragmenter_Namespace::FragmentInfo &fragment, const Analyzer::Constant *rhs_const) const
Definition: Execute.cpp:4598
static void invalidateCaches()
int deviceCount(const ExecutorDeviceType) const
Definition: Execute.cpp:1322
llvm::Value * castToIntPtrTyIn(llvm::Value *val, const size_t bit_width)
Definition: Execute.cpp:4428
void reset(bool discard_runtime_modules_only=false)
Definition: Execute.cpp:327
static std::mutex kernel_mutex_
Definition: Execute.h:1624
unsigned numBlocksPerMP() const
Definition: Execute.cpp:4361
StringDictionaryProxy * getStringDictionaryProxy(const shared::StringDictKey &dict_key, const bool with_generation) const
Definition: Execute.h:578
std::unique_ptr< CgenState > cgen_state_
Definition: Execute.h:1498
#define CHECK_GT(x, y)
Definition: Logger.h:305
Container for compilation results and assorted options for a single execution unit.
static const size_t auto_cpu_mem_bytes
Definition: Execute.h:1626
bool isCPUOnly() const
Definition: Execute.cpp:706
void resetGridSize()
Definition: Execute.cpp:4380
bool checkCurrentQuerySession(const std::string &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:4991
void clearCaches(bool runtime_only=false)
std::unique_ptr< WindowProjectNodeContext > window_project_node_context_owned_
Definition: Execute.h:1567
void addTransientStringLiterals(const RelAlgExecutionUnit &ra_exe_unit, const std::shared_ptr< RowSetMemoryOwner > &row_set_mem_owner)
Definition: Execute.cpp:2523
std::vector< FragmentsPerTable > FragmentsList
bool is_time() const
Definition: sqltypes.h:579
bool needFetchAllFragments(const InputColDescriptor &col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments) const
Definition: Execute.cpp:3416
const QuerySessionId query_session_
Definition: Execute.h:142
std::shared_ptr< HashJoin > hash_table
Definition: Execute.h:1236
ExecutorDeviceType
std::string to_string(char const *&&v)
bool updateQuerySessionStatusWithLock(const QuerySessionId &query_session, const std::string &submitted_time_str, const QuerySessionStatus::QueryStatus updated_query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5158
ResultSetPtr executeWorkUnit(size_t &max_groups_buffer_entry_guess, const bool is_agg, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
Definition: Execute.cpp:2099
static void clearCardinalityCache()
Definition: Execute.cpp:5309
bool checkNonKernelTimeInterrupted() const
Definition: Execute.cpp:5363
static void clearMemory(const Data_Namespace::MemoryLevel memory_level)
Definition: Execute.cpp:535
QuerySessionStatus(const QuerySessionId &query_session, const size_t executor_id, const std::string &query_str, const std::string &submitted_time)
Definition: Execute.h:111
std::function< void(const UpdateLogForFragment &, TableUpdateMetadata &)> Callback
Definition: Execute.h:370
bool removeFromQuerySessionList(const QuerySessionId &query_session, const std::string &submitted_time_str, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5209
static void resume_executor_queue()
Definition: Execute.cpp:5429
RUNTIME_EXPORT void register_buffer_with_executor_rsm(int64_t exec, int8_t *buffer)
CardinalityCacheKey(const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:1960
std::unordered_map< shared::TableKey, const Analyzer::BinOper * > getInnerTabIdToJoinCond() const
Definition: Execute.cpp:2882
const std::unique_ptr< llvm::Module > & get_libdevice_module() const
Definition: Execute.h:548
QueryMustRunOnCpu(const std::string &err)
Definition: Execute.h:317
std::shared_lock< T > shared_lock
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
Definition: Execute.cpp:513
void preloadFragOffsets(const std::vector< InputDescriptor > &input_descs, const std::vector< InputTableInfo > &query_infos)
Definition: Execute.cpp:4288
std::pair< std::vector< std::vector< int64_t > >, std::vector< std::vector< uint64_t > > > getRowCountAndOffsetForAllFrags(const RelAlgExecutionUnit &ra_exe_unit, const CartesianProduct< std::vector< std::vector< size_t >>> &frag_ids_crossjoin, const std::vector< InputDescriptor > &input_descs, const std::map< shared::TableKey, const TableFragments * > &all_tables_fragments)
Definition: Execute.cpp:3367
void setQueryStatus(const QuerySessionStatus::QueryStatus &status)
Definition: Execute.h:136
std::pair< llvm::Value *, llvm::Value * > codegenFrameNullRange(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
std::unordered_map< int, const ResultSetPtr & > TemporaryTables
Definition: InputMetadata.h:31
const ExecutorId executor_id_
Definition: Execute.h:1476
bool updateQuerySessionExecutorAssignment(const QuerySessionId &query_session, const std::string &submitted_time_str, const size_t executor_id, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5184
const ResultSetPtr & get_temporary_table(const TemporaryTables *temporary_tables, const int table_id)
Definition: Execute.h:246
int8_t warpSize() const
Definition: Execute.cpp:4344
std::map< QuerySessionId, bool > InterruptFlagMap
Definition: Execute.h:88
const size_t max_gpu_slab_size_
Definition: Execute.h:1554
ResultSetPtr reduceSpeculativeTopN(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1724
ResultSetPtr collectAllDeviceResults(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner)
Definition: Execute.cpp:2715
const ColumnDescriptor * getPhysicalColumnDescriptor(const Analyzer::ColumnVar *, int) const
Definition: Execute.cpp:716
std::lock_guard< std::mutex > lock_
Definition: Execute.h:1497
static void unregisterActiveModule(const int device_id)
std::tuple< CompilationResult, std::unique_ptr< QueryMemoryDescriptor > > compileWorkUnit(const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool allow_lazy_fetch, std::shared_ptr< RowSetMemoryOwner >, const size_t max_groups_buffer_entry_count, const int8_t crt_min_byte_width, const bool has_cardinality_estimation, ColumnCacheMap &column_cache, RenderInfo *render_info=nullptr)
TableIdToNodeMap table_id_to_node_map_
Definition: Execute.h:1560
bool compileBody(const RelAlgExecutionUnit &ra_exe_unit, GroupByAndAggregate &group_by_and_aggregate, QueryMemoryDescriptor &query_mem_desc, const CompilationOptions &co, const GpuSharedMemoryContext &gpu_smem_context={})
std::unordered_map< size_t, HashTableBuildDag > HashTableBuildDagMap
static void set_executor_resource_pool_resource(const ExecutorResourceMgr_Namespace::ResourceType resource_type, const size_t resource_quantity)
Definition: Execute.cpp:5456
llvm::Value * get_arg_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:168
std::vector< llvm::Value * > LLVMValueVector
Definition: Execute.h:380
void logSystemCPUMemoryStatus(std::string const &tag, size_t const thread_idx) const
Definition: Execute.cpp:765
size_t get_context_count(const ExecutorDeviceType device_type, const size_t cpu_count, const size_t gpu_count)
Definition: Execute.cpp:1766
std::vector< TargetValue > getEntryAt(const size_t index) const override
int get_logical_size() const
Definition: sqltypes.h:421
decltype(FragmentInfoType::fragmentId) const getFragmentId() const
Definition: Execute.h:364
int64_t deviceCycles(int milliseconds) const
Definition: Execute.cpp:4396
std::string generatePTX(const std::string &) const
std::vector< llvm::Value * > prepareRangeModeFuncArgs(bool for_start_bound, const Analyzer::WindowFrame *frame_bound, bool is_timestamp_type_frame, llvm::Value *order_key_null_val, const WindowFrameBoundFuncArgs &frame_args) const
std::mutex str_dict_mutex_
Definition: Execute.h:1545
bool is_integer() const
Definition: sqltypes.h:567
friend class PendingExecutionClosure
Definition: Execute.h:1651
const ColumnDescriptor * get_column_descriptor_maybe(const shared::ColumnKey &column_key)
Definition: Execute.h:241
Fragmenter_Namespace::TableInfo getTableInfo(const shared::TableKey &table_key) const
Definition: Execute.cpp:736
ResultSetPtr reduceMultiDeviceResults(const RelAlgExecutionUnit &, std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1589
const SQLTypeInfo getFirstOrderColTypeInfo(WindowFunctionContext *window_func_context) const
static void set_concurrent_resource_grant_policy(const ExecutorResourceMgr_Namespace::ConcurrentResourceGrantPolicy &concurrent_resource_grant_policy)
Definition: Execute.cpp:5477
const std::string debug_file_
Definition: Execute.h:1556
void populate_string_dictionary(int32_t table_id, int32_t col_id, int32_t db_id)
Definition: Execute.cpp:237
decltype(FragmentInfoType::physicalTableId) const getPhysicalTableId() const
Definition: Execute.h:361
std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner_
Definition: Execute.h:1533
Used by Fragmenter classes to store info about each fragment - the fragment id and number of tuples(r...
Definition: Fragmenter.h:86
void setColRangeCache(const AggregatedColRange &aggregated_col_range)
Definition: Execute.h:1329
bool containsLeftDeepOuterJoin() const
Definition: Execute.h:614
static QueryPlanDAG latest_query_plan_extracted_
Definition: Execute.h:1612
void addToCardinalityCache(const CardinalityCacheKey &cache_key, const size_t cache_value)
Definition: Execute.cpp:5289
int32_t executePlanWithGroupBy(const RelAlgExecutionUnit &ra_exe_unit, const CompilationResult &, const bool hoist_literals, ResultSetPtr *results, const ExecutorDeviceType device_type, std::vector< std::vector< const int8_t * >> &col_buffers, const std::vector< size_t > outer_tab_frag_ids, QueryExecutionContext *, const std::vector< std::vector< int64_t >> &num_rows, const std::vector< std::vector< uint64_t >> &frag_offsets, Data_Namespace::DataMgr *, const int device_id, const shared::TableKey &outer_table_key, const int64_t limit, const uint32_t start_rowid, const uint32_t num_tables, const bool allow_runtime_interrupt, RenderInfo *render_info, const bool optimize_cuda_block_and_grid_sizes, const int64_t rows_to_process=-1)
Definition: Execute.cpp:4061
size_t getNumCurentSessionsEnrolled() const
Definition: Execute.cpp:5115
bool has_rt_module() const
Definition: Execute.h:552
unsigned get_index_by_name(llvm::Function *func, const std::string &name)
Definition: Execute.h:187
std::shared_ptr< CompilationContext > optimizeAndCodegenCPU(llvm::Function *, llvm::Function *, const std::unordered_set< llvm::Function * > &, const CompilationOptions &)
const ColumnDescriptor * get_column_descriptor(const shared::ColumnKey &column_key)
Definition: Execute.h:213
std::pair< llvm::Value *, llvm::Value * > codegenWindowFrameBounds(WindowFunctionContext *window_func_context, const Analyzer::WindowFrame *frame_start_bound, const Analyzer::WindowFrame *frame_end_bound, llvm::Value *order_key_col_null_val_lv, WindowFrameBoundFuncArgs &args, CodeGenerator &code_generator)
bool is_timeinterval() const
Definition: sqltypes.h:594
CachedCardinality getCachedCardinality(const CardinalityCacheKey &cache_key)
Definition: Execute.cpp:5298
static InterruptFlagMap queries_interrupt_flag_
Definition: Execute.h:1578
FragmentInfoType const & getFragmentInfo() const
std::unique_lock< T > unique_lock
std::unique_ptr< PlanState > plan_state_
Definition: Execute.h:1532
void initializeNVPTXBackend() const
static const ExecutorResourceMgr_Namespace::ConcurrentResourceGrantPolicy get_concurrent_resource_grant_policy(const ExecutorResourceMgr_Namespace::ResourceType resource_type)
Definition: Execute.cpp:5467
std::map< int32_t, std::set< int32_t >> TableToFragmentIds
Definition: Execute.h:336
const std::string submitted_time_
Definition: Execute.h:145
ResultSetPtr executeTableFunction(const TableFunctionExecutionUnit exe_unit, const std::vector< InputTableInfo > &table_infos, const CompilationOptions &co, const ExecutionOptions &eo)
Compiles and dispatches a table function; that is, a function that takes as input one or more columns...
Definition: Execute.cpp:2445
llvm::Value * castToFP(llvm::Value *, SQLTypeInfo const &from_ti, SQLTypeInfo const &to_ti)
Definition: Execute.cpp:4401
size_t fragment_index_
Definition: Execute.h:376
std::pair< bool, size_t > CachedCardinality
Definition: Execute.h:1403
void setupCaching(const std::unordered_set< PhysicalInput > &phys_inputs, const std::unordered_set< shared::TableKey > &phys_table_keys)
Definition: Execute.cpp:4960
static void invalidateCardinalityCacheForTable(const shared::TableKey &table_key)
Definition: Execute.cpp:5316
const ColumnDescriptor * getColumnDescriptor(const Analyzer::ColumnVar *) const
Definition: Execute.cpp:711
bool containsTableKey(const shared::TableKey &table_key) const
Definition: Execute.cpp:2020
bool checkIsQuerySessionEnrolled(const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:5265
specifies the content in-memory of a row in the column metadata table
bool is_boolean() const
Definition: sqltypes.h:582
std::vector< TargetValue > getTranslatedEntryAt(const size_t index) const override
static std::map< int, std::shared_ptr< Executor > > executors_
Definition: Execute.h:1581
unsigned grid_size_x_
Definition: Execute.h:1553
std::pair< llvm::BasicBlock *, llvm::Value * > codegenWindowResetStateControlFlow(CodeGenerator *code_generator, const CompilationOptions &co)
QuerySessionStatus::QueryStatus getQuerySessionStatus(const QuerySessionId &candidate_query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:5001
std::string toString(const Executor::ExtModuleKinds &kind)
Definition: Execute.h:1703
static const size_t auto_num_threads
Definition: Execute.h:1536
const TemporaryTables * getTemporaryTables()
Definition: Execute.h:573
std::string get_null_check_suffix(const SQLTypeInfo &lhs_ti, const SQLTypeInfo &rhs_ti)
Definition: Execute.h:1661
const std::unique_ptr< llvm::Module > & get_rt_module() const
Definition: Execute.h:532
size_t hash() const
Definition: Execute.cpp:2016
std::string key
Definition: Execute.h:402
#define RUNTIME_EXPORT
void launchKernelsLocked(SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type)
Definition: Execute.cpp:3123
Executor(const ExecutorId id, Data_Namespace::DataMgr *data_mgr, const size_t block_size_x, const size_t grid_size_x, const size_t max_gpu_slab_size, const std::string &debug_dir, const std::string &debug_file)
Definition: Execute.cpp:276
static std::unordered_map< CardinalityCacheKey, size_t > cardinality_cache_
Definition: Execute.h:1607
std::string dumpCache() const
Definition: Execute.cpp:5520
ColumnToFragmentsMap columns_for_metadata_update
Definition: Execute.h:339
std::unordered_map< shared::TableKey, const RelAlgNode * > TableIdToNodeMap
const std::vector< size_t > getExecutorIdsRunningQuery(const QuerySessionId &interrupt_session) const
Definition: Execute.cpp:5347
#define CHECK_LT(x, y)
Definition: Logger.h:303
ResultSetPtr resultsUnion(SharedKernelContext &shared_context, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:1563
void registerExtractedQueryPlanDag(const QueryPlanDAG &query_plan_dag)
Definition: Execute.cpp:5376
std::shared_ptr< ResultSet > rs_
Definition: Execute.h:377
JoinHashTableOrError buildHashTableForQualifier(const std::shared_ptr< Analyzer::BinOper > &qual_bin_oper, const std::vector< InputTableInfo > &query_infos, const MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, ColumnCacheMap &column_cache, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)
Definition: Execute.cpp:4309
QuerySessionId & getCurrentQuerySession(heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:4986
std::vector< size_t > getFragmentCount(const FragmentsList &selected_fragments, const size_t scan_idx, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3760
llvm::Value * codegenLoadCurrentValueFromColBuf(WindowFunctionContext *window_func_context, CodeGenerator &code_generator, WindowFrameBoundFuncArgs &args) const
static void addUdfIrToModule(const std::string &udf_ir_filename, const bool is_cuda_ir)
llvm::BasicBlock * codegenSkipDeletedOuterTableRow(const RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co)
std::unique_ptr< QueryCompilationDescriptor > QueryCompilationDescriptorOwned
Definition: Execute.h:83
size_t ExecutorId
Definition: Execute.h:422
void setGridSize(unsigned grid_size)
Definition: Execute.cpp:4376
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399
void codegenWindowAvgEpilogue(CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *crt_val, llvm::Value *window_func_null_val)
std::shared_ptr< HashJoin > buildCurrentLevelHashTable(const JoinCondition &current_level_join_conditions, size_t level_idx, RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache, std::vector< std::string > &fail_reasons)
Definition: IRCodegen.cpp:1027
const std::string getOrderKeyTypeName(WindowFunctionContext *window_func_context) const
static heavyai::shared_mutex recycler_mutex_
Definition: Execute.h:1605
static void update_after_registration(bool update_runtime_modules_only=false)
Definition: Execute.h:1420
std::vector< int8_t > serializeLiterals(const std::unordered_map< int, CgenState::LiteralValues > &literals, const int device_id)
Definition: Execute.cpp:1060
std::shared_ptr< CompilationContext > optimizeAndCodegenGPU(llvm::Function *, llvm::Function *, std::unordered_set< llvm::Function * > &, const bool no_inline, const CudaMgr_Namespace::CudaMgr *cuda_mgr, const bool is_gpu_smem_used, const CompilationOptions &)
InputTableInfoCache input_table_info_cache_
Definition: Execute.h:1571
size_t getNumBytesForFetchedRow(const std::set< shared::TableKey > &table_keys_to_fetch) const
void setBlockSize(unsigned block_size)
Definition: Execute.cpp:4384
const Expr * get_operand() const
Definition: Analyzer.h:384
std::chrono::steady_clock::time_point lock_queue_clock_
Definition: Execute.h:1496
llvm::Value * codegenConditionalAggregateCondValSelector(llvm::Value *cond_lv, SQLAgg const aggKind, CompilationOptions const &co) const
std::pair< bool, int64_t > skipFragment(const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &frag_info, const std::list< std::shared_ptr< Analyzer::Expr >> &simple_quals, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
Definition: Execute.cpp:4658
std::pair< llvm::Value *, llvm::Value * > codegenFrameBoundRange(const Analyzer::WindowFunction *window_func, CodeGenerator &code_generator, const CompilationOptions &co)
unsigned gridSize() const
Definition: Execute.cpp:4352
std::unordered_map< shared::TableKey, std::unordered_map< int, std::shared_ptr< const ColumnarResults >>> ColumnCacheMap
llvm::Value * spillDoubleElement(llvm::Value *elem_val, llvm::Type *elem_ty)
TableGenerations computeTableGenerations(const std::unordered_set< shared::TableKey > &phys_table_keys)
Definition: Execute.cpp:4948
friend class KernelSubtask
Definition: Execute.h:1635
PlanState * getPlanStatePtr() const
Definition: Execute.h:1415
static std::map< ExtModuleKinds, std::string > extension_module_sources
Definition: Execute.h:528
StringDictionaryGenerations computeStringDictionaryGenerations(const std::unordered_set< PhysicalInput > &phys_inputs)
Definition: Execute.cpp:4922
unsigned block_size_x_
Definition: Execute.h:1552
bool has_udf_module(bool is_gpu=false) const
Definition: Execute.h:555
Data_Namespace::DataMgr * getDataMgr() const
Definition: Execute.h:623
bool needLinearizeAllFragments(const ColumnDescriptor *cd, const InputColDescriptor &inner_col_desc, const RelAlgExecutionUnit &ra_exe_unit, const FragmentsList &selected_fragments, const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:3435
void setExecutorId(const size_t executor_id)
Definition: Execute.h:139
llvm::Value * codegenWindowFunctionAggregate(CodeGenerator *code_generator, const CompilationOptions &co)
std::unique_ptr< llvm::LLVMContext > context_
Definition: Execute.h:1477
llvm::Value * codegenCurrentPartitionIndex(const WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *current_row_pos_lv)
CgenState * getCgenStatePtr() const
Definition: Execute.h:1414
FragmentInfoType const & fragment_info_
Definition: Execute.h:375
void nukeOldState(const bool allow_lazy_fetch, const std::vector< InputTableInfo > &query_infos, const PlanState::DeletedColumnsMap &deleted_cols_map, const RelAlgExecutionUnit *ra_exe_unit)
Definition: Execute.cpp:4268
static heavyai::shared_mutex executor_session_mutex_
Definition: Execute.h:1574
const std::string getQueryStr()
Definition: Execute.h:132
std::pair< bool, int64_t > skipFragmentInnerJoins(const InputDescriptor &table_desc, const RelAlgExecutionUnit &ra_exe_unit, const Fragmenter_Namespace::FragmentInfo &fragment, const std::vector< uint64_t > &frag_offsets, const size_t frag_idx)
Definition: Execute.cpp:4861
void buildSelectedFragsMapping(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, std::vector< size_t > &local_col_to_frag_pos, const std::list< std::shared_ptr< const InputColDescriptor >> &col_global_ids, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3774
TableGenerations table_generations_
Definition: Execute.h:1573
llvm::Value * codegenWindowNavigationFunctionOnFrame(const CompilationOptions &co)
std::size_t hash_value(RexAbstractInput const &rex_ab_input)
Definition: RelAlgDag.cpp:3548
std::function< void(ResultSetPtr, const Fragmenter_Namespace::FragmentInfo &)> PerFragmentCallBack
Definition: Execute.h:890
void resetInterrupt()
const size_t getExecutorId()
Definition: Execute.h:133
size_t executor_id_
Definition: Execute.h:143
llvm::Value * codegenFrameBoundExpr(const Analyzer::WindowFunction *window_func, const Analyzer::WindowFrame *frame_bound, CodeGenerator &code_generator, const CompilationOptions &co)
void buildSelectedFragsMappingForUnion(std::vector< std::vector< size_t >> &selected_fragments_crossjoin, const FragmentsList &selected_fragments, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:3805
static void registerExtensionFunctions(F register_extension_functions)
Definition: Execute.h:470
std::string QuerySessionId
Definition: Execute.h:86
llvm::Value * addJoinLoopIterator(const std::vector< llvm::Value * > &prev_iters, const size_t level_idx)
Definition: IRCodegen.cpp:1186
ResultSetPtr reduceMultiDeviceResultSets(std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &all_fragment_results, std::shared_ptr< RowSetMemoryOwner >, const QueryMemoryDescriptor &) const
Definition: Execute.cpp:1664
std::vector< std::pair< ResultSetPtr, std::vector< size_t > > > getUniqueThreadSharedResultSets(const std::vector< std::pair< ResultSetPtr, std::vector< size_t >>> &results_per_device) const
Definition: Execute.cpp:1624
#define CHECK(condition)
Definition: Logger.h:291
QueryPlanDagCache & getQueryPlanDagCache()
Definition: Execute.cpp:4974
bool has_extension_module(ExtModuleKinds kind) const
Definition: Execute.h:1513
std::map< shared::ColumnKey, size_t > getColumnByteWidthMap(const std::set< shared::TableKey > &table_ids_to_fetch, const bool include_lazy_fetched_cols) const
Definition: Execute.cpp:819
WindowPartitionBufferPtrs codegenLoadPartitionBuffers(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *partition_index_lv) const
void resetBlockSize()
Definition: Execute.cpp:4388
heavyai::shared_lock< heavyai::shared_mutex > shared_lock
Definition: Execute.h:1588
static void clearExternalCaches(bool for_update, const TableDescriptor *td, const int current_db_id)
Definition: Execute.h:438
CgenStateManager(Executor &executor)
Definition: Execute.cpp:438
std::mutex compilation_mutex_
Definition: Execute.h:1618
void interrupt(const QuerySessionId &query_session="", const QuerySessionId &interrupt_session="")
heavyai::shared_mutex & getDataRecyclerLock()
Definition: Execute.cpp:4970
bool has_rt_udf_module(bool is_gpu=false) const
Definition: Execute.h:559
static void pause_executor_queue()
Definition: Execute.cpp:5420
std::vector< llvm::Value * > inlineHoistedLiterals()
static size_t getBaselineThreshold(bool for_count_distinct, ExecutorDeviceType device_type)
Definition: Execute.h:1448
ExecutorDeviceType getDeviceTypeForTargets(const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType requested_device_type)
Definition: Execute.cpp:2575
void invalidateRunningQuerySession(heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5013
std::pair< std::string, llvm::Value * > codegenLoadOrderKeyBufPtr(WindowFunctionContext *window_func_context, CodeGenerator *code_generator, const CompilationOptions &co) const
ExecutorResourceMgr is the central manager for resources available to all executors in the system...
uint32_t log2_bytes(const uint32_t bytes)
Definition: Execute.h:198
ExpressionRange getColRange(const PhysicalInput &) const
Definition: Execute.cpp:746
std::string numeric_type_name(const SQLTypeInfo &ti)
Definition: Execute.h:230
CurrentQueryStatus attachExecutorToQuerySession(const QuerySessionId &query_session_id, const std::string &query_str, const std::string &query_submitted_time)
Definition: Execute.cpp:5018
bool skipFragmentPair(const Fragmenter_Namespace::FragmentInfo &outer_fragment_info, const Fragmenter_Namespace::FragmentInfo &inner_fragment_info, const int inner_table_id, const std::unordered_map< shared::TableKey, const Analyzer::BinOper * > &inner_table_id_to_join_condition, const RelAlgExecutionUnit &ra_exe_unit, const ExecutorDeviceType device_type)
Definition: Execute.cpp:3278
void redeclareFilterFunction()
Definition: IRCodegen.cpp:1087
SQLTypeInfo columnType
QuerySessionStatus(const QuerySessionId &query_session, const size_t executor_id, const std::string &query_str, const std::string &submitted_time, const QuerySessionStatus::QueryStatus &query_status)
Definition: Execute.h:120
void launchKernelsViaResourceMgr(SharedKernelContext &shared_context, std::vector< std::unique_ptr< ExecutionKernel >> &&kernels, const ExecutorDeviceType device_type, const std::vector< InputDescriptor > &input_descs, const QueryMemoryDescriptor &query_mem_desc)
Launches a vector of kernels for a given query step, gated/scheduled by ExecutorResourceMgr.
Definition: Execute.cpp:3135
UpdateLogForFragment(FragmentInfoType const &fragment_info, size_t const, const std::shared_ptr< ResultSet > &rs)
bool is_unnest(const Analyzer::Expr *expr)
Definition: Execute.h:1677
static constexpr ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:423
bool is_string() const
Definition: sqltypes.h:561
const TableGeneration & getTableGeneration(const shared::TableKey &table_key) const
Definition: Execute.cpp:741
std::vector< JoinLoop > buildJoinLoops(RelAlgExecutionUnit &ra_exe_unit, const CompilationOptions &co, const ExecutionOptions &eo, const std::vector< InputTableInfo > &query_infos, ColumnCacheMap &column_cache)
Definition: IRCodegen.cpp:610
std::function< llvm::Value *(const std::vector< llvm::Value * > &, llvm::Value *)> buildIsDeletedCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const CompilationOptions &co)
Definition: IRCodegen.cpp:968
auto getResultSet() const
Definition: Execute.h:372
unsigned blockSize() const
Definition: Execute.cpp:4366
string name
Definition: setup.in.py:72
std::shared_timed_mutex shared_mutex
static std::mutex register_runtime_extension_functions_mutex_
Definition: Execute.h:1623
Specifies all DataMgr chunks needed for a query step/request, along with their sizes in bytes...
size_t getOrderKeySize(WindowFunctionContext *window_func_context) const
JoinLoop::HoistedFiltersCallback buildHoistLeftHandSideFiltersCb(const RelAlgExecutionUnit &ra_exe_unit, const size_t level_idx, const shared::TableKey &inner_table_key, const CompilationOptions &co)
Definition: IRCodegen.cpp:859
Execution unit for relational algebra. It&#39;s a low-level description of any relational algebra operati...
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
ExecutorId getExecutorId() const
Definition: Execute.h:1332
static size_t align(const size_t off_in, const size_t alignment)
Definition: Execute.h:1468
static heavyai::shared_mutex executors_cache_mutex_
Definition: Execute.h:1602
std::unique_ptr< QueryMemoryDescriptor > QueryMemoryDescriptorOwned
Definition: Execute.h:85
size_t const getRowCount() const override
bool operator==(const CardinalityCacheKey &other) const
Definition: Execute.cpp:2012
const QuerySessionId getQuerySession()
Definition: Execute.h:131
void clearQuerySessionStatus(const QuerySessionId &query_session, const std::string &submitted_time_str)
Definition: Execute.cpp:5061
llvm::Value * codegenAggregateWindowState(CodeGenerator *code_generator, const CompilationOptions &co, llvm::Value *aggregate_state)
QuerySessionStatus::QueryStatus query_status_
Definition: Execute.h:152
bool is_decimal() const
Definition: sqltypes.h:570
void setQuerySessionAsInterrupted(const QuerySessionId &query_session, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5243
int deviceCountForMemoryLevel(const Data_Namespace::MemoryLevel memory_level) const
Definition: Execute.cpp:1330
static size_t get_executor_resource_pool_total_resource_quantity(const ExecutorResourceMgr_Namespace::ResourceType resource_type)
Definition: Execute.cpp:5438
ResultSetPtr executeWorkUnitImpl(size_t &max_groups_buffer_entry_guess, const bool is_agg, const bool allow_single_frag_table_opt, const std::vector< InputTableInfo > &, const RelAlgExecutionUnit &, const CompilationOptions &, const ExecutionOptions &options, std::shared_ptr< RowSetMemoryOwner >, RenderInfo *render_info, const bool has_cardinality_estimation, ColumnCacheMap &column_cache)
Definition: Execute.cpp:2166
std::vector< QuerySessionStatus > getQuerySessionInfo(const QuerySessionId &query_session, heavyai::shared_lock< heavyai::shared_mutex > &read_lock)
Definition: Execute.cpp:5329
Descriptor for the fragments required for an execution kernel.
llvm::Value * codegenWindowFunctionAggregateCalls(llvm::Value *aggregate_state, const CompilationOptions &co)
Fragmenter_Namespace::FragmentInfo FragmentInfoType
Definition: Execute.h:345
static size_t getArenaBlockSize()
Definition: Execute.cpp:562
const StringDictionaryProxy::TranslationMap< Datum > * getStringProxyNumericTranslationMap(const shared::StringDictKey &source_dict_key, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos, std::shared_ptr< RowSetMemoryOwner > row_set_mem_owner, const bool with_generation) const
Definition: Execute.cpp:640
std::mutex gpu_exec_mutex_[max_gpu_count]
Definition: Execute.h:1537
HashType
Definition: HashTable.h:19
llvm::LLVMContext & getContext()
Definition: Execute.h:1417
llvm::Value * codegenWindowFunction(const size_t target_index, const CompilationOptions &co)
std::vector< int > getTableChunkKey(const int getCurrentDBId) const
bool addToQuerySessionList(const QuerySessionId &query_session, const std::string &query_str, const std::string &submitted, const size_t executor_id, const QuerySessionStatus::QueryStatus query_status, heavyai::unique_lock< heavyai::shared_mutex > &write_lock)
Definition: Execute.cpp:5120
SQLOps get_optype() const
Definition: Analyzer.h:383
static QueryPlanDagCache query_plan_dag_cache_
Definition: Execute.h:1604
size_t operator()(const CardinalityCacheKey &cache_key) const
Definition: Execute.h:409
WindowFunctionContext * active_window_function_
Definition: Execute.h:1569
llvm::Value * get_arg_by_index(llvm::Function *func, unsigned const index)
Definition: Execute.h:178
static std::mutex gpu_active_modules_mutex_
Definition: Execute.h:1539
static void nukeCacheOfExecutors()
Definition: Execute.h:505
void clearMetaInfoCache()
Definition: Execute.cpp:1054
FragmentSkipStatus
Definition: Execute.h:164
size_t const getEntryCount() const override
const TemporaryTables * temporary_tables_
Definition: Execute.h:1559
CompilationRetryNewScanLimit(const size_t new_scan_limit)
Definition: Execute.h:276
std::vector< llvm::Value * > prepareRowModeFuncArgs(bool for_start_bound, SqlWindowFrameBoundType bound_type, const WindowFrameBoundFuncArgs &args) const
WatchdogException(const std::string &cause)
Definition: Execute.h:161
bool has_geos_module() const
Definition: Execute.h:563
void update_extension_modules(bool update_runtime_modules_only=false)
Definition: Execute.cpp:350
FetchResult fetchChunks(const ColumnFetcher &, const RelAlgExecutionUnit &ra_exe_unit, const int device_id, const Data_Namespace::MemoryLevel, const std::map< shared::TableKey, const TableFragments * > &, const FragmentsList &selected_fragments, std::list< ChunkIter > &, std::list< std::shared_ptr< Chunk_NS::Chunk >> &, DeviceAllocator *device_allocator, const size_t thread_idx, const bool allow_runtime_interrupt)
Definition: Execute.cpp:3458
bool isArchMaxwell(const ExecutorDeviceType dt) const
size_t get_loop_join_size(const std::vector< InputTableInfo > &query_infos, const RelAlgExecutionUnit &ra_exe_unit)
Definition: Execute.cpp:1905
ResultSetPtr executeExplain(const QueryCompilationDescriptor &)
Definition: Execute.cpp:2519
std::map< ExtModuleKinds, std::unique_ptr< llvm::Module > > extension_modules_
Definition: Execute.h:1517
bool isFragmentFullyDeleted(const InputDescriptor &table_desc, const Fragmenter_Namespace::FragmentInfo &fragment)
Definition: Execute.cpp:4561