OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DBHandler.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #include "DBHandler.h"
24 #include "DistributedLoader.h"
25 #include "TokenCompletionHints.h"
26 
27 #ifdef HAVE_PROFILER
28 #include <gperftools/heap-profiler.h>
29 #endif // HAVE_PROFILER
30 
31 #include "MapDRelease.h"
32 
33 #include "Calcite/Calcite.h"
34 #include "gen-cpp/CalciteServer.h"
35 
38 
39 #include "Catalog/Catalog.h"
44 #include "DistributedHandler.h"
46 #include "Geospatial/ColumnNames.h"
47 #include "Geospatial/Compression.h"
48 #include "Geospatial/GDAL.h"
49 #include "Geospatial/Types.h"
50 #include "ImportExport/Importer.h"
51 #include "LockMgr/LockMgr.h"
53 #include "Parser/ParserWrapper.h"
57 #include "QueryEngine/Execute.h"
67 #include "RequestInfo.h"
68 #ifdef HAVE_RUNTIME_LIBS
70 #endif
71 #include "Shared/ArrowUtil.h"
72 #include "Shared/DateTimeParser.h"
73 #include "Shared/StringTransform.h"
74 #include "Shared/SysDefinitions.h"
75 #include "Shared/file_path_util.h"
77 #include "Shared/import_helpers.h"
78 #include "Shared/measure.h"
79 #include "Shared/misc.h"
80 #include "Shared/scope.h"
82 
83 #ifdef HAVE_AWS_S3
84 #include <aws/core/auth/AWSCredentialsProviderChain.h>
85 #endif
86 #include <fcntl.h>
87 #include <picosha2.h>
88 #include <sys/types.h>
89 #include <algorithm>
90 #include <boost/algorithm/string.hpp>
91 #include <boost/filesystem.hpp>
92 #include <boost/make_shared.hpp>
93 #include <boost/process/search_path.hpp>
94 #include <boost/program_options.hpp>
95 #include <boost/tokenizer.hpp>
96 #include <chrono>
97 #include <cmath>
98 #include <csignal>
99 #include <fstream>
100 #include <future>
101 #include <map>
102 #include <memory>
103 #include <random>
104 #include <string>
105 #include <thread>
106 #include <typeinfo>
107 
108 #include <arrow/api.h>
109 #include <arrow/io/api.h>
110 #include <arrow/ipc/api.h>
111 
112 #include "Shared/ArrowUtil.h"
113 #include "Shared/distributed.h"
114 
115 #ifdef ENABLE_IMPORT_PARQUET
116 extern bool g_enable_parquet_import_fsi;
117 #endif
118 
119 #ifdef HAVE_AWS_S3
120 extern bool g_allow_s3_server_privileges;
121 #endif
122 
123 extern bool g_enable_system_tables;
126 extern bool g_allow_memory_status_log;
127 
130 
131 #define INVALID_SESSION_ID ""
132 
133 #define SET_REQUEST_ID(parent_request_id) \
134  if (g_uniform_request_ids_per_thrift_call && parent_request_id) \
135  logger::set_request_id(parent_request_id); \
136  else if (logger::set_new_request_id(); parent_request_id) \
137  LOG(INFO) << "This request has parent request_id(" << parent_request_id << ')'
138 
139 #define THROW_DB_EXCEPTION(errstr) \
140  { \
141  TDBException ex; \
142  ex.error_msg = errstr; \
143  LOG(ERROR) << ex.error_msg; \
144  throw ex; \
145  }
146 
147 thread_local std::string TrackingProcessor::client_address;
149 
150 namespace {
151 
153  const int32_t user_id,
154  const std::string& dashboard_name) {
155  return (cat.getMetadataForDashboard(std::to_string(user_id), dashboard_name));
156 }
157 
158 struct ForceDisconnect : public std::runtime_error {
159  ForceDisconnect(const std::string& cause) : std::runtime_error(cause) {}
160 };
161 
162 } // namespace
163 
164 #ifdef ENABLE_GEOS
165 // from Geospatial/GeosValidation.cpp
166 extern std::unique_ptr<std::string> g_libgeos_so_filename;
167 #endif
168 
169 DBHandler::DBHandler(const std::vector<LeafHostInfo>& db_leaves,
170  const std::vector<LeafHostInfo>& string_leaves,
171  const std::string& base_data_path,
172  const bool allow_multifrag,
173  const bool jit_debug,
174  const bool intel_jit_profile,
175  const bool read_only,
176  const bool allow_loop_joins,
177  const bool enable_rendering,
178  const bool renderer_prefer_igpu,
179  const unsigned renderer_vulkan_timeout_ms,
180  const bool renderer_use_parallel_executors,
181  const bool enable_auto_clear_render_mem,
182  const int render_oom_retry_threshold,
183  const size_t render_mem_bytes,
184  const size_t max_concurrent_render_sessions,
185  const size_t reserved_gpu_mem,
186  const bool render_compositor_use_last_gpu,
187  const bool renderer_enable_slab_allocation,
188  const size_t num_reader_threads,
189  const AuthMetadata& authMetadata,
190  SystemParameters& system_parameters,
191  const bool legacy_syntax,
192  const int idle_session_duration,
193  const int max_session_duration,
194  const std::string& udf_filename,
195  const std::string& clang_path,
196  const std::vector<std::string>& clang_options,
197 #ifdef ENABLE_GEOS
198  const std::string& libgeos_so_filename,
199 #endif
200 #ifdef HAVE_TORCH_TFS
201  const std::string& torch_lib_path,
202 #endif
203  const File_Namespace::DiskCacheConfig& disk_cache_config,
204  const bool is_new_db)
205  : leaf_aggregator_(db_leaves)
206  , db_leaves_(db_leaves)
207  , string_leaves_(string_leaves)
208  , base_data_path_(base_data_path)
209  , random_gen_(std::random_device{}())
210  , session_id_dist_(0, INT32_MAX)
211  , jit_debug_(jit_debug)
212  , intel_jit_profile_(intel_jit_profile)
213  , allow_multifrag_(allow_multifrag)
214  , read_only_(read_only)
215  , allow_loop_joins_(allow_loop_joins)
216  , authMetadata_(authMetadata)
217  , system_parameters_(system_parameters)
218  , legacy_syntax_(legacy_syntax)
219  , dispatch_queue_(
220  std::make_unique<QueryDispatchQueue>(system_parameters.num_executors))
221  , super_user_rights_(false)
222  , idle_session_duration_(idle_session_duration * 60)
223  , max_session_duration_(max_session_duration * 60)
224  , enable_rendering_(enable_rendering)
225  , renderer_prefer_igpu_(renderer_prefer_igpu)
226  , renderer_vulkan_timeout_(renderer_vulkan_timeout_ms)
227  , renderer_use_parallel_executors_(renderer_use_parallel_executors)
228  , enable_auto_clear_render_mem_(enable_auto_clear_render_mem)
229  , render_oom_retry_threshold_(render_oom_retry_threshold)
230  , render_mem_bytes_(render_mem_bytes)
231  , max_concurrent_render_sessions_(max_concurrent_render_sessions)
232  , reserved_gpu_mem_(reserved_gpu_mem)
233  , render_compositor_use_last_gpu_(render_compositor_use_last_gpu)
234  , renderer_enable_slab_allocation_{renderer_enable_slab_allocation}
235  , num_reader_threads_(num_reader_threads)
236 #ifdef ENABLE_GEOS
237  , libgeos_so_filename_(libgeos_so_filename)
238 #endif
239 #ifdef HAVE_TORCH_TFS
240  , torch_lib_path_(torch_lib_path)
241 #endif
242  , disk_cache_config_(disk_cache_config)
243  , udf_filename_(udf_filename)
244  , clang_path_(clang_path)
245  , clang_options_(clang_options)
246  , max_num_sessions_(-1) {
247  LOG(INFO) << "HeavyDB Server " << MAPD_RELEASE;
248  initialize(is_new_db);
249  resetSessionsStore();
250 }
251 
253  size_t num_cpu_slots{0};
254  size_t num_gpu_slots{0};
255  size_t cpu_result_mem{0};
256  size_t cpu_buffer_pool_mem{0};
257  size_t gpu_buffer_pool_mem{0};
258  LOG(INFO) << "Initializing Executor Resource Manager";
259 
260  if (g_cpu_threads_override != 0) {
261  LOG(INFO) << "\tSetting Executor resource pool avaiable CPU threads/slots to "
262  "user-specified value of "
263  << g_cpu_threads_override << ".";
264  num_cpu_slots = g_cpu_threads_override;
265  } else {
266  LOG(INFO) << "\tSetting Executor resource pool avaiable CPU threads/slots to default "
267  "value of "
268  << cpu_threads() << ".";
269  // Setting the number of CPU slots to cpu_threads() will cause the ExecutorResourceMgr
270  // to set the logical number of available cpu slots to mirror the number of threads in
271  // the tbb thread pool and used elsewhere in the system, but we may want to consider a
272  // capability to allow the executor resource pool number of threads to be set
273  // independently as some fraction of the what cpu_threads() will return, to give some
274  // breathing room for all the other processes in the system that use CPU threadds
275  num_cpu_slots = cpu_threads();
276  }
277  LOG(INFO) << "\tSetting max per-query CPU threads to ratio of "
279  << num_cpu_slots << " available threads, or "
281  num_cpu_slots)
282  << " threads.";
283 
284  // system_parameters_.num_gpus will be -1 if there are no GPUs enabled so we need to
285  // guard against this
286  num_gpu_slots = system_parameters_.num_gpus < 0 ? static_cast<size_t>(0)
288 
289  cpu_buffer_pool_mem = data_mgr_->getCpuBufferPoolSize();
292  } else {
293  const size_t system_mem_bytes = DataMgr::getTotalSystemMemory();
294  CHECK_GT(system_mem_bytes, size_t(0));
295  const size_t remaining_cpu_mem_bytes = system_mem_bytes >= cpu_buffer_pool_mem
296  ? system_mem_bytes - cpu_buffer_pool_mem
297  : 0UL;
298  cpu_result_mem =
299  std::max(static_cast<size_t>(remaining_cpu_mem_bytes *
301  static_cast<size_t>(1UL << 32));
302  }
303  // Below gets total combined size of all gpu buffer pools
304  // Likely will move to per device pool resource management,
305  // but keeping simple for now
306  gpu_buffer_pool_mem = data_mgr_->getGpuBufferPoolSize();
307 
308  // When we move to using the BufferMgrs directly in
309  // ExecutorResourcePool, there won't be a need for
310  // the buffer_pool_max_occupancy variable - a
311  // safety "fudge" factor as what the resource pool sees
312  // and what the BufferMgrs see will be exactly the same.
313 
314  // However we need to ensure we can quickly access
315  // chunk state of BufferMgrs without going through coarse lock
316  // before we do this, so use this fudge ratio for now
317 
318  // Note that if we are not conservative enough with the below and
319  // overshoot, the error will still be caught and if on GPU, the query
320  // can be re-run on CPU
321 
322  constexpr double buffer_pool_max_occupancy{0.95};
323  const size_t conservative_cpu_buffer_pool_mem =
324  static_cast<size_t>(cpu_buffer_pool_mem * buffer_pool_max_occupancy);
325  const size_t conservative_gpu_buffer_pool_mem =
326  static_cast<size_t>(gpu_buffer_pool_mem * buffer_pool_max_occupancy);
327 
328  LOG(INFO)
329  << "\tSetting Executor resource pool reserved space for CPU buffer pool memory to "
330  << format_num_bytes(conservative_cpu_buffer_pool_mem) << ".";
331  if (gpu_buffer_pool_mem > 0UL) {
332  LOG(INFO) << "\tSetting Executor resource pool reserved space for GPU buffer pool "
333  "memory to "
334  << format_num_bytes(conservative_gpu_buffer_pool_mem) << ".";
335  }
336  LOG(INFO) << "\tSetting Executor resource pool reserved space for CPU result memory to "
337  << format_num_bytes(cpu_result_mem) << ".";
338 
340  num_cpu_slots,
341  num_gpu_slots,
342  cpu_result_mem,
343  conservative_cpu_buffer_pool_mem,
344  conservative_gpu_buffer_pool_mem,
352 }
353 
355 #ifndef _WIN32
356  size_t temp;
357  CHECK(!__builtin_mul_overflow(g_num_tuple_threshold_switch_to_baseline,
359  &temp))
360  << "The product of g_num_tuple_threshold_switch_to_baseline and "
361  "g_ratio_num_hash_entry_to_num_tuple_switch_to_baseline exceeds 64 bits.";
362 #endif
363 }
364 
366  if (sessions_store_) {
367  // Disconnect any existing sessions.
368  auto sessions = sessions_store_->getAllSessions();
369  for (auto session : sessions) {
370  sessions_store_->disconnect(session->get_session_id());
371  }
372  }
375  1,
379  [this](auto& session_ptr) { disconnect_impl(session_ptr); });
380 }
381 
382 void DBHandler::initialize(const bool is_new_db) {
383  if (!initialized_) {
384  initialized_ = true;
385  } else {
387  "Server already initialized; service restart required to activate any new "
388  "entitlements.");
389  return;
390  }
391 
394  cpu_mode_only_ = true;
395  } else {
396 #ifdef HAVE_CUDA
398  cpu_mode_only_ = false;
399 #else
401  LOG(WARNING) << "This build isn't CUDA enabled, will run on CPU";
402  cpu_mode_only_ = true;
403 #endif
404  }
405 
406  bool is_rendering_enabled = enable_rendering_;
407  if (system_parameters_.num_gpus == 0) {
408  is_rendering_enabled = false;
409  }
410 
411  const auto data_path =
412  boost::filesystem::path(base_data_path_) / shared::kDataDirectoryName;
413  // calculate the total amount of memory we need to reserve from each gpu that the Buffer
414  // manage cannot ask for
415  size_t total_reserved = reserved_gpu_mem_;
416  if (is_rendering_enabled) {
417  total_reserved += render_mem_bytes_;
418  }
419 
420  std::unique_ptr<CudaMgr_Namespace::CudaMgr> cuda_mgr;
421 #ifdef HAVE_CUDA
422  if (!cpu_mode_only_ || is_rendering_enabled) {
423  try {
424  cuda_mgr = std::make_unique<CudaMgr_Namespace::CudaMgr>(
426  if (system_parameters_.num_gpus < 0) {
427  system_parameters_.num_gpus = cuda_mgr->getDeviceCount();
428  } else {
430  std::min(system_parameters_.num_gpus, cuda_mgr->getDeviceCount());
431  }
432  } catch (const std::exception& e) {
433  LOG(ERROR) << "Unable to instantiate CudaMgr, falling back to CPU-only mode. "
434  << e.what();
436  cpu_mode_only_ = true;
437  is_rendering_enabled = false;
438  }
439  }
440 #endif // HAVE_CUDA
441 
443 
444  try {
445  data_mgr_.reset(new Data_Namespace::DataMgr(data_path.string(),
447  std::move(cuda_mgr),
449  total_reserved,
452  } catch (const std::exception& e) {
453  LOG(FATAL) << "Failed to initialize data manager: " << e.what();
454  }
457  }
458 
459  std::string udf_ast_filename("");
460 
461  try {
462  if (!udf_filename_.empty()) {
463  const auto cuda_mgr = data_mgr_->getCudaMgr();
464  const CudaMgr_Namespace::NvidiaDeviceArch device_arch =
465  cuda_mgr ? cuda_mgr->getDeviceArch()
467  UdfCompiler compiler(device_arch, clang_path_, clang_options_);
468 
469  const auto [cpu_udf_ir_file, cuda_udf_ir_file] = compiler.compileUdf(udf_filename_);
470  Executor::addUdfIrToModule(cpu_udf_ir_file, /*is_cuda_ir=*/false);
471  if (!cuda_udf_ir_file.empty()) {
472  Executor::addUdfIrToModule(cuda_udf_ir_file, /*is_cuda_ir=*/true);
473  }
474  udf_ast_filename = compiler.getAstFileName(udf_filename_);
475  }
476  } catch (const std::exception& e) {
477  LOG(FATAL) << "Failed to initialize UDF compiler: " << e.what();
478  }
479 
480  try {
481  calcite_ =
482  std::make_shared<Calcite>(system_parameters_, base_data_path_, udf_ast_filename);
483  } catch (const std::exception& e) {
484  LOG(FATAL) << "Failed to initialize Calcite server: " << e.what();
485  }
486 
487  try {
488  ExtensionFunctionsWhitelist::add(calcite_->getExtensionFunctionWhitelist());
489  if (!udf_filename_.empty()) {
490  ExtensionFunctionsWhitelist::addUdfs(calcite_->getUserDefinedFunctionWhitelist());
491  }
492  } catch (const std::exception& e) {
493  LOG(FATAL) << "Failed to initialize extension functions: " << e.what();
494  }
495 
496  try {
498  } catch (const std::exception& e) {
499  LOG(FATAL) << "Failed to initialize table functions factory: " << e.what();
500  }
501 
502 #ifdef HAVE_RUNTIME_LIBS
503  try {
504 #ifdef HAVE_TORCH_TFS
505  RuntimeLibManager::loadRuntimeLibs(torch_lib_path_);
506 #else
508 #endif
509  } catch (const std::exception& e) {
510  LOG(ERROR) << "Failed to load runtime libraries: " << e.what();
511  LOG(ERROR) << "Support for runtime library table functions is disabled.";
512  }
513 #endif
514 
515  try {
516  auto udtfs = ThriftSerializers::to_thrift(
518  std::vector<TUserDefinedFunction> udfs = {};
519  calcite_->setRuntimeExtensionFunctions(udfs, udtfs, /*is_runtime=*/false);
520  } catch (const std::exception& e) {
521  LOG(FATAL) << "Failed to register compile-time table functions: " << e.what();
522  }
523 
524  if (!data_mgr_->gpusPresent() && !cpu_mode_only_) {
526  LOG(ERROR) << "No GPUs detected, falling back to CPU mode";
527  cpu_mode_only_ = true;
528  }
529 
530  LOG(INFO) << "Started in " << executor_device_type_ << " mode.";
531 
532  try {
535  data_mgr_,
537  calcite_,
538  is_new_db,
539  !db_leaves_.empty(),
541  } catch (const std::exception& e) {
542  LOG(FATAL) << "Failed to initialize system catalog: " << e.what();
543  }
544 
545  import_path_ = boost::filesystem::path(base_data_path_) / shared::kDefaultImportDirName;
546  start_time_ = std::time(nullptr);
547 
548  if (is_rendering_enabled) {
549  try {
550  render_handler_.reset(new RenderHandler(this,
554  false,
555  false,
561  } catch (const std::exception& e) {
562  LOG(ERROR) << "Backend rendering disabled: " << e.what();
563  }
564  }
565 
567 
568 #ifdef ENABLE_GEOS
569  if (!libgeos_so_filename_.empty()) {
570  g_libgeos_so_filename.reset(new std::string(libgeos_so_filename_));
571  LOG(INFO) << "Overriding default geos library with '" + *g_libgeos_so_filename + "'";
572  }
573 #endif
574 }
575 
577  shutdown();
578 }
579 
580 void DBHandler::check_read_only(const std::string& str) {
581  if (DBHandler::read_only_) {
582  THROW_DB_EXCEPTION(str + " disabled: server running in read-only mode.");
583  }
584 }
585 
587  const std::shared_ptr<Catalog_Namespace::Catalog>& catalog_ptr) {
588  // We would create an in memory session for calcite with super user privileges which
589  // would be used for getting all tables metadata when a user runs the query. The
590  // session would be under the name of a proxy user/password which would only persist
591  // till server's lifetime or execution of calcite query(in memory) whichever is the
592  // earliest.
594  std::string session_id;
595  do {
597  } while (calcite_sessions_.find(session_id) != calcite_sessions_.end());
598  Catalog_Namespace::UserMetadata user_meta(-1,
599  calcite_->getInternalSessionProxyUserName(),
600  calcite_->getInternalSessionProxyPassword(),
601  true,
602  -1,
603  true,
604  false);
605  const auto emplace_ret = calcite_sessions_.emplace(
606  session_id,
607  std::make_shared<Catalog_Namespace::SessionInfo>(
608  catalog_ptr, user_meta, executor_device_type_, session_id));
609  CHECK(emplace_ret.second);
610  return session_id;
611 }
612 
613 void DBHandler::removeInMemoryCalciteSession(const std::string& session_id) {
614  // Remove InMemory calcite Session.
616  CHECK(calcite_sessions_.erase(session_id)) << session_id;
617 }
618 
619 // internal connection for connections with no password
620 void DBHandler::internal_connect(TSessionId& session_id,
621  const std::string& username,
622  const std::string& dbname) {
624  auto stdlog = STDLOG(); // session_id set by connect_impl()
625  std::string username2 = username; // login() may reset username given as argument
626  std::string dbname2 = dbname; // login() may reset dbname given as argument
628  std::shared_ptr<Catalog> cat = nullptr;
629  try {
630  cat =
631  SysCatalog::instance().login(dbname2, username2, std::string(), user_meta, false);
632  } catch (std::exception& e) {
633  THROW_DB_EXCEPTION(e.what());
634  }
635 
636  DBObject dbObject(dbname2, DatabaseDBObjectType);
637  dbObject.loadKey(*cat);
639  std::vector<DBObject> dbObjects;
640  dbObjects.push_back(dbObject);
641  if (!SysCatalog::instance().checkPrivileges(user_meta, dbObjects)) {
642  THROW_DB_EXCEPTION("Unauthorized Access: user " + user_meta.userLoggable() +
643  " is not allowed to access database " + dbname2 + ".");
644  }
645  connect_impl(session_id, std::string(), dbname2, user_meta, cat, stdlog);
646 }
647 
649  return leaf_aggregator_.leafCount() > 0;
650 }
651 
652 void DBHandler::krb5_connect(TKrb5Session& session,
653  const std::string& inputToken,
654  const std::string& dbname) {
655  THROW_DB_EXCEPTION("Unauthrorized Access. Kerberos login not supported");
656 }
657 
658 void DBHandler::connect(TSessionId& session_id,
659  const std::string& username,
660  const std::string& passwd,
661  const std::string& dbname) {
663  auto stdlog = STDLOG(); // session_info set by connect_impl()
664  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
665  std::string username2 = username; // login() may reset username given as argument
666  std::string dbname2 = dbname; // login() may reset dbname given as argument
668  std::shared_ptr<Catalog> cat = nullptr;
669  try {
670  cat = SysCatalog::instance().login(
671  dbname2, username2, passwd, user_meta, !super_user_rights_);
672  } catch (std::exception& e) {
673  stdlog.appendNameValuePairs("user", username, "db", dbname, "exception", e.what());
674  THROW_DB_EXCEPTION(e.what());
675  }
676 
677  DBObject dbObject(dbname2, DatabaseDBObjectType);
678  dbObject.loadKey(*cat);
680  std::vector<DBObject> dbObjects;
681  dbObjects.push_back(dbObject);
682  if (!SysCatalog::instance().checkPrivileges(user_meta, dbObjects)) {
683  stdlog.appendNameValuePairs(
684  "user", username, "db", dbname, "exception", "Missing Privileges");
685  THROW_DB_EXCEPTION("Unauthorized Access: user " + user_meta.userLoggable() +
686  " is not allowed to access database " + dbname2 + ".");
687  }
688  connect_impl(session_id, passwd, dbname2, user_meta, cat, stdlog);
689 
690  // if pki auth session_id will come back encrypted with user pubkey
691  SysCatalog::instance().check_for_session_encryption(passwd, session_id);
692 }
693 
694 void DBHandler::connect_impl(TSessionId& session_id,
695  const std::string& passwd,
696  const std::string& dbname,
697  const Catalog_Namespace::UserMetadata& user_meta,
698  std::shared_ptr<Catalog> cat,
699  query_state::StdLog& stdlog) {
700  // TODO(sy): Is there any reason to have dbname as a parameter
701  // here when the cat parameter already provides cat->name()?
702  // Should dbname and cat->name() ever differ?
703  auto session_ptr = sessions_store_->add(user_meta, cat, executor_device_type_);
704  session_id = session_ptr->get_session_id();
705  LOG(INFO) << "User " << user_meta.userLoggable() << " connected to database " << dbname;
706  stdlog.setSessionInfo(session_ptr);
707  session_ptr->set_connection_info(getConnectionInfo().toString());
708  if (!super_user_rights_) { // no need to connect to leaf_aggregator_ at this time
709  // while doing warmup
710  }
711  auto const roles =
712  stdlog.getConstSessionInfo()->get_currentUser().isSuper
713  ? std::vector<std::string>{{"super"}}
714  : SysCatalog::instance().getRoles(
715  false, false, stdlog.getConstSessionInfo()->get_currentUser().userName);
716  stdlog.appendNameValuePairs("roles", boost::algorithm::join(roles, ","));
717 }
718 
719 void DBHandler::disconnect(const TSessionId& session_id_or_json) {
720  heavyai::RequestInfo const request_info(session_id_or_json);
721  SET_REQUEST_ID(request_info.requestId());
722  auto session_ptr = get_session_ptr(request_info.sessionId());
723  auto stdlog = STDLOG(session_ptr, "client", getConnectionInfo().toString());
724  sessions_store_->disconnect(request_info.sessionId());
725 }
726 
728  const auto session_id = session_ptr->get_session_id();
729  std::exception_ptr leaf_exception = nullptr;
730  try {
731  if (leaf_aggregator_.leafCount() > 0) {
732  leaf_aggregator_.disconnect(session_id);
733  }
734  } catch (...) {
735  leaf_exception = std::current_exception();
736  }
737 
738  if (render_handler_) {
739  render_handler_->disconnect(session_id);
740  }
741 
742  if (leaf_exception) {
743  std::rethrow_exception(leaf_exception);
744  }
745 }
746 
747 void DBHandler::switch_database(const TSessionId& session_id_or_json,
748  const std::string& dbname) {
749  heavyai::RequestInfo const request_info(session_id_or_json);
750  SET_REQUEST_ID(request_info.requestId());
751  auto session_ptr = get_session_ptr(request_info.sessionId());
752  auto stdlog = STDLOG(session_ptr);
753  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
754  std::string dbname2 = dbname; // switchDatabase() may reset dbname given as argument
755  try {
756  std::shared_ptr<Catalog> cat = SysCatalog::instance().switchDatabase(
757  dbname2, session_ptr->get_currentUser().userName);
758  session_ptr->set_catalog_ptr(cat);
759  if (leaf_aggregator_.leafCount() > 0) {
760  leaf_aggregator_.switch_database(request_info.sessionId(), dbname);
761  return;
762  }
763  } catch (std::exception& e) {
764  THROW_DB_EXCEPTION(e.what());
765  }
766 }
767 
768 void DBHandler::clone_session(TSessionId& session2_id,
769  const TSessionId& session1_id_or_json) {
770  heavyai::RequestInfo const request_info(session1_id_or_json);
771  SET_REQUEST_ID(request_info.requestId());
772  auto session1_ptr = get_session_ptr(request_info.sessionId());
773  auto stdlog = STDLOG(session1_ptr);
774  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
775 
776  try {
777  const Catalog_Namespace::UserMetadata& user_meta = session1_ptr->get_currentUser();
778  std::shared_ptr<Catalog> cat = session1_ptr->get_catalog_ptr();
779  auto session2_ptr = sessions_store_->add(user_meta, cat, executor_device_type_);
780  session2_id = session2_ptr->get_session_id();
781  LOG(INFO) << "User " << user_meta.userLoggable() << " connected to database "
782  << cat->name();
783  if (leaf_aggregator_.leafCount() > 0) {
784  leaf_aggregator_.clone_session(request_info.sessionId(), session2_id);
785  return;
786  }
787  } catch (std::exception& e) {
788  THROW_DB_EXCEPTION(e.what());
789  }
790 }
791 
792 void DBHandler::interrupt(const TSessionId& query_session_id_or_json,
793  const TSessionId& interrupt_session_id_or_json) {
794  // if this is for distributed setting, query_session becomes a parent session (agg)
795  // and the interrupt session is one of existing session in the leaf node (leaf)
796  // so we can think there exists a logical mapping
797  // between query_session (agg) and interrupt_session (leaf)
798  heavyai::RequestInfo const query_request_info(query_session_id_or_json);
799  heavyai::RequestInfo const interrupt_request_info(interrupt_session_id_or_json);
800  SET_REQUEST_ID(interrupt_request_info.requestId());
801  auto session_ptr = get_session_ptr(interrupt_request_info.sessionId());
802  auto& cat = session_ptr->getCatalog();
803  auto stdlog = STDLOG(session_ptr);
804  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
805  const auto allow_query_interrupt =
807  if (g_enable_dynamic_watchdog || allow_query_interrupt) {
808  const auto dbname = cat.getCurrentDB().dbName;
810  jit_debug_ ? "/tmp" : "",
811  jit_debug_ ? "mapdquery" : "",
813  CHECK(executor);
814 
815  if (leaf_aggregator_.leafCount() > 0) {
816  leaf_aggregator_.interrupt(query_request_info.sessionId(),
817  interrupt_request_info.sessionId());
818  }
819  auto target_executor_ids =
820  executor->getExecutorIdsRunningQuery(query_request_info.sessionId());
821  if (target_executor_ids.empty()) {
823  executor->getSessionLock());
824  if (executor->checkIsQuerySessionEnrolled(query_request_info.sessionId(),
825  session_read_lock)) {
826  session_read_lock.unlock();
827  VLOG(1) << "Received interrupt: "
828  << "User " << session_ptr->get_currentUser().userLoggable()
829  << ", Database " << dbname << std::endl;
830  executor->interrupt(query_request_info.sessionId(),
831  interrupt_request_info.sessionId());
832  }
833  } else {
834  for (auto& executor_id : target_executor_ids) {
835  VLOG(1) << "Received interrupt: "
836  << "Executor " << executor_id << ", User "
837  << session_ptr->get_currentUser().userLoggable() << ", Database "
838  << dbname << std::endl;
839  auto target_executor = Executor::getExecutor(executor_id);
840  target_executor->interrupt(query_request_info.sessionId(),
841  interrupt_request_info.sessionId());
842  }
843  }
844 
845  LOG(INFO) << "User " << session_ptr->get_currentUser().userName
846  << " interrupted session with database " << dbname << std::endl;
847  }
848 }
849 
851  if (g_cluster) {
852  if (leaf_aggregator_.leafCount() > 0) {
853  return TRole::type::AGGREGATOR;
854  }
855  return TRole::type::LEAF;
856  }
857  return TRole::type::SERVER;
858 }
859 void DBHandler::get_server_status(TServerStatus& _return,
860  const TSessionId& session_id_or_json) {
861  heavyai::RequestInfo const request_info(session_id_or_json);
862  SET_REQUEST_ID(request_info.requestId());
863  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
864  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
865  const auto rendering_enabled = bool(render_handler_);
866  _return.read_only = read_only_;
867  _return.version = MAPD_RELEASE;
868  _return.rendering_enabled = rendering_enabled;
869  _return.start_time = start_time_;
870  _return.edition = MAPD_EDITION;
871  _return.host_name = heavyai::get_hostname();
872  _return.poly_rendering_enabled = rendering_enabled;
873  _return.role = getServerRole();
874  _return.renderer_status_json =
875  render_handler_ ? render_handler_->get_renderer_status_json() : "";
876 }
877 
878 void DBHandler::get_status(std::vector<TServerStatus>& _return,
879  const TSessionId& session_id_or_json) {
880  //
881  // get_status() is now called locally at startup on the aggregator
882  // in order to validate that all nodes of a cluster are running the
883  // same software version and the same renderer status
884  //
885  // In that context, it is called with the InvalidSessionID, and
886  // with the local super-user flag set.
887  //
888  // Hence, we allow this session-less mode only in distributed mode, and
889  // then on a leaf (always), or on the aggregator (only in super-user mode)
890  //
891  heavyai::RequestInfo const request_info(session_id_or_json);
892  SET_REQUEST_ID(request_info.requestId());
893  auto const allow_invalid_session = g_cluster && (!isAggregator() || super_user_rights_);
894 
895  if (!allow_invalid_session || request_info.sessionId() != getInvalidSessionId()) {
896  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
897  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
898  } else {
899  LOG(INFO) << "get_status() called in session-less mode";
900  }
901  const auto rendering_enabled = bool(render_handler_);
902  TServerStatus ret;
903  ret.read_only = read_only_;
904  ret.version = MAPD_RELEASE;
905  ret.rendering_enabled = rendering_enabled;
906  ret.start_time = start_time_;
907  ret.edition = MAPD_EDITION;
908  ret.host_name = heavyai::get_hostname();
909  ret.poly_rendering_enabled = rendering_enabled;
910  ret.role = getServerRole();
911  ret.renderer_status_json =
912  render_handler_ ? render_handler_->get_renderer_status_json() : "";
913  ret.host_id = "";
914 
915  _return.push_back(ret);
916  if (leaf_aggregator_.leafCount() > 0) {
917  std::vector<TServerStatus> leaf_status =
918  leaf_aggregator_.getLeafStatus(request_info.sessionId());
919  _return.insert(_return.end(), leaf_status.begin(), leaf_status.end());
920  }
921 }
922 
923 void DBHandler::get_hardware_info(TClusterHardwareInfo& _return,
924  const TSessionId& session_id_or_json) {
925  heavyai::RequestInfo const request_info(session_id_or_json);
926  SET_REQUEST_ID(request_info.requestId());
927  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
928  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
929  THardwareInfo ret;
930  const auto cuda_mgr = data_mgr_->getCudaMgr();
931  if (cuda_mgr) {
932  ret.num_gpu_hw = cuda_mgr->getDeviceCount();
933  ret.start_gpu = cuda_mgr->getStartGpu();
934  if (ret.start_gpu >= 0) {
935  ret.num_gpu_allocated = cuda_mgr->getDeviceCount() - cuda_mgr->getStartGpu();
936  // ^ This will break as soon as we allow non contiguous GPU allocations to MapD
937  }
938  for (int16_t device_id = 0; device_id < ret.num_gpu_hw; device_id++) {
939  TGpuSpecification gpu_spec;
940  auto deviceProperties = cuda_mgr->getDeviceProperties(device_id);
941  gpu_spec.num_sm = deviceProperties->numMPs;
942  gpu_spec.clock_frequency_kHz = deviceProperties->clockKhz;
943  gpu_spec.memory = deviceProperties->globalMem;
944  gpu_spec.compute_capability_major = deviceProperties->computeMajor;
945  gpu_spec.compute_capability_minor = deviceProperties->computeMinor;
946  ret.gpu_info.push_back(gpu_spec);
947  }
948  }
949 
950  // start hardware/OS dependent code
951  ret.num_cpu_hw = std::thread::hardware_concurrency();
952  // ^ This might return diffrent results in case of hyper threading
953  // end hardware/OS dependent code
954 
955  _return.hardware_info.push_back(ret);
956 }
957 
958 void DBHandler::get_session_info(TSessionInfo& _return,
959  const TSessionId& session_id_or_json) {
960  heavyai::RequestInfo const request_info(session_id_or_json);
961  SET_REQUEST_ID(request_info.requestId());
962  auto session_ptr = get_session_ptr(request_info.sessionId());
963  CHECK(session_ptr);
964  auto stdlog = STDLOG(session_ptr);
965  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
966  auto user_metadata = session_ptr->get_currentUser();
967  _return.user = user_metadata.userName;
968  _return.database = session_ptr->getCatalog().getCurrentDB().dbName;
969  _return.start_time = session_ptr->get_start_time();
970  _return.is_super = user_metadata.isSuper;
971 }
972 
973 void DBHandler::set_leaf_info(const TSessionId& session, const TLeafInfo& info) {
974  g_distributed_leaf_idx = info.leaf_id;
975  g_distributed_num_leaves = info.num_leaves;
976 }
977 
979  const SQLTypeInfo& ti,
980  TColumn& column) {
981  if (ti.is_array()) {
983  << "element types of arrays should always be nullable";
984  TColumn tColumn;
985  const auto array_tv = boost::get<ArrayTargetValue>(&tv);
986  CHECK(array_tv);
987  bool is_null = !array_tv->is_initialized();
988  if (!is_null) {
989  const auto& vec = array_tv->get();
990  for (const auto& elem_tv : vec) {
991  value_to_thrift_column(elem_tv, ti.get_elem_type(), tColumn);
992  }
993  }
994  column.data.arr_col.push_back(tColumn);
995  column.nulls.push_back(is_null && !ti.get_notnull());
996  } else if (ti.is_geometry()) {
997  const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
998  if (scalar_tv) {
999  auto s_n = boost::get<NullableString>(scalar_tv);
1000  auto s = boost::get<std::string>(s_n);
1001  if (s) {
1002  column.data.str_col.push_back(*s);
1003  } else {
1004  column.data.str_col.emplace_back(""); // null string
1005  auto null_p = boost::get<void*>(s_n);
1006  CHECK(null_p && !*null_p);
1007  }
1008  column.nulls.push_back(!s && !ti.get_notnull());
1009  } else {
1010  const auto array_tv = boost::get<ArrayTargetValue>(&tv);
1011  CHECK(array_tv);
1012  bool is_null = !array_tv->is_initialized();
1013  if (!is_null) {
1014  auto elem_type = SQLTypeInfo(kDOUBLE, false);
1015  TColumn tColumn;
1016  const auto& vec = array_tv->get();
1017  for (const auto& elem_tv : vec) {
1018  value_to_thrift_column(elem_tv, elem_type, tColumn);
1019  }
1020  column.data.arr_col.push_back(tColumn);
1021  column.nulls.push_back(false);
1022  } else {
1023  TColumn tColumn;
1024  column.data.arr_col.push_back(tColumn);
1025  column.nulls.push_back(is_null && !ti.get_notnull());
1026  }
1027  }
1028  } else {
1029  CHECK(!ti.is_column());
1030  const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
1031  CHECK(scalar_tv);
1032  if (boost::get<int64_t>(scalar_tv)) {
1033  int64_t data = *(boost::get<int64_t>(scalar_tv));
1034 
1035  if (ti.is_decimal()) {
1036  double val = static_cast<double>(data);
1037  if (ti.get_scale() > 0) {
1038  val /= pow(10.0, std::abs(ti.get_scale()));
1039  }
1040  column.data.real_col.push_back(val);
1041  } else {
1042  column.data.int_col.push_back(data);
1043  }
1044 
1045  switch (ti.get_type()) {
1046  case kBOOLEAN:
1047  column.nulls.push_back(data == NULL_BOOLEAN && !ti.get_notnull());
1048  break;
1049  case kTINYINT:
1050  column.nulls.push_back(data == NULL_TINYINT && !ti.get_notnull());
1051  break;
1052  case kSMALLINT:
1053  column.nulls.push_back(data == NULL_SMALLINT && !ti.get_notnull());
1054  break;
1055  case kINT:
1056  column.nulls.push_back(data == NULL_INT && !ti.get_notnull());
1057  break;
1058  case kNUMERIC:
1059  case kDECIMAL:
1060  case kBIGINT:
1061  column.nulls.push_back(data == NULL_BIGINT && !ti.get_notnull());
1062  break;
1063  case kTIME:
1064  case kTIMESTAMP:
1065  case kDATE:
1066  case kINTERVAL_DAY_TIME:
1067  case kINTERVAL_YEAR_MONTH:
1068  column.nulls.push_back(data == NULL_BIGINT && !ti.get_notnull());
1069  break;
1070  default:
1071  column.nulls.push_back(false);
1072  }
1073  } else if (boost::get<double>(scalar_tv)) {
1074  double data = *(boost::get<double>(scalar_tv));
1075  column.data.real_col.push_back(data);
1076  if (ti.get_type() == kFLOAT) {
1077  column.nulls.push_back(data == NULL_FLOAT && !ti.get_notnull());
1078  } else {
1079  column.nulls.push_back(data == NULL_DOUBLE && !ti.get_notnull());
1080  }
1081  } else if (boost::get<float>(scalar_tv)) {
1082  CHECK_EQ(kFLOAT, ti.get_type());
1083  float data = *(boost::get<float>(scalar_tv));
1084  column.data.real_col.push_back(data);
1085  column.nulls.push_back(data == NULL_FLOAT && !ti.get_notnull());
1086  } else if (boost::get<NullableString>(scalar_tv)) {
1087  auto s_n = boost::get<NullableString>(scalar_tv);
1088  auto s = boost::get<std::string>(s_n);
1089  if (s) {
1090  column.data.str_col.push_back(*s);
1091  } else {
1092  column.data.str_col.emplace_back(""); // null string
1093  auto null_p = boost::get<void*>(s_n);
1094  CHECK(null_p && !*null_p);
1095  }
1096  column.nulls.push_back(!s && !ti.get_notnull());
1097  } else {
1098  CHECK(false);
1099  }
1100  }
1101 }
1102 
1104  TDatum datum;
1105  const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
1106  if (!scalar_tv) {
1107  CHECK(ti.is_array());
1109  << "element types of arrays should always be nullable";
1110  const auto array_tv = boost::get<ArrayTargetValue>(&tv);
1111  CHECK(array_tv);
1112  if (array_tv->is_initialized()) {
1113  const auto& vec = array_tv->get();
1114  for (const auto& elem_tv : vec) {
1115  const auto scalar_col_val = value_to_thrift(elem_tv, ti.get_elem_type());
1116  datum.val.arr_val.push_back(scalar_col_val);
1117  }
1118  // Datum is not null, at worst it's an empty array Datum
1119  datum.is_null = false;
1120  } else {
1121  datum.is_null = true;
1122  }
1123  return datum;
1124  }
1125  if (boost::get<int64_t>(scalar_tv)) {
1126  int64_t data = *(boost::get<int64_t>(scalar_tv));
1127 
1128  if (ti.is_decimal()) {
1129  double val = static_cast<double>(data);
1130  if (ti.get_scale() > 0) {
1131  val /= pow(10.0, std::abs(ti.get_scale()));
1132  }
1133  datum.val.real_val = val;
1134  } else {
1135  datum.val.int_val = data;
1136  }
1137 
1138  switch (ti.get_type()) {
1139  case kBOOLEAN:
1140  datum.is_null = (datum.val.int_val == NULL_BOOLEAN);
1141  break;
1142  case kTINYINT:
1143  datum.is_null = (datum.val.int_val == NULL_TINYINT);
1144  break;
1145  case kSMALLINT:
1146  datum.is_null = (datum.val.int_val == NULL_SMALLINT);
1147  break;
1148  case kINT:
1149  datum.is_null = (datum.val.int_val == NULL_INT);
1150  break;
1151  case kDECIMAL:
1152  case kNUMERIC:
1153  case kBIGINT:
1154  datum.is_null = (datum.val.int_val == NULL_BIGINT);
1155  break;
1156  case kTIME:
1157  case kTIMESTAMP:
1158  case kDATE:
1159  case kINTERVAL_DAY_TIME:
1160  case kINTERVAL_YEAR_MONTH:
1161  datum.is_null = (datum.val.int_val == NULL_BIGINT);
1162  break;
1163  default:
1164  datum.is_null = false;
1165  }
1166  } else if (boost::get<double>(scalar_tv)) {
1167  datum.val.real_val = *(boost::get<double>(scalar_tv));
1168  if (ti.get_type() == kFLOAT) {
1169  datum.is_null = (datum.val.real_val == NULL_FLOAT);
1170  } else {
1171  datum.is_null = (datum.val.real_val == NULL_DOUBLE);
1172  }
1173  } else if (boost::get<float>(scalar_tv)) {
1174  CHECK_EQ(kFLOAT, ti.get_type());
1175  datum.val.real_val = *(boost::get<float>(scalar_tv));
1176  datum.is_null = (datum.val.real_val == NULL_FLOAT);
1177  } else if (boost::get<NullableString>(scalar_tv)) {
1178  auto s_n = boost::get<NullableString>(scalar_tv);
1179  auto s = boost::get<std::string>(s_n);
1180  if (s) {
1181  datum.val.str_val = *s;
1182  } else {
1183  auto null_p = boost::get<void*>(s_n);
1184  CHECK(null_p && !*null_p);
1185  }
1186  datum.is_null = !s;
1187  } else {
1188  CHECK(false);
1189  }
1190  return datum;
1191 }
1192 
1194  TQueryResult& _return,
1195  const QueryStateProxy& query_state_proxy,
1196  const std::shared_ptr<Catalog_Namespace::SessionInfo> session_ptr,
1197  const std::string& query_str,
1198  const bool column_format,
1199  const std::string& nonce,
1200  const int32_t first_n,
1201  const int32_t at_most_n,
1202  const bool use_calcite) {
1203  _return.total_time_ms = 0;
1204  _return.nonce = nonce;
1205  ParserWrapper pw{query_str};
1206  switch (pw.getQueryType()) {
1208  _return.query_type = TQueryType::READ;
1209  VLOG(1) << "query type: READ";
1210  break;
1211  }
1213  _return.query_type = TQueryType::WRITE;
1214  VLOG(1) << "query type: WRITE";
1215  break;
1216  }
1218  _return.query_type = TQueryType::SCHEMA_READ;
1219  VLOG(1) << "query type: SCHEMA READ";
1220  break;
1221  }
1223  _return.query_type = TQueryType::SCHEMA_WRITE;
1224  VLOG(1) << "query type: SCHEMA WRITE";
1225  break;
1226  }
1227  default: {
1228  _return.query_type = TQueryType::UNKNOWN;
1229  LOG(WARNING) << "query type: UNKNOWN";
1230  break;
1231  }
1232  }
1233 
1236  _return.total_time_ms += measure<>::execution([&]() {
1238  query_state_proxy,
1239  column_format,
1240  session_ptr->get_executor_device_type(),
1241  first_n,
1242  at_most_n,
1243  use_calcite,
1244  locks);
1246  _return, result, query_state_proxy, column_format, first_n, at_most_n);
1247  });
1248 }
1249 
1250 void DBHandler::convertData(TQueryResult& _return,
1252  const QueryStateProxy& query_state_proxy,
1253  const bool column_format,
1254  const int32_t first_n,
1255  const int32_t at_most_n) {
1256  _return.execution_time_ms += result.getExecutionTime();
1257  if (result.empty()) {
1258  return;
1259  }
1260 
1261  switch (result.getResultType()) {
1263  convertRows(_return,
1264  query_state_proxy,
1265  result.getTargetsMeta(),
1266  *result.getRows(),
1267  column_format,
1268  first_n,
1269  at_most_n);
1270  break;
1272  convertResult(_return, *result.getRows(), true);
1273  break;
1275  convertExplain(_return, *result.getRows(), true);
1276  break;
1278  convertRows(_return,
1279  query_state_proxy,
1280  result.getTargetsMeta(),
1281  *result.getRows(),
1282  column_format,
1283  -1,
1284  -1);
1285  break;
1286  }
1287 }
1288 
1289 void DBHandler::sql_execute(TQueryResult& _return,
1290  const TSessionId& session_id_or_json,
1291  const std::string& query_str,
1292  const bool column_format,
1293  const std::string& nonce,
1294  const int32_t first_n,
1295  const int32_t at_most_n) {
1296  heavyai::RequestInfo const request_info(session_id_or_json);
1297  SET_REQUEST_ID(request_info.requestId());
1298  const std::string exec_ra_prefix = "execute relalg";
1299  const bool use_calcite = !boost::starts_with(query_str, exec_ra_prefix);
1300  auto actual_query =
1301  use_calcite ? query_str : boost::trim_copy(query_str.substr(exec_ra_prefix.size()));
1302  auto session_ptr = get_session_ptr(request_info.sessionId());
1303  auto query_state = create_query_state(session_ptr, actual_query);
1304  auto stdlog = STDLOG(session_ptr, query_state);
1305  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
1306  stdlog.appendNameValuePairs("nonce", nonce);
1307  auto timer = DEBUG_TIMER(__func__);
1308  try {
1309  ScopeGuard reset_was_deferred_copy_from = [this, &session_ptr] {
1310  deferred_copy_from_sessions.remove(session_ptr->get_session_id());
1311  };
1312 
1313  if (first_n >= 0 && at_most_n >= 0) {
1314  THROW_DB_EXCEPTION(std::string("At most one of first_n and at_most_n can be set"));
1315  }
1316 
1317  if (leaf_aggregator_.leafCount() > 0) {
1318  if (!agg_handler_) {
1319  THROW_DB_EXCEPTION("Distributed support is disabled.");
1320  }
1321  _return.total_time_ms = measure<>::execution([&]() {
1322  agg_handler_->cluster_execute(_return,
1323  query_state->createQueryStateProxy(),
1324  query_state->getQueryStr(),
1325  column_format,
1326  nonce,
1327  first_n,
1328  at_most_n,
1330  });
1331  _return.nonce = nonce;
1332  } else {
1333  sql_execute_local(_return,
1334  query_state->createQueryStateProxy(),
1335  session_ptr,
1336  actual_query,
1337  column_format,
1338  nonce,
1339  first_n,
1340  at_most_n,
1341  use_calcite);
1342  }
1343  _return.total_time_ms += process_deferred_copy_from(request_info.sessionId());
1344  std::string debug_json = timer.stopAndGetJson();
1345  if (!debug_json.empty()) {
1346  _return.__set_debug(std::move(debug_json));
1347  }
1348  stdlog.appendNameValuePairs(
1349  "execution_time_ms",
1350  _return.execution_time_ms,
1351  "total_time_ms", // BE-3420 - Redundant with duration field
1352  stdlog.duration<std::chrono::milliseconds>());
1353  VLOG(1) << "Table Schema Locks:\n" << lockmgr::TableSchemaLockMgr::instance();
1354  VLOG(1) << "Table Data Locks:\n" << lockmgr::TableDataLockMgr::instance();
1355  } catch (const std::exception& e) {
1356  if (strstr(e.what(), "java.lang.NullPointerException")) {
1357  THROW_DB_EXCEPTION("query failed from broken view or other schema related issue");
1358  } else if (strstr(e.what(), "SQL Error: Encountered \";\"")) {
1359  THROW_DB_EXCEPTION("multiple SQL statements not allowed");
1360  } else if (strstr(e.what(), "SQL Error: Encountered \"<EOF>\" at line 0, column 0")) {
1361  THROW_DB_EXCEPTION("empty SQL statment not allowed");
1362  } else {
1363  THROW_DB_EXCEPTION(e.what());
1364  }
1365  }
1366 }
1367 
1369  const TSessionId& session_id_or_json,
1370  const std::string& query_str,
1371  const bool column_format,
1372  const int32_t first_n,
1373  const int32_t at_most_n,
1375  heavyai::RequestInfo const request_info(session_id_or_json);
1376  SET_REQUEST_ID(request_info.requestId());
1377  const std::string exec_ra_prefix = "execute relalg";
1378  const bool use_calcite = !boost::starts_with(query_str, exec_ra_prefix);
1379  auto actual_query =
1380  use_calcite ? query_str : boost::trim_copy(query_str.substr(exec_ra_prefix.size()));
1381 
1382  auto session_ptr = get_session_ptr(request_info.sessionId());
1383  CHECK(session_ptr);
1384  auto query_state = create_query_state(session_ptr, actual_query);
1385  auto stdlog = STDLOG(session_ptr, query_state);
1386  auto timer = DEBUG_TIMER(__func__);
1387 
1388  try {
1389  ScopeGuard reset_was_deferred_copy_from = [this, &session_ptr] {
1390  deferred_copy_from_sessions.remove(session_ptr->get_session_id());
1391  };
1392 
1393  if (first_n >= 0 && at_most_n >= 0) {
1394  THROW_DB_EXCEPTION(std::string("At most one of first_n and at_most_n can be set"));
1395  }
1396  auto total_time_ms = measure<>::execution([&]() {
1398  query_state->createQueryStateProxy(),
1399  column_format,
1400  session_ptr->get_executor_device_type(),
1401  first_n,
1402  at_most_n,
1403  use_calcite,
1404  locks);
1405  });
1406 
1407  _return.setExecutionTime(total_time_ms +
1408  process_deferred_copy_from(request_info.sessionId()));
1409 
1410  stdlog.appendNameValuePairs(
1411  "execution_time_ms",
1412  _return.getExecutionTime(),
1413  "total_time_ms", // BE-3420 - Redundant with duration field
1414  stdlog.duration<std::chrono::milliseconds>());
1415  VLOG(1) << "Table Schema Locks:\n" << lockmgr::TableSchemaLockMgr::instance();
1416  VLOG(1) << "Table Data Locks:\n" << lockmgr::TableDataLockMgr::instance();
1417  } catch (const std::exception& e) {
1418  if (strstr(e.what(), "java.lang.NullPointerException")) {
1419  THROW_DB_EXCEPTION("query failed from broken view or other schema related issue");
1420  } else if (strstr(e.what(), "SQL Error: Encountered \";\"")) {
1421  THROW_DB_EXCEPTION("multiple SQL statements not allowed");
1422  } else if (strstr(e.what(), "SQL Error: Encountered \"<EOF>\" at line 0, column 0")) {
1423  THROW_DB_EXCEPTION("empty SQL statment not allowed");
1424  } else {
1425  THROW_DB_EXCEPTION(e.what());
1426  }
1427  }
1428 }
1429 
1430 int64_t DBHandler::process_deferred_copy_from(const TSessionId& session_id) {
1431  int64_t total_time_ms(0);
1432  // if the SQL statement we just executed was a geo COPY FROM, the import
1433  // parameters were captured, and this flag set, so we do the actual import here
1434  if (auto deferred_copy_from_state = deferred_copy_from_sessions(session_id)) {
1435  // import_geo_table() calls create_table() which calls this function to
1436  // do the work, so reset the flag now to avoid executing this part a
1437  // second time at the end of that, which would fail as the table was
1438  // already created! Also reset the flag with a ScopeGuard on exiting
1439  // this function any other way, such as an exception from the code above!
1440  deferred_copy_from_sessions.remove(session_id);
1441 
1442  // create table as replicated?
1443  TCreateParams create_params;
1444  if (deferred_copy_from_state->partitions == "REPLICATED") {
1445  create_params.is_replicated = true;
1446  }
1447 
1448  // now do (and time) the import
1449  total_time_ms = measure<>::execution([&]() {
1450  importGeoTableGlobFilterSort(session_id,
1451  deferred_copy_from_state->table,
1452  deferred_copy_from_state->file_name,
1453  deferred_copy_from_state->copy_params,
1454  TRowDescriptor(),
1455  create_params);
1456  });
1457  }
1458  return total_time_ms;
1459 }
1460 
1461 void DBHandler::sql_execute_df(TDataFrame& _return,
1462  const TSessionId& session_id_or_json,
1463  const std::string& query_str,
1464  const TDeviceType::type results_device_type,
1465  const int32_t device_id,
1466  const int32_t first_n,
1467  const TArrowTransport::type transport_method) {
1468  heavyai::RequestInfo const request_info(session_id_or_json);
1469  SET_REQUEST_ID(request_info.requestId());
1470  auto session_ptr = get_session_ptr(request_info.sessionId());
1471  CHECK(session_ptr);
1472  auto query_state = create_query_state(session_ptr, query_str);
1473  auto stdlog = STDLOG(session_ptr, query_state);
1474 
1475  const auto executor_device_type = session_ptr->get_executor_device_type();
1476 
1477  if (results_device_type == TDeviceType::GPU) {
1478  if (executor_device_type != ExecutorDeviceType::GPU) {
1479  THROW_DB_EXCEPTION(std::string("GPU mode is not allowed in this session"));
1480  }
1481  if (!data_mgr_->gpusPresent()) {
1482  THROW_DB_EXCEPTION(std::string("No GPU is available in this server"));
1483  }
1484  if (device_id < 0 || device_id >= data_mgr_->getCudaMgr()->getDeviceCount()) {
1486  std::string("Invalid device_id or unavailable GPU with this ID"));
1487  }
1488  }
1489  ParserWrapper pw{query_str};
1490  if (pw.getQueryType() != ParserWrapper::QueryType::Read) {
1491  THROW_DB_EXCEPTION(std::string(
1492  "Only read queries supported for the Arrow sql_execute_df endpoint."));
1493  }
1494  if (ExplainInfo(query_str).isCalciteExplain()) {
1495  THROW_DB_EXCEPTION(std::string(
1496  "Explain is currently unsupported by the Arrow sql_execute_df endpoint."));
1497  }
1498 
1499  ExecutionResult execution_result;
1501  sql_execute_impl(execution_result,
1502  query_state->createQueryStateProxy(),
1503  true, /* column_format - does this do anything? */
1504  executor_device_type,
1505  first_n,
1506  -1, /* at_most_n */
1507  true,
1508  locks);
1509 
1510  const auto result_set = execution_result.getRows();
1511  const auto executor_results_device_type = results_device_type == TDeviceType::CPU
1514  _return.execution_time_ms =
1515  execution_result.getExecutionTime() - result_set->getQueueTime();
1516  const auto converter = std::make_unique<ArrowResultSetConverter>(
1517  result_set,
1518  data_mgr_,
1519  executor_results_device_type,
1520  device_id,
1521  getTargetNames(execution_result.getTargetsMeta()),
1522  first_n,
1523  ArrowTransport(transport_method));
1524  ArrowResult arrow_result;
1525  _return.arrow_conversion_time_ms +=
1526  measure<>::execution([&] { arrow_result = converter->getArrowResult(); });
1527  _return.sm_handle =
1528  std::string(arrow_result.sm_handle.begin(), arrow_result.sm_handle.end());
1529  _return.sm_size = arrow_result.sm_size;
1530  _return.df_handle =
1531  std::string(arrow_result.df_handle.begin(), arrow_result.df_handle.end());
1532  _return.df_buffer =
1533  std::string(arrow_result.df_buffer.begin(), arrow_result.df_buffer.end());
1534  if (executor_results_device_type == ExecutorDeviceType::GPU) {
1535  std::lock_guard<std::mutex> map_lock(handle_to_dev_ptr_mutex_);
1536  CHECK(!ipc_handle_to_dev_ptr_.count(_return.df_handle));
1537  ipc_handle_to_dev_ptr_.insert(
1538  std::make_pair(_return.df_handle, arrow_result.serialized_cuda_handle));
1539  }
1540  _return.df_size = arrow_result.df_size;
1541 }
1542 
1543 void DBHandler::sql_execute_gdf(TDataFrame& _return,
1544  const TSessionId& session_id_or_json,
1545  const std::string& query_str,
1546  const int32_t device_id,
1547  const int32_t first_n) {
1548  heavyai::RequestInfo request_info(session_id_or_json);
1549  SET_REQUEST_ID(request_info.requestId());
1550  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
1551  request_info.setRequestId(logger::request_id());
1552  sql_execute_df(_return,
1553  request_info.json(),
1554  query_str,
1555  TDeviceType::GPU,
1556  device_id,
1557  first_n,
1558  TArrowTransport::SHARED_MEMORY);
1559 }
1560 
1561 // For now we have only one user of a data frame in all cases.
1562 void DBHandler::deallocate_df(const TSessionId& session_id_or_json,
1563  const TDataFrame& df,
1564  const TDeviceType::type device_type,
1565  const int32_t device_id) {
1566  heavyai::RequestInfo const request_info(session_id_or_json);
1567  SET_REQUEST_ID(request_info.requestId());
1568  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
1569  std::string serialized_cuda_handle = "";
1570  if (device_type == TDeviceType::GPU) {
1571  std::lock_guard<std::mutex> map_lock(handle_to_dev_ptr_mutex_);
1572  if (ipc_handle_to_dev_ptr_.count(df.df_handle) != size_t(1)) {
1573  TDBException ex;
1574  ex.error_msg = std::string(
1575  "Current data frame handle is not bookkept or been inserted "
1576  "twice");
1577  LOG(ERROR) << ex.error_msg;
1578  throw ex;
1579  }
1580  serialized_cuda_handle = ipc_handle_to_dev_ptr_[df.df_handle];
1581  ipc_handle_to_dev_ptr_.erase(df.df_handle);
1582  }
1583  std::vector<char> sm_handle(df.sm_handle.begin(), df.sm_handle.end());
1584  std::vector<char> df_handle(df.df_handle.begin(), df.df_handle.end());
1586  sm_handle, df.sm_size, df_handle, df.df_size, serialized_cuda_handle};
1588  result,
1589  device_type == TDeviceType::CPU ? ExecutorDeviceType::CPU : ExecutorDeviceType::GPU,
1590  device_id,
1591  data_mgr_);
1592 }
1593 
1594 void DBHandler::sql_validate(TRowDescriptor& _return,
1595  const TSessionId& session_id_or_json,
1596  const std::string& query_str) {
1597  heavyai::RequestInfo const request_info(session_id_or_json);
1598  SET_REQUEST_ID(request_info.requestId());
1599  try {
1600  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
1601  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
1602  auto query_state = create_query_state(stdlog.getSessionInfo(), query_str);
1603  stdlog.setQueryState(query_state);
1604 
1605  ParserWrapper pw{query_str};
1606  if (ExplainInfo(query_str).isExplain() || pw.is_ddl || pw.is_update_dml) {
1607  throw std::runtime_error("Can only validate SELECT statements.");
1608  }
1609 
1610  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
1611 
1612  TPlanResult parse_result;
1614  std::tie(parse_result, locks) = parse_to_ra(query_state->createQueryStateProxy(),
1615  query_state->getQueryStr(),
1616  {},
1617  true,
1619  /*check_privileges=*/true);
1620  const auto query_ra = parse_result.plan_result;
1621  _return = validateRelAlg(query_ra, query_state->createQueryStateProxy());
1622  } catch (const std::exception& e) {
1623  THROW_DB_EXCEPTION(std::string(e.what()));
1624  }
1625 }
1626 
1627 namespace {
1628 
1630  std::unordered_set<std::string> uc_column_names;
1631  std::unordered_set<std::string> uc_column_table_qualifiers;
1632 };
1633 
1634 // Extract what looks like a (qualified) identifier from the partial query.
1635 // The results will be used to rank the auto-completion results: tables which
1636 // contain at least one of the identifiers first.
1638  const std::string& sql) {
1639  boost::regex id_regex{R"(([[:alnum:]]|_|\.)+)",
1640  boost::regex::extended | boost::regex::icase};
1641  boost::sregex_token_iterator tok_it(sql.begin(), sql.end(), id_regex, 0);
1642  boost::sregex_token_iterator end;
1643  std::unordered_set<std::string> uc_column_names;
1644  std::unordered_set<std::string> uc_column_table_qualifiers;
1645  for (; tok_it != end; ++tok_it) {
1646  std::string column_name = *tok_it;
1647  std::vector<std::string> column_tokens;
1648  boost::split(column_tokens, column_name, boost::is_any_of("."));
1649  if (column_tokens.size() == 2) {
1650  // If the column name is qualified, take user's word.
1651  uc_column_table_qualifiers.insert(to_upper(column_tokens.front()));
1652  } else {
1653  uc_column_names.insert(to_upper(column_name));
1654  }
1655  }
1656  return {uc_column_names, uc_column_table_qualifiers};
1657 }
1658 
1659 } // namespace
1660 
1661 void DBHandler::get_completion_hints(std::vector<TCompletionHint>& hints,
1662  const TSessionId& session_id_or_json,
1663  const std::string& sql,
1664  const int cursor) {
1665  heavyai::RequestInfo const request_info(session_id_or_json);
1666  SET_REQUEST_ID(request_info.requestId());
1667  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
1668  std::vector<std::string> visible_tables; // Tables allowed for the given session.
1669  get_completion_hints_unsorted(hints, visible_tables, stdlog, sql, cursor);
1670  const auto proj_tokens = extract_projection_tokens_for_completion(sql);
1671  auto compatible_table_names = get_uc_compatible_table_names_by_column(
1672  proj_tokens.uc_column_names, visible_tables, stdlog);
1673  // Add the table qualifiers explicitly specified by the user.
1674  compatible_table_names.insert(proj_tokens.uc_column_table_qualifiers.begin(),
1675  proj_tokens.uc_column_table_qualifiers.end());
1676  // Sort the hints by category, from COLUMN (most specific) to KEYWORD.
1677  std::sort(
1678  hints.begin(),
1679  hints.end(),
1680  [&compatible_table_names](const TCompletionHint& lhs, const TCompletionHint& rhs) {
1681  if (lhs.type == TCompletionHintType::TABLE &&
1682  rhs.type == TCompletionHintType::TABLE) {
1683  // Between two tables, one which is compatible with the specified
1684  // projections and one which isn't, pick the one which is compatible.
1685  if (compatible_table_names.find(to_upper(lhs.hints.back())) !=
1686  compatible_table_names.end() &&
1687  compatible_table_names.find(to_upper(rhs.hints.back())) ==
1688  compatible_table_names.end()) {
1689  return true;
1690  }
1691  }
1692  return lhs.type < rhs.type;
1693  });
1694 }
1695 
1696 void DBHandler::get_completion_hints_unsorted(std::vector<TCompletionHint>& hints,
1697  std::vector<std::string>& visible_tables,
1698  query_state::StdLog& stdlog,
1699  const std::string& sql,
1700  const int cursor) {
1701  const auto& session_info = *stdlog.getConstSessionInfo();
1702  try {
1703  get_tables_impl(visible_tables, session_info, GET_PHYSICAL_TABLES_AND_VIEWS);
1704 
1705  // Filter out keywords suggested by Calcite which we don't support.
1707  calcite_->getCompletionHints(session_info, visible_tables, sql, cursor));
1708  } catch (const std::exception& e) {
1709  TDBException ex;
1710  ex.error_msg = std::string(e.what());
1711  LOG(ERROR) << ex.error_msg;
1712  throw ex;
1713  }
1714  boost::regex from_expr{R"(\s+from\s+)", boost::regex::extended | boost::regex::icase};
1715  const size_t length_to_cursor =
1716  cursor < 0 ? sql.size() : std::min(sql.size(), static_cast<size_t>(cursor));
1717  // Trust hints from Calcite after the FROM keyword.
1718  if (boost::regex_search(sql.cbegin(), sql.cbegin() + length_to_cursor, from_expr)) {
1719  return;
1720  }
1721  // Before FROM, the query is too incomplete for context-sensitive completions.
1722  get_token_based_completions(hints, stdlog, visible_tables, sql, cursor);
1723 }
1724 
1725 void DBHandler::get_token_based_completions(std::vector<TCompletionHint>& hints,
1726  query_state::StdLog& stdlog,
1727  std::vector<std::string>& visible_tables,
1728  const std::string& sql,
1729  const int cursor) {
1730  const auto last_word =
1731  find_last_word_from_cursor(sql, cursor < 0 ? sql.size() : cursor);
1732  boost::regex select_expr{R"(\s*select\s+)",
1733  boost::regex::extended | boost::regex::icase};
1734  const size_t length_to_cursor =
1735  cursor < 0 ? sql.size() : std::min(sql.size(), static_cast<size_t>(cursor));
1736  // After SELECT but before FROM, look for all columns in all tables which match the
1737  // prefix.
1738  if (boost::regex_search(sql.cbegin(), sql.cbegin() + length_to_cursor, select_expr)) {
1739  const auto column_names_by_table = fill_column_names_by_table(visible_tables, stdlog);
1740  // Trust the fully qualified columns the most.
1741  if (get_qualified_column_hints(hints, last_word, column_names_by_table)) {
1742  return;
1743  }
1744  // Not much information to use, just retrieve column names which match the prefix.
1745  if (should_suggest_column_hints(sql)) {
1746  get_column_hints(hints, last_word, column_names_by_table);
1747  return;
1748  }
1749  const std::string kFromKeyword{"FROM"};
1750  if (boost::istarts_with(kFromKeyword, last_word)) {
1751  TCompletionHint keyword_hint;
1752  keyword_hint.type = TCompletionHintType::KEYWORD;
1753  keyword_hint.replaced = last_word;
1754  keyword_hint.hints.emplace_back(kFromKeyword);
1755  hints.push_back(keyword_hint);
1756  }
1757  } else {
1758  const std::string kSelectKeyword{"SELECT"};
1759  if (boost::istarts_with(kSelectKeyword, last_word)) {
1760  TCompletionHint keyword_hint;
1761  keyword_hint.type = TCompletionHintType::KEYWORD;
1762  keyword_hint.replaced = last_word;
1763  keyword_hint.hints.emplace_back(kSelectKeyword);
1764  hints.push_back(keyword_hint);
1765  }
1766  }
1767 }
1768 
1769 std::unordered_map<std::string, std::unordered_set<std::string>>
1770 DBHandler::fill_column_names_by_table(std::vector<std::string>& table_names,
1771  query_state::StdLog& stdlog) {
1772  std::unordered_map<std::string, std::unordered_set<std::string>> column_names_by_table;
1773  for (auto it = table_names.begin(); it != table_names.end();) {
1774  TTableDetails table_details;
1775  try {
1776  get_table_details_impl(table_details, stdlog, *it, false, false);
1777  } catch (const TDBException& e) {
1778  // Remove the corrupted Table/View name from the list for further processing.
1779  it = table_names.erase(it);
1780  continue;
1781  }
1782  for (const auto& column_type : table_details.row_desc) {
1783  column_names_by_table[*it].emplace(column_type.col_name);
1784  }
1785  ++it;
1786  }
1787  return column_names_by_table;
1788 }
1789 
1793 }
1794 
1796  const std::unordered_set<std::string>& uc_column_names,
1797  std::vector<std::string>& table_names,
1798  query_state::StdLog& stdlog) {
1799  std::unordered_set<std::string> compatible_table_names_by_column;
1800  for (auto it = table_names.begin(); it != table_names.end();) {
1801  TTableDetails table_details;
1802  try {
1803  get_table_details_impl(table_details, stdlog, *it, false, false);
1804  } catch (const TDBException& e) {
1805  // Remove the corrupted Table/View name from the list for further processing.
1806  it = table_names.erase(it);
1807  continue;
1808  }
1809  for (const auto& column_type : table_details.row_desc) {
1810  if (uc_column_names.find(to_upper(column_type.col_name)) != uc_column_names.end()) {
1811  compatible_table_names_by_column.emplace(to_upper(*it));
1812  break;
1813  }
1814  }
1815  ++it;
1816  }
1817  return compatible_table_names_by_column;
1818 }
1819 
1820 void DBHandler::dispatch_query_task(std::shared_ptr<QueryDispatchQueue::Task> query_task,
1821  const bool is_update_delete) {
1823  dispatch_queue_->submit(std::move(query_task), is_update_delete);
1824 }
1825 
1826 TRowDescriptor DBHandler::validateRelAlg(const std::string& query_ra,
1827  QueryStateProxy query_state_proxy) {
1828  TQueryResult query_result;
1829  ExecutionResult execution_result;
1830  auto execute_rel_alg_task = std::make_shared<QueryDispatchQueue::Task>(
1831  [this,
1832  &execution_result,
1833  query_state_proxy,
1834  &query_ra,
1835  parent_thread_local_ids =
1836  logger::thread_local_ids()](const size_t executor_index) {
1837  logger::LocalIdsScopeGuard lisg = parent_thread_local_ids.setNewThreadId();
1838  execute_rel_alg(execution_result,
1839  query_state_proxy,
1840  query_ra,
1841  true,
1843  -1,
1844  -1,
1845  /*just_validate=*/true,
1846  /*find_filter_push_down_candidates=*/false,
1847  ExplainInfo(),
1848  executor_index);
1849  });
1850  dispatch_query_task(execute_rel_alg_task, /*is_update_delete=*/false);
1851  auto result_future = execute_rel_alg_task->get_future();
1852  result_future.get();
1853  DBHandler::convertData(query_result, execution_result, query_state_proxy, true, -1, -1);
1854 
1855  const auto& row_desc = query_result.row_set.row_desc;
1856  const auto& targets_meta = execution_result.getTargetsMeta();
1857  CHECK_EQ(row_desc.size(), targets_meta.size());
1858 
1859  // TODO: Below fixup logic should no longer be needed after the comp_param refactor
1860  TRowDescriptor fixedup_row_desc;
1861  for (size_t i = 0; i < row_desc.size(); i++) {
1862  const auto& col_desc = row_desc[i];
1863  auto fixedup_col_desc = col_desc;
1864  if (col_desc.col_type.encoding == TEncodingType::DICT &&
1865  col_desc.col_type.comp_param > 0) {
1866  const auto& type_info = targets_meta[i].get_type_info();
1867  CHECK_EQ(type_info.get_compression(), kENCODING_DICT);
1869  type_info.getStringDictKey().db_id);
1870  const auto dd = cat->getMetadataForDict(col_desc.col_type.comp_param, false);
1871  CHECK(dd);
1872  fixedup_col_desc.col_type.comp_param = dd->dictNBits;
1873  }
1874  fixedup_row_desc.push_back(fixedup_col_desc);
1875  }
1876  return fixedup_row_desc;
1877 }
1878 
1879 void DBHandler::get_roles(std::vector<std::string>& roles,
1880  const TSessionId& session_id_or_json) {
1881  heavyai::RequestInfo const request_info(session_id_or_json);
1882  SET_REQUEST_ID(request_info.requestId());
1883  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
1884  auto session_ptr = stdlog.getConstSessionInfo();
1885  if (!session_ptr->get_currentUser().isSuper) {
1886  // WARNING: This appears to not include roles a user is a member of,
1887  // if the role has no permissions granted to it.
1888  roles =
1889  SysCatalog::instance().getRoles(session_ptr->get_currentUser().userName,
1890  session_ptr->getCatalog().getCurrentDB().dbId);
1891  } else {
1892  roles = SysCatalog::instance().getRoles(
1893  false, true, session_ptr->get_currentUser().userName);
1894  }
1895 }
1896 
1897 bool DBHandler::has_role(const TSessionId& session_id_or_json,
1898  const std::string& granteeName,
1899  const std::string& roleName) {
1900  heavyai::RequestInfo const request_info(session_id_or_json);
1901  SET_REQUEST_ID(request_info.requestId());
1902  const auto session_ptr = get_session_ptr(request_info.sessionId());
1903  const auto stdlog = STDLOG(session_ptr);
1904  const auto current_user = session_ptr->get_currentUser();
1905  if (!current_user.isSuper) {
1906  if (const auto* user = SysCatalog::instance().getUserGrantee(granteeName);
1907  user && current_user.userName != granteeName) {
1908  THROW_DB_EXCEPTION("Only super users can check other user's roles.");
1909  } else if (!SysCatalog::instance().isRoleGrantedToGrantee(
1910  current_user.userName, granteeName, true)) {
1912  "Only super users can check roles assignment that have not been directly "
1913  "granted to a user.");
1914  }
1915  }
1916  return SysCatalog::instance().isRoleGrantedToGrantee(granteeName, roleName, false);
1917 }
1918 
1919 static TDBObject serialize_db_object(const std::string& roleName,
1920  const DBObject& inObject) {
1921  TDBObject outObject;
1922  outObject.objectName = inObject.getName();
1923  outObject.grantee = roleName;
1924  outObject.objectId = inObject.getObjectKey().objectId;
1925  const auto ap = inObject.getPrivileges();
1926  switch (inObject.getObjectKey().permissionType) {
1927  case DatabaseDBObjectType:
1928  outObject.privilegeObjectType = TDBObjectType::DatabaseDBObjectType;
1929  outObject.privs.push_back(ap.hasPermission(DatabasePrivileges::CREATE_DATABASE));
1930  outObject.privs.push_back(ap.hasPermission(DatabasePrivileges::DROP_DATABASE));
1931  outObject.privs.push_back(ap.hasPermission(DatabasePrivileges::VIEW_SQL_EDITOR));
1932  outObject.privs.push_back(ap.hasPermission(DatabasePrivileges::ACCESS));
1933 
1934  break;
1935  case TableDBObjectType:
1936  outObject.privilegeObjectType = TDBObjectType::TableDBObjectType;
1937  outObject.privs.push_back(ap.hasPermission(TablePrivileges::CREATE_TABLE));
1938  outObject.privs.push_back(ap.hasPermission(TablePrivileges::DROP_TABLE));
1939  outObject.privs.push_back(ap.hasPermission(TablePrivileges::SELECT_FROM_TABLE));
1940  outObject.privs.push_back(ap.hasPermission(TablePrivileges::INSERT_INTO_TABLE));
1941  outObject.privs.push_back(ap.hasPermission(TablePrivileges::UPDATE_IN_TABLE));
1942  outObject.privs.push_back(ap.hasPermission(TablePrivileges::DELETE_FROM_TABLE));
1943  outObject.privs.push_back(ap.hasPermission(TablePrivileges::TRUNCATE_TABLE));
1944  outObject.privs.push_back(ap.hasPermission(TablePrivileges::ALTER_TABLE));
1945 
1946  break;
1947  case DashboardDBObjectType:
1948  outObject.privilegeObjectType = TDBObjectType::DashboardDBObjectType;
1949  outObject.privs.push_back(ap.hasPermission(DashboardPrivileges::CREATE_DASHBOARD));
1950  outObject.privs.push_back(ap.hasPermission(DashboardPrivileges::DELETE_DASHBOARD));
1951  outObject.privs.push_back(ap.hasPermission(DashboardPrivileges::VIEW_DASHBOARD));
1952  outObject.privs.push_back(ap.hasPermission(DashboardPrivileges::EDIT_DASHBOARD));
1953 
1954  break;
1955  case ViewDBObjectType:
1956  outObject.privilegeObjectType = TDBObjectType::ViewDBObjectType;
1957  outObject.privs.push_back(ap.hasPermission(ViewPrivileges::CREATE_VIEW));
1958  outObject.privs.push_back(ap.hasPermission(ViewPrivileges::DROP_VIEW));
1959  outObject.privs.push_back(ap.hasPermission(ViewPrivileges::SELECT_FROM_VIEW));
1960  outObject.privs.push_back(ap.hasPermission(ViewPrivileges::INSERT_INTO_VIEW));
1961  outObject.privs.push_back(ap.hasPermission(ViewPrivileges::UPDATE_IN_VIEW));
1962  outObject.privs.push_back(ap.hasPermission(ViewPrivileges::DELETE_FROM_VIEW));
1963 
1964  break;
1965  case ServerDBObjectType:
1966  outObject.privilegeObjectType = TDBObjectType::ServerDBObjectType;
1967  outObject.privs.push_back(ap.hasPermission(ServerPrivileges::CREATE_SERVER));
1968  outObject.privs.push_back(ap.hasPermission(ServerPrivileges::DROP_SERVER));
1969  outObject.privs.push_back(ap.hasPermission(ServerPrivileges::ALTER_SERVER));
1970  outObject.privs.push_back(ap.hasPermission(ServerPrivileges::SERVER_USAGE));
1971 
1972  break;
1973  default:
1974  CHECK(false);
1975  }
1976  const int type_val = static_cast<int>(inObject.getType());
1977  CHECK(type_val >= 0 && type_val < 6);
1978  outObject.objectType = static_cast<TDBObjectType::type>(type_val);
1979  return outObject;
1980 }
1981 
1983  const TDBObjectPermissions& permissions) {
1984  if (!permissions.__isset.database_permissions_) {
1985  THROW_DB_EXCEPTION("Database permissions not set for check.")
1986  }
1987  auto perms = permissions.database_permissions_;
1988  if ((perms.create_ && !privs.hasPermission(DatabasePrivileges::CREATE_DATABASE)) ||
1989  (perms.delete_ && !privs.hasPermission(DatabasePrivileges::DROP_DATABASE)) ||
1990  (perms.view_sql_editor_ &&
1992  (perms.access_ && !privs.hasPermission(DatabasePrivileges::ACCESS))) {
1993  return false;
1994  } else {
1995  return true;
1996  }
1997 }
1998 
2000  const TDBObjectPermissions& permissions) {
2001  if (!permissions.__isset.table_permissions_) {
2002  THROW_DB_EXCEPTION("Table permissions not set for check.")
2003  }
2004  auto perms = permissions.table_permissions_;
2005  if ((perms.create_ && !privs.hasPermission(TablePrivileges::CREATE_TABLE)) ||
2006  (perms.drop_ && !privs.hasPermission(TablePrivileges::DROP_TABLE)) ||
2007  (perms.select_ && !privs.hasPermission(TablePrivileges::SELECT_FROM_TABLE)) ||
2008  (perms.insert_ && !privs.hasPermission(TablePrivileges::INSERT_INTO_TABLE)) ||
2009  (perms.update_ && !privs.hasPermission(TablePrivileges::UPDATE_IN_TABLE)) ||
2010  (perms.delete_ && !privs.hasPermission(TablePrivileges::DELETE_FROM_TABLE)) ||
2011  (perms.truncate_ && !privs.hasPermission(TablePrivileges::TRUNCATE_TABLE)) ||
2012  (perms.alter_ && !privs.hasPermission(TablePrivileges::ALTER_TABLE))) {
2013  return false;
2014  } else {
2015  return true;
2016  }
2017 }
2018 
2020  const TDBObjectPermissions& permissions) {
2021  if (!permissions.__isset.dashboard_permissions_) {
2022  THROW_DB_EXCEPTION("Dashboard permissions not set for check.")
2023  }
2024  auto perms = permissions.dashboard_permissions_;
2025  if ((perms.create_ && !privs.hasPermission(DashboardPrivileges::CREATE_DASHBOARD)) ||
2026  (perms.delete_ && !privs.hasPermission(DashboardPrivileges::DELETE_DASHBOARD)) ||
2027  (perms.view_ && !privs.hasPermission(DashboardPrivileges::VIEW_DASHBOARD)) ||
2028  (perms.edit_ && !privs.hasPermission(DashboardPrivileges::EDIT_DASHBOARD))) {
2029  return false;
2030  } else {
2031  return true;
2032  }
2033 }
2034 
2036  const TDBObjectPermissions& permissions) {
2037  if (!permissions.__isset.view_permissions_) {
2038  THROW_DB_EXCEPTION("View permissions not set for check.")
2039  }
2040  auto perms = permissions.view_permissions_;
2041  if ((perms.create_ && !privs.hasPermission(ViewPrivileges::CREATE_VIEW)) ||
2042  (perms.drop_ && !privs.hasPermission(ViewPrivileges::DROP_VIEW)) ||
2043  (perms.select_ && !privs.hasPermission(ViewPrivileges::SELECT_FROM_VIEW)) ||
2044  (perms.insert_ && !privs.hasPermission(ViewPrivileges::INSERT_INTO_VIEW)) ||
2045  (perms.update_ && !privs.hasPermission(ViewPrivileges::UPDATE_IN_VIEW)) ||
2046  (perms.delete_ && !privs.hasPermission(ViewPrivileges::DELETE_FROM_VIEW))) {
2047  return false;
2048  } else {
2049  return true;
2050  }
2051 }
2052 
2054  const TDBObjectPermissions& permissions) {
2055  CHECK(permissions.__isset.server_permissions_);
2056  auto perms = permissions.server_permissions_;
2057  if ((perms.create_ && !privs.hasPermission(ServerPrivileges::CREATE_SERVER)) ||
2058  (perms.drop_ && !privs.hasPermission(ServerPrivileges::DROP_SERVER)) ||
2059  (perms.alter_ && !privs.hasPermission(ServerPrivileges::ALTER_SERVER)) ||
2060  (perms.usage_ && !privs.hasPermission(ServerPrivileges::SERVER_USAGE))) {
2061  return false;
2062  } else {
2063  return true;
2064  }
2065 }
2066 
2067 bool DBHandler::has_object_privilege(const TSessionId& session_id_or_json,
2068  const std::string& granteeName,
2069  const std::string& objectName,
2070  const TDBObjectType::type objectType,
2071  const TDBObjectPermissions& permissions) {
2072  heavyai::RequestInfo const request_info(session_id_or_json);
2073  SET_REQUEST_ID(request_info.requestId());
2074  auto session_ptr = get_session_ptr(request_info.sessionId());
2075  auto stdlog = STDLOG(session_ptr);
2076  auto const& cat = session_ptr->getCatalog();
2077  auto const& current_user = session_ptr->get_currentUser();
2078  if (!current_user.isSuper && !SysCatalog::instance().isRoleGrantedToGrantee(
2079  current_user.userName, granteeName, false)) {
2081  "Users except superusers can only check privileges for self or roles granted "
2082  "to "
2083  "them.")
2084  }
2086  if (SysCatalog::instance().getMetadataForUser(granteeName, user_meta) &&
2087  user_meta.isSuper) {
2088  return true;
2089  }
2090  Grantee* grnt = SysCatalog::instance().getGrantee(granteeName);
2091  if (!grnt) {
2092  THROW_DB_EXCEPTION("User or Role " + granteeName + " does not exist.")
2093  }
2095  std::string func_name;
2096  switch (objectType) {
2099  func_name = "database";
2100  break;
2103  func_name = "table";
2104  break;
2107  func_name = "dashboard";
2108  break;
2111  func_name = "view";
2112  break;
2115  func_name = "server";
2116  break;
2117  default:
2118  THROW_DB_EXCEPTION("Invalid object type (" + std::to_string(objectType) + ").");
2119  }
2120  DBObject req_object(objectName, type);
2121  req_object.loadKey(cat);
2122 
2123  auto grantee_object = grnt->findDbObject(req_object.getObjectKey(), false);
2124  if (grantee_object) {
2125  // if grantee has privs on the object
2126  return permissionFuncMap_[func_name](grantee_object->getPrivileges(), permissions);
2127  } else {
2128  // no privileges on that object
2129  return false;
2130  }
2131 }
2132 
2133 void DBHandler::get_db_objects_for_grantee(std::vector<TDBObject>& TDBObjectsForRole,
2134  const TSessionId& session_id_or_json,
2135  const std::string& roleName) {
2136  heavyai::RequestInfo const request_info(session_id_or_json);
2137  SET_REQUEST_ID(request_info.requestId());
2138  auto session_ptr = get_session_ptr(request_info.sessionId());
2139  auto stdlog = STDLOG(session_ptr);
2140  auto const& user = session_ptr->get_currentUser();
2141  if (!user.isSuper &&
2142  !SysCatalog::instance().isRoleGrantedToGrantee(user.userName, roleName, false)) {
2143  return;
2144  }
2145  auto* rl = SysCatalog::instance().getGrantee(roleName);
2146  if (rl) {
2147  auto dbId = session_ptr->getCatalog().getCurrentDB().dbId;
2148  for (auto& dbObject : *rl->getDbObjects(true)) {
2149  if (dbObject.first.dbId != dbId) {
2150  // TODO (max): it doesn't scale well in case we have many DBs (not a typical
2151  // usecase for now, though)
2152  continue;
2153  }
2154  TDBObject tdbObject = serialize_db_object(roleName, *dbObject.second);
2155  TDBObjectsForRole.push_back(tdbObject);
2156  }
2157  } else {
2158  THROW_DB_EXCEPTION("User or role " + roleName + " does not exist.");
2159  }
2160 }
2161 
2162 void DBHandler::get_db_object_privs(std::vector<TDBObject>& TDBObjects,
2163  const TSessionId& session_id_or_json,
2164  const std::string& objectName,
2165  const TDBObjectType::type type) {
2166  heavyai::RequestInfo const request_info(session_id_or_json);
2167  SET_REQUEST_ID(request_info.requestId());
2168  auto session_ptr = get_session_ptr(request_info.sessionId());
2169  auto stdlog = STDLOG(session_ptr);
2170  const auto& cat = session_ptr->getCatalog();
2171  DBObjectType object_type;
2172  switch (type) {
2174  object_type = DBObjectType::DatabaseDBObjectType;
2175  break;
2177  object_type = DBObjectType::TableDBObjectType;
2178  break;
2181  break;
2183  object_type = DBObjectType::ViewDBObjectType;
2184  break;
2186  object_type = DBObjectType::ServerDBObjectType;
2187  break;
2188  default:
2189  THROW_DB_EXCEPTION("Failed to get object privileges for " + objectName +
2190  ": unknown object type (" + std::to_string(type) + ").");
2191  }
2192  DBObject object_to_find(objectName, object_type);
2193 
2194  // TODO(adb): Use DatabaseLock to protect method
2195  try {
2196  if (object_type == DashboardDBObjectType) {
2197  if (objectName == "") {
2198  object_to_find = DBObject(-1, object_type);
2199  } else {
2200  object_to_find = DBObject(std::stoi(objectName), object_type);
2201  }
2202  } else if ((object_type == TableDBObjectType || object_type == ViewDBObjectType) &&
2203  !objectName.empty()) {
2204  // special handling for view / table
2205  auto td = cat.getMetadataForTable(objectName, false);
2206  if (td) {
2207  object_type = td->isView ? ViewDBObjectType : TableDBObjectType;
2208  object_to_find = DBObject(objectName, object_type);
2209  }
2210  }
2211  object_to_find.loadKey(cat);
2212  } catch (const std::exception&) {
2213  THROW_DB_EXCEPTION("Object with name " + objectName + " does not exist.");
2214  }
2215 
2216  // object type on database level
2217  DBObject object_to_find_dblevel("", object_type);
2218  object_to_find_dblevel.loadKey(cat);
2219  // if user is superuser respond with a full priv
2220  if (session_ptr->get_currentUser().isSuper) {
2221  // using ALL_TABLE here to set max permissions
2222  DBObject dbObj{object_to_find.getObjectKey(),
2224  session_ptr->get_currentUser().userId};
2225  dbObj.setName("super");
2226  TDBObjects.push_back(
2227  serialize_db_object(session_ptr->get_currentUser().userName, dbObj));
2228  };
2229 
2230  std::vector<std::string> grantees =
2231  SysCatalog::instance().getRoles(true,
2232  session_ptr->get_currentUser().isSuper,
2233  session_ptr->get_currentUser().userName);
2234  for (const auto& grantee : grantees) {
2235  DBObject* object_found;
2236  auto* gr = SysCatalog::instance().getGrantee(grantee);
2237  if (gr && (object_found = gr->findDbObject(object_to_find.getObjectKey(), true))) {
2238  TDBObjects.push_back(serialize_db_object(grantee, *object_found));
2239  }
2240  // check object permissions on Database level
2241  if (gr &&
2242  (object_found = gr->findDbObject(object_to_find_dblevel.getObjectKey(), true))) {
2243  TDBObjects.push_back(serialize_db_object(grantee, *object_found));
2244  }
2245  }
2246 }
2247 
2249  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr,
2250  std::vector<std::string>& roles,
2251  const std::string& granteeName,
2252  bool effective) {
2253  auto* grantee = SysCatalog::instance().getGrantee(granteeName);
2254  if (grantee) {
2255  if (session_ptr->get_currentUser().isSuper) {
2256  roles = grantee->getRoles(/*only_direct=*/!effective);
2257  } else if (grantee->isUser()) {
2258  if (session_ptr->get_currentUser().userName == granteeName) {
2259  roles = grantee->getRoles(/*only_direct=*/!effective);
2260  } else {
2262  "Only a superuser is authorized to request list of roles granted to another "
2263  "user.");
2264  }
2265  } else {
2266  CHECK(!grantee->isUser());
2267  // granteeName is actually a roleName here and we can check a role
2268  // only if it is granted to us
2269  if (SysCatalog::instance().isRoleGrantedToGrantee(
2270  session_ptr->get_currentUser().userName, granteeName, false)) {
2271  roles = grantee->getRoles(/*only_direct=*/!effective);
2272  } else {
2273  THROW_DB_EXCEPTION("A user can check only roles granted to him.");
2274  }
2275  }
2276  } else {
2277  THROW_DB_EXCEPTION("Grantee " + granteeName + " does not exist.");
2278  }
2279 }
2280 
2281 void DBHandler::get_all_roles_for_user(std::vector<std::string>& roles,
2282  const TSessionId& session_id_or_json,
2283  const std::string& granteeName) {
2284  // WARNING: This function only returns directly granted roles.
2285  // See also: get_all_effective_roles_for_user() for all of a user's roles.
2286  heavyai::RequestInfo const request_info(session_id_or_json);
2287  SET_REQUEST_ID(request_info.requestId());
2288  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2289  auto session_ptr = stdlog.getConstSessionInfo();
2290  getAllRolesForUserImpl(session_ptr, roles, granteeName, /*effective=*/false);
2291 }
2292 
2293 void DBHandler::get_all_effective_roles_for_user(std::vector<std::string>& roles,
2294  const TSessionId& session_id_or_json,
2295  const std::string& granteeName) {
2296  heavyai::RequestInfo const request_info(session_id_or_json);
2297  SET_REQUEST_ID(request_info.requestId());
2298  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2299  auto session_ptr = stdlog.getConstSessionInfo();
2300  getAllRolesForUserImpl(session_ptr, roles, granteeName, /*effective=*/true);
2301 }
2302 
2303 namespace {
2305  const std::map<std::string, std::vector<std::string>>& table_col_names) {
2306  std::ostringstream oss;
2307  for (const auto& [table_name, col_names] : table_col_names) {
2308  oss << ":" << table_name;
2309  for (const auto& col_name : col_names) {
2310  oss << "," << col_name;
2311  }
2312  }
2313  return oss.str();
2314 }
2315 } // namespace
2316 
2318  TPixelTableRowResult& _return,
2319  const TSessionId& session_id_or_json,
2320  const int64_t widget_id,
2321  const TPixel& pixel,
2322  const std::map<std::string, std::vector<std::string>>& table_col_names,
2323  const bool column_format,
2324  const int32_t pixel_radius,
2325  const std::string& nonce) {
2326  heavyai::RequestInfo const request_info(session_id_or_json);
2327  SET_REQUEST_ID(request_info.requestId());
2328  auto session_ptr = get_session_ptr(request_info.sessionId());
2329  auto stdlog = STDLOG(session_ptr,
2330  "widget_id",
2331  widget_id,
2332  "pixel.x",
2333  pixel.x,
2334  "pixel.y",
2335  pixel.y,
2336  "column_format",
2337  column_format,
2338  "pixel_radius",
2339  pixel_radius,
2340  "table_col_names",
2341  dump_table_col_names(table_col_names),
2342  "nonce",
2343  nonce);
2344  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2345  if (!render_handler_) {
2346  THROW_DB_EXCEPTION("Backend rendering is disabled.");
2347  }
2348 
2349  try {
2350  render_handler_->get_result_row_for_pixel(_return,
2351  session_ptr,
2352  widget_id,
2353  pixel,
2354  table_col_names,
2355  column_format,
2356  pixel_radius,
2357  nonce);
2358  } catch (std::exception& e) {
2359  THROW_DB_EXCEPTION(e.what());
2360  }
2361 }
2362 
2364  const ColumnDescriptor* cd) {
2365  TColumnType col_type;
2366  col_type.col_name = cd->columnName;
2367  col_type.src_name = cd->sourceName;
2368  col_type.col_id = cd->columnId;
2369  col_type.col_type.type = type_to_thrift(cd->columnType);
2370  col_type.col_type.encoding = encoding_to_thrift(cd->columnType);
2371  col_type.col_type.nullable = !cd->columnType.get_notnull();
2372  col_type.col_type.is_array = cd->columnType.get_type() == kARRAY;
2373  if (col_type.col_type.is_array || cd->columnType.get_type() == kDATE) {
2374  col_type.col_type.size = cd->columnType.get_size(); // only for arrays and dates
2375  }
2376  if (IS_GEO(cd->columnType.get_type())) {
2378  col_type, cd->columnType.get_subtype(), cd->columnType.get_output_srid());
2379  } else {
2380  col_type.col_type.precision = cd->columnType.get_precision();
2381  col_type.col_type.scale = cd->columnType.get_scale();
2382  }
2383  col_type.is_system = cd->isSystemCol;
2385  cat != nullptr) {
2386  // have to get the actual size of the encoding from the dictionary definition
2387  const int dict_id = cd->columnType.get_comp_param();
2388  if (!cat->getMetadataForDict(dict_id, false)) {
2389  col_type.col_type.comp_param = 0;
2390  return col_type;
2391  }
2392  auto dd = cat->getMetadataForDict(dict_id, false);
2393  if (!dd) {
2394  THROW_DB_EXCEPTION("Dictionary doesn't exist");
2395  }
2396  col_type.col_type.comp_param = dd->dictNBits;
2397  } else {
2398  col_type.col_type.comp_param =
2399  (cd->columnType.is_date_in_days() && cd->columnType.get_comp_param() == 0)
2400  ? 32
2401  : cd->columnType.get_comp_param();
2402  }
2403  col_type.is_reserved_keyword = ImportHelpers::is_reserved_name(col_type.col_name);
2404  if (cd->default_value.has_value()) {
2405  col_type.__set_default_value(cd->getDefaultValueLiteral());
2406  }
2407  return col_type;
2408 }
2409 
2410 void DBHandler::get_internal_table_details(TTableDetails& _return,
2411  const TSessionId& session_id_or_json,
2412  const std::string& table_name,
2413  const bool include_system_columns) {
2414  heavyai::RequestInfo const request_info(session_id_or_json);
2415  SET_REQUEST_ID(request_info.requestId());
2416  auto stdlog =
2417  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
2418  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2419  get_table_details_impl(_return, stdlog, table_name, include_system_columns, false);
2420 }
2421 
2423  TTableDetails& _return,
2424  const TSessionId& session_id_or_json,
2425  const std::string& table_name,
2426  const std::string& database_name) {
2427  heavyai::RequestInfo const request_info(session_id_or_json);
2428  SET_REQUEST_ID(request_info.requestId());
2429  auto stdlog =
2430  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
2431  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2432  get_table_details_impl(_return, stdlog, table_name, true, false, database_name);
2433 }
2434 
2435 void DBHandler::get_table_details(TTableDetails& _return,
2436  const TSessionId& session_id_or_json,
2437  const std::string& table_name) {
2438  heavyai::RequestInfo const request_info(session_id_or_json);
2439  SET_REQUEST_ID(request_info.requestId());
2440  auto stdlog =
2441  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
2442  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2443 
2444  auto execute_read_lock = legacylockmgr::getExecuteReadLock();
2445  get_table_details_impl(_return, stdlog, table_name, false, false);
2446 }
2447 
2448 void DBHandler::get_table_details_for_database(TTableDetails& _return,
2449  const TSessionId& session_id_or_json,
2450  const std::string& table_name,
2451  const std::string& database_name) {
2452  heavyai::RequestInfo const request_info(session_id_or_json);
2453  SET_REQUEST_ID(request_info.requestId());
2454  auto stdlog =
2455  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
2456  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2457 
2458  auto execute_read_lock = legacylockmgr::getExecuteReadLock();
2459  get_table_details_impl(_return, stdlog, table_name, false, false, database_name);
2460 }
2461 
2462 namespace {
2463 TTableRefreshInfo get_refresh_info(const TableDescriptor* td) {
2464  CHECK(td->isForeignTable());
2465  auto foreign_table = dynamic_cast<const foreign_storage::ForeignTable*>(td);
2466  CHECK(foreign_table);
2467  TTableRefreshInfo refresh_info;
2468  const auto& update_type =
2470  CHECK(update_type.has_value());
2471  if (update_type.value() == foreign_storage::ForeignTable::ALL_REFRESH_UPDATE_TYPE) {
2472  refresh_info.update_type = TTableRefreshUpdateType::ALL;
2473  } else if (update_type.value() ==
2475  refresh_info.update_type = TTableRefreshUpdateType::APPEND;
2476  } else {
2477  UNREACHABLE() << "Unexpected refresh update type: " << update_type.value();
2478  }
2479 
2480  const auto& timing_type =
2482  CHECK(timing_type.has_value());
2483  if (timing_type.value() == foreign_storage::ForeignTable::MANUAL_REFRESH_TIMING_TYPE) {
2484  refresh_info.timing_type = TTableRefreshTimingType::MANUAL;
2485  refresh_info.interval_count = -1;
2486  } else if (timing_type.value() ==
2488  refresh_info.timing_type = TTableRefreshTimingType::SCHEDULED;
2489  const auto& start_date_time = foreign_table->getOption(
2491  CHECK(start_date_time.has_value());
2492  auto start_date_time_epoch = dateTimeParse<kTIMESTAMP>(start_date_time.value(), 0);
2493  refresh_info.start_date_time =
2494  shared::convert_temporal_to_iso_format({kTIMESTAMP}, start_date_time_epoch);
2495  const auto& interval =
2496  foreign_table->getOption(foreign_storage::ForeignTable::REFRESH_INTERVAL_KEY);
2497  CHECK(interval.has_value());
2498  const auto& interval_str = interval.value();
2499  refresh_info.interval_count =
2500  std::stoi(interval_str.substr(0, interval_str.length() - 1));
2501  auto interval_type = std::toupper(interval_str[interval_str.length() - 1]);
2502  if (interval_type == 'H') {
2503  refresh_info.interval_type = TTableRefreshIntervalType::HOUR;
2504  } else if (interval_type == 'D') {
2505  refresh_info.interval_type = TTableRefreshIntervalType::DAY;
2506  } else if (interval_type == 'S') {
2507  // This use case is for development only.
2508  refresh_info.interval_type = TTableRefreshIntervalType::NONE;
2509  } else {
2510  UNREACHABLE() << "Unexpected interval type: " << interval_str;
2511  }
2512  } else {
2513  UNREACHABLE() << "Unexpected refresh timing type: " << timing_type.value();
2514  }
2515  if (foreign_table->last_refresh_time !=
2517  refresh_info.last_refresh_time = shared::convert_temporal_to_iso_format(
2518  {kTIMESTAMP}, foreign_table->last_refresh_time);
2519  }
2520  if (foreign_table->next_refresh_time !=
2522  refresh_info.next_refresh_time = shared::convert_temporal_to_iso_format(
2523  {kTIMESTAMP}, foreign_table->next_refresh_time);
2524  }
2525  return refresh_info;
2526 }
2527 } // namespace
2528 
2529 void DBHandler::get_table_details_impl(TTableDetails& _return,
2530  query_state::StdLog& stdlog,
2531  const std::string& table_name,
2532  const bool get_system,
2533  const bool get_physical,
2534  const std::string& database_name) {
2535  try {
2536  auto session_info = stdlog.getSessionInfo();
2537  auto cat = (database_name.empty())
2538  ? &session_info->getCatalog()
2539  : SysCatalog::instance().getCatalog(database_name).get();
2540  if (!cat) {
2541  THROW_DB_EXCEPTION("Database " + database_name + " does not exist.");
2542  }
2543  const auto td_with_lock =
2545  *cat, table_name, false);
2546  const auto td = td_with_lock();
2547  CHECK(td);
2548 
2549  bool have_privileges_on_view_sources = true;
2550  if (td->isView) {
2551  auto query_state = create_query_state(session_info, td->viewSQL);
2552  stdlog.setQueryState(query_state);
2553  try {
2554  if (hasTableAccessPrivileges(td, *session_info)) {
2555  const auto [query_ra, locks] = parse_to_ra(query_state->createQueryStateProxy(),
2556  query_state->getQueryStr(),
2557  {},
2558  true,
2560  false);
2561  try {
2562  calcite_->checkAccessedObjectsPrivileges(query_state->createQueryStateProxy(),
2563  query_ra);
2564  } catch (const std::runtime_error&) {
2565  have_privileges_on_view_sources = false;
2566  }
2567 
2568  _return.row_desc =
2569  validateRelAlg(query_ra.plan_result, query_state->createQueryStateProxy());
2570  } else {
2571  throw std::runtime_error(
2572  "Unable to access view " + table_name +
2573  ". The view may not exist, or the logged in user may not "
2574  "have permission to access the view.");
2575  }
2576  } catch (const std::exception& e) {
2577  throw std::runtime_error("View '" + table_name +
2578  "' query has failed with an error: '" +
2579  std::string(e.what()) +
2580  "'.\nThe view must be dropped and re-created to "
2581  "resolve the error. \nQuery:\n" +
2582  query_state->getQueryStr());
2583  }
2584  } else {
2585  if (hasTableAccessPrivileges(td, *session_info)) {
2586  const auto col_descriptors = cat->getAllColumnMetadataForTable(
2587  td->tableId, get_system, true, get_physical);
2588  const auto deleted_cd = cat->getDeletedColumn(td);
2589  for (const auto cd : col_descriptors) {
2590  if (cd == deleted_cd) {
2591  continue;
2592  }
2593  _return.row_desc.push_back(populateThriftColumnType(cat, cd));
2594  }
2595  } else {
2596  throw std::runtime_error(
2597  "Unable to access table " + table_name +
2598  ". The table may not exist, or the logged in user may not "
2599  "have permission to access the table.");
2600  }
2601  }
2602  _return.fragment_size = td->maxFragRows;
2603  _return.page_size = td->fragPageSize;
2604  _return.max_rows = td->maxRows;
2605  _return.view_sql =
2606  (have_privileges_on_view_sources ? td->viewSQL
2607  : "[Not enough privileges to see the view SQL]");
2608  _return.shard_count = td->nShards * std::max(g_leaf_count, size_t(1));
2609  if (td->nShards > 0) {
2610  auto cd = cat->getMetadataForColumn(td->tableId, td->shardedColumnId);
2611  CHECK(cd);
2612  _return.sharded_column_name = cd->columnName;
2613  }
2614  _return.key_metainfo = td->keyMetainfo;
2615  _return.is_temporary = td->persistenceLevel == Data_Namespace::MemoryLevel::CPU_LEVEL;
2616  _return.partition_detail =
2617  td->partitions.empty()
2618  ? TPartitionDetail::DEFAULT
2619  : (table_is_replicated(td)
2620  ? TPartitionDetail::REPLICATED
2621  : (td->partitions == "SHARDED" ? TPartitionDetail::SHARDED
2622  : TPartitionDetail::OTHER));
2623  if (td->isView) {
2624  _return.table_type = TTableType::VIEW;
2625  } else if (td->isTemporaryTable()) {
2626  _return.table_type = TTableType::TEMPORARY;
2627  } else if (td->isForeignTable()) {
2628  _return.table_type = TTableType::FOREIGN;
2629  _return.refresh_info = get_refresh_info(td);
2630  } else {
2631  _return.table_type = TTableType::DEFAULT;
2632  }
2633 
2634  } catch (const std::runtime_error& e) {
2635  THROW_DB_EXCEPTION(std::string(e.what()));
2636  }
2637 }
2638 
2639 void DBHandler::get_link_view(TFrontendView& _return,
2640  const TSessionId& session_id_or_json,
2641  const std::string& link) {
2642  heavyai::RequestInfo const request_info(session_id_or_json);
2643  SET_REQUEST_ID(request_info.requestId());
2644  auto session_ptr = get_session_ptr(request_info.sessionId());
2645  auto stdlog = STDLOG(session_ptr);
2646  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2647  auto const& cat = session_ptr->getCatalog();
2648  auto ld = cat.getMetadataForLink(std::to_string(cat.getCurrentDB().dbId) + link);
2649  if (!ld) {
2650  THROW_DB_EXCEPTION("Link " + link + " is not valid.");
2651  }
2652  _return.view_state = ld->viewState;
2653  _return.view_name = ld->link;
2654  _return.update_time = ld->updateTime;
2655  _return.view_metadata = ld->viewMetadata;
2656 }
2657 
2659  const TableDescriptor* td,
2660  const Catalog_Namespace::SessionInfo& session_info) {
2661  auto& cat = session_info.getCatalog();
2662  auto user_metadata = session_info.get_currentUser();
2663 
2664  if (user_metadata.isSuper) {
2665  return true;
2666  }
2667 
2669  dbObject.loadKey(cat);
2670  std::vector<DBObject> privObjects = {dbObject};
2671 
2672  return SysCatalog::instance().hasAnyPrivileges(user_metadata, privObjects);
2673 }
2674 
2675 void DBHandler::get_tables_impl(std::vector<std::string>& table_names,
2676  const Catalog_Namespace::SessionInfo& session_info,
2677  const GetTablesType get_tables_type,
2678  const std::string& database_name) {
2679  if (database_name.empty()) {
2680  table_names = session_info.getCatalog().getTableNamesForUser(
2681  session_info.get_currentUser(), get_tables_type);
2682  } else {
2683  auto request_cat = SysCatalog::instance().getCatalog(database_name);
2684  if (!request_cat) {
2685  THROW_DB_EXCEPTION("Database " + database_name + " does not exist.");
2686  }
2687  table_names = request_cat->getTableNamesForUser(session_info.get_currentUser(),
2688  get_tables_type);
2689  }
2690 }
2691 
2692 void DBHandler::get_tables(std::vector<std::string>& table_names,
2693  const TSessionId& session_id_or_json) {
2694  heavyai::RequestInfo const request_info(session_id_or_json);
2695  SET_REQUEST_ID(request_info.requestId());
2696  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2697  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2699  table_names, *stdlog.getConstSessionInfo(), GET_PHYSICAL_TABLES_AND_VIEWS);
2700 }
2701 
2702 void DBHandler::get_tables_for_database(std::vector<std::string>& table_names,
2703  const TSessionId& session_id_or_json,
2704  const std::string& database_name) {
2705  heavyai::RequestInfo const request_info(session_id_or_json);
2706  SET_REQUEST_ID(request_info.requestId());
2707  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2708  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2709 
2710  get_tables_impl(table_names,
2711  *stdlog.getConstSessionInfo(),
2713  database_name);
2714 }
2715 
2716 void DBHandler::get_physical_tables(std::vector<std::string>& table_names,
2717  const TSessionId& session_id_or_json) {
2718  heavyai::RequestInfo const request_info(session_id_or_json);
2719  SET_REQUEST_ID(request_info.requestId());
2720  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2721  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2722  get_tables_impl(table_names, *stdlog.getConstSessionInfo(), GET_PHYSICAL_TABLES);
2723 }
2724 
2725 void DBHandler::get_views(std::vector<std::string>& table_names,
2726  const TSessionId& session_id_or_json) {
2727  heavyai::RequestInfo const request_info(session_id_or_json);
2728  SET_REQUEST_ID(request_info.requestId());
2729  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2730  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2731  get_tables_impl(table_names, *stdlog.getConstSessionInfo(), GET_VIEWS);
2732 }
2733 
2734 void DBHandler::get_tables_meta_impl(std::vector<TTableMeta>& _return,
2735  QueryStateProxy query_state_proxy,
2736  const Catalog_Namespace::SessionInfo& session_info,
2737  const bool with_table_locks) {
2738  const auto& cat = session_info.getCatalog();
2739  // Get copies of table descriptors here in order to avoid possible use of dangling
2740  // pointers, if tables are concurrently dropped.
2741  const auto tables = cat.getAllTableMetadataCopy();
2742  _return.reserve(tables.size());
2743 
2744  for (const auto& td : tables) {
2745  if (td.shard >= 0) {
2746  // skip shards, they're not standalone tables
2747  continue;
2748  }
2749  if (!hasTableAccessPrivileges(&td, session_info)) {
2750  // skip table, as there are no privileges to access it
2751  continue;
2752  }
2753 
2754  TTableMeta ret;
2755  ret.table_name = td.tableName;
2756  ret.is_view = td.isView;
2757  ret.is_replicated = table_is_replicated(&td);
2758  ret.shard_count = td.nShards;
2759  ret.max_rows = td.maxRows;
2760  ret.table_id = td.tableId;
2761 
2762  std::vector<TTypeInfo> col_types;
2763  std::vector<std::string> col_names;
2764  size_t num_cols = 0;
2765  if (td.isView) {
2766  try {
2767  TPlanResult parse_result;
2769  std::tie(parse_result, locks) = parse_to_ra(
2770  query_state_proxy, td.viewSQL, {}, with_table_locks, system_parameters_);
2771  const auto query_ra = parse_result.plan_result;
2772 
2773  ExecutionResult ex_result;
2774  execute_rel_alg(ex_result,
2775  query_state_proxy,
2776  query_ra,
2777  true,
2779  -1,
2780  -1,
2781  /*just_validate=*/true,
2782  /*find_push_down_candidates=*/false,
2783  ExplainInfo());
2784  TQueryResult result;
2785  DBHandler::convertData(result, ex_result, query_state_proxy, true, -1, -1);
2786  num_cols = result.row_set.row_desc.size();
2787  for (const auto& col : result.row_set.row_desc) {
2788  if (col.is_physical) {
2789  num_cols--;
2790  continue;
2791  }
2792  col_types.push_back(col.col_type);
2793  col_names.push_back(col.col_name);
2794  }
2795  } catch (std::exception& e) {
2796  LOG(WARNING) << "get_tables_meta: Ignoring broken view: " << td.tableName;
2797  }
2798  } else {
2799  try {
2800  if (hasTableAccessPrivileges(&td, session_info)) {
2801  const auto col_descriptors =
2802  cat.getAllColumnMetadataForTable(td.tableId, false, true, false);
2803  const auto deleted_cd = cat.getDeletedColumn(&td);
2804  for (const auto cd : col_descriptors) {
2805  if (cd == deleted_cd) {
2806  continue;
2807  }
2808  col_types.push_back(ThriftSerializers::type_info_to_thrift(cd->columnType));
2809  col_names.push_back(cd->columnName);
2810  }
2811  num_cols = col_descriptors.size();
2812  } else {
2813  continue;
2814  }
2815  } catch (const std::runtime_error& e) {
2816  THROW_DB_EXCEPTION(e.what());
2817  }
2818  }
2819 
2820  ret.num_cols = num_cols;
2821  std::copy(col_types.begin(), col_types.end(), std::back_inserter(ret.col_types));
2822  std::copy(col_names.begin(), col_names.end(), std::back_inserter(ret.col_names));
2823 
2824  _return.push_back(ret);
2825  }
2826 }
2827 
2828 void DBHandler::get_tables_meta(std::vector<TTableMeta>& _return,
2829  const TSessionId& session_id_or_json) {
2830  heavyai::RequestInfo const request_info(session_id_or_json);
2831  SET_REQUEST_ID(request_info.requestId());
2832  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2833  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2834  auto session_ptr = stdlog.getConstSessionInfo();
2835  auto query_state = create_query_state(session_ptr, "");
2836  stdlog.setQueryState(query_state);
2837 
2838  auto execute_read_lock = legacylockmgr::getExecuteReadLock();
2839 
2840  try {
2841  get_tables_meta_impl(_return, query_state->createQueryStateProxy(), *session_ptr);
2842  } catch (const std::exception& e) {
2843  THROW_DB_EXCEPTION(e.what());
2844  }
2845 }
2846 
2847 void DBHandler::get_users(std::vector<std::string>& user_names,
2848  const TSessionId& session_id_or_json) {
2849  heavyai::RequestInfo const request_info(session_id_or_json);
2850  SET_REQUEST_ID(request_info.requestId());
2851  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2852  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2853  auto session_ptr = stdlog.getConstSessionInfo();
2854  std::list<Catalog_Namespace::UserMetadata> user_list;
2855 
2856  if (!session_ptr->get_currentUser().isSuper) {
2857  user_list = SysCatalog::instance().getAllUserMetadata(
2858  session_ptr->getCatalog().getCurrentDB().dbId);
2859  } else {
2860  user_list = SysCatalog::instance().getAllUserMetadata();
2861  }
2862  for (auto u : user_list) {
2863  user_names.push_back(u.userName);
2864  }
2865 }
2866 
2867 void DBHandler::get_version(std::string& version) {
2868  version = MAPD_RELEASE;
2869 }
2870 
2871 namespace {
2872 
2876  return [] {
2877  // we need to resume erm queue if we throw any exception
2878  // that heavydb server can handle w/o shutting it down
2880  };
2881  }
2882  return [] {};
2883 }
2884 
2885 } // namespace
2886 
2887 void DBHandler::clear_gpu_memory(const TSessionId& session_id_or_json) {
2888  heavyai::RequestInfo const request_info(session_id_or_json);
2889  SET_REQUEST_ID(request_info.requestId());
2890  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2891  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2892  auto session_ptr = stdlog.getConstSessionInfo();
2893  if (!session_ptr->get_currentUser().isSuper) {
2894  THROW_DB_EXCEPTION("Superuser privilege is required to run clear_gpu_memory");
2895  }
2897  // clear renderer memory first
2898  // this will block until any running render finishes
2899  if (render_handler_) {
2900  render_handler_->clear_gpu_memory();
2901  }
2902  // then clear the QE memory
2903  // the renderer will have disconnected from any QE memory
2904  try {
2906  } catch (const std::exception& e) {
2907  THROW_DB_EXCEPTION(e.what());
2908  }
2909 }
2910 
2911 void DBHandler::clear_cpu_memory(const TSessionId& session_id_or_json) {
2912  heavyai::RequestInfo const request_info(session_id_or_json);
2913  SET_REQUEST_ID(request_info.requestId());
2914  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2915  auto session_ptr = stdlog.getConstSessionInfo();
2916  if (!session_ptr->get_currentUser().isSuper) {
2917  THROW_DB_EXCEPTION("Superuser privilege is required to run clear_cpu_memory");
2918  }
2920  // clear renderer memory first
2921  // this will block until any running render finishes
2922  if (render_handler_) {
2923  render_handler_->clear_cpu_memory();
2924  }
2925  // then clear the QE memory
2926  // the renderer will have disconnected from any QE memory
2927  try {
2929  } catch (const std::exception& e) {
2930  THROW_DB_EXCEPTION(e.what());
2931  }
2932 }
2933 
2934 void DBHandler::clearRenderMemory(const TSessionId& session_id_or_json) {
2935  heavyai::RequestInfo const request_info(session_id_or_json);
2936  SET_REQUEST_ID(request_info.requestId());
2937  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
2938  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2939  auto session_ptr = stdlog.getConstSessionInfo();
2940  if (!session_ptr->get_currentUser().isSuper) {
2941  THROW_DB_EXCEPTION("Superuser privilege is required to run clear_render_memory");
2942  }
2943  if (render_handler_) {
2945  render_handler_->clear_cpu_memory();
2946  render_handler_->clear_gpu_memory();
2947  }
2948 }
2949 
2950 void DBHandler::pause_executor_queue(const TSessionId& session) {
2951  auto stdlog = STDLOG(get_session_ptr(session));
2952  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2953  auto session_ptr = stdlog.getConstSessionInfo();
2954  if (!session_ptr->get_currentUser().isSuper) {
2955  THROW_DB_EXCEPTION("Superuser privilege is required to run PAUSE EXECUTOR QUEUE");
2956  }
2957  try {
2959  } catch (const std::exception& e) {
2960  THROW_DB_EXCEPTION(e.what());
2961  }
2962 }
2963 
2964 void DBHandler::resume_executor_queue(const TSessionId& session) {
2965  auto stdlog = STDLOG(get_session_ptr(session));
2966  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2967  auto session_ptr = stdlog.getConstSessionInfo();
2968  if (!session_ptr->get_currentUser().isSuper) {
2969  THROW_DB_EXCEPTION("Superuser privilege is required to run RESUME EXECUTOR QUEUE");
2970  }
2971  try {
2973  } catch (const std::exception& e) {
2974  THROW_DB_EXCEPTION(e.what());
2975  }
2976 }
2977 
2978 void DBHandler::set_cur_session(const TSessionId& parent_session_id_or_json,
2979  const TSessionId& leaf_session_id_or_json,
2980  const std::string& start_time_str,
2981  const std::string& label,
2982  bool for_running_query_kernel) {
2983  // internal API to manage query interruption in distributed mode
2984  heavyai::RequestInfo const parent_request_info(parent_session_id_or_json);
2985  heavyai::RequestInfo const leaf_request_info(leaf_session_id_or_json);
2986  SET_REQUEST_ID(leaf_request_info.requestId());
2987  auto stdlog = STDLOG(get_session_ptr(leaf_request_info.sessionId()));
2988  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
2989  auto session_ptr = stdlog.getConstSessionInfo();
2990 
2992  executor->enrollQuerySession(parent_request_info.sessionId(),
2993  label,
2994  start_time_str,
2996  for_running_query_kernel
2997  ? QuerySessionStatus::QueryStatus::RUNNING_QUERY_KERNEL
2998  : QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
2999 }
3000 
3001 void DBHandler::invalidate_cur_session(const TSessionId& parent_session_id_or_json,
3002  const TSessionId& leaf_session_id_or_json,
3003  const std::string& start_time_str,
3004  const std::string& label,
3005  bool for_running_query_kernel) {
3006  // internal API to manage query interruption in distributed mode
3007  heavyai::RequestInfo const parent_request_info(parent_session_id_or_json);
3008  heavyai::RequestInfo const leaf_request_info(leaf_session_id_or_json);
3009  SET_REQUEST_ID(leaf_request_info.requestId());
3010  auto stdlog = STDLOG(get_session_ptr(leaf_request_info.sessionId()));
3011  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
3013  executor->clearQuerySessionStatus(parent_request_info.sessionId(), start_time_str);
3014 }
3015 
3017  return INVALID_SESSION_ID;
3018 }
3019 
3020 void DBHandler::get_memory(std::vector<TNodeMemoryInfo>& _return,
3021  const TSessionId& session_id_or_json,
3022  const std::string& memory_level) {
3023  heavyai::RequestInfo const request_info(session_id_or_json);
3024  SET_REQUEST_ID(request_info.requestId());
3025  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
3026  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
3027  std::vector<Data_Namespace::MemoryInfo> internal_memory;
3028  if (!memory_level.compare("gpu")) {
3029  internal_memory =
3030  SysCatalog::instance().getDataMgr().getMemoryInfo(MemoryLevel::GPU_LEVEL);
3031  } else {
3032  internal_memory =
3033  SysCatalog::instance().getDataMgr().getMemoryInfo(MemoryLevel::CPU_LEVEL);
3034  }
3035 
3036  for (auto memInfo : internal_memory) {
3037  TNodeMemoryInfo nodeInfo;
3038  nodeInfo.page_size = memInfo.pageSize;
3039  nodeInfo.max_num_pages = memInfo.maxNumPages;
3040  nodeInfo.num_pages_allocated = memInfo.numPageAllocated;
3041  nodeInfo.is_allocation_capped = memInfo.isAllocationCapped;
3042  for (auto gpu : memInfo.nodeMemoryData) {
3043  TMemoryData md;
3044  md.slab = gpu.slabNum;
3045  md.start_page = gpu.startPage;
3046  md.num_pages = gpu.numPages;
3047  md.touch = gpu.touch;
3048  md.chunk_key.insert(md.chunk_key.end(), gpu.chunk_key.begin(), gpu.chunk_key.end());
3049  md.is_free = gpu.memStatus == Buffer_Namespace::MemStatus::FREE;
3050  nodeInfo.node_memory_data.push_back(md);
3051  }
3052  _return.push_back(nodeInfo);
3053  }
3054 }
3055 
3056 void DBHandler::get_databases(std::vector<TDBInfo>& dbinfos,
3057  const TSessionId& session_id_or_json) {
3058  heavyai::RequestInfo const request_info(session_id_or_json);
3059  SET_REQUEST_ID(request_info.requestId());
3060  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
3061  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
3062  auto session_ptr = stdlog.getConstSessionInfo();
3063  const auto& user = session_ptr->get_currentUser();
3065  SysCatalog::instance().getDatabaseListForUser(user);
3066  for (auto& db : dbs) {
3067  TDBInfo dbinfo;
3068  dbinfo.db_name = std::move(db.dbName);
3069  dbinfo.db_owner = std::move(db.dbOwnerName);
3070  dbinfos.push_back(std::move(dbinfo));
3071  }
3072 }
3073 
3074 TExecuteMode::type DBHandler::getExecutionMode(const TSessionId& session_id) {
3075  auto executor = get_session_ptr(session_id)->get_executor_device_type();
3076  switch (executor) {
3078  return TExecuteMode::CPU;
3080  return TExecuteMode::GPU;
3081  default:
3082  UNREACHABLE();
3083  }
3084  UNREACHABLE();
3085  return TExecuteMode::CPU;
3086 }
3087 void DBHandler::set_execution_mode(const TSessionId& session_id_or_json,
3088  const TExecuteMode::type mode) {
3089  heavyai::RequestInfo const request_info(session_id_or_json);
3090  SET_REQUEST_ID(request_info.requestId());
3091  auto session_ptr = get_session_ptr(request_info.sessionId());
3092  auto stdlog = STDLOG(session_ptr);
3093  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
3094  DBHandler::set_execution_mode_nolock(session_ptr.get(), mode);
3095 }
3096 
3097 namespace {
3098 
3100  if (td && td->nShards) {
3101  throw std::runtime_error("Cannot import a sharded table directly to a leaf");
3102  }
3103 }
3104 
3105 void check_valid_column_names(const std::list<const ColumnDescriptor*>& descs,
3106  const std::vector<std::string>& column_names) {
3107  std::unordered_set<std::string> unique_names;
3108  for (const auto& name : column_names) {
3109  auto lower_name = to_lower(name);
3110  if (unique_names.find(lower_name) != unique_names.end()) {
3111  THROW_DB_EXCEPTION("Column " + name + " is mentioned multiple times");
3112  } else {
3113  unique_names.insert(lower_name);
3114  }
3115  }
3116  for (const auto& cd : descs) {
3117  auto iter = unique_names.find(to_lower(cd->columnName));
3118  if (iter != unique_names.end()) {
3119  unique_names.erase(iter);
3120  }
3121  }
3122  if (!unique_names.empty()) {
3123  THROW_DB_EXCEPTION("Column " + *unique_names.begin() + " does not exist");
3124  }
3125 }
3126 
3127 // Return vector of IDs mapping column descriptors to the list of comumn names.
3128 // The size of the vector is the number of actual columns (geophisical columns excluded).
3129 // ID is either a position in column_names matching the descriptor, or -1 if the column
3130 // is missing from the column_names
3131 std::vector<int> column_ids_by_names(const std::list<const ColumnDescriptor*>& descs,
3132  const std::vector<std::string>& column_names) {
3133  std::vector<int> desc_to_column_ids;
3134  if (column_names.empty()) {
3135  int col_idx = 0;
3136  for (const auto& cd : descs) {
3137  if (!cd->isGeoPhyCol) {
3138  desc_to_column_ids.push_back(col_idx);
3139  ++col_idx;
3140  }
3141  }
3142  } else {
3143  for (const auto& cd : descs) {
3144  if (!cd->isGeoPhyCol) {
3145  bool found = false;
3146  for (size_t j = 0; j < column_names.size(); ++j) {
3147  if (to_lower(cd->columnName) == to_lower(column_names[j])) {
3148  found = true;
3149  desc_to_column_ids.push_back(j);
3150  break;
3151  }
3152  }
3153  if (!found) {
3154  if (!cd->columnType.get_notnull()) {
3155  desc_to_column_ids.push_back(-1);
3156  } else {
3157  THROW_DB_EXCEPTION("Column '" + cd->columnName +
3158  "' cannot be omitted due to NOT NULL constraint");
3159  }
3160  }
3161  }
3162  }
3163  }
3164  return desc_to_column_ids;
3165 }
3166 
3168  std::ostringstream oss;
3169  oss << "Cache size information {";
3171  // 1. Data recycler
3172  // 1.a Resultset Recycler
3173  auto resultset_cache_size =
3174  executor->getResultSetRecyclerHolder()
3175  .getResultSetRecycler()
3176  ->getResultSetRecyclerMetricTracker()
3177  .getCurrentCacheSize(DataRecyclerUtil::CPU_DEVICE_IDENTIFIER);
3178  if (resultset_cache_size) {
3179  oss << "\"query_resultset\": " << *resultset_cache_size << " bytes, ";
3180  }
3181 
3182  // 1.b Join Hash Table Recycler
3183  auto perfect_join_ht_cache_size =
3186  auto baseline_join_ht_cache_size =
3189  auto bbox_intersect_ht_cache_size =
3193  auto bbox_intersect_ht_tuner_cache_size =
3197  auto sum_hash_table_cache_size =
3198  perfect_join_ht_cache_size + baseline_join_ht_cache_size +
3199  bbox_intersect_ht_cache_size + bbox_intersect_ht_tuner_cache_size;
3200  oss << "\"hash_tables\": " << sum_hash_table_cache_size << " bytes, ";
3201 
3202  // 1.c Chunk Metadata Recycler
3203  auto chunk_metadata_cache_size =
3204  executor->getResultSetRecyclerHolder()
3205  .getChunkMetadataRecycler()
3206  ->getCurrentCacheSizeForDevice(CacheItemType::CHUNK_METADATA,
3208  oss << "\"chunk_metadata\": " << chunk_metadata_cache_size << " bytes, ";
3209 
3210  // 2. Query Plan Dag
3211  auto query_plan_dag_cache_size =
3212  executor->getQueryPlanDagCache().getCurrentNodeMapSize();
3213  oss << "\"query_plan_dag\": " << query_plan_dag_cache_size << " bytes, ";
3214 
3215  // 3. Compiled (GPU) Code
3216  oss << "\"compiled_GPU code\": "
3217  << QueryEngine::getInstance()->gpu_code_accessor->getCacheSize() << " bytes, ";
3218 
3219  // 4. String Dictionary
3220  oss << "\"string_dictionary\": " << cat.getTotalMemorySizeForDictionariesForDatabase()
3221  << " bytes";
3222  oss << "}";
3223  LOG(INFO) << oss.str();
3224 }
3225 
3226 void log_system_cpu_memory_status(std::string const& query,
3229  std::ostringstream oss;
3230  oss << query << "\n" << cat.getDataMgr().getSystemMemoryUsage();
3231  LOG(INFO) << oss.str();
3232  log_cache_size(cat);
3233  }
3234 }
3235 } // namespace
3236 
3238  const TSessionId& session_id,
3239  const Catalog& catalog,
3240  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
3241  const ColumnDescriptor* cd,
3242  size_t& col_idx,
3243  size_t num_rows,
3244  const std::string& table_name) {
3245  auto geo_col_idx = col_idx - 1;
3246  const auto wkt_or_wkb_hex_column = import_buffers[geo_col_idx]->getGeoStringBuffer();
3247  std::vector<std::vector<double>> coords_column, bounds_column;
3248  std::vector<std::vector<int>> ring_sizes_column, poly_rings_column;
3249  SQLTypeInfo ti = cd->columnType;
3250  const bool validate_with_geos_if_available = false;
3251  if (num_rows != wkt_or_wkb_hex_column->size() ||
3252  !Geospatial::GeoTypesFactory::getGeoColumns(wkt_or_wkb_hex_column,
3253  ti,
3254  coords_column,
3255  bounds_column,
3256  ring_sizes_column,
3257  poly_rings_column,
3258  validate_with_geos_if_available)) {
3259  std::ostringstream oss;
3260  oss << "Invalid geometry in column " << cd->columnName;
3261  THROW_DB_EXCEPTION(oss.str());
3262  }
3263 
3264  // Populate physical columns, advance col_idx
3266  cd,
3267  import_buffers,
3268  col_idx,
3269  coords_column,
3270  bounds_column,
3271  ring_sizes_column,
3272  poly_rings_column);
3273 }
3274 
3276  const TSessionId& session_id,
3277  const Catalog& catalog,
3278  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
3279  const std::list<const ColumnDescriptor*>& cds,
3280  const std::vector<int>& desc_id_to_column_id,
3281  size_t num_rows,
3282  const std::string& table_name) {
3283  size_t skip_physical_cols = 0;
3284  size_t col_idx = 0, import_idx = 0;
3285  for (const auto& cd : cds) {
3286  if (skip_physical_cols > 0) {
3287  CHECK(cd->isGeoPhyCol);
3288  skip_physical_cols--;
3289  continue;
3290  } else if (cd->columnType.is_geometry()) {
3291  skip_physical_cols = cd->columnType.get_physical_cols();
3292  }
3293  if (desc_id_to_column_id[import_idx] == -1) {
3294  import_buffers[col_idx]->addDefaultValues(cd, num_rows);
3295  col_idx++;
3296  if (cd->columnType.is_geometry()) {
3298  session_id, catalog, import_buffers, cd, col_idx, num_rows, table_name);
3299  }
3300  } else {
3301  col_idx++;
3302  col_idx += skip_physical_cols;
3303  }
3304  import_idx++;
3305  }
3306 }
3307 
3308 namespace {
3309 std::string get_load_tag(const std::string& load_tag, const std::string& table_name) {
3310  std::ostringstream oss;
3311  oss << load_tag << "(" << table_name << ")";
3312  return oss.str();
3313 }
3314 
3315 std::string get_import_tag(const std::string& import_tag,
3316  const std::string& table_name,
3317  const std::string& file_path) {
3318  std::ostringstream oss;
3319  oss << import_tag << "(" << table_name << ", file_path:" << file_path << ")";
3320  return oss.str();
3321 }
3322 } // namespace
3323 
3324 void DBHandler::load_table_binary(const TSessionId& session_id_or_json,
3325  const std::string& table_name,
3326  const std::vector<TRow>& rows,
3327  const std::vector<std::string>& column_names) {
3328  try {
3329  heavyai::RequestInfo const request_info(session_id_or_json);
3330  SET_REQUEST_ID(request_info.requestId());
3331  auto stdlog =
3332  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
3333  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
3334  auto session_ptr = stdlog.getConstSessionInfo();
3335 
3336  if (rows.empty()) {
3337  THROW_DB_EXCEPTION("No rows to insert");
3338  }
3339 
3340  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
3341  std::unique_ptr<import_export::Loader> loader;
3342  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
3343  auto schema_read_lock = prepare_loader_generic(*session_ptr,
3344  table_name,
3345  rows.front().cols.size(),
3346  &loader,
3347  &import_buffers,
3348  column_names,
3349  "load_table_binary");
3350 
3351  auto col_descs = loader->get_column_descs();
3352  auto desc_id_to_column_id = column_ids_by_names(col_descs, column_names);
3353 
3354  size_t rows_completed = 0;
3355  auto const load_tag = get_load_tag("load_table_binary", table_name);
3356  log_system_cpu_memory_status("start_" + load_tag, session_ptr->getCatalog());
3357  ScopeGuard cleanup = [&load_tag, &session_ptr]() {
3358  log_system_cpu_memory_status("finish_" + load_tag, session_ptr->getCatalog());
3359  };
3360  for (auto const& row : rows) {
3361  size_t col_idx = 0;
3362  try {
3363  for (auto cd : col_descs) {
3364  auto mapped_idx = desc_id_to_column_id[col_idx];
3365  if (mapped_idx != -1) {
3366  import_buffers[col_idx]->add_value(
3367  cd, row.cols[mapped_idx], row.cols[mapped_idx].is_null);
3368  }
3369  col_idx++;
3370  }
3371  rows_completed++;
3372  } catch (const std::exception& e) {
3373  for (size_t col_idx_to_pop = 0; col_idx_to_pop < col_idx; ++col_idx_to_pop) {
3374  import_buffers[col_idx_to_pop]->pop_value();
3375  }
3376  LOG(ERROR) << "Input exception thrown: " << e.what()
3377  << ". Row discarded, issue at column : " << (col_idx + 1)
3378  << " data :" << row;
3379  }
3380  }
3381  fillMissingBuffers(request_info.sessionId(),
3382  session_ptr->getCatalog(),
3383  import_buffers,
3384  col_descs,
3385  desc_id_to_column_id,
3386  rows_completed,
3387  table_name);
3388  auto insert_data_lock = lockmgr::InsertDataLockMgr::getWriteLockForTable(
3389  session_ptr->getCatalog(), table_name);
3390  if (!loader->load(import_buffers, rows.size(), session_ptr.get())) {
3391  THROW_DB_EXCEPTION(loader->getErrorMessage());
3392  }
3393  } catch (const std::exception& e) {
3394  THROW_DB_EXCEPTION(std::string(e.what()));
3395  }
3396 }
3397 
3398 std::unique_ptr<lockmgr::AbstractLockContainer<const TableDescriptor*>>
3400  const Catalog_Namespace::SessionInfo& session_info,
3401  const std::string& table_name,
3402  size_t num_cols,
3403  std::unique_ptr<import_export::Loader>* loader,
3404  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>* import_buffers,
3405  const std::vector<std::string>& column_names,
3406  std::string load_type) {
3407  if (num_cols == 0) {
3408  THROW_DB_EXCEPTION("No columns to insert");
3409  }
3410  check_read_only(load_type);
3411  auto& cat = session_info.getCatalog();
3412  auto td_with_lock =
3413  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>>(
3415  cat, table_name, true));
3416  const auto td = (*td_with_lock)();
3417  CHECK(td);
3418 
3419  if (g_cluster && !leaf_aggregator_.leafCount()) {
3420  // Sharded table rows need to be routed to the leaf by an aggregator.
3422  }
3423  check_table_load_privileges(session_info, table_name);
3424 
3425  loader->reset(new import_export::Loader(cat, td));
3426 
3427  auto col_descs = (*loader)->get_column_descs();
3428  check_valid_column_names(col_descs, column_names);
3429  if (column_names.empty()) {
3430  // TODO(andrew): nColumns should be number of non-virtual/non-system columns.
3431  // Subtracting 1 (rowid) until TableDescriptor is updated.
3432  auto geo_physical_cols = std::count_if(
3433  col_descs.begin(), col_descs.end(), [](auto cd) { return cd->isGeoPhyCol; });
3434  const auto num_table_cols = static_cast<size_t>(td->nColumns) - geo_physical_cols -
3435  (td->hasDeletedCol ? 2 : 1);
3436  if (num_cols != num_table_cols) {
3437  throw std::runtime_error("Number of columns to load (" + std::to_string(num_cols) +
3438  ") does not match number of columns in table " +
3439  td->tableName + " (" + std::to_string(num_table_cols) +
3440  ")");
3441  }
3442  } else if (num_cols != column_names.size()) {
3444  "Number of columns specified does not match the "
3445  "number of columns given (" +
3446  std::to_string(num_cols) + " vs " + std::to_string(column_names.size()) + ")");
3447  }
3448 
3449  *import_buffers = import_export::setup_column_loaders(td, loader->get());
3450  return std::move(td_with_lock);
3451 }
3452 namespace {
3453 
3454 size_t get_column_size(const TColumn& column) {
3455  if (!column.nulls.empty()) {
3456  return column.nulls.size();
3457  } else {
3458  // it is a very bold estimate but later we check it against REAL data
3459  // and if this function returns a wrong result (e.g. both int and string
3460  // vectors are filled with values), we get an error
3461  return column.data.int_col.size() + column.data.arr_col.size() +
3462  column.data.real_col.size() + column.data.str_col.size();
3463  }
3464 }
3465 
3466 } // namespace
3467 
3468 void DBHandler::load_table_binary_columnar(const TSessionId& session_id_or_json,
3469  const std::string& table_name,
3470  const std::vector<TColumn>& cols,
3471  const std::vector<std::string>& column_names) {
3472  heavyai::RequestInfo const request_info(session_id_or_json);
3473  SET_REQUEST_ID(request_info.requestId());
3474  auto stdlog =
3475  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
3476  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
3477  auto session_ptr = stdlog.getConstSessionInfo();
3478 
3479  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
3480  std::unique_ptr<import_export::Loader> loader;
3481  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
3482  auto schema_read_lock = prepare_loader_generic(*session_ptr,
3483  table_name,
3484  cols.size(),
3485  &loader,
3486  &import_buffers,
3487  column_names,
3488  "load_table_binary_columnar");
3489 
3490  auto desc_id_to_column_id =
3491  column_ids_by_names(loader->get_column_descs(), column_names);
3492  size_t num_rows = get_column_size(cols.front());
3493  size_t import_idx = 0; // index into the TColumn vector being loaded
3494  size_t col_idx = 0; // index into column description vector
3495  auto const load_tag = get_load_tag("load_table_binary_columnar", table_name);
3496  log_system_cpu_memory_status("start_" + load_tag, session_ptr->getCatalog());
3497  ScopeGuard cleanup = [&load_tag, &session_ptr]() {
3498  log_system_cpu_memory_status("finish_" + load_tag, session_ptr->getCatalog());
3499  };
3500  try {
3501  size_t skip_physical_cols = 0;
3502  for (auto cd : loader->get_column_descs()) {
3503  if (skip_physical_cols > 0) {
3504  CHECK(cd->isGeoPhyCol);
3505  skip_physical_cols--;
3506  continue;
3507  }
3508  auto mapped_idx = desc_id_to_column_id[import_idx];
3509  if (mapped_idx != -1) {
3510  size_t col_rows = import_buffers[col_idx]->add_values(cd, cols[mapped_idx]);
3511  if (col_rows != num_rows) {
3512  std::ostringstream oss;
3513  oss << "load_table_binary_columnar: Inconsistent number of rows in column "
3514  << cd->columnName << " , expecting " << num_rows << " rows, column "
3515  << col_idx << " has " << col_rows << " rows";
3516  THROW_DB_EXCEPTION(oss.str());
3517  }
3518  // Advance to the next column in the table
3519  col_idx++;
3520  // For geometry columns: process WKT strings and fill physical columns
3521  if (cd->columnType.is_geometry()) {
3522  fillGeoColumns(request_info.sessionId(),
3523  session_ptr->getCatalog(),
3524  import_buffers,
3525  cd,
3526  col_idx,
3527  num_rows,
3528  table_name);
3529  skip_physical_cols = cd->columnType.get_physical_cols();
3530  }
3531  } else {
3532  col_idx++;
3533  if (cd->columnType.is_geometry()) {
3534  skip_physical_cols = cd->columnType.get_physical_cols();
3535  col_idx += skip_physical_cols;
3536  }
3537  }
3538  // Advance to the next column of values being loaded
3539  import_idx++;
3540  }
3541  } catch (const std::exception& e) {
3542  std::ostringstream oss;
3543  oss << "load_table_binary_columnar: Input exception thrown: " << e.what()
3544  << ". Issue at column : " << (col_idx + 1) << ". Import aborted";
3545  THROW_DB_EXCEPTION(oss.str());
3546  }
3547  fillMissingBuffers(request_info.sessionId(),
3548  session_ptr->getCatalog(),
3549  import_buffers,
3550  loader->get_column_descs(),
3551  desc_id_to_column_id,
3552  num_rows,
3553  table_name);
3554  auto insert_data_lock = lockmgr::InsertDataLockMgr::getWriteLockForTable(
3555  session_ptr->getCatalog(), table_name);
3556  if (!loader->load(import_buffers, num_rows, session_ptr.get())) {
3557  THROW_DB_EXCEPTION(loader->getErrorMessage());
3558  }
3559 }
3560 
3561 using RecordBatchVector = std::vector<std::shared_ptr<arrow::RecordBatch>>;
3562 
3563 #define ARROW_THRIFT_THROW_NOT_OK(s) \
3564  do { \
3565  ::arrow::Status _s = (s); \
3566  if (UNLIKELY(!_s.ok())) { \
3567  TDBException ex; \
3568  ex.error_msg = _s.ToString(); \
3569  LOG(ERROR) << s.ToString(); \
3570  throw ex; \
3571  } \
3572  } while (0)
3573 
3574 namespace {
3575 
3576 RecordBatchVector loadArrowStream(const std::string& stream) {
3577  RecordBatchVector batches;
3578  try {
3579  // TODO(wesm): Make this simpler in general, see ARROW-1600
3580  auto stream_buffer =
3581  std::make_shared<arrow::Buffer>(reinterpret_cast<const uint8_t*>(stream.c_str()),
3582  static_cast<int64_t>(stream.size()));
3583 
3584  arrow::io::BufferReader buf_reader(stream_buffer);
3585  std::shared_ptr<arrow::RecordBatchReader> batch_reader;
3586  ARROW_ASSIGN_OR_THROW(batch_reader,
3587  arrow::ipc::RecordBatchStreamReader::Open(&buf_reader));
3588 
3589  while (true) {
3590  std::shared_ptr<arrow::RecordBatch> batch;
3591  // Read batch (zero-copy) from the stream
3592  ARROW_THRIFT_THROW_NOT_OK(batch_reader->ReadNext(&batch));
3593  if (batch == nullptr) {
3594  break;
3595  }
3596  batches.emplace_back(std::move(batch));
3597  }
3598  } catch (const std::exception& e) {
3599  LOG(ERROR) << "Error parsing Arrow stream: " << e.what() << ". Import aborted";
3600  }
3601  return batches;
3602 }
3603 
3604 } // namespace
3605 
3606 void DBHandler::load_table_binary_arrow(const TSessionId& session_id_or_json,
3607  const std::string& table_name,
3608  const std::string& arrow_stream,
3609  const bool use_column_names) {
3610  heavyai::RequestInfo const request_info(session_id_or_json);
3611  SET_REQUEST_ID(request_info.requestId());
3612  auto stdlog =
3613  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
3614  auto session_ptr = stdlog.getConstSessionInfo();
3615 
3616  RecordBatchVector batches = loadArrowStream(arrow_stream);
3617  // Assuming have one batch for now
3618  if (batches.size() != 1) {
3619  THROW_DB_EXCEPTION("Expected a single Arrow record batch. Import aborted");
3620  }
3621 
3622  std::shared_ptr<arrow::RecordBatch> batch = batches[0];
3623  std::unique_ptr<import_export::Loader> loader;
3624  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
3625  std::vector<std::string> column_names;
3626  if (use_column_names) {
3627  column_names = batch->schema()->field_names();
3628  }
3629  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
3630  auto schema_read_lock =
3631  prepare_loader_generic(*session_ptr,
3632  table_name,
3633  static_cast<size_t>(batch->num_columns()),
3634  &loader,
3635  &import_buffers,
3636  column_names,
3637  "load_table_binary_arrow");
3638 
3639  auto desc_id_to_column_id =
3640  column_ids_by_names(loader->get_column_descs(), column_names);
3641  size_t num_rows = 0;
3642 
3643  // col_idx indexes "desc_id_to_column_id"
3644  size_t col_idx = 0;
3645  auto const load_tag = get_load_tag("load_table_binary_arrow", table_name);
3646  log_system_cpu_memory_status("start_" + load_tag, session_ptr->getCatalog());
3647  ScopeGuard cleanup = [&load_tag, &session_ptr]() {
3648  log_system_cpu_memory_status("finish_" + load_tag, session_ptr->getCatalog());
3649  };
3650  try {
3651  for (auto cd : loader->get_column_descs()) {
3652  if (cd->isGeoPhyCol) {
3653  // Skip in the case of "cd" being a physical cols, as they are generated
3654  // in fillGeoColumns:
3655  // * Point: coords col
3656  // * MultiPoint/LineString: coords/bounds cols
3657  // etc...
3658  continue;
3659  }
3660  auto mapped_idx = desc_id_to_column_id[col_idx];
3661  if (mapped_idx != -1) {
3662  auto& array = *batch->column(mapped_idx);
3663  import_export::ArraySliceRange row_slice(0, array.length());
3664 
3665  // col_id indexes "import_buffers"
3666  size_t col_id = cd->columnId;
3667 
3668  // When importing a buffer with "add_arrow_values", the index in
3669  // "importing_buffers" is given by the "columnId" attribute of a ColumnDescriptor.
3670  // This index will differ from "col_idx" if any of the importing columns is a
3671  // geometry column as they have physical columns for other properties (i.e. a
3672  // LineString also has "coords" and "bounds").
3673  num_rows = import_buffers[col_id - 1]->add_arrow_values(
3674  cd, array, true, row_slice, nullptr);
3675  // For geometry columns: process WKT strings and fill physical columns
3676  if (cd->columnType.is_geometry()) {
3677  fillGeoColumns(request_info.sessionId(),
3678  session_ptr->getCatalog(),
3679  import_buffers,
3680  cd,
3681  col_id,
3682  num_rows,
3683  table_name);
3684  }
3685  }
3686  // Advance to the next column in the table
3687  col_idx++;
3688  }
3689  } catch (const std::exception& e) {
3690  LOG(ERROR) << "Input exception thrown: " << e.what()
3691  << ". Issue at column : " << (col_idx + 1) << ". Import aborted";
3692  // TODO(tmostak): Go row-wise on binary columnar import to be consistent with our
3693  // other import paths
3694  THROW_DB_EXCEPTION(e.what());
3695  }
3696  fillMissingBuffers(request_info.sessionId(),
3697  session_ptr->getCatalog(),
3698  import_buffers,
3699  loader->get_column_descs(),
3700  desc_id_to_column_id,
3701  num_rows,
3702  table_name);
3703  auto insert_data_lock = lockmgr::InsertDataLockMgr::getWriteLockForTable(
3704  session_ptr->getCatalog(), table_name);
3705  if (!loader->load(import_buffers, num_rows, session_ptr.get())) {
3706  THROW_DB_EXCEPTION(loader->getErrorMessage());
3707  }
3708 }
3709 
3710 void DBHandler::load_table(const TSessionId& session_id_or_json,
3711  const std::string& table_name,
3712  const std::vector<TStringRow>& rows,
3713  const std::vector<std::string>& column_names) {
3714  try {
3715  heavyai::RequestInfo const request_info(session_id_or_json);
3716  SET_REQUEST_ID(request_info.requestId());
3717  auto stdlog =
3718  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
3719  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
3720  auto session_ptr = stdlog.getConstSessionInfo();
3721 
3722  if (rows.empty()) {
3723  THROW_DB_EXCEPTION("No rows to insert");
3724  }
3725  auto const load_tag = get_load_tag("load_table", table_name);
3726  log_system_cpu_memory_status("start_" + load_tag, session_ptr->getCatalog());
3727  ScopeGuard cleanup = [&load_tag, &session_ptr]() {
3728  log_system_cpu_memory_status("finish_" + load_tag, session_ptr->getCatalog());
3729  };
3730  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
3731  std::unique_ptr<import_export::Loader> loader;
3732  std::vector<std::unique_ptr<import_export::TypedImportBuffer>> import_buffers;
3733  auto schema_read_lock =
3734  prepare_loader_generic(*session_ptr,
3735  table_name,
3736  static_cast<size_t>(rows.front().cols.size()),
3737  &loader,
3738  &import_buffers,
3739  column_names,
3740  "load_table");
3741 
3742  auto col_descs = loader->get_column_descs();
3743  auto desc_id_to_column_id = column_ids_by_names(col_descs, column_names);
3744  import_export::CopyParams copy_params;
3745  size_t rows_completed = 0;
3746  for (auto const& row : rows) {
3747  size_t import_idx = 0; // index into the TStringRow being loaded
3748  size_t col_idx = 0; // index into column description vector
3749  try {
3750  size_t skip_physical_cols = 0;
3751  for (auto cd : col_descs) {
3752  if (skip_physical_cols > 0) {
3753  CHECK(cd->isGeoPhyCol);
3754  skip_physical_cols--;
3755  continue;
3756  }
3757  auto mapped_idx = desc_id_to_column_id[import_idx];
3758  if (mapped_idx != -1) {
3759  import_buffers[col_idx]->add_value(cd,
3760  row.cols[mapped_idx].str_val,
3761  row.cols[mapped_idx].is_null,
3762  copy_params);
3763  }
3764  col_idx++;
3765  if (cd->columnType.is_geometry()) {
3766  // physical geo columns will be filled separately lately
3767  skip_physical_cols = cd->columnType.get_physical_cols();
3768  col_idx += skip_physical_cols;
3769  }
3770  // Advance to the next field within the row
3771  import_idx++;
3772  }
3773  rows_completed++;
3774  } catch (const std::exception& e) {
3775  LOG(ERROR) << "Input exception thrown: " << e.what()
3776  << ". Row discarded, issue at column : " << (col_idx + 1)
3777  << " data :" << row;
3778  THROW_DB_EXCEPTION(std::string("Exception: ") + e.what());
3779  }
3780  }
3781  // do batch filling of geo columns separately
3782  if (rows.size() != 0) {
3783  const auto& row = rows[0];
3784  size_t col_idx = 0; // index into column description vector
3785  try {
3786  size_t import_idx = 0;
3787  size_t skip_physical_cols = 0;
3788  for (auto cd : col_descs) {
3789  if (skip_physical_cols > 0) {
3790  skip_physical_cols--;
3791  continue;
3792  }
3793  auto mapped_idx = desc_id_to_column_id[import_idx];
3794  col_idx++;
3795  if (cd->columnType.is_geometry()) {
3796  skip_physical_cols = cd->columnType.get_physical_cols();
3797  if (mapped_idx != -1) {
3798  fillGeoColumns(request_info.sessionId(),
3799  session_ptr->getCatalog(),
3800  import_buffers,
3801  cd,
3802  col_idx,
3803  rows_completed,
3804  table_name);
3805  } else {
3806  col_idx += skip_physical_cols;
3807  }
3808  }
3809  import_idx++;
3810  }
3811  } catch (const std::exception& e) {
3812  LOG(ERROR) << "Input exception thrown: " << e.what()
3813  << ". Row discarded, issue at column : " << (col_idx + 1)
3814  << " data :" << row;
3815  THROW_DB_EXCEPTION(e.what());
3816  }
3817  }
3818  fillMissingBuffers(request_info.sessionId(),
3819  session_ptr->getCatalog(),
3820  import_buffers,
3821  col_descs,
3822  desc_id_to_column_id,
3823  rows_completed,
3824  table_name);
3825  auto insert_data_lock = lockmgr::InsertDataLockMgr::getWriteLockForTable(
3826  session_ptr->getCatalog(), table_name);
3827  if (!loader->load(import_buffers, rows_completed, session_ptr.get())) {
3828  THROW_DB_EXCEPTION(loader->getErrorMessage());
3829  }
3830 
3831  } catch (const std::exception& e) {
3832  THROW_DB_EXCEPTION(std::string(e.what()));
3833  }
3834 }
3835 
3836 char DBHandler::unescape_char(std::string str) {
3837  char out = str[0];
3838  if (str.size() == 2 && str[0] == '\\') {
3839  if (str[1] == 't') {
3840  out = '\t';
3841  } else if (str[1] == 'n') {
3842  out = '\n';
3843  } else if (str[1] == '0') {
3844  out = '\0';
3845  } else if (str[1] == '\'') {
3846  out = '\'';
3847  } else if (str[1] == '\\') {
3848  out = '\\';
3849  }
3850  }
3851  return out;
3852 }
3853 
3855  import_export::CopyParams copy_params;
3856  switch (cp.has_header) {
3857  case TImportHeaderRow::AUTODETECT:
3859  break;
3860  case TImportHeaderRow::NO_HEADER:
3862  break;
3863  case TImportHeaderRow::HAS_HEADER:
3865  break;
3866  default:
3867  CHECK(false);
3868  }
3869  copy_params.quoted = cp.quoted;
3870  if (cp.delimiter.length() > 0) {
3871  copy_params.delimiter = unescape_char(cp.delimiter);
3872  } else {
3873  copy_params.delimiter = '\0';
3874  }
3875  if (cp.null_str.length() > 0) {
3876  copy_params.null_str = cp.null_str;
3877  }
3878  if (cp.quote.length() > 0) {
3879  copy_params.quote = unescape_char(cp.quote);
3880  }
3881  if (cp.escape.length() > 0) {
3882  copy_params.escape = unescape_char(cp.escape);
3883  }
3884  if (cp.line_delim.length() > 0) {
3885  copy_params.line_delim = unescape_char(cp.line_delim);
3886  }
3887  if (cp.array_delim.length() > 0) {
3888  copy_params.array_delim = unescape_char(cp.array_delim);
3889  }
3890  if (cp.array_begin.length() > 0) {
3891  copy_params.array_begin = unescape_char(cp.array_begin);
3892  }
3893  if (cp.array_end.length() > 0) {
3894  copy_params.array_end = unescape_char(cp.array_end);
3895  }
3896  if (cp.threads != 0) {
3897  copy_params.threads = cp.threads;
3898  }
3899  if (cp.s3_access_key.length() > 0) {
3900  copy_params.s3_access_key = cp.s3_access_key;
3901  }
3902  if (cp.s3_secret_key.length() > 0) {
3903  copy_params.s3_secret_key = cp.s3_secret_key;
3904  }
3905  if (cp.s3_session_token.length() > 0) {
3906  copy_params.s3_session_token = cp.s3_session_token;
3907  }
3908  if (cp.s3_region.length() > 0) {
3909  copy_params.s3_region = cp.s3_region;
3910  }
3911  if (cp.s3_endpoint.length() > 0) {
3912  copy_params.s3_endpoint = cp.s3_endpoint;
3913  }
3914 #ifdef HAVE_AWS_S3
3915  if (g_allow_s3_server_privileges && cp.s3_access_key.length() == 0 &&
3916  cp.s3_secret_key.length() == 0 && cp.s3_session_token.length() == 0) {
3917  const auto& server_credentials =
3918  Aws::Auth::DefaultAWSCredentialsProviderChain().GetAWSCredentials();
3919  copy_params.s3_access_key = server_credentials.GetAWSAccessKeyId();
3920  copy_params.s3_secret_key = server_credentials.GetAWSSecretKey();
3921  copy_params.s3_session_token = server_credentials.GetSessionToken();
3922  }
3923 #endif
3924 
3925  switch (cp.source_type) {
3926  case TSourceType::DELIMITED_FILE:
3928  break;
3929  case TSourceType::GEO_FILE:
3931  break;
3932  case TSourceType::PARQUET_FILE:
3933 #ifdef ENABLE_IMPORT_PARQUET
3935  break;
3936 #else
3937  THROW_DB_EXCEPTION("Parquet not supported");
3938 #endif
3939  case TSourceType::ODBC:
3940  THROW_DB_EXCEPTION("ODBC source not supported");
3941  case TSourceType::RASTER_FILE:
3943  break;
3944  default:
3945  CHECK(false);
3946  }
3947 
3948  switch (cp.geo_coords_encoding) {
3949  case TEncodingType::GEOINT:
3950  copy_params.geo_coords_encoding = kENCODING_GEOINT;
3951  break;
3952  case TEncodingType::NONE:
3953  copy_params.geo_coords_encoding = kENCODING_NONE;
3954  break;
3955  default:
3956  THROW_DB_EXCEPTION("Invalid geo_coords_encoding in TCopyParams: " +
3957  std::to_string((int)cp.geo_coords_encoding));
3958  }
3959  copy_params.geo_coords_comp_param = cp.geo_coords_comp_param;
3960  switch (cp.geo_coords_type) {
3961  case TDatumType::GEOGRAPHY:
3962  copy_params.geo_coords_type = kGEOGRAPHY;
3963  break;
3964  case TDatumType::GEOMETRY:
3965  copy_params.geo_coords_type = kGEOMETRY;
3966  break;
3967  default:
3968  THROW_DB_EXCEPTION("Invalid geo_coords_type in TCopyParams: " +
3969  std::to_string((int)cp.geo_coords_type));
3970  }
3971  switch (cp.geo_coords_srid) {
3972  case 4326:
3973  case 3857:
3974  case 900913:
3975  copy_params.geo_coords_srid = cp.geo_coords_srid;
3976  break;
3977  default:
3978  THROW_DB_EXCEPTION("Invalid geo_coords_srid in TCopyParams (" +
3979  std::to_string((int)cp.geo_coords_srid));
3980  }
3981  copy_params.sanitize_column_names = cp.sanitize_column_names;
3982  copy_params.geo_layer_name = cp.geo_layer_name;
3983  copy_params.geo_explode_collections = cp.geo_explode_collections;
3984  copy_params.source_srid = cp.source_srid;
3985  switch (cp.raster_point_type) {
3986  case TRasterPointType::NONE:
3988  break;
3989  case TRasterPointType::AUTO:
3991  break;
3992  case TRasterPointType::SMALLINT:
3994  break;
3995  case TRasterPointType::INT:
3997  break;
3998  case TRasterPointType::FLOAT:
4000  break;
4001  case TRasterPointType::DOUBLE:
4003  break;
4004  case TRasterPointType::POINT:
4006  break;
4007  default:
4008  CHECK(false);
4009  }
4010  copy_params.raster_import_bands = cp.raster_import_bands;
4011  if (cp.raster_scanlines_per_thread < 0) {
4012  THROW_DB_EXCEPTION("Invalid raster_scanlines_per_thread in TCopyParams (" +
4013  std::to_string((int)cp.raster_scanlines_per_thread));
4014  } else {
4015  copy_params.raster_scanlines_per_thread = cp.raster_scanlines_per_thread;
4016  }
4017  switch (cp.raster_point_transform) {
4018  case TRasterPointTransform::NONE:
4020  break;
4021  case TRasterPointTransform::AUTO:
4023  break;
4024  case TRasterPointTransform::FILE:
4026  break;
4027  case TRasterPointTransform::WORLD:
4029  break;
4030  default:
4031  CHECK(false);
4032  }
4033  copy_params.raster_point_compute_angle = cp.raster_point_compute_angle;
4034  copy_params.raster_import_dimensions = cp.raster_import_dimensions;
4035  copy_params.dsn = cp.odbc_dsn;
4036  copy_params.connection_string = cp.odbc_connection_string;
4037  copy_params.sql_select = cp.odbc_sql_select;
4038  copy_params.sql_order_by = cp.odbc_sql_order_by;
4039  copy_params.username = cp.odbc_username;
4040  copy_params.password = cp.odbc_password;
4041  copy_params.credential_string = cp.odbc_credential_string;
4042  copy_params.add_metadata_columns = cp.add_metadata_columns;
4043  copy_params.trim_spaces = cp.trim_spaces;
4044  copy_params.geo_validate_geometry = cp.geo_validate_geometry;
4045  copy_params.raster_drop_if_all_null = cp.raster_drop_if_all_null;
4046  return copy_params;
4047 }
4048 
4050  TCopyParams copy_params;
4051  copy_params.delimiter = cp.delimiter;
4052  copy_params.null_str = cp.null_str;
4053  switch (cp.has_header) {
4055  copy_params.has_header = TImportHeaderRow::AUTODETECT;
4056  break;
4058  copy_params.has_header = TImportHeaderRow::NO_HEADER;
4059  break;
4061  copy_params.has_header = TImportHeaderRow::HAS_HEADER;
4062  break;
4063  default:
4064  CHECK(false);
4065  }
4066  copy_params.quoted = cp.quoted;
4067  copy_params.quote = cp.quote;
4068  copy_params.escape = cp.escape;
4069  copy_params.line_delim = cp.line_delim;
4070  copy_params.array_delim = cp.array_delim;
4071  copy_params.array_begin = cp.array_begin;
4072  copy_params.array_end = cp.array_end;
4073  copy_params.threads = cp.threads;
4074  copy_params.s3_access_key = cp.s3_access_key;
4075  copy_params.s3_secret_key = cp.s3_secret_key;
4076  copy_params.s3_session_token = cp.s3_session_token;
4077  copy_params.s3_region = cp.s3_region;
4078  copy_params.s3_endpoint = cp.s3_endpoint;
4079  switch (cp.source_type) {
4081  copy_params.source_type = TSourceType::DELIMITED_FILE;
4082  break;
4084  copy_params.source_type = TSourceType::GEO_FILE;
4085  break;
4087  copy_params.source_type = TSourceType::PARQUET_FILE;
4088  break;
4090  copy_params.source_type = TSourceType::RASTER_FILE;
4091  break;
4093  copy_params.source_type = TSourceType::ODBC;
4094  break;
4095  default:
4096  CHECK(false);
4097  }
4098  switch (cp.geo_coords_encoding) {
4099  case kENCODING_GEOINT:
4100  copy_params.geo_coords_encoding = TEncodingType::GEOINT;
4101  break;
4102  default:
4103  copy_params.geo_coords_encoding = TEncodingType::NONE;
4104  break;
4105  }
4106  copy_params.geo_coords_comp_param = cp.geo_coords_comp_param;
4107  switch (cp.geo_coords_type) {
4108  case kGEOGRAPHY:
4109  copy_params.geo_coords_type = TDatumType::GEOGRAPHY;
4110  break;
4111  case kGEOMETRY:
4112  copy_params.geo_coords_type = TDatumType::GEOMETRY;
4113  break;
4114  default:
4115  CHECK(false);
4116  }
4117  copy_params.geo_coords_srid = cp.geo_coords_srid;
4118  copy_params.sanitize_column_names = cp.sanitize_column_names;
4119  copy_params.geo_layer_name = cp.geo_layer_name;
4120  copy_params.geo_assign_render_groups = false;
4121  copy_params.geo_explode_collections = cp.geo_explode_collections;
4122  copy_params.source_srid = cp.source_srid;
4123  switch (cp.raster_point_type) {
4125  copy_params.raster_point_type = TRasterPointType::NONE;
4126  break;
4128  copy_params.raster_point_type = TRasterPointType::AUTO;
4129  break;
4131  copy_params.raster_point_type = TRasterPointType::SMALLINT;
4132  break;
4134  copy_params.raster_point_type = TRasterPointType::INT;
4135  break;
4137  copy_params.raster_point_type = TRasterPointType::FLOAT;
4138  break;
4140  copy_params.raster_point_type = TRasterPointType::DOUBLE;
4141  break;
4143  copy_params.raster_point_type = TRasterPointType::POINT;
4144  break;
4145  default:
4146  CHECK(false);
4147  }
4148  copy_params.raster_import_bands = cp.raster_import_bands;
4149  copy_params.raster_scanlines_per_thread = cp.raster_scanlines_per_thread;
4150  switch (cp.raster_point_transform) {
4152  copy_params.raster_point_transform = TRasterPointTransform::NONE;
4153  break;
4155  copy_params.raster_point_transform = TRasterPointTransform::AUTO;
4156  break;
4158  copy_params.raster_point_transform = TRasterPointTransform::FILE;
4159  break;
4161  copy_params.raster_point_transform = TRasterPointTransform::WORLD;
4162  break;
4163  default:
4164  CHECK(false);
4165  }
4166  copy_params.raster_point_compute_angle = cp.raster_point_compute_angle;
4167  copy_params.raster_import_dimensions = cp.raster_import_dimensions;
4168  copy_params.odbc_dsn = cp.dsn;
4169  copy_params.odbc_connection_string = cp.connection_string;
4170  copy_params.odbc_sql_select = cp.sql_select;
4171  copy_params.odbc_sql_order_by = cp.sql_order_by;
4172  copy_params.odbc_username = cp.username;
4173  copy_params.odbc_password = cp.password;
4174  copy_params.odbc_credential_string = cp.credential_string;
4175  copy_params.add_metadata_columns = cp.add_metadata_columns;
4176  copy_params.trim_spaces = cp.trim_spaces;
4177  copy_params.geo_validate_geometry = cp.geo_validate_geometry;
4178  copy_params.raster_drop_if_all_null = cp.raster_drop_if_all_null;
4179  return copy_params;
4180 }
4181 
4182 namespace {
4183 void add_vsi_network_prefix(std::string& path) {
4184  // do we support network file access?
4185  bool gdal_network = Geospatial::GDAL::supportsNetworkFileAccess();
4186 
4187  // modify head of filename based on source location
4188  if (boost::istarts_with(path, "http://") || boost::istarts_with(path, "https://")) {
4189  if (!gdal_network) {
4191  "HTTP geo file import not supported! Update to GDAL 2.2 or later!");
4192  }
4193  // invoke GDAL CURL virtual file reader
4194  path = "/vsicurl/" + path;
4195  } else if (boost::istarts_with(path, "s3://")) {
4196  if (!gdal_network) {
4198  "S3 geo file import not supported! Update to GDAL 2.2 or later!");
4199  }
4200  // invoke GDAL S3 virtual file reader
4201  boost::replace_first(path, "s3://", "/vsis3/");
4202  }
4203 }
4204 
4205 void add_vsi_geo_prefix(std::string& path) {
4206  // single gzip'd file (not an archive)?
4207  if (boost::iends_with(path, ".gz") && !boost::iends_with(path, ".tar.gz")) {
4208  path = "/vsigzip/" + path;
4209  }
4210 }
4211 
4212 void add_vsi_archive_prefix(std::string& path) {
4213  // check for compressed file or file bundle
4214  if (boost::iends_with(path, ".zip")) {
4215  // zip archive
4216  path = "/vsizip/" + path;
4217  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
4218  boost::iends_with(path, ".tar.gz")) {
4219  // tar archive (compressed or uncompressed)
4220  path = "/vsitar/" + path;
4221  }
4222 }
4223 
4224 std::string remove_vsi_prefixes(const std::string& path_in) {
4225  std::string path(path_in);
4226 
4227  // these will be first
4228  if (boost::istarts_with(path, "/vsizip/")) {
4229  boost::replace_first(path, "/vsizip/", "");
4230  } else if (boost::istarts_with(path, "/vsitar/")) {
4231  boost::replace_first(path, "/vsitar/", "");
4232  } else if (boost::istarts_with(path, "/vsigzip/")) {
4233  boost::replace_first(path, "/vsigzip/", "");
4234  }
4235 
4236  // then these
4237  if (boost::istarts_with(path, "/vsicurl/")) {
4238  boost::replace_first(path, "/vsicurl/", "");
4239  } else if (boost::istarts_with(path, "/vsis3/")) {
4240  boost::replace_first(path, "/vsis3/", "s3://");
4241  }
4242 
4243  return path;
4244 }
4245 
4246 bool path_is_relative(const std::string& path) {
4247  if (boost::istarts_with(path, "s3://") || boost::istarts_with(path, "http://") ||
4248  boost::istarts_with(path, "https://")) {
4249  return false;
4250  }
4251  return !boost::filesystem::path(path).is_absolute();
4252 }
4253 
4254 bool path_has_valid_filename(const std::string& path) {
4255  auto filename = boost::filesystem::path(path).filename().string();
4256  if (filename.size() == 0 || filename[0] == '.' || filename[0] == '/') {
4257  return false;
4258  }
4259  return true;
4260 }
4261 
4262 bool is_a_supported_geo_file(const std::string& path) {
4263  if (!path_has_valid_filename(path)) {
4264  return false;
4265  }
4266  // this is now just for files that we want to recognize
4267  // as geo when inside an archive (see below)
4268  // @TODO(se) make this more flexible?
4269  if (boost::iends_with(path, ".shp") || boost::iends_with(path, ".geojson") ||
4270  boost::iends_with(path, ".json") || boost::iends_with(path, ".kml") ||
4271  boost::iends_with(path, ".kmz") || boost::iends_with(path, ".gdb") ||
4272  boost::iends_with(path, ".gdb.zip") || boost::iends_with(path, ".fgb")) {
4273  return true;
4274  }
4275  return false;
4276 }
4277 
4278 bool is_a_supported_archive_file(const std::string& path) {
4279  if (!path_has_valid_filename(path)) {
4280  return false;
4281  }
4282  if (boost::iends_with(path, ".zip") && !boost::iends_with(path, ".gdb.zip")) {
4283  return true;
4284  } else if (boost::iends_with(path, ".tar") || boost::iends_with(path, ".tgz") ||
4285  boost::iends_with(path, ".tar.gz")) {
4286  return true;
4287  }
4288  return false;
4289 }
4290 
4291 std::string find_first_geo_file_in_archive(const std::string& archive_path,
4292  const import_export::CopyParams& copy_params) {
4293  // get the recursive list of all files in the archive
4294  std::vector<std::string> files =
4295  import_export::Importer::gdalGetAllFilesInArchive(archive_path, copy_params);
4296 
4297  // report the list
4298  LOG(INFO) << "Found " << files.size() << " files in Archive "
4299  << remove_vsi_prefixes(archive_path);
4300  for (const auto& file : files) {
4301  LOG(INFO) << " " << file;
4302  }
4303 
4304  // scan the list for the first candidate file
4305  bool found_suitable_file = false;
4306  std::string file_name;
4307  for (const auto& file : files) {
4308  if (is_a_supported_geo_file(file)) {
4309  file_name = file;
4310  found_suitable_file = true;
4311  break;
4312  }
4313  }
4314 
4315  // if we didn't find anything
4316  if (!found_suitable_file) {
4317  LOG(INFO) << "Failed to find any supported geo files in Archive: " +
4318  remove_vsi_prefixes(archive_path);
4319  file_name.clear();
4320  }
4321 
4322  // done
4323  return file_name;
4324 }
4325 
4326 bool is_local_file(const std::string& file_path) {
4327  return (!boost::istarts_with(file_path, "s3://") &&
4328  !boost::istarts_with(file_path, "http://") &&
4329  !boost::istarts_with(file_path, "https://"));
4330 }
4331 
4332 void validate_import_file_path_if_local(const std::string& file_path) {
4333  if (is_local_file(file_path)) {
4335  file_path, ddl_utils::DataTransferType::IMPORT, true);
4336  }
4337 }
4338 } // namespace
4339 
4340 void DBHandler::detect_column_types(TDetectResult& _return,
4341  const TSessionId& session_id_or_json,
4342  const std::string& file_name_in,
4343  const TCopyParams& cp) {
4344  heavyai::RequestInfo const request_info(session_id_or_json);
4345  SET_REQUEST_ID(request_info.requestId());
4346  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4347  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4348  check_read_only("detect_column_types");
4349 
4350  bool is_raster = false;
4351  boost::filesystem::path file_path;
4353  if (copy_params.source_type != import_export::SourceType::kOdbc) {
4354  std::string file_name{file_name_in};
4355  if (path_is_relative(file_name)) {
4356  // assume relative paths are relative to data_path / import / <session>
4357  auto temp_file_path = import_path_ /
4358  picosha2::hash256_hex_string(request_info.sessionId()) /
4359  boost::filesystem::path(file_name).filename();
4360  file_name = temp_file_path.string();
4361  }
4363 
4364  if ((copy_params.source_type == import_export::SourceType::kGeoFile ||
4366  is_local_file(file_name)) {
4367  const shared::FilePathOptions options{copy_params.regex_path_filter,
4368  copy_params.file_sort_order_by,
4369  copy_params.file_sort_regex};
4370  auto file_paths = shared::local_glob_filter_sort_files(file_name, options, false);
4371  // For geo and raster detect, pick the first file, if multiple files are provided
4372  // (e.g. through file globbing).
4373  CHECK(!file_paths.empty());
4374  file_name = file_paths[0];
4375  }
4376 
4377  // if it's a geo or raster import, handle alternative paths (S3, HTTP, archive etc.)
4378  if (copy_params.source_type == import_export::SourceType::kGeoFile) {
4379  if (is_a_supported_archive_file(file_name)) {
4380  // find the archive file
4381  add_vsi_network_prefix(file_name);
4382  if (!import_export::Importer::gdalFileExists(file_name, copy_params)) {
4383  THROW_DB_EXCEPTION("Archive does not exist: " + file_name_in);
4384  }
4385  // find geo file in archive
4386  add_vsi_archive_prefix(file_name);
4387  std::string geo_file = find_first_geo_file_in_archive(file_name, copy_params);
4388  // prepare to detect that geo file
4389  if (geo_file.size()) {
4390  file_name = file_name + std::string("/") + geo_file;
4391  }
4392  } else {
4393  // prepare to detect geo file directly
4394  add_vsi_network_prefix(file_name);
4395  add_vsi_geo_prefix(file_name);
4396  }
4397  } else if (copy_params.source_type == import_export::SourceType::kRasterFile) {
4398  // prepare to detect raster file directly
4399  add_vsi_network_prefix(file_name);
4400  add_vsi_geo_prefix(file_name);
4401  is_raster = true;
4402  }
4403 
4404  file_path = boost::filesystem::path(file_name);
4405  // can be a s3 url
4406  if (!boost::istarts_with(file_name, "s3://")) {
4407  if (!boost::filesystem::path(file_name).is_absolute()) {
4408  file_path = import_path_ /
4409  picosha2::hash256_hex_string(request_info.sessionId()) /
4410  boost::filesystem::path(file_name).filename();
4411  file_name = file_path.string();
4412  }
4413 
4414  if (copy_params.source_type == import_export::SourceType::kGeoFile ||
4416  // check for geo or raster file
4417  if (!import_export::Importer::gdalFileOrDirectoryExists(file_name, copy_params)) {
4418  THROW_DB_EXCEPTION("File or directory \"" + file_path.string() +
4419  "\" does not exist.")
4420  }
4421  } else {
4422  // check for regular file
4423  if (!shared::file_or_glob_path_exists(file_path.string())) {
4424  THROW_DB_EXCEPTION("File or directory \"" + file_path.string() +
4425  "\" does not exist.");
4426  }
4427  }
4428  }
4429  }
4430 
4431  try {
4433 #ifdef ENABLE_IMPORT_PARQUET
4435 #endif
4436  ) {
4437  import_export::Detector detector(file_path, copy_params);
4438  auto best_types = detector.getBestColumnTypes();
4439  std::vector<std::string> headers = detector.get_headers();
4440  copy_params = detector.get_copy_params();
4441 
4442  _return.copy_params = copyparams_to_thrift(copy_params);
4443  _return.row_set.row_desc.resize(best_types.size());
4444  for (size_t col_idx = 0; col_idx < best_types.size(); col_idx++) {
4445  TColumnType col;
4446  auto& ti = best_types[col_idx];
4447  col.col_type.precision = ti.get_precision();
4448  col.col_type.scale = ti.get_scale();
4449  col.col_type.comp_param = ti.get_comp_param();
4450  if (ti.is_geometry()) {
4451  // set this so encoding_to_thrift does the right thing
4452  ti.set_compression(copy_params.geo_coords_encoding);
4453  // fill in these directly
4454  col.col_type.precision = static_cast<int>(copy_params.geo_coords_type);
4455  col.col_type.scale = copy_params.geo_coords_srid;
4456  col.col_type.comp_param = copy_params.geo_coords_comp_param;
4457  }
4458  col.col_type.type = type_to_thrift(ti);
4459  col.col_type.encoding = encoding_to_thrift(ti);
4460  if (ti.is_array()) {
4461  col.col_type.is_array = true;
4462  }
4463  if (copy_params.sanitize_column_names) {
4464  col.col_name = ImportHelpers::sanitize_name(headers[col_idx]);
4465  } else {
4466  col.col_name = headers[col_idx];
4467  }
4468  col.is_reserved_keyword = ImportHelpers::is_reserved_name(col.col_name);
4469  _return.row_set.row_desc[col_idx] = col;
4470  }
4471  auto sample_data = detector.get_sample_rows(shared::kDefaultSampleRowsCount);
4472 
4473  TRow sample_row;
4474  for (auto row : sample_data) {
4475  sample_row.cols.clear();
4476  for (const auto& s : row) {
4477  TDatum td;
4478  td.val.str_val = s;
4479  td.is_null = s.empty();
4480  sample_row.cols.push_back(td);
4481  }
4482  _return.row_set.rows.push_back(sample_row);
4483  }
4484  } else if (copy_params.source_type == import_export::SourceType::kGeoFile ||
4486  check_geospatial_files(file_path, copy_params);
4487  std::list<ColumnDescriptor> cds = import_export::Importer::gdalToColumnDescriptors(
4488  file_path.string(), is_raster, Geospatial::kGeoColumnName, copy_params);
4489  for (auto cd : cds) {
4490  if (copy_params.sanitize_column_names) {
4491  cd.columnName = ImportHelpers::sanitize_name(cd.columnName);
4492  }
4493  _return.row_set.row_desc.push_back(populateThriftColumnType(nullptr, &cd));
4494  }
4495  if (!is_raster) {
4496  // @TODO(se) support for raster?
4497  std::map<std::string, std::vector<std::string>> sample_data;
4500  sample_data,
4502  copy_params);
4503  if (sample_data.size() > 0) {
4504  for (size_t i = 0; i < sample_data.begin()->second.size(); i++) {
4505  TRow sample_row;
4506  for (auto cd : cds) {
4507  TDatum td;
4508  td.val.str_val = sample_data[cd.sourceName].at(i);
4509  td.is_null = td.val.str_val.empty();
4510  sample_row.cols.push_back(td);
4511  }
4512  _return.row_set.rows.push_back(sample_row);
4513  }
4514  }
4515  }
4516  _return.copy_params = copyparams_to_thrift(copy_params);
4517  }
4518  } catch (const std::exception& e) {
4519  THROW_DB_EXCEPTION("detect_column_types error: " + std::string(e.what()));
4520  }
4521 }
4522 
4523 void DBHandler::render_vega(TRenderResult& _return,
4524  const TSessionId& session_id_or_json,
4525  const int64_t widget_id,
4526  const std::string& vega_json,
4527  const int compression_level,
4528  const std::string& nonce) {
4529  heavyai::RequestInfo const request_info(session_id_or_json);
4530  SET_REQUEST_ID(request_info.requestId());
4531  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()),
4532  "widget_id",
4533  widget_id,
4534  "compression_level",
4535  compression_level,
4536  "vega_json",
4537  vega_json,
4538  "nonce",
4539  nonce);
4540  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4541  stdlog.appendNameValuePairs("nonce", nonce);
4542  if (!render_handler_) {
4543  THROW_DB_EXCEPTION("Backend rendering is disabled.");
4544  }
4545 
4546  // cast away const-ness of incoming Thrift string ref
4547  // to allow it to be passed down as an r-value and
4548  // ultimately std::moved into the RenderSession
4549  auto& non_const_vega_json = const_cast<std::string&>(vega_json);
4550 
4551  _return.total_time_ms = measure<>::execution([&]() {
4552  try {
4553  render_handler_->render_vega(_return,
4554  stdlog.getSessionInfo(),
4555  widget_id,
4556  std::move(non_const_vega_json),
4557  compression_level,
4558  nonce);
4559  } catch (std::exception& e) {
4560  THROW_DB_EXCEPTION(e.what());
4561  }
4562  });
4563 }
4564 
4566  int32_t dashboard_id,
4567  AccessPrivileges requestedPermissions) {
4568  DBObject object(dashboard_id, DashboardDBObjectType);
4569  auto& catalog = session_info.getCatalog();
4570  auto& user = session_info.get_currentUser();
4571  object.loadKey(catalog);
4572  object.setPrivileges(requestedPermissions);
4573  std::vector<DBObject> privs = {object};
4574  return SysCatalog::instance().checkPrivileges(user, privs);
4575 }
4576 
4577 // custom expressions
4578 namespace {
4581 
4582 std::unique_ptr<Catalog_Namespace::CustomExpression> create_custom_expr_from_thrift_obj(
4583  const TCustomExpression& t_custom_expr,
4584  const Catalog& catalog) {
4585  if (t_custom_expr.data_source_name.empty()) {
4586  THROW_DB_EXCEPTION("Custom expression data source name cannot be empty.")
4587  }
4588  CHECK(t_custom_expr.data_source_type == TDataSourceType::type::TABLE)
4589  << "Unexpected data source type: "
4590  << static_cast<int>(t_custom_expr.data_source_type);
4591  auto td = catalog.getMetadataForTable(t_custom_expr.data_source_name, false);
4592  if (!td) {
4593  THROW_DB_EXCEPTION("Custom expression references a table \"" +
4594  t_custom_expr.data_source_name + "\" that does not exist.")
4595  }
4596  DataSourceType data_source_type = DataSourceType::TABLE;
4597  return std::make_unique<CustomExpression>(
4598  t_custom_expr.name, t_custom_expr.expression_json, data_source_type, td->tableId);
4599 }
4600 
4601 TCustomExpression create_thrift_obj_from_custom_expr(const CustomExpression& custom_expr,
4602  const Catalog& catalog) {
4603  TCustomExpression t_custom_expr;
4604  t_custom_expr.id = custom_expr.id;
4605  t_custom_expr.name = custom_expr.name;
4606  t_custom_expr.expression_json = custom_expr.expression_json;
4607  t_custom_expr.data_source_id = custom_expr.data_source_id;
4608  t_custom_expr.is_deleted = custom_expr.is_deleted;
4609  CHECK(custom_expr.data_source_type == DataSourceType::TABLE)
4610  << "Unexpected data source type: "
4611  << static_cast<int>(custom_expr.data_source_type);
4612  t_custom_expr.data_source_type = TDataSourceType::type::TABLE;
4613  auto td = catalog.getMetadataForTable(custom_expr.data_source_id, false);
4614  if (td) {
4615  t_custom_expr.data_source_name = td->tableName;
4616  } else {
4617  LOG(WARNING)
4618  << "Custom expression references a deleted data source. Custom expression id: "
4619  << custom_expr.id << ", name: " << custom_expr.name;
4620  }
4621  return t_custom_expr;
4622 }
4623 } // namespace
4624 
4625 int32_t DBHandler::create_custom_expression(const TSessionId& session_id_or_json,
4626  const TCustomExpression& t_custom_expr) {
4627  heavyai::RequestInfo const request_info(session_id_or_json);
4628  SET_REQUEST_ID(request_info.requestId());
4629  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4630  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4631  check_read_only("create_custom_expression");
4632 
4633  auto session_ptr = stdlog.getConstSessionInfo();
4634  if (!session_ptr->get_currentUser().isSuper) {
4635  THROW_DB_EXCEPTION("Custom expressions can only be created by super users.")
4636  }
4637  auto& catalog = session_ptr->getCatalog();
4639  return catalog.createCustomExpression(
4640  create_custom_expr_from_thrift_obj(t_custom_expr, catalog));
4641 }
4642 
4643 void DBHandler::get_custom_expressions(std::vector<TCustomExpression>& _return,
4644  const TSessionId& session_id_or_json) {
4645  heavyai::RequestInfo const request_info(session_id_or_json);
4646  SET_REQUEST_ID(request_info.requestId());
4647  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4648  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4649 
4650  auto session_ptr = stdlog.getConstSessionInfo();
4651  auto& catalog = session_ptr->getCatalog();
4653  auto custom_expressions =
4654  catalog.getCustomExpressionsForUser(session_ptr->get_currentUser());
4655  for (const auto& custom_expression : custom_expressions) {
4656  _return.emplace_back(create_thrift_obj_from_custom_expr(*custom_expression, catalog));
4657  }
4658 }
4659 
4660 void DBHandler::update_custom_expression(const TSessionId& session_id_or_json,
4661  const int32_t id,
4662  const std::string& expression_json) {
4663  heavyai::RequestInfo const request_info(session_id_or_json);
4664  SET_REQUEST_ID(request_info.requestId());
4665  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4666  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4667  check_read_only("update_custom_expression");
4668 
4669  auto session_ptr = stdlog.getConstSessionInfo();
4670  if (!session_ptr->get_currentUser().isSuper) {
4671  THROW_DB_EXCEPTION("Custom expressions can only be updated by super users.")
4672  }
4673  auto& catalog = session_ptr->getCatalog();
4675  catalog.updateCustomExpression(id, expression_json);
4676 }
4677 
4679  const TSessionId& session_id_or_json,
4680  const std::vector<int32_t>& custom_expression_ids,
4681  const bool do_soft_delete) {
4682  heavyai::RequestInfo const request_info(session_id_or_json);
4683  SET_REQUEST_ID(request_info.requestId());
4684  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4685  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4686  check_read_only("delete_custom_expressions");
4687 
4688  auto session_ptr = stdlog.getConstSessionInfo();
4689  if (!session_ptr->get_currentUser().isSuper) {
4690  THROW_DB_EXCEPTION("Custom expressions can only be deleted by super users.")
4691  }
4692  auto& catalog = session_ptr->getCatalog();
4694  catalog.deleteCustomExpressions(custom_expression_ids, do_soft_delete);
4695 }
4696 
4697 // dashboards
4698 void DBHandler::get_dashboard(TDashboard& dashboard,
4699  const TSessionId& session_id_or_json,
4700  const int32_t dashboard_id) {
4701  heavyai::RequestInfo const request_info(session_id_or_json);
4702  SET_REQUEST_ID(request_info.requestId());
4703  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4704  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4705  auto session_ptr = stdlog.getConstSessionInfo();
4706  auto const& cat = session_ptr->getCatalog();
4708  auto dash = cat.getMetadataForDashboard(dashboard_id);
4709  if (!dash) {
4710  THROW_DB_EXCEPTION("Dashboard with dashboard id " + std::to_string(dashboard_id) +
4711  " doesn't exist");
4712  }
4714  *session_ptr, dash->dashboardId, AccessPrivileges::VIEW_DASHBOARD)) {
4715  THROW_DB_EXCEPTION("User has no view privileges for the dashboard with id " +
4716  std::to_string(dashboard_id));
4717  }
4718  user_meta.userName = "";
4719  SysCatalog::instance().getMetadataForUserById(dash->userId, user_meta);
4720  dashboard = get_dashboard_impl(session_ptr, user_meta, dash);
4721 }
4722 
4723 void DBHandler::get_dashboards(std::vector<TDashboard>& dashboards,
4724  const TSessionId& session_id_or_json) {
4725  heavyai::RequestInfo const request_info(session_id_or_json);
4726  SET_REQUEST_ID(request_info.requestId());
4727  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4728  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4729  auto session_ptr = stdlog.getConstSessionInfo();
4730  auto const& cat = session_ptr->getCatalog();
4732  const auto dashes = cat.getAllDashboardsMetadata();
4733  user_meta.userName = "";
4734  for (const auto dash : dashes) {
4736  *session_ptr, dash->dashboardId, AccessPrivileges::VIEW_DASHBOARD)) {
4737  // dashboardState is intentionally not populated here
4738  // for payload reasons
4739  // use get_dashboard call to get state
4740  dashboards.push_back(get_dashboard_impl(session_ptr, user_meta, dash, false));
4741  }
4742  }
4743 }
4744 
4746  const std::shared_ptr<Catalog_Namespace::SessionInfo const>& session_ptr,
4748  const DashboardDescriptor* dash,
4749  const bool populate_state) {
4750  auto const& cat = session_ptr->getCatalog();
4751  SysCatalog::instance().getMetadataForUserById(dash->userId, user_meta);
4752  auto objects_list = SysCatalog::instance().getMetadataForObject(
4753  cat.getCurrentDB().dbId,
4754  static_cast<int>(DBObjectType::DashboardDBObjectType),
4755  dash->dashboardId);
4756  TDashboard dashboard;
4757  dashboard.dashboard_name = dash->dashboardName;
4758  if (populate_state) {
4759  dashboard.dashboard_state = dash->dashboardState;
4760  }
4761  dashboard.image_hash = dash->imageHash;
4762  dashboard.update_time = dash->updateTime;
4763  dashboard.dashboard_metadata = dash->dashboardMetadata;
4764  dashboard.dashboard_id = dash->dashboardId;
4765  dashboard.dashboard_owner = dash->user;
4766  TDashboardPermissions perms;
4767  // Super user has all permissions.
4768  if (session_ptr->get_currentUser().isSuper) {
4769  perms.create_ = true;
4770  perms.delete_ = true;
4771  perms.edit_ = true;
4772  perms.view_ = true;
4773  } else {
4774  // Collect all grants on current user
4775  // add them to the permissions.
4776  auto obj_to_find =
4777  DBObject(dashboard.dashboard_id, DBObjectType::DashboardDBObjectType);
4778  obj_to_find.loadKey(cat);
4779  std::vector<std::string> grantees =
4780  SysCatalog::instance().getRoles(true,
4781  session_ptr->get_currentUser().isSuper,
4782  session_ptr->get_currentUser().userName);
4783  for (const auto& grantee : grantees) {
4784  DBObject* object_found;
4785  auto* gr = SysCatalog::instance().getGrantee(grantee);
4786  if (gr && (object_found = gr->findDbObject(obj_to_find.getObjectKey(), true))) {
4787  const auto obj_privs = object_found->getPrivileges();
4788  perms.create_ |= obj_privs.hasPermission(DashboardPrivileges::CREATE_DASHBOARD);
4789  perms.delete_ |= obj_privs.hasPermission(DashboardPrivileges::DELETE_DASHBOARD);
4790  perms.edit_ |= obj_privs.hasPermission(DashboardPrivileges::EDIT_DASHBOARD);
4791  perms.view_ |= obj_privs.hasPermission(DashboardPrivileges::VIEW_DASHBOARD);
4792  }
4793  }
4794  }
4795  dashboard.dashboard_permissions = perms;
4796  if (objects_list.empty() ||
4797  (objects_list.size() == 1 && objects_list[0]->roleName == user_meta.userName)) {
4798  dashboard.is_dash_shared = false;
4799  } else {
4800  dashboard.is_dash_shared = true;
4801  }
4802  return dashboard;
4803 }
4804 
4805 namespace dbhandler {
4806 bool is_info_schema_db(const std::string& db_name) {
4807  return (db_name == shared::kInfoSchemaDbName &&
4808  SysCatalog::instance().hasExecutedMigration(shared::kInfoSchemaMigrationName));
4809 }
4810 
4811 void check_not_info_schema_db(const std::string& db_name, bool throw_db_exception) {
4812  if (is_info_schema_db(db_name)) {
4813  std::string error_message{"Write requests/queries are not allowed in the " +
4814  shared::kInfoSchemaDbName + " database."};
4815  if (throw_db_exception) {
4816  THROW_DB_EXCEPTION(error_message)
4817  } else {
4818  throw std::runtime_error(error_message);
4819  }
4820  }
4821 }
4822 } // namespace dbhandler
4823 
4824 int32_t DBHandler::create_dashboard(const TSessionId& session_id_or_json,
4825  const std::string& dashboard_name,
4826  const std::string& dashboard_state,
4827  const std::string& image_hash,
4828  const std::string& dashboard_metadata) {
4829  heavyai::RequestInfo const request_info(session_id_or_json);
4830  SET_REQUEST_ID(request_info.requestId());
4831  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4832  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4833  auto session_ptr = stdlog.getConstSessionInfo();
4834  CHECK(session_ptr);
4835  check_read_only("create_dashboard");
4836  auto& cat = session_ptr->getCatalog();
4839  }
4840 
4841  if (!session_ptr->checkDBAccessPrivileges(DBObjectType::DashboardDBObjectType,
4843  THROW_DB_EXCEPTION("Not enough privileges to create a dashboard.");
4844  }
4845 
4846  if (dashboard_exists(cat, session_ptr->get_currentUser().userId, dashboard_name)) {
4847  THROW_DB_EXCEPTION("Dashboard with name: " + dashboard_name + " already exists.");
4848  }
4849 
4851  dd.dashboardName = dashboard_name;
4852  dd.dashboardState = dashboard_state;
4853  dd.imageHash = image_hash;
4854  dd.dashboardMetadata = dashboard_metadata;
4855  dd.userId = session_ptr->get_currentUser().userId;
4856  dd.user = session_ptr->get_currentUser().userName;
4857 
4858  try {
4859  auto id = cat.createDashboard(dd);
4860  // TODO: transactionally unsafe
4861  SysCatalog::instance().createDBObject(
4862  session_ptr->get_currentUser(), dashboard_name, DashboardDBObjectType, cat, id);
4863  return id;
4864  } catch (const std::exception& e) {
4865  THROW_DB_EXCEPTION(e.what());
4866  }
4867 }
4868 
4869 void DBHandler::replace_dashboard(const TSessionId& session_id_or_json,
4870  const int32_t dashboard_id,
4871  const std::string& dashboard_name,
4872  const std::string& dashboard_owner,
4873  const std::string& dashboard_state,
4874  const std::string& image_hash,
4875  const std::string& dashboard_metadata) {
4876  heavyai::RequestInfo const request_info(session_id_or_json);
4877  SET_REQUEST_ID(request_info.requestId());
4878  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4879  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4880  auto session_ptr = stdlog.getConstSessionInfo();
4881  CHECK(session_ptr);
4882  check_read_only("replace_dashboard");
4883  auto& cat = session_ptr->getCatalog();
4886  }
4887 
4889  *session_ptr, dashboard_id, AccessPrivileges::EDIT_DASHBOARD)) {
4890  THROW_DB_EXCEPTION("Not enough privileges to replace a dashboard.");
4891  }
4892 
4893  if (auto dash = cat.getMetadataForDashboard(
4894  std::to_string(session_ptr->get_currentUser().userId), dashboard_name)) {
4895  if (dash->dashboardId != dashboard_id) {
4896  THROW_DB_EXCEPTION("Dashboard with name: " + dashboard_name + " already exists.");
4897  }
4898  }
4899 
4901  dd.dashboardName = dashboard_name;
4902  dd.dashboardState = dashboard_state;
4903  dd.imageHash = image_hash;
4904  dd.dashboardMetadata = dashboard_metadata;
4906  if (!SysCatalog::instance().getMetadataForUser(dashboard_owner, user)) {
4907  THROW_DB_EXCEPTION(std::string("Dashboard owner ") + dashboard_owner +
4908  " does not exist");
4909  }
4910  dd.userId = user.userId;
4911  dd.user = dashboard_owner;
4912  dd.dashboardId = dashboard_id;
4913 
4914  try {
4915  cat.replaceDashboard(dd);
4916  } catch (const std::exception& e) {
4917  THROW_DB_EXCEPTION(e.what());
4918  }
4919 }
4920 
4921 void DBHandler::delete_dashboard(const TSessionId& session_id_or_json,
4922  const int32_t dashboard_id) {
4923  delete_dashboards(session_id_or_json, {dashboard_id});
4924 }
4925 
4926 void DBHandler::delete_dashboards(const TSessionId& session_id_or_json,
4927  const std::vector<int32_t>& dashboard_ids) {
4928  heavyai::RequestInfo const request_info(session_id_or_json);
4929  SET_REQUEST_ID(request_info.requestId());
4930  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
4931  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
4932  auto session_ptr = stdlog.getConstSessionInfo();
4933  check_read_only("delete_dashboards");
4934  auto& cat = session_ptr->getCatalog();
4937  }
4938  // Checks will be performed in catalog
4939  try {
4940  cat.deleteMetadataForDashboards(dashboard_ids, session_ptr->get_currentUser());
4941  } catch (const std::exception& e) {
4942  THROW_DB_EXCEPTION(e.what());
4943  }
4944 }
4945 
4946 std::vector<std::string> DBHandler::get_valid_groups(const TSessionId& session_id_or_json,
4947  int32_t dashboard_id,
4948  std::vector<std::string> groups) {
4949  heavyai::RequestInfo const request_info(session_id_or_json);
4950  SET_REQUEST_ID(request_info.requestId());
4951  const auto session_info = get_session_copy(request_info.sessionId());
4952  auto& cat = session_info.getCatalog();
4953  auto dash = cat.getMetadataForDashboard(dashboard_id);
4954  if (!dash) {
4955  THROW_DB_EXCEPTION("Dashboard id " + std::to_string(dashboard_id) +
4956  " does not exist");
4957  } else if (session_info.get_currentUser().userId != dash->userId &&
4958  !session_info.get_currentUser().isSuper) {
4959  throw std::runtime_error(
4960  "User should be either owner of dashboard or super user to share/unshare it");
4961  }
4962  std::vector<std::string> valid_groups;
4964  for (auto& group : groups) {
4965  user_meta.isSuper = false; // initialize default flag
4966  if (!SysCatalog::instance().getGrantee(group)) {
4967  THROW_DB_EXCEPTION("User/Role " + group + " does not exist");
4968  } else if (!user_meta.isSuper) {
4969  valid_groups.push_back(group);
4970  }
4971  }
4972  return valid_groups;
4973 }
4974 
4975 void DBHandler::validateGroups(const std::vector<std::string>& groups) {
4976  for (auto const& group : groups) {
4977  if (!SysCatalog::instance().getGrantee(group)) {
4978  THROW_DB_EXCEPTION("User/Role '" + group + "' does not exist");
4979  }
4980  }
4981 }
4982 
4984  const Catalog_Namespace::SessionInfo& session_info,
4985  const std::vector<int32_t>& dashboard_ids) {
4986  auto& cat = session_info.getCatalog();
4987  std::map<std::string, std::list<int32_t>> errors;
4988  for (auto const& dashboard_id : dashboard_ids) {
4989  auto dashboard = cat.getMetadataForDashboard(dashboard_id);
4990  if (!dashboard) {
4991  errors["Dashboard id does not exist"].push_back(dashboard_id);
4992  } else if (session_info.get_currentUser().userId != dashboard->userId &&
4993  !session_info.get_currentUser().isSuper) {
4994  errors["User should be either owner of dashboard or super user to share/unshare it"]
4995  .push_back(dashboard_id);
4996  }
4997  }
4998  if (!errors.empty()) {
4999  std::stringstream error_stream;
5000  error_stream << "Share/Unshare dashboard(s) failed with error(s)\n";
5001  for (const auto& [error, id_list] : errors) {
5002  error_stream << "Dashboard ids " << join(id_list, ", ") << ": " << error << "\n";
5003  }
5004  THROW_DB_EXCEPTION(error_stream.str());
5005  }
5006 }
5007 
5008 void DBHandler::shareOrUnshareDashboards(const TSessionId& session_id,
5009  const std::vector<int32_t>& dashboard_ids,
5010  const std::vector<std::string>& groups,
5011  const TDashboardPermissions& permissions,
5012  const bool do_share) {
5013  auto stdlog = STDLOG(get_session_ptr(session_id));
5014  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5015  check_read_only(do_share ? "share_dashboards" : "unshare_dashboards");
5016  if (!permissions.create_ && !permissions.delete_ && !permissions.edit_ &&
5017  !permissions.view_) {
5018  THROW_DB_EXCEPTION("At least one privilege should be assigned for " +
5019  std::string(do_share ? "grants" : "revokes"));
5020  }
5021  auto session_ptr = stdlog.getConstSessionInfo();
5022  auto const& catalog = session_ptr->getCatalog();
5023  auto& sys_catalog = SysCatalog::instance();
5024  validateGroups(groups);
5025  validateDashboardIdsForSharing(*session_ptr, dashboard_ids);
5026  std::vector<DBObject> batch_objects;
5027  for (auto const& dashboard_id : dashboard_ids) {
5028  DBObject object(dashboard_id, DBObjectType::DashboardDBObjectType);
5029  AccessPrivileges privs;
5030  if (permissions.delete_) {
5032  }
5033  if (permissions.create_) {
5035  }
5036  if (permissions.edit_) {
5038  }
5039  if (permissions.view_) {
5041  }
5042  object.setPrivileges(privs);
5043  batch_objects.push_back(object);
5044  }
5045  if (do_share) {
5046  sys_catalog.grantDBObjectPrivilegesBatch(groups, batch_objects, catalog);
5047  } else {
5048  sys_catalog.revokeDBObjectPrivilegesBatch(groups, batch_objects, catalog);
5049  }
5050 }
5051 
5052 void DBHandler::share_dashboards(const TSessionId& session_id_or_json,
5053  const std::vector<int32_t>& dashboard_ids,
5054  const std::vector<std::string>& groups,
5055  const TDashboardPermissions& permissions) {
5056  heavyai::RequestInfo const request_info(session_id_or_json);
5057  SET_REQUEST_ID(request_info.requestId());
5059  request_info.sessionId(), dashboard_ids, groups, permissions, true);
5060 }
5061 
5062 // NOOP: Grants not available for objects as of now
5063 void DBHandler::share_dashboard(const TSessionId& session_id_or_json,
5064  const int32_t dashboard_id,
5065  const std::vector<std::string>& groups,
5066  const std::vector<std::string>& objects,
5067  const TDashboardPermissions& permissions,
5068  const bool grant_role = false) {
5069  share_dashboards(session_id_or_json, {dashboard_id}, groups, permissions);
5070 }
5071 
5072 void DBHandler::unshare_dashboards(const TSessionId& session_id_or_json,
5073  const std::vector<int32_t>& dashboard_ids,
5074  const std::vector<std::string>& groups,
5075  const TDashboardPermissions& permissions) {
5076  heavyai::RequestInfo const request_info(session_id_or_json);
5077  SET_REQUEST_ID(request_info.requestId());
5079  request_info.sessionId(), dashboard_ids, groups, permissions, false);
5080 }
5081 
5082 void DBHandler::unshare_dashboard(const TSessionId& session_id_or_json,
5083  const int32_t dashboard_id,
5084  const std::vector<std::string>& groups,
5085  const std::vector<std::string>& objects,
5086  const TDashboardPermissions& permissions) {
5087  unshare_dashboards(session_id_or_json, {dashboard_id}, groups, permissions);
5088 }
5089 
5091  std::vector<TDashboardGrantees>& dashboard_grantees,
5092  const TSessionId& session_id_or_json,
5093  const int32_t dashboard_id) {
5094  heavyai::RequestInfo const request_info(session_id_or_json);
5095  SET_REQUEST_ID(request_info.requestId());
5096  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
5097  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5098  auto session_ptr = stdlog.getConstSessionInfo();
5099  auto const& cat = session_ptr->getCatalog();
5101  auto dash = cat.getMetadataForDashboard(dashboard_id);
5102  if (!dash) {
5103  THROW_DB_EXCEPTION("Dashboard id " + std::to_string(dashboard_id) +
5104  " does not exist");
5105  } else if (session_ptr->get_currentUser().userId != dash->userId &&
5106  !session_ptr->get_currentUser().isSuper) {
5108  "User should be either owner of dashboard or super user to access grantees");
5109  }
5110  std::vector<ObjectRoleDescriptor*> objectsList;
5111  objectsList = SysCatalog::instance().getMetadataForObject(
5112  cat.getCurrentDB().dbId,
5113  static_cast<int>(DBObjectType::DashboardDBObjectType),
5114  dashboard_id); // By default objecttypecan be only dashabaords
5115  user_meta.userId = -1;
5116  user_meta.userName = "";
5117  SysCatalog::instance().getMetadataForUserById(dash->userId, user_meta);
5118  for (auto object : objectsList) {
5119  if (user_meta.userName == object->roleName) {
5120  // Mask owner
5121  continue;
5122  }
5123  TDashboardGrantees grantee;
5124  TDashboardPermissions perm;
5125  grantee.name = object->roleName;
5126  grantee.is_user = object->roleType;
5127  perm.create_ = object->privs.hasPermission(DashboardPrivileges::CREATE_DASHBOARD);
5128  perm.delete_ = object->privs.hasPermission(DashboardPrivileges::DELETE_DASHBOARD);
5129  perm.edit_ = object->privs.hasPermission(DashboardPrivileges::EDIT_DASHBOARD);
5130  perm.view_ = object->privs.hasPermission(DashboardPrivileges::VIEW_DASHBOARD);
5131  grantee.permissions = perm;
5132  dashboard_grantees.push_back(grantee);
5133  }
5134 }
5135 
5136 void DBHandler::create_link(std::string& _return,
5137  const TSessionId& session_id_or_json,
5138  const std::string& view_state,
5139  const std::string& view_metadata) {
5140  heavyai::RequestInfo const request_info(session_id_or_json);
5141  SET_REQUEST_ID(request_info.requestId());
5142  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
5143  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5144  auto session_ptr = stdlog.getConstSessionInfo();
5145  // check_read_only("create_link");
5146  auto& cat = session_ptr->getCatalog();
5147 
5148  LinkDescriptor ld;
5149  ld.userId = session_ptr->get_currentUser().userId;
5150  ld.viewState = view_state;
5151  ld.viewMetadata = view_metadata;
5152 
5153  try {
5154  _return = cat.createLink(ld, 6);
5155  } catch (const std::exception& e) {
5156  THROW_DB_EXCEPTION(e.what());
5157  }
5158 }
5159 
5161  const std::string& name,
5162  const bool is_array) {
5163  TColumnType ct;
5164  ct.col_name = name;
5165  ct.col_type.type = type;
5166  ct.col_type.is_array = is_array;
5167  return ct;
5168 }
5169 
5170 void DBHandler::check_geospatial_files(const boost::filesystem::path file_path,
5171  const import_export::CopyParams& copy_params) {
5172  const std::list<std::string> shp_ext{".shp", ".shx", ".dbf"};
5173  if (std::find(shp_ext.begin(),
5174  shp_ext.end(),
5175  boost::algorithm::to_lower_copy(file_path.extension().string())) !=
5176  shp_ext.end()) {
5177  for (auto ext : shp_ext) {
5178  auto aux_file = file_path;
5180  aux_file.replace_extension(boost::algorithm::to_upper_copy(ext)).string(),
5181  copy_params) &&
5183  aux_file.replace_extension(ext).string(), copy_params)) {
5184  throw std::runtime_error("required file for shapefile does not exist: " +
5185  aux_file.filename().string());
5186  }
5187  }
5188  }
5189 }
5190 
5191 void DBHandler::create_table(const TSessionId& session_id_or_json,
5192  const std::string& table_name,
5193  const TRowDescriptor& rd,
5194  const TCreateParams& create_params) {
5195  heavyai::RequestInfo request_info(session_id_or_json);
5196  SET_REQUEST_ID(request_info.requestId());
5197  auto stdlog = STDLOG("table_name", table_name);
5198  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5199  check_read_only("create_table");
5200 
5201  if (ImportHelpers::is_reserved_name(table_name)) {
5202  THROW_DB_EXCEPTION("Invalid table name (reserved keyword): " + table_name);
5203  } else if (table_name != ImportHelpers::sanitize_name(table_name)) {
5204  THROW_DB_EXCEPTION("Invalid characters in table name: " + table_name);
5205  }
5206 
5207  auto rds = rd;
5208 
5209  std::string stmt{"CREATE TABLE " + table_name};
5210  std::vector<std::string> col_stmts;
5211 
5212  for (auto col : rds) {
5213  if (ImportHelpers::is_reserved_name(col.col_name)) {
5214  THROW_DB_EXCEPTION("Invalid column name (reserved keyword): " + col.col_name);
5215  } else if (col.col_name != ImportHelpers::sanitize_name(col.col_name)) {
5216  THROW_DB_EXCEPTION("Invalid characters in column name: " + col.col_name);
5217  }
5218  if (col.col_type.type == TDatumType::INTERVAL_DAY_TIME ||
5219  col.col_type.type == TDatumType::INTERVAL_YEAR_MONTH) {
5220  THROW_DB_EXCEPTION("Unsupported type: " + thrift_to_name(col.col_type) +
5221  " for column: " + col.col_name);
5222  }
5223 
5224  if (col.col_type.type == TDatumType::DECIMAL) {
5225  // if no precision or scale passed in set to default 14,7
5226  if (col.col_type.precision == 0 && col.col_type.scale == 0) {
5227  col.col_type.precision = 14;
5228  col.col_type.scale = 7;
5229  }
5230  }
5231 
5232  std::string col_stmt;
5233  col_stmt.append(col.col_name + " " + thrift_to_name(col.col_type));
5234  if (col.__isset.default_value) {
5235  col_stmt.append(" DEFAULT " + col.default_value);
5236  }
5237 
5238  // As of 2016-06-27 the Immerse v1 frontend does not explicitly set the
5239  // `nullable` argument, leading this to default to false. Uncomment for v2.
5240  // if (!col.col_type.nullable) col_stmt.append(" NOT NULL");
5241 
5242  if (thrift_to_encoding(col.col_type.encoding) != kENCODING_NONE) {
5243  col_stmt.append(" ENCODING " + thrift_to_encoding_name(col.col_type));
5244  if (thrift_to_encoding(col.col_type.encoding) == kENCODING_DICT ||
5245  thrift_to_encoding(col.col_type.encoding) == kENCODING_FIXED ||
5246  thrift_to_encoding(col.col_type.encoding) == kENCODING_GEOINT ||
5247  thrift_to_encoding(col.col_type.encoding) == kENCODING_DATE_IN_DAYS) {
5248  col_stmt.append("(" + std::to_string(col.col_type.comp_param) + ")");
5249  }
5250  } else if (col.col_type.type == TDatumType::STR) {
5251  // non DICT encoded strings
5252  col_stmt.append(" ENCODING NONE");
5253  } else if (col.col_type.type == TDatumType::POINT ||
5254  col.col_type.type == TDatumType::MULTIPOINT ||
5255  col.col_type.type == TDatumType::LINESTRING ||
5256  col.col_type.type == TDatumType::MULTILINESTRING ||
5257  col.col_type.type == TDatumType::POLYGON ||
5258  col.col_type.type == TDatumType::MULTIPOLYGON) {
5259  // non encoded compressable geo
5260  if (col.col_type.scale == 4326) {
5261  col_stmt.append(" ENCODING NONE");
5262  }
5263  }
5264  col_stmts.push_back(col_stmt);
5265  }
5266 
5267  stmt.append(" (" + boost::algorithm::join(col_stmts, ", ") + ")");
5268 
5269  if (create_params.is_replicated) {
5270  stmt.append(" WITH (PARTITIONS = 'REPLICATED')");
5271  }
5272 
5273  stmt.append(";");
5274 
5275  TQueryResult ret;
5276  request_info.setRequestId(logger::request_id());
5277  sql_execute(ret, request_info.json(), stmt, true, "", -1, -1);
5278 }
5279 
5280 void DBHandler::import_table(const TSessionId& session_id_or_json,
5281  const std::string& table_name,
5282  const std::string& file_name_in,
5283  const TCopyParams& cp) {
5284  try {
5285  heavyai::RequestInfo const request_info(session_id_or_json);
5286  SET_REQUEST_ID(request_info.requestId());
5287  auto stdlog =
5288  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
5289  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5290  auto session_ptr = stdlog.getConstSessionInfo();
5291  check_read_only("import_table");
5292  LOG(INFO) << "import_table " << table_name << " from " << file_name_in;
5293 
5294  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
5295  auto& cat = session_ptr->getCatalog();
5297  auto start_time = ::toString(std::chrono::system_clock::now());
5299  executor->enrollQuerySession(request_info.sessionId(),
5300  "IMPORT_TABLE",
5301  start_time,
5303  QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
5304  }
5305 
5306  ScopeGuard clearInterruptStatus = [executor, &request_info, &start_time] {
5307  // reset the runtime query interrupt status
5309  executor->clearQuerySessionStatus(request_info.sessionId(), start_time);
5310  }
5311  };
5312  const auto td_with_lock =
5314  cat, table_name);
5315  const auto td = td_with_lock();
5316  CHECK(td);
5317  check_table_load_privileges(*session_ptr, table_name);
5318 
5319  std::string copy_from_source;
5321  if (copy_params.source_type == import_export::SourceType::kOdbc) {
5322  copy_from_source = copy_params.sql_select;
5323  } else {
5324  std::string file_name{file_name_in};
5325  auto file_path = boost::filesystem::path(file_name);
5326  if (!boost::istarts_with(file_name, "s3://")) {
5327  if (!boost::filesystem::path(file_name).is_absolute()) {
5328  file_path = import_path_ /
5329  picosha2::hash256_hex_string(request_info.sessionId()) /
5330  boost::filesystem::path(file_name).filename();
5331  file_name = file_path.string();
5332  }
5333  if (!shared::file_or_glob_path_exists(file_path.string())) {
5334  THROW_DB_EXCEPTION("File or directory \"" + file_path.string() +
5335  "\" does not exist.");
5336  }
5337  }
5339 
5340  // TODO(andrew): add delimiter detection to Importer
5341  if (copy_params.delimiter == '\0') {
5342  copy_params.delimiter = ',';
5343  if (boost::filesystem::path(file_path).extension() == ".tsv") {
5344  copy_params.delimiter = '\t';
5345  }
5346  }
5347  copy_from_source = file_path.string();
5348  }
5349  auto const load_tag = get_import_tag("import_table", table_name, copy_from_source);
5350  log_system_cpu_memory_status("start_" + load_tag, session_ptr->getCatalog());
5351  ScopeGuard cleanup = [&load_tag, &session_ptr]() {
5352  log_system_cpu_memory_status("finish_" + load_tag, session_ptr->getCatalog());
5353  };
5354  const auto insert_data_lock = lockmgr::InsertDataLockMgr::getWriteLockForTable(
5355  session_ptr->getCatalog(), table_name);
5356  std::unique_ptr<import_export::AbstractImporter> importer;
5357  importer = import_export::create_importer(cat, td, copy_from_source, copy_params);
5358  auto ms = measure<>::execution([&]() { importer->import(session_ptr.get()); });
5359  LOG(INFO) << "Total Import Time: " << (double)ms / 1000.0 << " Seconds.";
5360  } catch (const TDBException& e) {
5361  throw;
5362  } catch (const std::exception& e) {
5363  THROW_DB_EXCEPTION(std::string(e.what()));
5364  }
5365 }
5366 
5367 namespace {
5368 
5369 // helper functions for error checking below
5370 // these would usefully be added as methods of TDatumType
5371 // but that's not possible as it's auto-generated by Thrift
5372 
5374  return (t == TDatumType::POLYGON || t == TDatumType::MULTIPOLYGON ||
5375  t == TDatumType::LINESTRING || t == TDatumType::MULTILINESTRING ||
5376  t == TDatumType::POINT || t == TDatumType::MULTIPOINT);
5377 }
5378 
5380  std::stringstream ss;
5381  ss << t;
5382  return ss.str();
5383 }
5384 
5385 std::string get_mismatch_attr_warning_text(const std::string& table_name,
5386  const std::string& file_path,
5387  const std::string& column_name,
5388  const std::string& attr,
5389  const std::string& got,
5390  const std::string& expected) {
5391  return "Issue encountered in geo/raster file '" + file_path +
5392  "' while appending to table '" + table_name + "'. Column '" + column_name +
5393  "' " + attr + " mismatch (got '" + got + "', expected '" + expected + "')";
5394 }
5395 
5396 } // namespace
5397 
5398 #define THROW_COLUMN_ATTR_MISMATCH_EXCEPTION(attr, got, expected) \
5399  THROW_DB_EXCEPTION("Could not append geo/raster file '" + \
5400  file_path.filename().string() + "' to table '" + table_name + \
5401  "'. Column '" + cd->columnName + "' " + attr + " mismatch (got '" + \
5402  got + "', expected '" + expected + "')");
5403 
5404 void DBHandler::import_geo_table(const TSessionId& session_id_or_json,
5405  const std::string& table_name,
5406  const std::string& file_name,
5407  const TCopyParams& cp,
5408  const TRowDescriptor& row_desc,
5409  const TCreateParams& create_params) {
5410  // this is the direct Thrift endpoint
5411  // it does NOT support the separate FSI regex/filter/sort options
5412  // but it DOES support basic globbing specified in the filename itself
5413  heavyai::RequestInfo const request_info(session_id_or_json);
5414  SET_REQUEST_ID(request_info.requestId());
5415  importGeoTableGlobFilterSort(request_info.sessionId(),
5416  table_name,
5417  file_name,
5419  row_desc,
5420  create_params);
5421 }
5422 
5423 void DBHandler::importGeoTableGlobFilterSort(const TSessionId& session_id,
5424  const std::string& table_name,
5425  const std::string& file_name,
5426  const import_export::CopyParams& copy_params,
5427  const TRowDescriptor& row_desc,
5428  const TCreateParams& create_params) {
5429  // this is called by the above direct Thrift endpoint
5430  // and also for a deferred COPY FROM for geo/raster
5431  // it DOES support the full FSI regex/filter/sort options
5432  std::vector<std::string> file_names;
5433  try {
5434  const shared::FilePathOptions options{copy_params.regex_path_filter,
5435  copy_params.file_sort_order_by,
5436  copy_params.file_sort_regex};
5438  file_names = shared::local_glob_filter_sort_files(file_name, options, false);
5439  } catch (const shared::FileNotFoundException& e) {
5440  // no files match, just try the original filename, might be remote
5441  file_names.push_back(file_name);
5442  }
5443  // import whatever we found
5444  for (auto const& file_name : file_names) {
5446  session_id, table_name, file_name, copy_params, row_desc, create_params);
5447  }
5448 }
5449 
5450 void DBHandler::importGeoTableSingle(const TSessionId& session_id,
5451  const std::string& table_name,
5452  const std::string& file_name_in,
5453  const import_export::CopyParams& copy_params,
5454  const TRowDescriptor& row_desc,
5455  const TCreateParams& create_params) {
5456  auto stdlog = STDLOG(get_session_ptr(session_id), "table_name", table_name);
5457  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5458  auto session_ptr = stdlog.getConstSessionInfo();
5459  check_read_only("import_table");
5460 
5461  auto& cat = session_ptr->getCatalog();
5463  auto start_time = ::toString(std::chrono::system_clock::now());
5465  executor->enrollQuerySession(session_id,
5466  "IMPORT_GEO_TABLE",
5467  start_time,
5469  QuerySessionStatus::QueryStatus::RUNNING_IMPORTER);
5470  }
5471 
5472  ScopeGuard clearInterruptStatus = [executor, &session_id, &start_time] {
5473  // reset the runtime query interrupt status
5475  executor->clearQuerySessionStatus(session_id, start_time);
5476  }
5477  };
5478 
5479  std::string file_name{file_name_in};
5480 
5481  if (path_is_relative(file_name)) {
5482  // assume relative paths are relative to data_path / import / <session>
5483  auto file_path = import_path_ / picosha2::hash256_hex_string(session_id) /
5484  boost::filesystem::path(file_name).filename();
5485  file_name = file_path.string();
5486  }
5488 
5489  bool is_raster = false;
5490  if (copy_params.source_type == import_export::SourceType::kGeoFile) {
5491  if (is_a_supported_archive_file(file_name)) {
5492  // find the archive file
5493  add_vsi_network_prefix(file_name);
5494  if (!import_export::Importer::gdalFileExists(file_name, copy_params)) {
5495  THROW_DB_EXCEPTION("Archive does not exist: " + file_name_in);
5496  }
5497  // find geo file in archive
5498  add_vsi_archive_prefix(file_name);
5499  std::string geo_file = find_first_geo_file_in_archive(file_name, copy_params);
5500  // prepare to load that geo file
5501  if (geo_file.size()) {
5502  file_name = file_name + std::string("/") + geo_file;
5503  }
5504  } else {
5505  // prepare to load geo file directly
5506  add_vsi_network_prefix(file_name);
5507  add_vsi_geo_prefix(file_name);
5508  }
5509  } else if (copy_params.source_type == import_export::SourceType::kRasterFile) {
5510  // prepare to load geo raster file directly
5511  add_vsi_network_prefix(file_name);
5512  add_vsi_geo_prefix(file_name);
5513  is_raster = true;
5514  } else {
5515  THROW_DB_EXCEPTION("import_geo_table called with file_type other than GEO or RASTER");
5516  }
5517 
5518  // log what we're about to try to do
5519  VLOG(1) << "import_geo_table: Original filename: " << file_name_in;
5520  VLOG(1) << "import_geo_table: Actual filename: " << file_name;
5521  VLOG(1) << "import_geo_table: Raster: " << is_raster;
5522  auto const load_tag = get_import_tag("import_geo_table", table_name, file_name);
5523  log_system_cpu_memory_status("start_" + load_tag, session_ptr->getCatalog());
5524  ScopeGuard cleanup = [&load_tag, &session_ptr]() {
5525  log_system_cpu_memory_status("finish_" + load_tag, session_ptr->getCatalog());
5526  };
5527  // use GDAL to check the primary file exists (even if on S3 and/or in archive)
5528  auto file_path = boost::filesystem::path(file_name);
5529  if (!import_export::Importer::gdalFileOrDirectoryExists(file_name, copy_params)) {
5530  THROW_DB_EXCEPTION("File does not exist: " + file_path.filename().string());
5531  }
5532 
5533  // use GDAL to check any dependent files exist (ditto)
5534  try {
5535  check_geospatial_files(file_path, copy_params);
5536  } catch (const std::exception& e) {
5537  THROW_DB_EXCEPTION("import_geo_table error: " + std::string(e.what()));
5538  }
5539 
5540  // get layer info and deconstruct
5541  // in general, we will get a combination of layers of these four types:
5542  // EMPTY: no rows, report and skip
5543  // GEO: create a geo table from this
5544  // NON_GEO: create a regular table from this
5545  // UNSUPPORTED_GEO: report and skip
5546  std::vector<import_export::Importer::GeoFileLayerInfo> layer_info;
5547  if (!is_raster) {
5548  try {
5549  layer_info =
5550  import_export::Importer::gdalGetLayersInGeoFile(file_name, copy_params);
5551  } catch (const std::exception& e) {
5552  THROW_DB_EXCEPTION("import_geo_table error: " + std::string(e.what()));
5553  }
5554  }
5555 
5556  // categorize the results
5557  using LayerNameToContentsMap =
5558  std::map<std::string, import_export::Importer::GeoFileLayerContents>;
5559  LayerNameToContentsMap load_layers;
5560  LOG_IF(INFO, layer_info.size() > 0)
5561  << "import_geo_table: Found the following layers in the geo file:";
5562  for (const auto& layer : layer_info) {
5563  switch (layer.contents) {
5565  LOG(INFO) << "import_geo_table: '" << layer.name
5566  << "' (will import as geo table)";
5567  load_layers[layer.name] = layer.contents;
5568  break;
5570  LOG(INFO) << "import_geo_table: '" << layer.name
5571  << "' (will import as regular table)";
5572  load_layers[layer.name] = layer.contents;
5573  break;
5575  LOG(WARNING) << "import_geo_table: '" << layer.name
5576  << "' (will not import, unsupported geo type)";
5577  break;
5579  LOG(INFO) << "import_geo_table: '" << layer.name << "' (ignoring, empty)";
5580  break;
5581  default:
5582  break;
5583  }
5584  }
5585 
5586  // if nothing is loadable, stop now
5587  if (!is_raster && load_layers.size() == 0) {
5588  THROW_DB_EXCEPTION("import_geo_table: No loadable layers found, aborting!");
5589  }
5590 
5591  // if we've been given an explicit layer name, check that it exists and is loadable
5592  // scan the original list, as it may exist but not have been gathered as loadable
5593  if (!is_raster && copy_params.geo_layer_name.size()) {
5594  bool found = false;
5595  for (const auto& layer : layer_info) {
5596  if (copy_params.geo_layer_name == layer.name) {
5599  // forget all the other layers and just load this one
5600  load_layers.clear();
5601  load_layers[layer.name] = layer.contents;
5602  found = true;
5603  break;
5604  } else if (layer.contents ==
5606  THROW_DB_EXCEPTION("import_geo_table: Explicit geo layer '" +
5607  copy_params.geo_layer_name + "' has unsupported geo type!");
5608  } else if (layer.contents ==
5610  THROW_DB_EXCEPTION("import_geo_table: Explicit geo layer '" +
5611  copy_params.geo_layer_name + "' is empty!");
5612  }
5613  }
5614  }
5615  if (!found) {
5616  THROW_DB_EXCEPTION("import_geo_table: Explicit geo layer '" +
5617  copy_params.geo_layer_name + "' not found!");
5618  }
5619  }
5620 
5621  // Immerse import of multiple layers is not yet supported
5622  // @TODO fix this!
5623  if (!is_raster && row_desc.size() > 0 && load_layers.size() > 1) {
5625  "import_geo_table: Multi-layer geo import not yet supported from Immerse!");
5626  }
5627 
5628  // one definition of layer table name construction
5629  // we append the layer name if we're loading more than one table
5630  auto construct_layer_table_name = [&load_layers](const std::string& table_name,
5631  const std::string& layer_name) {
5632  if (load_layers.size() > 1) {
5633  auto sanitized_layer_name = ImportHelpers::sanitize_name(layer_name);
5634  if (sanitized_layer_name != layer_name) {
5635  LOG(INFO) << "import_geo_table: Using sanitized layer name '"
5636  << sanitized_layer_name << "' for table name";
5637  }
5638  return table_name + "_" + sanitized_layer_name;
5639  }
5640  return table_name;
5641  };
5642 
5643  // if we're importing multiple tables, then NONE of them must exist already
5644  if (!is_raster && load_layers.size() > 1) {
5645  for (const auto& layer : load_layers) {
5646  // construct table name
5647  auto this_table_name = construct_layer_table_name(table_name, layer.first);
5648 
5649  // table must not exist
5650  if (cat.getMetadataForTable(this_table_name)) {
5651  THROW_DB_EXCEPTION("import_geo_table: Table '" + this_table_name +
5652  "' already exists, aborting!");
5653  }
5654  }
5655  }
5656 
5657  // prepare to gather errors that would otherwise be exceptions, as we can only throw
5658  // one
5659  std::vector<std::string> caught_exception_messages;
5660 
5661  // prepare to time multi-layer import
5662  double total_import_ms = 0.0;
5663 
5664  // for geo raster, we make a single dummy layer
5665  // the name is irrelevant, but set it to the filename so the log makes sense
5666  if (is_raster) {
5667  CHECK_EQ(load_layers.size(), 0u);
5668  load_layers.emplace(file_name, import_export::Importer::GeoFileLayerContents::GEO);
5669  }
5670 
5671  // now we're safe to start importing
5672  // we loop over the layers we're going to attempt to load
5673  for (const auto& layer : load_layers) {
5674  // unpack
5675  const auto& layer_name = layer.first;
5676  const auto& layer_contents = layer.second;
5677  bool is_geo_layer =
5679 
5680  // construct table name again
5681  auto this_table_name = construct_layer_table_name(table_name, layer_name);
5682 
5683  // report
5684  LOG(INFO) << "import_geo_table: Creating table: " << this_table_name;
5685 
5686  // we need a row descriptor
5687  TRowDescriptor rd;
5688  if (row_desc.size() > 0) {
5689  // we have a valid RowDescriptor
5690  // this is the case where Immerse has already detected and created
5691  // all we need to do is import and trust that the data will match
5692  // use the provided row descriptor
5693  // table must already exist (we check this below)
5694  rd = row_desc;
5695  } else {
5696  // we don't have a RowDescriptor
5697  // we have to detect the file ourselves
5698  TDetectResult cds;
5699  TCopyParams cp_copy = copyparams_to_thrift(copy_params);
5700  cp_copy.geo_layer_name = layer_name;
5701  try {
5702  detect_column_types(cds, session_id, file_name_in, cp_copy);
5703  } catch (const std::exception& e) {
5704  // capture the error and abort this layer
5705  caught_exception_messages.emplace_back("Column Type Detection failed for '" +
5706  layer_name + "':" + e.what());
5707  continue;
5708  }
5709  rd = cds.row_set.row_desc;
5710 
5711  // then, if the table does NOT already exist, create it
5712  const TableDescriptor* td = cat.getMetadataForTable(this_table_name);
5713  if (!td) {
5714  try {
5715  create_table(session_id, this_table_name, rd, create_params);
5716  } catch (const std::exception& e) {
5717  // capture the error and abort this layer
5718  caught_exception_messages.emplace_back("Failed to create table for Layer '" +
5719  layer_name + "':" + e.what());
5720  continue;
5721  }
5722  }
5723  }
5724 
5725  // match locking sequence for CopyTableStmt::execute
5726  auto execute_read_lock = legacylockmgr::getExecuteReadLock();
5727 
5728  const TableDescriptor* td{nullptr};
5729  std::unique_ptr<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>> td_with_lock;
5730  std::unique_ptr<lockmgr::WriteLock> insert_data_lock;
5731 
5732  try {
5733  td_with_lock =
5734  std::make_unique<lockmgr::TableSchemaLockContainer<lockmgr::ReadLock>>(
5736  lockmgr::ReadLock>::acquireTableDescriptor(cat, this_table_name));
5737  td = (*td_with_lock)();
5738  insert_data_lock = std::make_unique<lockmgr::WriteLock>(
5740  } catch (const std::runtime_error& e) {
5741  // capture the error and abort this layer
5742  std::string exception_message = "Could not import geo/raster file '" +
5743  file_path.filename().string() + "' to table '" +
5744  this_table_name +
5745  "'; table does not exist or failed to create.";
5746  caught_exception_messages.emplace_back(exception_message);
5747  continue;
5748  }
5749  CHECK(td);
5750 
5751  // then, we have to verify that the structure matches
5752  // get column descriptors (non-system, non-deleted, logical columns only)
5753  const auto col_descriptors =
5754  cat.getAllColumnMetadataForTable(td->tableId, false, false, false);
5755 
5756  // first, compare the column count
5757  if (col_descriptors.size() != rd.size()) {
5758  // capture the error and abort this layer
5759  std::string exception_message = "Could not append geo/raster file '" +
5760  file_path.filename().string() + "' to table '" +
5761  this_table_name + "'. Column count mismatch (got " +
5762  std::to_string(rd.size()) + ", expecting " +
5763  std::to_string(col_descriptors.size()) + ")";
5764  caught_exception_messages.emplace_back(exception_message);
5765  continue;
5766  }
5767 
5768  try {
5769  // validate column type match
5770  // also handle geo column name changes
5771  int rd_index = 0;
5772  for (auto const* cd : col_descriptors) {
5773  auto const cd_col_type = populateThriftColumnType(&cat, cd);
5774 
5775  // for types, all we care about is that the got and expected types are either both
5776  // geo or both non-geo, and if they're geo that the exact geo type matches
5777  auto const gtype = rd[rd_index].col_type.type; // importer type
5778  auto const etype = cd_col_type.col_type.type; // existing table type
5779  if (TTypeInfo_IsGeo(gtype) && TTypeInfo_IsGeo(etype)) {
5780  if (gtype != etype) {
5782  "type", TTypeInfo_TypeToString(gtype), TTypeInfo_TypeToString(etype));
5783  }
5784  } else if (TTypeInfo_IsGeo(gtype) != TTypeInfo_IsGeo(etype)) {
5786  "type", TTypeInfo_TypeToString(gtype), TTypeInfo_TypeToString(etype));
5787  }
5788 
5789  // for names, we keep the existing table geo column name (for example, to handle
5790  // the case where an existing table has a geo column with a legacy name), but all
5791  // other column names must match, otherwise the import will fail
5792  auto const gname = rd[rd_index].col_name; // importer name
5793  auto const ename = cd->columnName; // existing table name
5794  if (gname != ename) {
5795  if (TTypeInfo_IsGeo(gtype)) {
5796  LOG(INFO) << "import_geo_table: Renaming incoming geo column to match "
5797  "existing table column name '"
5798  << ename << "'";
5799  rd[rd_index].col_name = ename;
5800  } else {
5801  if (is_raster) {
5803  table_name,
5804  file_path.filename().string(),
5805  cd->columnName,
5806  "name",
5807  gname,
5808  ename);
5809  } else {
5810  THROW_COLUMN_ATTR_MISMATCH_EXCEPTION("name", gname, ename);
5811  }
5812  }
5813  }
5814  rd_index++;
5815  }
5816  } catch (const std::exception& e) {
5817  // capture the error and abort this layer
5818  caught_exception_messages.emplace_back(e.what());
5819  continue;
5820  }
5821 
5822  std::map<std::string, std::string> colname_to_src;
5823  for (auto r : rd) {
5824  colname_to_src[r.col_name] =
5825  r.src_name.length() > 0 ? r.src_name : ImportHelpers::sanitize_name(r.src_name);
5826  }
5827 
5828  try {
5829  check_table_load_privileges(*session_ptr, this_table_name);
5830  } catch (const std::exception& e) {
5831  // capture the error and abort this layer
5832  caught_exception_messages.emplace_back(e.what());
5833  continue;
5834  }
5835 
5836  if (!is_raster && is_geo_layer) {
5837  // Final check to ensure that we have exactly one geo column
5838  // before doing the actual import, in case the user naively
5839  // overrode the types in Immerse Preview (which as of 6/17/21
5840  // it still allows you to do). We should make Immerse more
5841  // robust and disallow re-typing of columns to/from geo types
5842  // completely. Currently, if multiple columns are re-typed
5843  // such that there is still exactly one geo column (but it's
5844  // the wrong one) then this test will pass, but the import
5845  // will then reject some (or more likely all) of the rows.
5846  int num_geo_columns{0};
5847  for (auto const& col : rd) {
5848  if (TTypeInfo_IsGeo(col.col_type.type)) {
5849  num_geo_columns++;
5850  }
5851  }
5852  if (num_geo_columns != 1) {
5853  std::string exception_message =
5854  "Table '" + this_table_name +
5855  "' must have exactly one geo column. Import aborted!";
5856  caught_exception_messages.emplace_back(exception_message);
5857  continue;
5858  }
5859  }
5860 
5861  std::string layer_or_raster = is_raster ? "Raster" : "Layer";
5862 
5863  try {
5864  // import this layer only?
5865  import_export::CopyParams copy_params_copy = copy_params;
5866  copy_params_copy.geo_layer_name = layer_name;
5867 
5868  // create an importer
5869  std::unique_ptr<import_export::Importer> importer;
5870  importer.reset(
5871  new import_export::Importer(cat, td, file_path.string(), copy_params_copy));
5872 
5873  // import
5874  auto ms = measure<>::execution(
5875  [&]() { importer->importGDAL(colname_to_src, session_ptr.get(), is_raster); });
5876  LOG(INFO) << "Import of " << layer_or_raster << " '" << layer_name << "' took "
5877  << (double)ms / 1000.0 << "s";
5878  total_import_ms += ms;
5879  } catch (const std::exception& e) {
5880  std::string exception_message = "Import of " + layer_or_raster + " '" +
5881  this_table_name + "' failed: " + e.what();
5882  caught_exception_messages.emplace_back(exception_message);
5883  continue;
5884  }
5885  }
5886 
5887  // did we catch any exceptions?
5888  if (caught_exception_messages.size()) {
5889  // combine all the strings into one and throw a single Thrift exception
5890  std::string combined_exception_message = "Failed to import geo/raster file: ";
5891  bool comma{false};
5892  for (const auto& message : caught_exception_messages) {
5893  combined_exception_message += comma ? (", " + message) : message;
5894  comma = true;
5895  }
5896  THROW_DB_EXCEPTION(combined_exception_message);
5897  } else {
5898  // report success and total time
5899  LOG(INFO) << "Import Successful!";
5900  LOG(INFO) << "Total Import Time: " << total_import_ms / 1000.0 << "s";
5901  }
5902 }
5903 
5904 #undef THROW_COLUMN_ATTR_MISMATCH_EXCEPTION
5905 
5906 void DBHandler::import_table_status(TImportStatus& _return,
5907  const TSessionId& session_id_or_json,
5908  const std::string& import_id) {
5909  heavyai::RequestInfo const request_info(session_id_or_json);
5910  SET_REQUEST_ID(request_info.requestId());
5911  auto stdlog =
5912  STDLOG(get_session_ptr(request_info.sessionId()), "import_table_status", import_id);
5913  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5914  auto is = import_export::Importer::get_import_status(import_id);
5915  _return.elapsed = is.elapsed.count();
5916  _return.rows_completed = is.rows_completed;
5917  _return.rows_estimated = is.rows_estimated;
5918  _return.rows_rejected = is.rows_rejected;
5919 }
5920 
5922  const TSessionId& session_id_or_json,
5923  const std::string& archive_path_in,
5924  const TCopyParams& copy_params) {
5925  heavyai::RequestInfo const request_info(session_id_or_json);
5926  SET_REQUEST_ID(request_info.requestId());
5927  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()),
5928  "get_first_geo_file_in_archive",
5929  archive_path_in);
5930  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5931 
5932  std::string archive_path(archive_path_in);
5933 
5934  if (path_is_relative(archive_path)) {
5935  // assume relative paths are relative to data_path / import / <session>
5936  auto file_path = import_path_ /
5937  picosha2::hash256_hex_string(request_info.sessionId()) /
5938  boost::filesystem::path(archive_path).filename();
5939  archive_path = file_path.string();
5940  }
5941  validate_import_file_path_if_local(archive_path);
5942 
5943  if (is_a_supported_archive_file(archive_path)) {
5944  // find the archive file
5945  add_vsi_network_prefix(archive_path);
5946  if (!import_export::Importer::gdalFileExists(archive_path,
5947  thrift_to_copyparams(copy_params))) {
5948  THROW_DB_EXCEPTION("Archive does not exist: " + archive_path_in);
5949  }
5950  // find geo file in archive
5951  add_vsi_archive_prefix(archive_path);
5952  std::string geo_file =
5953  find_first_geo_file_in_archive(archive_path, thrift_to_copyparams(copy_params));
5954  // what did we get?
5955  if (geo_file.size()) {
5956  // prepend it with the original path
5957  _return = archive_path_in + std::string("/") + geo_file;
5958  } else {
5959  // just return the original path
5960  _return = archive_path_in;
5961  }
5962  } else {
5963  // just return the original path
5964  _return = archive_path_in;
5965  }
5966 }
5967 
5968 void DBHandler::get_all_files_in_archive(std::vector<std::string>& _return,
5969  const TSessionId& session_id_or_json,
5970  const std::string& archive_path_in,
5971  const TCopyParams& copy_params) {
5972  heavyai::RequestInfo const request_info(session_id_or_json);
5973  SET_REQUEST_ID(request_info.requestId());
5974  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()),
5975  "get_all_files_in_archive",
5976  archive_path_in);
5977  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
5978 
5979  std::string archive_path(archive_path_in);
5980  if (path_is_relative(archive_path)) {
5981  // assume relative paths are relative to data_path / import / <session>
5982  auto file_path = import_path_ /
5983  picosha2::hash256_hex_string(request_info.sessionId()) /
5984  boost::filesystem::path(archive_path).filename();
5985  archive_path = file_path.string();
5986  }
5987  validate_import_file_path_if_local(archive_path);
5988 
5989  if (is_a_supported_archive_file(archive_path)) {
5990  // find the archive file
5991  add_vsi_network_prefix(archive_path);
5992  if (!import_export::Importer::gdalFileExists(archive_path,
5993  thrift_to_copyparams(copy_params))) {
5994  THROW_DB_EXCEPTION("Archive does not exist: " + archive_path_in);
5995  }
5996  // find all files in archive
5997  add_vsi_archive_prefix(archive_path);
5999  archive_path, thrift_to_copyparams(copy_params));
6000  // prepend them all with original path
6001  for (auto& s : _return) {
6002  s = archive_path_in + '/' + s;
6003  }
6004  }
6005 }
6006 
6007 void DBHandler::get_layers_in_geo_file(std::vector<TGeoFileLayerInfo>& _return,
6008  const TSessionId& session_id_or_json,
6009  const std::string& file_name_in,
6010  const TCopyParams& cp) {
6011  heavyai::RequestInfo const request_info(session_id_or_json);
6012  SET_REQUEST_ID(request_info.requestId());
6013  auto stdlog = STDLOG(
6014  get_session_ptr(request_info.sessionId()), "get_layers_in_geo_file", file_name_in);
6015  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
6016 
6017  std::string file_name(file_name_in);
6018 
6020 
6021  // handle relative paths
6022  if (path_is_relative(file_name)) {
6023  // assume relative paths are relative to data_path / import / <session>
6024  auto file_path = import_path_ /
6025  picosha2::hash256_hex_string(request_info.sessionId()) /
6026  boost::filesystem::path(file_name).filename();
6027  file_name = file_path.string();
6028  }
6030 
6031  // archive or file?
6032  if (is_a_supported_archive_file(file_name)) {
6033  // find the archive file
6034  add_vsi_network_prefix(file_name);
6035  if (!import_export::Importer::gdalFileExists(file_name, copy_params)) {
6036  THROW_DB_EXCEPTION("Archive does not exist: " + file_name_in);
6037  }
6038  // find geo file in archive
6039  add_vsi_archive_prefix(file_name);
6040  std::string geo_file = find_first_geo_file_in_archive(file_name, copy_params);
6041  // prepare to load that geo file
6042  if (geo_file.size()) {
6043  file_name = file_name + std::string("/") + geo_file;
6044  }
6045  } else {
6046  // prepare to load geo file directly
6047  add_vsi_network_prefix(file_name);
6048  add_vsi_geo_prefix(file_name);
6049  }
6050 
6051  // check the file actually exists
6052  if (!import_export::Importer::gdalFileOrDirectoryExists(file_name, copy_params)) {
6053  THROW_DB_EXCEPTION("Geo file/archive does not exist: " + file_name_in);
6054  }
6055 
6056  // find all layers
6057  auto internal_layer_info =
6058  import_export::Importer::gdalGetLayersInGeoFile(file_name, copy_params);
6059 
6060  // convert to Thrift type
6061  for (const auto& internal_layer : internal_layer_info) {
6062  TGeoFileLayerInfo layer;
6063  layer.name = internal_layer.name;
6064  switch (internal_layer.contents) {
6066  layer.contents = TGeoFileLayerContents::EMPTY;
6067  break;
6069  layer.contents = TGeoFileLayerContents::GEO;
6070  break;
6072  layer.contents = TGeoFileLayerContents::NON_GEO;
6073  break;
6075  layer.contents = TGeoFileLayerContents::UNSUPPORTED_GEO;
6076  break;
6077  default:
6078  CHECK(false);
6079  }
6080  _return.emplace_back(layer); // no suitable constructor to just pass parameters
6081  }
6082 }
6083 
6084 void DBHandler::start_heap_profile(const TSessionId& session_id_or_json) {
6085  heavyai::RequestInfo const request_info(session_id_or_json);
6086  SET_REQUEST_ID(request_info.requestId());
6087  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
6088 #ifdef HAVE_PROFILER
6089  if (IsHeapProfilerRunning()) {
6090  THROW_DB_EXCEPTION("Profiler already started");
6091  }
6092  HeapProfilerStart("omnisci");
6093 #else
6094  THROW_DB_EXCEPTION("Profiler not enabled");
6095 #endif // HAVE_PROFILER
6096 }
6097 
6098 void DBHandler::stop_heap_profile(const TSessionId& session_id_or_json) {
6099  heavyai::RequestInfo const request_info(session_id_or_json);
6100  SET_REQUEST_ID(request_info.requestId());
6101  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
6102 #ifdef HAVE_PROFILER
6103  if (!IsHeapProfilerRunning()) {
6104  THROW_DB_EXCEPTION("Profiler not running");
6105  }
6106  HeapProfilerStop();
6107 #else
6108  THROW_DB_EXCEPTION("Profiler not enabled");
6109 #endif // HAVE_PROFILER
6110 }
6111 
6113  TSessionId const& session_id) const {
6115  auto const itr = calcite_sessions_.find(session_id);
6116  return itr == calcite_sessions_.end() ? nullptr : itr->second;
6117 }
6118 
6119 void DBHandler::get_heap_profile(std::string& profile,
6120  const TSessionId& session_id_or_json) {
6121  heavyai::RequestInfo const request_info(session_id_or_json);
6122  SET_REQUEST_ID(request_info.requestId());
6123  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
6124 #ifdef HAVE_PROFILER
6125  if (!IsHeapProfilerRunning()) {
6126  THROW_DB_EXCEPTION("Profiler not running");
6127  }
6128  auto profile_buff = GetHeapProfile();
6129  profile = profile_buff;
6130  free(profile_buff);
6131 #else
6132  THROW_DB_EXCEPTION("Profiler not enabled");
6133 #endif // HAVE_PROFILER
6134 }
6135 
6137  if (session_id.length() == Catalog_Namespace::CALCITE_SESSION_ID_LENGTH) {
6139  if (auto it = calcite_sessions_.find(session_id); it != calcite_sessions_.end()) {
6140  return *it->second;
6141  }
6142  throw std::runtime_error("No session with id " + session_id);
6143  }
6144  return sessions_store_->getSessionCopy(session_id);
6145 }
6146 
6147 std::shared_ptr<Catalog_Namespace::SessionInfo> DBHandler::get_session_ptr(
6148  const TSessionId& session_id) {
6149  // Note(Wamsi): This method will give you a shared_ptr to master SessionInfo itself.
6150  // Should be used only when you need to make updates to original SessionInfo object.
6151  // Currently used by `update_session_last_used_duration`
6152 
6153  // 1) `session_id` will be empty during intial connect. 2)`sessionmapd iterator` will
6154  // be invalid during disconnect. SessionInfo will be erased from map by the time it
6155  // reaches here. In both the above cases, we would return `nullptr` and can skip
6156  // SessionInfo updates.
6157  if (session_id.empty()) {
6158  return nullptr;
6159  }
6160  auto ptr = session_id.length() == Catalog_Namespace::CALCITE_SESSION_ID_LENGTH
6161  ? findCalciteSession(session_id)
6162  : sessions_store_->get(session_id);
6163  if (!ptr) {
6164  THROW_DB_EXCEPTION("Session not valid or expired.");
6165  }
6166  return ptr;
6167 }
6168 
6170  const Catalog_Namespace::SessionInfo& session_info,
6171  const std::string& table_name) {
6172  auto user_metadata = session_info.get_currentUser();
6173  auto& cat = session_info.getCatalog();
6174  DBObject dbObject(table_name, TableDBObjectType);
6175  dbObject.loadKey(cat);
6177  std::vector<DBObject> privObjects;
6178  privObjects.push_back(dbObject);
6179  if (!SysCatalog::instance().checkPrivileges(user_metadata, privObjects)) {
6180  THROW_DB_EXCEPTION("Violation of access privileges: user " +
6181  user_metadata.userLoggable() +
6182  " has no insert privileges for table " + table_name + ".");
6183  }
6184 }
6185 
6187  const TExecuteMode::type mode) {
6188  const std::string user_name = session_ptr->get_currentUser().userLoggable();
6189  switch (mode) {
6190  case TExecuteMode::GPU:
6191  if (cpu_mode_only_) {
6192  TDBException e;
6193  e.error_msg = "Cannot switch to GPU mode in a server started in CPU-only mode.";
6194  throw e;
6195  }
6197  LOG(INFO) << "User " << user_name << " sets GPU mode.";
6198  break;
6199  case TExecuteMode::CPU:
6201  LOG(INFO) << "User " << user_name << " sets CPU mode.";
6202  break;
6203  }
6204 }
6205 
6206 std::vector<PushedDownFilterInfo> DBHandler::execute_rel_alg(
6207  ExecutionResult& _return,
6208  QueryStateProxy query_state_proxy,
6209  const std::string& query_ra,
6210  const bool column_format,
6211  const ExecutorDeviceType executor_device_type,
6212  const int32_t first_n,
6213  const int32_t at_most_n,
6214  const bool just_validate,
6215  const bool find_push_down_candidates,
6216  const ExplainInfo& explain_info,
6217  const std::optional<size_t> executor_index) const {
6218  query_state::Timer timer = query_state_proxy.createTimer(__func__);
6219  VLOG(1) << "Table Schema Locks:\n" << lockmgr::TableSchemaLockMgr::instance();
6220  VLOG(1) << "Table Data Locks:\n" << lockmgr::TableDataLockMgr::instance();
6221  auto executor = Executor::getExecutor(
6222  executor_index ? *executor_index : Executor::UNITARY_EXECUTOR_ID,
6223  jit_debug_ ? "/tmp" : "",
6224  jit_debug_ ? "mapdquery" : "",
6226  RelAlgExecutor ra_executor(
6227  executor.get(), query_ra, query_state_proxy->shared_from_this());
6228  CompilationOptions co = {executor_device_type,
6229  /*hoist_literals=*/true,
6232  /*allow_lazy_fetch=*/true,
6233  /*filter_on_deleted_column=*/true,
6234  explain_info.isOptimizedExplain()
6238  auto validate_or_explain_query =
6239  explain_info.isJustExplain() || explain_info.isCalciteExplain() || just_validate;
6240  ExecutionOptions eo = {
6242  false,
6244  explain_info.isJustExplain(),
6245  allow_loop_joins_ || just_validate,
6247  jit_debug_,
6248  just_validate,
6251  find_push_down_candidates,
6252  explain_info.isCalciteExplain(),
6254  g_enable_runtime_query_interrupt && !validate_or_explain_query &&
6255  !query_state_proxy->getConstSessionInfo()->get_session_id().empty(),
6259  auto execution_time_ms =
6260  _return.getExecutionTime() + measure<>::execution([&]() {
6261  _return = ra_executor.executeRelAlgQuery(
6262  co, eo, explain_info.isPlanExplain(), explain_info.isVerbose(), nullptr);
6263  });
6264  // reduce execution time by the time spent during queue waiting
6265  const auto rs = _return.getRows();
6266  if (rs) {
6267  execution_time_ms -= rs->getQueueTime();
6268  }
6269  _return.setExecutionTime(execution_time_ms);
6270  const auto& filter_push_down_info = _return.getPushedDownFilterInfo();
6271  if (!filter_push_down_info.empty()) {
6272  return filter_push_down_info;
6273  }
6274  if (explain_info.isJustExplain()) {
6276  } else if (!explain_info.isCalciteExplain()) {
6278  }
6279  return {};
6280 }
6281 
6282 std::vector<TargetMetaInfo> DBHandler::getTargetMetaInfo(
6283  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& targets) const {
6284  std::vector<TargetMetaInfo> result;
6285  for (const auto& target : targets) {
6286  CHECK(target);
6287  CHECK(target->get_expr());
6288  result.emplace_back(target->get_resname(), target->get_expr()->get_type_info());
6289  }
6290  return result;
6291 }
6292 
6293 std::vector<std::string> DBHandler::getTargetNames(
6294  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& targets) const {
6295  std::vector<std::string> names;
6296  for (const auto& target : targets) {
6297  CHECK(target);
6298  CHECK(target->get_expr());
6299  names.push_back(target->get_resname());
6300  }
6301  return names;
6302 }
6303 
6304 std::vector<std::string> DBHandler::getTargetNames(
6305  const std::vector<TargetMetaInfo>& targets) const {
6306  std::vector<std::string> names;
6307  for (const auto& target : targets) {
6308  names.push_back(target.get_resname());
6309  }
6310  return names;
6311 }
6312 
6313 void DBHandler::convertRows(TQueryResult& _return,
6314  QueryStateProxy query_state_proxy,
6315  const std::vector<TargetMetaInfo>& targets,
6316  const ResultSet& results,
6317  const bool column_format,
6318  const int32_t first_n,
6319  const int32_t at_most_n) {
6320  query_state::Timer timer = query_state_proxy.createTimer(__func__);
6321  _return.row_set.row_desc = ThriftSerializers::target_meta_infos_to_thrift(targets);
6322  int32_t fetched{0};
6323  if (column_format) {
6324  _return.row_set.is_columnar = true;
6325  std::vector<TColumn> tcolumns(results.colCount());
6326  while (first_n == -1 || fetched < first_n) {
6327  const auto crt_row = results.getNextRow(true, true);
6328  if (crt_row.empty()) {
6329  break;
6330  }
6331  ++fetched;
6332  if (at_most_n >= 0 && fetched > at_most_n) {
6333  THROW_DB_EXCEPTION("The result contains more rows than the specified cap of " +
6334  std::to_string(at_most_n));
6335  }
6336  for (size_t i = 0; i < results.colCount(); ++i) {
6337  const auto agg_result = crt_row[i];
6338  value_to_thrift_column(agg_result, targets[i].get_type_info(), tcolumns[i]);
6339  }
6340  }
6341  for (size_t i = 0; i < results.colCount(); ++i) {
6342  _return.row_set.columns.push_back(tcolumns[i]);
6343  }
6344  } else {
6345  _return.row_set.is_columnar = false;
6346  while (first_n == -1 || fetched < first_n) {
6347  const auto crt_row = results.getNextRow(true, true);
6348  if (crt_row.empty()) {
6349  break;
6350  }
6351  ++fetched;
6352  if (at_most_n >= 0 && fetched > at_most_n) {
6353  THROW_DB_EXCEPTION("The result contains more rows than the specified cap of " +
6354  std::to_string(at_most_n));
6355  }
6356  TRow trow;
6357  trow.cols.reserve(results.colCount());
6358  for (size_t i = 0; i < results.colCount(); ++i) {
6359  const auto agg_result = crt_row[i];
6360  trow.cols.push_back(value_to_thrift(agg_result, targets[i].get_type_info()));
6361  }
6362  _return.row_set.rows.push_back(trow);
6363  }
6364  }
6365 }
6366 
6367 // create simple result set to return a single column result
6368 void DBHandler::createSimpleResult(TQueryResult& _return,
6369  const ResultSet& results,
6370  const bool column_format,
6371  const std::string label) {
6372  CHECK_EQ(size_t(1), results.rowCount());
6373  TColumnType proj_info;
6374  proj_info.col_name = label;
6375  proj_info.col_type.type = TDatumType::STR;
6376  proj_info.col_type.nullable = false;
6377  proj_info.col_type.is_array = false;
6378  _return.row_set.row_desc.push_back(proj_info);
6379  const auto crt_row = results.getNextRow(true, true);
6380  const auto tv = crt_row[0];
6381  CHECK(results.getNextRow(true, true).empty());
6382  const auto scalar_tv = boost::get<ScalarTargetValue>(&tv);
6383  CHECK(scalar_tv);
6384  const auto s_n = boost::get<NullableString>(scalar_tv);
6385  CHECK(s_n);
6386  const auto s = boost::get<std::string>(s_n);
6387  CHECK(s);
6388  if (column_format) {
6389  TColumn tcol;
6390  tcol.data.str_col.push_back(*s);
6391  tcol.nulls.push_back(false);
6392  _return.row_set.is_columnar = true;
6393  _return.row_set.columns.push_back(tcol);
6394  } else {
6395  TDatum explanation;
6396  explanation.val.str_val = *s;
6397  explanation.is_null = false;
6398  TRow trow;
6399  trow.cols.push_back(explanation);
6400  _return.row_set.is_columnar = false;
6401  _return.row_set.rows.push_back(trow);
6402  }
6403 }
6404 
6405 void DBHandler::convertExplain(TQueryResult& _return,
6406  const ResultSet& results,
6407  const bool column_format) {
6408  createSimpleResult(_return, results, column_format, "Explanation");
6409 }
6410 
6411 void DBHandler::convertResult(TQueryResult& _return,
6412  const ResultSet& results,
6413  const bool column_format) {
6414  createSimpleResult(_return, results, column_format, "Result");
6415 }
6416 
6417 // this all should be moved out of here to catalog
6419  const TableDescriptor* td,
6420  const AccessPrivileges access_priv) {
6421  CHECK(td);
6422  auto& cat = session_info.getCatalog();
6423  std::vector<DBObject> privObjects;
6424  DBObject dbObject(td->tableName, TableDBObjectType);
6425  dbObject.loadKey(cat);
6426  dbObject.setPrivileges(access_priv);
6427  privObjects.push_back(dbObject);
6428  return SysCatalog::instance().checkPrivileges(session_info.get_currentUser(),
6429  privObjects);
6430 }
6431 
6432 // TODO(max): usage of it was accidentally lost. Need to restore this check
6434  if (const auto drop_db_stmt = dynamic_cast<Parser::DropDBStmt*>(ddl)) {
6435  sessions_store_->eraseByDB(*drop_db_stmt->getDatabaseName());
6436  } else if (const auto rename_db_stmt = dynamic_cast<Parser::RenameDBStmt*>(ddl)) {
6437  sessions_store_->eraseByDB(*rename_db_stmt->getPreviousDatabaseName());
6438  } else if (const auto drop_user_stmt = dynamic_cast<Parser::DropUserStmt*>(ddl)) {
6439  sessions_store_->eraseByUser(*drop_user_stmt->getUserName());
6440  } else if (const auto rename_user_stmt = dynamic_cast<Parser::RenameUserStmt*>(ddl)) {
6441  sessions_store_->eraseByUser(*rename_user_stmt->getOldUserName());
6442  }
6443 }
6444 
6446  QueryStateProxy query_state_proxy,
6447  const bool column_format,
6448  const ExecutorDeviceType executor_device_type,
6449  const int32_t first_n,
6450  const int32_t at_most_n,
6451  const bool use_calcite,
6453  if (leaf_handler_) {
6454  leaf_handler_->flush_queue();
6455  }
6456  auto const query_str = strip(query_state_proxy->getQueryStr());
6457  auto session_ptr = query_state_proxy->getConstSessionInfo();
6458  // Call to DistributedValidate() below may change cat.
6459  auto& cat = session_ptr->getCatalog();
6460  legacylockmgr::ExecutorWriteLock execute_write_lock;
6461  legacylockmgr::ExecutorReadLock execute_read_lock;
6462 
6463  ParserWrapper pw{query_str};
6464  auto [query_substr, post_fix] = ::substring(query_str, g_max_log_length);
6465  std::ostringstream oss;
6466  oss << query_substr << post_fix;
6467  auto const reduced_query_str = oss.str();
6468  bool show_cpu_memory_stat_after_finishing_query = false;
6469  ScopeGuard cpu_system_memory_logging = [&show_cpu_memory_stat_after_finishing_query,
6470  &cat,
6471  &reduced_query_str]() {
6472  if (show_cpu_memory_stat_after_finishing_query) {
6473  log_system_cpu_memory_status("Finish query execution: " + reduced_query_str, cat);
6474  }
6475  };
6476  auto log_cpu_memory_status =
6477  [&reduced_query_str, &cat, &show_cpu_memory_stat_after_finishing_query]() {
6478  log_system_cpu_memory_status("Start query execution: " + reduced_query_str, cat);
6479  show_cpu_memory_stat_after_finishing_query = true;
6480  };
6481 
6482  // test to see if db/catalog is writable before execution of a writable SQL/DDL command
6483  // TODO: move to execute() (?)
6484  // instead of pre-filtering here based upon incomplete info ?
6485  if (!pw.is_refresh && pw.getQueryType() != ParserWrapper::QueryType::Read &&
6486  pw.getQueryType() != ParserWrapper::QueryType::SchemaRead &&
6487  pw.getQueryType() != ParserWrapper::QueryType::Unknown) {
6489  }
6490 
6491  if (pw.is_itas) {
6492  // itas can attempt to execute here
6493  check_read_only("insert_into_table");
6494 
6495  std::string query_ra;
6496  _return.addExecutionTime(measure<>::execution([&]() {
6497  TPlanResult result;
6498  std::tie(result, locks) =
6499  parse_to_ra(query_state_proxy, query_str, {}, false, system_parameters_);
6500  query_ra = result.plan_result;
6501  }));
6502  rapidjson::Document ddl_query;
6503  ddl_query.Parse(query_ra);
6504  CHECK(ddl_query.HasMember("payload"));
6505  CHECK(ddl_query["payload"].IsObject());
6506  auto stmt = Parser::InsertIntoTableAsSelectStmt(ddl_query["payload"].GetObject());
6507  log_cpu_memory_status();
6508  _return.addExecutionTime(
6509  measure<>::execution([&]() { stmt.execute(*session_ptr, read_only_); }));
6510  return;
6511 
6512  } else if (pw.is_ctas) {
6513  // ctas can attempt to execute here
6514  check_read_only("create_table_as");
6515 
6516  std::string query_ra;
6517  _return.addExecutionTime(measure<>::execution([&]() {
6518  TPlanResult result;
6519  std::tie(result, locks) =
6520  parse_to_ra(query_state_proxy, query_str, {}, false, system_parameters_);
6521  query_ra = result.plan_result;
6522  }));
6523  if (query_ra.size()) {
6524  rapidjson::Document ddl_query;
6525  ddl_query.Parse(query_ra);
6526  CHECK(ddl_query.HasMember("payload"));
6527  CHECK(ddl_query["payload"].IsObject());
6528  auto stmt = Parser::CreateTableAsSelectStmt(ddl_query["payload"].GetObject());
6529  log_cpu_memory_status();
6530  _return.addExecutionTime(
6531  measure<>::execution([&]() { stmt.execute(*session_ptr, read_only_); }));
6532  }
6533  return;
6534 
6535  } else if (pw.getDMLType() == ParserWrapper::DMLType::Insert) {
6536  check_read_only("insert_into_table");
6537  std::string query_ra;
6538  _return.addExecutionTime(measure<>::execution([&]() {
6539  TPlanResult result;
6540  std::tie(result, locks) =
6541  parse_to_ra(query_state_proxy, query_str, {}, false, system_parameters_);
6542  query_ra = result.plan_result;
6543  }));
6544  rapidjson::Document ddl_query;
6545  ddl_query.Parse(query_ra);
6546  CHECK(ddl_query.HasMember("payload"));
6547  CHECK(ddl_query["payload"].IsObject());
6548  auto stmt = Parser::InsertValuesStmt(cat, ddl_query["payload"].GetObject());
6549  if (stmt.get_value_lists().size() > 1) {
6550  log_cpu_memory_status();
6551  }
6552  _return.addExecutionTime(
6553  measure<>::execution([&]() { stmt.execute(*session_ptr, read_only_); }));
6554  return;
6555 
6556  } else if (pw.is_validate) {
6557  // check user is superuser
6558  if (!session_ptr->get_currentUser().isSuper) {
6559  throw std::runtime_error("Superuser is required to run VALIDATE");
6560  }
6561 
6562  std::string query_ra;
6563  _return.addExecutionTime(measure<>::execution([&]() {
6564  TPlanResult result;
6565  std::tie(result, locks) =
6566  parse_to_ra(query_state_proxy, query_str, {}, false, system_parameters_);
6567  query_ra = result.plan_result;
6568  }));
6569  rapidjson::Document ddl_query;
6570  ddl_query.Parse(query_ra);
6571  CHECK(ddl_query.HasMember("payload"));
6572  CHECK(ddl_query["payload"].IsObject());
6573  auto validate_stmt = Parser::ValidateStmt(ddl_query["payload"].GetObject());
6574  _return.addExecutionTime(measure<>::execution([&]() {
6575  // Prevent any other query from running while doing validate
6576  execute_write_lock = legacylockmgr::getExecuteWriteLock();
6577 
6578  std::string output{"Result for validate"};
6579  if (g_cluster) {
6580  THROW_DB_EXCEPTION("Validate command should be executed on the aggregator.");
6581  } else {
6582  _return.addExecutionTime(measure<>::execution([&]() {
6583  const system_validator::SingleNodeValidator validator(validate_stmt.getType(),
6584  cat);
6585  output = validator.validate();
6586  }));
6587  }
6589  }));
6590  return;
6591 
6592  } else if (pw.is_copy && !pw.is_copy_to) {
6593  std::unique_ptr<Parser::Stmt> stmt =
6594  Parser::create_stmt_for_query(query_str, *session_ptr);
6595  const auto import_stmt = dynamic_cast<Parser::CopyTableStmt*>(stmt.get());
6596  if (import_stmt) {
6597  if (g_cluster && !leaf_aggregator_.leafCount()) {
6598  // Don't allow copy from imports directly on a leaf node
6599  throw std::runtime_error(
6600  "Cannot import on an individual leaf. Please import from the Aggregator.");
6601  } else if (leaf_aggregator_.leafCount() > 0) {
6603  [&]() { execute_distributed_copy_statement(import_stmt, *session_ptr); }));
6604  } else {
6605  log_cpu_memory_status();
6607  [&]() { import_stmt->execute(*session_ptr, read_only_); }));
6608  }
6609 
6610  // Read response message
6611  _return.updateResultSet(*import_stmt->return_message.get(),
6613  import_stmt->get_success());
6614 
6615  // get deferred_copy_from info
6616  if (import_stmt->was_deferred_copy_from()) {
6617  DeferredCopyFromState deferred_copy_from_state;
6618  import_stmt->get_deferred_copy_from_payload(deferred_copy_from_state.table,
6619  deferred_copy_from_state.file_name,
6620  deferred_copy_from_state.copy_params,
6621  deferred_copy_from_state.partitions);
6622  deferred_copy_from_sessions.add(session_ptr->get_session_id(),
6623  deferred_copy_from_state);
6624  }
6625 
6626  // } else {
6627  // possibly a failure case:
6628  // CopyTableStmt failed to be created, or failed typecast
6629  // but historically just returned
6630  // }
6631  }
6632  return;
6633 
6634  } else if (pw.is_ddl) {
6635  std::string query_ra;
6636  _return.addExecutionTime(measure<>::execution([&]() {
6637  TPlanResult result;
6638  std::tie(result, locks) =
6639  parse_to_ra(query_state_proxy, query_str, {}, false, system_parameters_);
6640  query_ra = result.plan_result;
6641  }));
6642  executeDdl(_return, query_ra, session_ptr);
6643  return;
6644 
6645  } else if (pw.is_other_explain) {
6646  // does nothing
6647  throw std::runtime_error("EXPLAIN not yet supported for DDL or DML commands.");
6648  return;
6649 
6650  } else {
6651  // includes:
6652  // explain that is not 'other'
6653  // copy_to
6654  // DmlUpdate DmlDelete
6655  // anything else that failed to match
6656 
6657  if (pw.getDMLType() != ParserWrapper::DMLType::NotDML) {
6658  check_read_only("modify");
6659  }
6660 
6661  execute_read_lock = legacylockmgr::getExecuteReadLock();
6662 
6663  std::string query_ra = query_str;
6664  if (use_calcite) {
6665  _return.addExecutionTime(measure<>::execution([&]() {
6666  TPlanResult result;
6667  std::tie(result, locks) =
6668  parse_to_ra(query_state_proxy, query_str, {}, true, system_parameters_);
6669  query_ra = result.plan_result;
6670  }));
6671  }
6672  std::string query_ra_calcite_explain;
6673  ExplainInfo explain(query_str);
6674  if (explain.isCalciteExplain()) {
6676  // return the ra as the result
6677  _return.updateResultSet(query_ra, ExecutionResult::Explanation);
6678  return;
6679  }
6680  CHECK(!locks.empty());
6681  query_ra_calcite_explain =
6682  parse_to_ra(
6683  query_state_proxy, explain.ActualQuery(), {}, false, system_parameters_)
6684  .first.plan_result;
6685  }
6686  std::vector<PushedDownFilterInfo> filter_push_down_requests;
6687  auto submitted_time_str = query_state_proxy->getQuerySubmittedTime();
6688  auto query_session = session_ptr ? session_ptr->get_session_id() : "";
6689  auto execute_rel_alg_task = std::make_shared<QueryDispatchQueue::Task>(
6690  [this,
6691  &filter_push_down_requests,
6692  &_return,
6693  query_state_proxy,
6694  &explain,
6695  &query_ra_calcite_explain,
6696  &query_ra,
6697  &query_str,
6698  &locks,
6699  column_format,
6700  executor_device_type,
6701  first_n,
6702  at_most_n,
6703  parent_thread_local_ids =
6704  logger::thread_local_ids()](const size_t executor_index) {
6705  // if we find proper filters we need to "re-execute" the query
6706  // with a modified query plan (i.e., which has pushdowned filter)
6707  // otherwise this trial just executes the query and keeps corresponding query
6708  // resultset in _return object
6709  logger::LocalIdsScopeGuard lisg = parent_thread_local_ids.setNewThreadId();
6710  filter_push_down_requests = execute_rel_alg(
6711  _return,
6712  query_state_proxy,
6713  explain.isCalciteExplain() ? query_ra_calcite_explain : query_ra,
6714  column_format,
6715  executor_device_type,
6716  first_n,
6717  at_most_n,
6718  /*just_validate=*/false,
6720  explain,
6721  executor_index);
6722  if (explain.isCalciteExplain()) {
6723  if (filter_push_down_requests.empty()) {
6724  // we only reach here if filter push down was enabled, but no filter
6725  // push down candidate was found
6726  _return.updateResultSet(query_ra, ExecutionResult::Explanation);
6727  } else {
6728  CHECK(!locks.empty());
6729  std::vector<TFilterPushDownInfo> filter_push_down_info;
6730  for (const auto& req : filter_push_down_requests) {
6731  TFilterPushDownInfo filter_push_down_info_for_request;
6732  filter_push_down_info_for_request.input_prev = req.input_prev;
6733  filter_push_down_info_for_request.input_start = req.input_start;
6734  filter_push_down_info_for_request.input_next = req.input_next;
6735  filter_push_down_info.push_back(filter_push_down_info_for_request);
6736  }
6737  query_ra = parse_to_ra(query_state_proxy,
6738  query_str,
6739  filter_push_down_info,
6740  false,
6742  .first.plan_result;
6743  _return.updateResultSet(query_ra, ExecutionResult::Explanation);
6744  }
6745  } else {
6746  if (!filter_push_down_requests.empty()) {
6747  CHECK(!locks.empty());
6749  query_state_proxy,
6750  query_ra,
6751  column_format,
6752  executor_device_type,
6753  first_n,
6754  at_most_n,
6755  false,
6756  false,
6757  filter_push_down_requests);
6758  }
6759  }
6760  });
6763  if (g_enable_runtime_query_interrupt && !query_session.empty() &&
6764  !explain.isSelectExplain()) {
6765  executor->enrollQuerySession(query_session,
6766  query_str,
6767  submitted_time_str,
6769  QuerySessionStatus::QueryStatus::PENDING_QUEUE);
6770  while (!dispatch_queue_->hasIdleWorker()) {
6771  try {
6772  executor->checkPendingQueryStatus(query_session);
6773  } catch (QueryExecutionError& e) {
6774  executor->clearQuerySessionStatus(query_session, submitted_time_str);
6775  if (e.hasErrorCode(ErrorCode::INTERRUPTED)) {
6776  throw std::runtime_error(
6777  "Query execution has been interrupted (pending query).");
6778  }
6779  throw e;
6780  }
6781  std::this_thread::sleep_for(std::chrono::milliseconds(10));
6782  }
6783  }
6784  log_cpu_memory_status();
6785  dispatch_queue_->submit(execute_rel_alg_task,
6786  pw.getDMLType() == ParserWrapper::DMLType::Update ||
6787  pw.getDMLType() == ParserWrapper::DMLType::Delete);
6788  auto result_future = execute_rel_alg_task->get_future();
6789  result_future.get();
6790  return;
6791  }
6792 }
6793 
6795  ExecutionResult& _return,
6796  QueryStateProxy query_state_proxy,
6797  std::string& query_ra,
6798  const bool column_format,
6799  const ExecutorDeviceType executor_device_type,
6800  const int32_t first_n,
6801  const int32_t at_most_n,
6802  const bool just_explain,
6803  const bool is_calcite_explain,
6804  const std::vector<PushedDownFilterInfo>& filter_push_down_requests) {
6805  // collecting the selected filters' info to be sent to Calcite:
6806  std::vector<TFilterPushDownInfo> filter_push_down_info;
6807  for (const auto& req : filter_push_down_requests) {
6808  TFilterPushDownInfo filter_push_down_info_for_request;
6809  filter_push_down_info_for_request.input_prev = req.input_prev;
6810  filter_push_down_info_for_request.input_start = req.input_start;
6811  filter_push_down_info_for_request.input_next = req.input_next;
6812  filter_push_down_info.push_back(filter_push_down_info_for_request);
6813  }
6814  // deriving the new relational algebra plan with respect to the pushed down filters
6815  _return.addExecutionTime(measure<>::execution([&]() {
6816  query_ra = parse_to_ra(query_state_proxy,
6817  query_state_proxy->getQueryStr(),
6818  filter_push_down_info,
6819  false,
6821  .first.plan_result;
6822  }));
6823 
6824  // execute the new relational algebra plan:
6825  auto explain_info = ExplainInfo(ExplainInfo::ExplainType::None);
6826  execute_rel_alg(_return,
6827  query_state_proxy,
6828  query_ra,
6829  column_format,
6830  executor_device_type,
6831  first_n,
6832  at_most_n,
6833  /*just_validate=*/false,
6834  /*find_push_down_candidates=*/false,
6835  explain_info);
6836 }
6837 
6839  Parser::CopyTableStmt* copy_stmt,
6840  const Catalog_Namespace::SessionInfo& session_info) {}
6841 
6842 namespace {
6844  const TableDescriptor& td) {
6845  if (td.is_in_memory_system_table) {
6846  if (g_enable_system_tables) {
6847  // Reset system table fragmenter in order to force chunk metadata refetch.
6848  auto table_schema_lock =
6850  auto table_data_lock =
6852  catalog.removeFragmenterForTable(td.tableId);
6853  catalog.getMetadataForTable(td.tableId, true);
6854  return true;
6855  } else {
6856  throw std::runtime_error(
6857  "Query cannot be executed because use of system tables is currently "
6858  "disabled.");
6859  }
6860  }
6861  return false;
6862 }
6863 
6865  const std::vector<std::vector<std::string>>& selected_tables) {
6866  const auto info_schema_catalog =
6868  if (info_schema_catalog) {
6869  for (const auto& table : selected_tables) {
6870  if (table[1] == shared::kInfoSchemaDbName) {
6871  auto td = info_schema_catalog->getMetadataForTable(table[0], false);
6872  CHECK(td);
6873  check_and_reset_in_memory_system_table(*info_schema_catalog, *td);
6874  }
6875  }
6876  }
6877 }
6878 } // namespace
6879 
6881  QueryStateProxy query_state_proxy,
6882  const std::shared_ptr<Catalog_Namespace::Catalog>& cat,
6883  const std::string& query_str,
6884  const std::vector<TFilterPushDownInfo>& filter_push_down_info,
6885  const SystemParameters& system_parameters,
6886  const bool check_privileges) {
6887  query_state::Timer timer = query_state_proxy.createTimer(__func__);
6888 
6889  heavyai::RequestInfo const request_info(createInMemoryCalciteSession(cat),
6890  logger::request_id());
6891  ScopeGuard cleanup = [&]() { removeInMemoryCalciteSession(request_info.sessionId()); };
6892  ExplainInfo explain(query_str);
6893  std::string const actual_query{explain.isSelectExplain() ? explain.ActualQuery()
6894  : query_str};
6895  auto query_parsing_option =
6896  calcite_->getCalciteQueryParsingOption(legacy_syntax_,
6897  explain.isCalciteExplain(),
6898  check_privileges,
6899  explain.isCalciteExplainDetail());
6900  auto optimization_option = calcite_->getCalciteOptimizationOption(
6901  system_parameters.enable_calcite_view_optimize,
6903  filter_push_down_info,
6905 
6906  return calcite_->process(timer.createQueryStateProxy(),
6907  legacy_syntax_ ? pg_shim(actual_query) : actual_query,
6908  query_parsing_option,
6909  optimization_option,
6910  request_info.json());
6911 }
6912 
6913 std::pair<TPlanResult, lockmgr::LockedTableDescriptors> DBHandler::parse_to_ra(
6914  QueryStateProxy query_state_proxy,
6915  const std::string& query_str,
6916  const std::vector<TFilterPushDownInfo>& filter_push_down_info,
6917  const bool acquire_locks,
6918  const SystemParameters& system_parameters,
6919  bool check_privileges) {
6920  query_state::Timer timer = query_state_proxy.createTimer(__func__);
6921  ParserWrapper pw{query_str};
6922  TPlanResult result;
6924  if (pw.is_ddl || (!pw.is_validate && !pw.is_other_explain)) {
6925  auto cat = query_state_proxy->getConstSessionInfo()->get_catalog_ptr();
6926  // Need to read lock the catalog while determining what table names are used by this
6927  // query, confirming the tables exist, checking the user's permissions, and finally
6928  // locking the individual tables. The catalog lock can be released once the query
6929  // begins running. The table locks will protect the running query.
6930  std::shared_lock<heavyai::DistributedSharedMutex> cat_lock;
6931  if (g_multi_instance) {
6932  cat_lock = std::shared_lock<heavyai::DistributedSharedMutex>(*cat->dcatalogMutex_);
6933  }
6935  cat,
6936  query_str,
6937  filter_push_down_info,
6938  system_parameters,
6939  check_privileges);
6941  result.resolved_accessed_objects.tables_selected_from);
6942 
6943  if (acquire_locks) {
6944  std::set<std::vector<std::string>> write_only_tables;
6945  std::vector<std::vector<std::string>> tables;
6946 
6947  tables.insert(tables.end(),
6948  result.resolved_accessed_objects.tables_updated_in.begin(),
6949  result.resolved_accessed_objects.tables_updated_in.end());
6950  tables.insert(tables.end(),
6951  result.resolved_accessed_objects.tables_deleted_from.begin(),
6952  result.resolved_accessed_objects.tables_deleted_from.end());
6953 
6954  // Collect the tables that need a write lock
6955  for (const auto& table : tables) {
6956  write_only_tables.insert(table);
6957  }
6958 
6959  tables.insert(tables.end(),
6960  result.resolved_accessed_objects.tables_selected_from.begin(),
6961  result.resolved_accessed_objects.tables_selected_from.end());
6962  tables.insert(tables.end(),
6963  result.resolved_accessed_objects.tables_inserted_into.begin(),
6964  result.resolved_accessed_objects.tables_inserted_into.end());
6965 
6966  // avoid deadlocks by enforcing a deterministic locking sequence
6967  // first, obtain table schema locks
6968  // then, obtain table data locks
6969  // force sort by database id and table id order in case of name change to
6970  // guarantee fixed order of mutex access
6971  std::sort(tables.begin(),
6972  tables.end(),
6973  [](const std::vector<std::string>& a, const std::vector<std::string>& b) {
6974  if (a[1] != b[1]) {
6975  const auto cat_a = SysCatalog::instance().getCatalog(a[1]);
6976  const auto cat_b = SysCatalog::instance().getCatalog(b[1]);
6977  return cat_a->getDatabaseId() < cat_b->getDatabaseId();
6978  }
6979  const auto cat = SysCatalog::instance().getCatalog(a[1]);
6980  return cat->getMetadataForTable(a[0], false)->tableId <
6981  cat->getMetadataForTable(b[0], false)->tableId;
6982  });
6983 
6984  // In the case of self-join and possibly other cases, we will
6985  // have duplicate tables. Ensure we only take one for locking below.
6986  tables.erase(unique(tables.begin(), tables.end()), tables.end());
6987  for (const auto& table : tables) {
6988  const auto cat = SysCatalog::instance().getCatalog(table[1]);
6989  CHECK(cat);
6990  locks.emplace_back(
6993  lockmgr::ReadLock>::acquireTableDescriptor(*cat, table[0])));
6994  if (write_only_tables.count(table)) {
6995  // Aquire an insert data lock for updates/deletes, consistent w/ insert. The
6996  // table data lock will be aquired in the fragmenter during checkpoint.
6997  locks.emplace_back(
7000  cat->getDatabaseId(), (*locks.back())())));
7001  } else {
7002  auto lock_td = (*locks.back())();
7003  if (lock_td->is_in_memory_system_table) {
7004  locks.emplace_back(
7007  cat->getDatabaseId(), lock_td)));
7008  } else {
7009  locks.emplace_back(
7012  cat->getDatabaseId(), lock_td)));
7013  }
7014  }
7015  }
7016  }
7017  }
7018  return std::make_pair(result, std::move(locks));
7019 }
7020 
7021 int64_t DBHandler::query_get_outer_fragment_count(const TSessionId& session_id_or_json,
7022  const std::string& select_query) {
7023  heavyai::RequestInfo const request_info(session_id_or_json);
7024  SET_REQUEST_ID(request_info.requestId());
7025  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7026  if (!leaf_handler_) {
7027  THROW_DB_EXCEPTION("Distributed support is disabled.");
7028  }
7029  try {
7030  return leaf_handler_->query_get_outer_fragment_count(request_info.sessionId(),
7031  select_query);
7032  } catch (std::exception& e) {
7033  THROW_DB_EXCEPTION(e.what());
7034  }
7035 }
7036 
7037 void DBHandler::check_table_consistency(TTableMeta& _return,
7038  const TSessionId& session_id_or_json,
7039  const int32_t table_id) {
7040  heavyai::RequestInfo const request_info(session_id_or_json);
7041  SET_REQUEST_ID(request_info.requestId());
7042  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7043  if (!leaf_handler_) {
7044  THROW_DB_EXCEPTION("Distributed support is disabled.");
7045  }
7046  try {
7047  leaf_handler_->check_table_consistency(_return, request_info.sessionId(), table_id);
7048  } catch (std::exception& e) {
7049  THROW_DB_EXCEPTION(e.what());
7050  }
7051 }
7052 
7053 void DBHandler::start_query(TPendingQuery& _return,
7054  const TSessionId& leaf_session_id_or_json,
7055  const TSessionId& parent_session_id_or_json,
7056  const std::string& serialized_rel_alg_dag,
7057  const std::string& start_time_str,
7058  const bool just_explain,
7059  const std::vector<int64_t>& outer_fragment_indices) {
7060  heavyai::RequestInfo const leaf_request_info(leaf_session_id_or_json);
7061  heavyai::RequestInfo const parent_request_info(parent_session_id_or_json);
7062  SET_REQUEST_ID(leaf_request_info.requestId());
7063  auto stdlog = STDLOG(get_session_ptr(leaf_request_info.sessionId()));
7064  auto session_ptr = stdlog.getConstSessionInfo();
7065  if (!leaf_handler_) {
7066  THROW_DB_EXCEPTION("Distributed support is disabled.");
7067  }
7068  LOG(INFO) << "start_query :" << *session_ptr << " :" << just_explain;
7069  auto time_ms = measure<>::execution([&]() {
7070  try {
7071  leaf_handler_->start_query(_return,
7072  leaf_request_info.sessionId(),
7073  parent_request_info.sessionId(),
7074  serialized_rel_alg_dag,
7075  start_time_str,
7076  just_explain,
7077  outer_fragment_indices);
7078  } catch (std::exception& e) {
7079  THROW_DB_EXCEPTION(e.what());
7080  }
7081  });
7082  LOG(INFO) << "start_query-COMPLETED " << time_ms << "ms "
7083  << "id is " << _return.id;
7084 }
7085 
7086 void DBHandler::execute_query_step(TStepResult& _return,
7087  const TPendingQuery& pending_query,
7088  const TSubqueryId subquery_id,
7089  const std::string& start_time_str) {
7090  SET_REQUEST_ID(0); // No SessionID is available
7091  if (!leaf_handler_) {
7092  THROW_DB_EXCEPTION("Distributed support is disabled.");
7093  }
7094  LOG(INFO) << "execute_query_step : id:" << pending_query.id;
7095  auto time_ms = measure<>::execution([&]() {
7096  try {
7097  leaf_handler_->execute_query_step(
7098  _return, pending_query, subquery_id, start_time_str);
7099  } catch (std::exception& e) {
7100  THROW_DB_EXCEPTION(e.what());
7101  }
7102  });
7103  LOG(INFO) << "execute_query_step-COMPLETED " << time_ms << "ms";
7104 }
7105 
7106 void DBHandler::broadcast_serialized_rows(const TSerializedRows& serialized_rows,
7107  const TRowDescriptor& row_desc,
7108  const TQueryId query_id,
7109  const TSubqueryId subquery_id,
7110  const bool is_final_subquery_result) {
7111  if (!leaf_handler_) {
7112  THROW_DB_EXCEPTION("Distributed support is disabled.");
7113  }
7114  LOG(INFO) << "BROADCAST-SERIALIZED-ROWS id:" << query_id;
7115  auto time_ms = measure<>::execution([&]() {
7116  try {
7117  leaf_handler_->broadcast_serialized_rows(
7118  serialized_rows, row_desc, query_id, subquery_id, is_final_subquery_result);
7119  } catch (std::exception& e) {
7120  THROW_DB_EXCEPTION(e.what());
7121  }
7122  });
7123  LOG(INFO) << "BROADCAST-SERIALIZED-ROWS COMPLETED " << time_ms << "ms";
7124 }
7125 
7126 void DBHandler::insert_chunks(const TSessionId& session_id_or_json,
7127  const TInsertChunks& thrift_insert_chunks) {
7128  try {
7129  heavyai::RequestInfo const request_info(session_id_or_json);
7130  SET_REQUEST_ID(request_info.requestId());
7131  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7132  auto session_ptr = stdlog.getConstSessionInfo();
7133  auto const& cat = session_ptr->getCatalog();
7134  Fragmenter_Namespace::InsertChunks insert_chunks{thrift_insert_chunks.table_id,
7135  thrift_insert_chunks.db_id};
7136  insert_chunks.valid_row_indices.resize(thrift_insert_chunks.valid_indices.size());
7137  std::copy(thrift_insert_chunks.valid_indices.begin(),
7138  thrift_insert_chunks.valid_indices.end(),
7139  insert_chunks.valid_row_indices.begin());
7140 
7141  auto columns =
7142  cat.getAllColumnMetadataForTable(insert_chunks.table_id, false, false, true);
7143  CHECK_EQ(columns.size(), thrift_insert_chunks.data.size());
7144 
7145  std::list<foreign_storage::PassThroughBuffer> pass_through_buffers;
7146  auto thrift_data_it = thrift_insert_chunks.data.begin();
7147  for (const auto col_desc : columns) {
7148  AbstractBuffer* data_buffer = nullptr;
7149  AbstractBuffer* index_buffer = nullptr;
7150  data_buffer = &pass_through_buffers.emplace_back(
7151  reinterpret_cast<const int8_t*>(thrift_data_it->data_buffer.data()),
7152  thrift_data_it->data_buffer.size());
7153  data_buffer->initEncoder(col_desc->columnType);
7154  data_buffer->getEncoder()->setNumElems(thrift_insert_chunks.num_rows);
7155  if (col_desc->columnType.is_varlen_indeed()) {
7156  CHECK(thrift_insert_chunks.num_rows == 0 ||
7157  thrift_data_it->index_buffer.size() > 0);
7158  index_buffer = &pass_through_buffers.emplace_back(
7159  reinterpret_cast<const int8_t*>(thrift_data_it->index_buffer.data()),
7160  thrift_data_it->index_buffer.size());
7161  }
7162 
7163  insert_chunks.chunks[col_desc->columnId] =
7164  Chunk_NS::Chunk::getChunk(col_desc, data_buffer, index_buffer, false);
7165  thrift_data_it++;
7166  }
7167 
7168  const ChunkKey lock_chunk_key{cat.getDatabaseId(),
7169  cat.getLogicalTableId(insert_chunks.table_id)};
7170  auto table_read_lock =
7172  const auto td = cat.getMetadataForTable(insert_chunks.table_id);
7173  CHECK(td);
7174 
7175  // this should have the same lock sequence as COPY FROM
7176  auto insert_data_lock =
7178  td->fragmenter->insertChunksNoCheckpoint(insert_chunks);
7179 
7180  } catch (const std::exception& e) {
7181  THROW_DB_EXCEPTION(std::string(e.what()));
7182  }
7183 }
7184 
7185 void DBHandler::insert_data(const TSessionId& session_id_or_json,
7186  const TInsertData& thrift_insert_data) {
7187  try {
7188  heavyai::RequestInfo const request_info(session_id_or_json);
7189  SET_REQUEST_ID(request_info.requestId());
7190  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7191  auto session_ptr = stdlog.getConstSessionInfo();
7192  CHECK_EQ(thrift_insert_data.column_ids.size(), thrift_insert_data.data.size());
7193  CHECK(thrift_insert_data.is_default.size() == 0 ||
7194  thrift_insert_data.is_default.size() == thrift_insert_data.column_ids.size());
7195  auto const& cat = session_ptr->getCatalog();
7197  insert_data.databaseId = thrift_insert_data.db_id;
7198  insert_data.tableId = thrift_insert_data.table_id;
7199  insert_data.columnIds = thrift_insert_data.column_ids;
7200  insert_data.is_default = thrift_insert_data.is_default;
7201  insert_data.numRows = thrift_insert_data.num_rows;
7202  std::vector<std::unique_ptr<std::vector<std::string>>> none_encoded_string_columns;
7203  std::vector<std::unique_ptr<std::vector<ArrayDatum>>> array_columns;
7204  SQLTypeInfo geo_ti{kNULLT,
7205  false}; // will be filled with the correct info if possible
7206  for (size_t col_idx = 0; col_idx < insert_data.columnIds.size(); ++col_idx) {
7207  const int column_id = insert_data.columnIds[col_idx];
7208  DataBlockPtr p;
7209  const auto cd = cat.getMetadataForColumn(insert_data.tableId, column_id);
7210  CHECK(cd);
7211  const auto& ti = cd->columnType;
7212  size_t rows_expected =
7213  !insert_data.is_default.empty() && insert_data.is_default[col_idx]
7214  ? 1ul
7215  : insert_data.numRows;
7216  if (ti.is_number() || ti.is_time() || ti.is_boolean()) {
7217  p.numbersPtr = (int8_t*)thrift_insert_data.data[col_idx].fixed_len_data.data();
7218  } else if (ti.is_string()) {
7219  if (ti.get_compression() == kENCODING_DICT) {
7220  p.numbersPtr = (int8_t*)thrift_insert_data.data[col_idx].fixed_len_data.data();
7221  } else {
7222  CHECK_EQ(kENCODING_NONE, ti.get_compression());
7223  none_encoded_string_columns.emplace_back(new std::vector<std::string>());
7224  auto& none_encoded_strings = none_encoded_string_columns.back();
7225 
7226  CHECK_EQ(rows_expected, thrift_insert_data.data[col_idx].var_len_data.size());
7227  for (const auto& varlen_str : thrift_insert_data.data[col_idx].var_len_data) {
7228  none_encoded_strings->push_back(varlen_str.payload);
7229  }
7230  p.stringsPtr = none_encoded_strings.get();
7231  }
7232  } else if (ti.is_geometry()) {
7233  none_encoded_string_columns.emplace_back(new std::vector<std::string>());
7234  auto& none_encoded_strings = none_encoded_string_columns.back();
7235  CHECK_EQ(rows_expected, thrift_insert_data.data[col_idx].var_len_data.size());
7236  for (const auto& varlen_str : thrift_insert_data.data[col_idx].var_len_data) {
7237  none_encoded_strings->push_back(varlen_str.payload);
7238  }
7239  p.stringsPtr = none_encoded_strings.get();
7240 
7241  // point geo type needs to mark null sentinel in its physical coord column
7242  // To recognize null sentinel for point, therefore, we keep the actual geo type
7243  // and needs to use it when constructing geo null point
7244  geo_ti = ti;
7245  } else {
7246  CHECK(ti.is_array());
7247  array_columns.emplace_back(new std::vector<ArrayDatum>());
7248  auto& array_column = array_columns.back();
7249  CHECK_EQ(rows_expected, thrift_insert_data.data[col_idx].var_len_data.size());
7250  for (const auto& t_arr_datum : thrift_insert_data.data[col_idx].var_len_data) {
7251  if (t_arr_datum.is_null) {
7252  if ((cd->columnName.find("_coords") != std::string::npos) &&
7253  geo_ti.get_type() == kPOINT) {
7254  // For geo point, we manually mark its null sentinel to coord buffer
7255  array_column->push_back(
7257  } else if (ti.get_size() > 0) {
7258  array_column->push_back(import_export::ImporterUtils::composeNullArray(ti));
7259  } else {
7260  array_column->emplace_back(0, nullptr, true);
7261  }
7262  } else {
7263  ArrayDatum arr_datum;
7264  arr_datum.length = t_arr_datum.payload.size();
7265  int8_t* ptr = (int8_t*)(t_arr_datum.payload.data());
7266  arr_datum.pointer = ptr;
7267  // In this special case, ArrayDatum does not handle freeing the underlying
7268  // memory
7269  arr_datum.data_ptr = std::shared_ptr<int8_t>(ptr, [](auto p) {});
7270  arr_datum.is_null = false;
7271  array_column->push_back(arr_datum);
7272  }
7273  }
7274  p.arraysPtr = array_column.get();
7275  }
7276  insert_data.data.push_back(p);
7277  }
7278  const ChunkKey lock_chunk_key{cat.getDatabaseId(),
7279  cat.getLogicalTableId(insert_data.tableId)};
7280  auto table_read_lock =
7282  const auto td = cat.getMetadataForTable(insert_data.tableId);
7283  CHECK(td);
7284 
7285  // this should have the same lock seq as COPY FROM
7286  auto insert_data_lock =
7288  auto data_memory_holder = import_export::fill_missing_columns(&cat, insert_data);
7289  td->fragmenter->insertDataNoCheckpoint(insert_data);
7290  } catch (const std::exception& e) {
7291  THROW_DB_EXCEPTION(std::string(e.what()));
7292  }
7293 }
7294 
7295 void DBHandler::start_render_query(TPendingRenderQuery& _return,
7296  const TSessionId& session_id_or_json,
7297  const int64_t widget_id,
7298  const int16_t node_idx,
7299  const std::string& vega_json) {
7300  heavyai::RequestInfo const request_info(session_id_or_json);
7301  SET_REQUEST_ID(request_info.requestId());
7302  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7303  auto session_ptr = stdlog.getConstSessionInfo();
7304  if (!render_handler_) {
7305  THROW_DB_EXCEPTION("Backend rendering is disabled.");
7306  }
7307  LOG(INFO) << "start_render_query :" << *session_ptr << " :widget_id:" << widget_id
7308  << ":vega_json:" << vega_json;
7309 
7310  // cast away const-ness of incoming Thrift string ref
7311  // to allow it to be passed down as an r-value and
7312  // ultimately std::moved into the RenderSession
7313  auto& non_const_vega_json = const_cast<std::string&>(vega_json);
7314 
7315  auto time_ms = measure<>::execution([&]() {
7316  try {
7317  render_handler_->start_render_query(_return,
7318  request_info.sessionId(),
7319  widget_id,
7320  node_idx,
7321  std::move(non_const_vega_json));
7322  } catch (std::exception& e) {
7323  THROW_DB_EXCEPTION(e.what());
7324  }
7325  });
7326  LOG(INFO) << "start_render_query-COMPLETED " << time_ms << "ms "
7327  << "id is " << _return.id;
7328 }
7329 
7330 void DBHandler::execute_next_render_step(TRenderStepResult& _return,
7331  const TPendingRenderQuery& pending_render,
7332  const TRenderAggDataMap& merged_data) {
7333  // No SessionID is available
7334  SET_REQUEST_ID(0);
7335 
7336  if (!render_handler_) {
7337  THROW_DB_EXCEPTION("Backend rendering is disabled.");
7338  }
7339 
7340  LOG(INFO) << "execute_next_render_step: id:" << pending_render.id;
7341  auto time_ms = measure<>::execution([&]() {
7342  try {
7343  render_handler_->execute_next_render_step(_return, pending_render, merged_data);
7344  } catch (std::exception& e) {
7345  THROW_DB_EXCEPTION(e.what());
7346  }
7347  });
7348  LOG(INFO) << "execute_next_render_step-COMPLETED id: " << pending_render.id
7349  << ", time: " << time_ms << "ms ";
7350 }
7351 
7352 void DBHandler::checkpoint(const TSessionId& session_id_or_json, const int32_t table_id) {
7353  heavyai::RequestInfo const request_info(session_id_or_json);
7354  SET_REQUEST_ID(request_info.requestId());
7355  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7356  auto session_ptr = stdlog.getConstSessionInfo();
7357  auto& cat = session_ptr->getCatalog();
7358  cat.checkpoint(table_id);
7359 }
7360 
7361 // check and reset epoch if a request has been made
7362 void DBHandler::set_table_epoch(const TSessionId& session_id_or_json,
7363  const int db_id,
7364  const int table_id,
7365  const int new_epoch) {
7366  heavyai::RequestInfo const request_info(session_id_or_json);
7367  SET_REQUEST_ID(request_info.requestId());
7368  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7369  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7370  auto session_ptr = stdlog.getConstSessionInfo();
7371  if (!session_ptr->get_currentUser().isSuper) {
7372  throw std::runtime_error("Only superuser can set_table_epoch");
7373  }
7374  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
7375  ChunkKey table_key{db_id, table_id};
7376  auto table_write_lock = lockmgr::TableSchemaLockMgr::getWriteLockForTable(table_key);
7377  auto table_data_write_lock = lockmgr::TableDataLockMgr::getWriteLockForTable(table_key);
7378  try {
7379  auto& cat = session_ptr->getCatalog();
7380  cat.setTableEpoch(db_id, table_id, new_epoch);
7381  } catch (const std::runtime_error& e) {
7382  THROW_DB_EXCEPTION(std::string(e.what()));
7383  }
7384 }
7385 
7386 // check and reset epoch if a request has been made
7387 void DBHandler::set_table_epoch_by_name(const TSessionId& session_id_or_json,
7388  const std::string& table_name,
7389  const int new_epoch) {
7390  heavyai::RequestInfo const request_info(session_id_or_json);
7391  SET_REQUEST_ID(request_info.requestId());
7392  auto stdlog =
7393  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
7394  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7395  auto session_ptr = stdlog.getConstSessionInfo();
7396  if (!session_ptr->get_currentUser().isSuper) {
7397  throw std::runtime_error("Only superuser can set_table_epoch");
7398  }
7399 
7400  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
7401  auto& cat = session_ptr->getCatalog();
7402  auto table_write_lock =
7404  auto table_data_write_lock =
7406  auto td = cat.getMetadataForTable(
7407  table_name,
7408  false); // don't populate fragmenter on this call since we only want metadata
7409  int32_t db_id = cat.getCurrentDB().dbId;
7410  try {
7411  cat.setTableEpoch(db_id, td->tableId, new_epoch);
7412  } catch (const std::runtime_error& e) {
7413  THROW_DB_EXCEPTION(std::string(e.what()));
7414  }
7415 }
7416 
7417 int32_t DBHandler::get_table_epoch(const TSessionId& session_id_or_json,
7418  const int32_t db_id,
7419  const int32_t table_id) {
7420  heavyai::RequestInfo const request_info(session_id_or_json);
7421  SET_REQUEST_ID(request_info.requestId());
7422  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7423  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7424  auto session_ptr = stdlog.getConstSessionInfo();
7425 
7426  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
7427  ChunkKey table_key{db_id, table_id};
7428  auto table_read_lock = lockmgr::TableSchemaLockMgr::getReadLockForTable(table_key);
7429  auto table_data_write_lock = lockmgr::TableDataLockMgr::getReadLockForTable(table_key);
7430  try {
7431  auto const& cat = session_ptr->getCatalog();
7432  return cat.getTableEpoch(db_id, table_id);
7433  } catch (const std::runtime_error& e) {
7434  THROW_DB_EXCEPTION(std::string(e.what()));
7435  }
7436 }
7437 
7438 int32_t DBHandler::get_table_epoch_by_name(const TSessionId& session_id_or_json,
7439  const std::string& table_name) {
7440  heavyai::RequestInfo const request_info(session_id_or_json);
7441  SET_REQUEST_ID(request_info.requestId());
7442  auto stdlog =
7443  STDLOG(get_session_ptr(request_info.sessionId()), "table_name", table_name);
7444  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7445  auto session_ptr = stdlog.getConstSessionInfo();
7446 
7447  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
7448  auto& cat = session_ptr->getCatalog();
7449  auto table_read_lock =
7451  auto table_data_read_lock =
7453  auto td = cat.getMetadataForTable(
7454  table_name,
7455  false); // don't populate fragmenter on this call since we only want metadata
7456  int32_t db_id = cat.getCurrentDB().dbId;
7457  try {
7458  return cat.getTableEpoch(db_id, td->tableId);
7459  } catch (const std::runtime_error& e) {
7460  THROW_DB_EXCEPTION(std::string(e.what()));
7461  }
7462 }
7463 
7464 void DBHandler::get_table_epochs(std::vector<TTableEpochInfo>& _return,
7465  const TSessionId& session_id_or_json,
7466  const int32_t db_id,
7467  const int32_t table_id) {
7468  heavyai::RequestInfo const request_info(session_id_or_json);
7469  SET_REQUEST_ID(request_info.requestId());
7470  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7471  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7472  auto session_ptr = stdlog.getConstSessionInfo();
7473 
7474  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
7475  ChunkKey table_key{db_id, table_id};
7476  auto table_read_lock = lockmgr::TableSchemaLockMgr::getReadLockForTable(table_key);
7477  auto table_data_read_lock = lockmgr::TableDataLockMgr::getReadLockForTable(table_key);
7478 
7479  std::vector<Catalog_Namespace::TableEpochInfo> table_epochs;
7480  auto const& cat = session_ptr->getCatalog();
7481  table_epochs = cat.getTableEpochs(db_id, table_id);
7482  CHECK(!table_epochs.empty());
7483 
7484  for (const auto& table_epoch : table_epochs) {
7485  TTableEpochInfo table_epoch_info;
7486  table_epoch_info.table_id = table_epoch.table_id;
7487  table_epoch_info.table_epoch = table_epoch.table_epoch;
7488  table_epoch_info.leaf_index = table_epoch.leaf_index;
7489  _return.emplace_back(table_epoch_info);
7490  }
7491 }
7492 
7493 void DBHandler::set_table_epochs(const TSessionId& session_id_or_json,
7494  const int32_t db_id,
7495  const std::vector<TTableEpochInfo>& table_epochs) {
7496  heavyai::RequestInfo const request_info(session_id_or_json);
7497  SET_REQUEST_ID(request_info.requestId());
7498  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7499  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7500  auto session_ptr = stdlog.getConstSessionInfo();
7501 
7502  // Only super users are allowed to call this API on a single node instance
7503  // or aggregator (for distributed mode)
7504  if (!g_cluster || leaf_aggregator_.leafCount() > 0) {
7505  if (!session_ptr->get_currentUser().isSuper) {
7506  THROW_DB_EXCEPTION("Only super users can set table epochs");
7507  }
7508  }
7509  if (table_epochs.empty()) {
7510  return;
7511  }
7512  auto& cat = session_ptr->getCatalog();
7513  auto logical_table_id = cat.getLogicalTableId(table_epochs[0].table_id);
7514  std::vector<Catalog_Namespace::TableEpochInfo> table_epochs_vector;
7515  for (const auto& table_epoch : table_epochs) {
7516  if (logical_table_id != cat.getLogicalTableId(table_epoch.table_id)) {
7517  THROW_DB_EXCEPTION("Table epochs do not reference the same logical table");
7518  }
7519  table_epochs_vector.emplace_back(
7520  table_epoch.table_id, table_epoch.table_epoch, table_epoch.leaf_index);
7521  }
7522 
7523  const auto execute_read_lock = legacylockmgr::getExecuteReadLock();
7525  true, cat.getMetadataForTable(logical_table_id, false), db_id);
7526  ChunkKey table_key{db_id, logical_table_id};
7527  auto table_write_lock = lockmgr::TableSchemaLockMgr::getWriteLockForTable(table_key);
7528  auto table_data_write_lock = lockmgr::TableDataLockMgr::getWriteLockForTable(table_key);
7529  cat.setTableEpochs(db_id, table_epochs_vector);
7530 }
7531 
7532 void DBHandler::set_license_key(TLicenseInfo& _return,
7533  const TSessionId& session_id_or_json,
7534  const std::string& key,
7535  const std::string& nonce) {
7536  heavyai::RequestInfo const request_info(session_id_or_json);
7537  SET_REQUEST_ID(request_info.requestId());
7538  check_read_only("set_license_key");
7539  THROW_DB_EXCEPTION(std::string("Licensing not supported."));
7540 }
7541 
7542 void DBHandler::get_license_claims(TLicenseInfo& _return,
7543  const TSessionId& session_id_or_json,
7544  const std::string& nonce) {
7545  heavyai::RequestInfo const request_info(session_id_or_json);
7546  SET_REQUEST_ID(request_info.requestId());
7547  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7548  _return.claims.emplace_back("");
7549 }
7550 
7553 
7554  Executor::clearExternalCaches(false, nullptr, -1);
7555 
7556  query_engine_.reset();
7557 
7558  if (render_handler_) {
7559  render_handler_->shutdown();
7560  }
7561 
7563 }
7564 
7566  if (calcite_) {
7567  calcite_->close_calcite_server(false);
7568  }
7569 }
7570 
7571 extern std::map<std::string, std::string> get_device_parameters(bool cpu_only);
7572 
7573 #define EXPOSE_THRIFT_MAP(TYPENAME) \
7574  { \
7575  std::map<int, const char*>::const_iterator it = \
7576  _##TYPENAME##_VALUES_TO_NAMES.begin(); \
7577  while (it != _##TYPENAME##_VALUES_TO_NAMES.end()) { \
7578  _return.insert(std::pair<std::string, std::string>( \
7579  #TYPENAME "." + std::string(it->second), std::to_string(it->first))); \
7580  it++; \
7581  } \
7582  }
7583 
7584 void DBHandler::get_device_parameters(std::map<std::string, std::string>& _return,
7585  const TSessionId& session_id_or_json) {
7586  heavyai::RequestInfo const request_info(session_id_or_json);
7587  SET_REQUEST_ID(request_info.requestId());
7588  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7589  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7591  for (auto item : params) {
7592  _return.insert(item);
7593  }
7594  EXPOSE_THRIFT_MAP(TDeviceType);
7595  EXPOSE_THRIFT_MAP(TDatumType);
7596  EXPOSE_THRIFT_MAP(TEncodingType);
7597  EXPOSE_THRIFT_MAP(TExtArgumentType);
7598  EXPOSE_THRIFT_MAP(TOutputBufferSizeType);
7599 }
7600 
7602  const TSessionId& session_id_or_json,
7603  const std::vector<TUserDefinedFunction>& udfs,
7604  const std::vector<TUserDefinedTableFunction>& udtfs,
7605  const std::map<std::string, std::string>& device_ir_map) {
7606  heavyai::RequestInfo const request_info(session_id_or_json);
7607  SET_REQUEST_ID(request_info.requestId());
7608  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7609  stdlog.appendNameValuePairs("client", getConnectionInfo().toString());
7610 
7611  VLOG(1) << "register_runtime_extension_functions: # UDFs: " << udfs.size()
7612  << " # UDTFs: " << udtfs.size() << std::endl;
7613 
7616  THROW_DB_EXCEPTION("Runtime UDF and UDTF function registration is disabled.");
7617  }
7618 
7621  auto session_ptr = stdlog.getConstSessionInfo();
7622  if (!session_ptr->get_currentUser().isSuper) {
7624  "Server is configured to require superuser privilege to register UDFs and "
7625  "UDTFs.");
7626  }
7627  }
7630 
7632  auto it_cpu = device_ir_map.find(std::string{"cpu"});
7633  auto it_gpu = device_ir_map.find(std::string{"gpu"});
7634  if (it_cpu != device_ir_map.end() || it_gpu != device_ir_map.end()) {
7635  if (it_cpu != device_ir_map.end()) {
7637  it_cpu->second;
7638  } else {
7641  }
7642  if (it_gpu != device_ir_map.end()) {
7644  it_gpu->second;
7645  } else {
7648  }
7649  } /* else avoid locking compilation if registration does not change
7650  the rt_udf_cpu/gpu_module instances */
7651 
7652  VLOG(1) << "Registering runtime UDTFs:\n";
7653 
7655 
7656  for (auto it = udtfs.begin(); it != udtfs.end(); it++) {
7657  VLOG(1) << "UDTF name=" << it->name << std::endl;
7659  it->name,
7661  ThriftSerializers::from_thrift(it->sizerType),
7662  static_cast<size_t>(it->sizerArgPos)},
7663  ThriftSerializers::from_thrift(it->inputArgTypes),
7664  ThriftSerializers::from_thrift(it->outputArgTypes),
7665  ThriftSerializers::from_thrift(it->sqlArgTypes),
7666  it->annotations,
7667  /*is_runtime =*/true);
7668  }
7669  /* Register extension functions with Calcite server */
7670  CHECK(calcite_);
7671  auto udtfs_ = ThriftSerializers::to_thrift(
7673  calcite_->setRuntimeExtensionFunctions(udfs, udtfs_, /*is_runtime =*/true);
7674 
7675  /* Update the extension function whitelist */
7676  std::string whitelist = calcite_->getRuntimeExtensionFunctionWhitelist();
7677  VLOG(1) << "Registering runtime extension functions with CodeGen using whitelist:\n"
7678  << whitelist;
7681  });
7682 }
7683 
7684 void DBHandler::get_function_names(std::vector<std::string>& _return,
7685  const TSessionId& session) {
7686  for (auto udf_name :
7687  ExtensionFunctionsWhitelist::get_udfs_name(/* is_runtime */ false)) {
7688  if (std::find(_return.begin(), _return.end(), udf_name) == _return.end()) {
7689  _return.emplace_back(udf_name);
7690  }
7691  }
7692 }
7693 
7694 void DBHandler::get_runtime_function_names(std::vector<std::string>& _return,
7695  const TSessionId& session) {
7696  for (auto udf_name :
7697  ExtensionFunctionsWhitelist::get_udfs_name(/* is_runtime */ true)) {
7698  if (std::find(_return.begin(), _return.end(), udf_name) == _return.end()) {
7699  _return.emplace_back(udf_name);
7700  }
7701  }
7702 }
7703 
7704 void DBHandler::get_function_details(std::vector<TUserDefinedFunction>& _return,
7705  const TSessionId& session,
7706  const std::vector<std::string>& udf_names) {
7707  for (const std::string& udf_name : udf_names) {
7708  for (auto udf : ExtensionFunctionsWhitelist::get_ext_funcs(udf_name)) {
7709  _return.emplace_back(ThriftSerializers::to_thrift(udf));
7710  }
7711  }
7712 }
7713 
7714 void DBHandler::get_table_function_names(std::vector<std::string>& _return,
7715  const TSessionId& session) {
7717  const std::string& name = tf.getName(/* drop_suffix */ true, /* to_lower */ true);
7718  if (std::find(_return.begin(), _return.end(), name) == _return.end()) {
7719  _return.emplace_back(name);
7720  }
7721  }
7722 }
7723 
7724 void DBHandler::get_runtime_table_function_names(std::vector<std::string>& _return,
7725  const TSessionId& session) {
7726  for (auto tf :
7728  const std::string& name = tf.getName(/* drop_suffix */ true, /* to_lower */ true);
7729  if (std::find(_return.begin(), _return.end(), name) == _return.end()) {
7730  _return.emplace_back(name);
7731  }
7732  }
7733 }
7734 
7736  std::vector<TUserDefinedTableFunction>& _return,
7737  const TSessionId& session,
7738  const std::vector<std::string>& udtf_names) {
7739  for (const std::string& udtf_name : udtf_names) {
7740  for (auto tf : table_functions::TableFunctionsFactory::get_table_funcs(udtf_name)) {
7741  _return.emplace_back(ThriftSerializers::to_thrift(tf));
7742  }
7743  }
7744 }
7745 
7747  const Catalog_Namespace::SessionInfo& session_info,
7748  const std::string& query_state_str,
7749  TQueryResult& _return) {
7750  // Stuff ResultSet into _return (which is a TQueryResult)
7751  // calls convertRows, but after some setup using session_info
7752 
7753  auto session_ptr = get_session_ptr(session_info.get_session_id());
7754  CHECK(session_ptr);
7755  auto qs = create_query_state(session_ptr, query_state_str);
7757 
7758  // heavysql only accepts column format as being 'VALID",
7759  // assume that heavydb should only return column format
7760  int32_t nRows = result.getDataPtr()->rowCount();
7761 
7762  convertData(_return,
7763  result,
7764  qsp,
7765  /*column_format=*/true,
7766  /*first_n=*/nRows,
7767  /*at_most_n=*/nRows);
7768 }
7769 
7770 static std::unique_ptr<RexLiteral> genLiteralStr(std::string val) {
7771  return std::unique_ptr<RexLiteral>(
7772  new RexLiteral(val, SQLTypes::kTEXT, SQLTypes::kTEXT, 0, 0, 0, 0));
7773 }
7774 
7776  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr) {
7777  std::shared_ptr<ResultSet> rSet = nullptr;
7778  std::vector<TargetMetaInfo> label_infos;
7779 
7780  if (!session_ptr->get_currentUser().isSuper) {
7781  throw std::runtime_error(
7782  "SHOW USER SESSIONS failed, because it can only be executed by super user.");
7783  } else {
7784  // label_infos -> column labels
7785  std::vector<std::string> labels{
7786  "session_id", "login_name", "client_address", "db_name"};
7787  for (const auto& label : labels) {
7788  label_infos.emplace_back(label, SQLTypeInfo(kTEXT, true));
7789  }
7790 
7791  // logical_values -> table data
7792  std::vector<RelLogicalValues::RowValues> logical_values;
7793  auto sessions = sessions_store_->getAllSessions();
7794  for (const auto& session_ptr : sessions) {
7795  logical_values.emplace_back(RelLogicalValues::RowValues{});
7796  logical_values.back().emplace_back(
7797  genLiteralStr(session_ptr->get_public_session_id()));
7798  logical_values.back().emplace_back(
7799  genLiteralStr(session_ptr->get_currentUser().userName));
7800  logical_values.back().emplace_back(
7801  genLiteralStr(session_ptr->get_connection_info()));
7802  logical_values.back().emplace_back(
7803  genLiteralStr(session_ptr->getCatalog().getCurrentDB().dbName));
7804  }
7805 
7806  // Create ResultSet
7807  rSet = std::shared_ptr<ResultSet>(
7808  ResultSetLogicalValuesBuilder::create(label_infos, logical_values));
7809  }
7810  return ExecutionResult(rSet, label_infos);
7811 }
7812 
7814  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr) {
7815  std::shared_ptr<ResultSet> rSet = nullptr;
7816  std::vector<TargetMetaInfo> label_infos;
7817  auto current_user_name = session_ptr->get_currentUser().userName;
7818  auto is_super_user = session_ptr->get_currentUser().isSuper.load();
7819 
7820  std::vector<std::string> labels{"query_session_id",
7821  "current_status",
7822  "executor_id",
7823  "submitted",
7824  "query_str",
7825  "login_name",
7826  "client_address",
7827  "db_name",
7828  "exec_device_type"};
7829  for (const auto& label : labels) {
7830  label_infos.emplace_back(label, SQLTypeInfo(kTEXT, true));
7831  }
7832 
7833  std::vector<RelLogicalValues::RowValues> logical_values;
7835  jit_debug_ ? "/tmp" : "",
7836  jit_debug_ ? "mapdquery" : "",
7838  CHECK(executor);
7839  auto sessions = (is_super_user ? sessions_store_->getAllSessions()
7840  : sessions_store_->getUserSessions(current_user_name));
7841  for (const auto& query_session_ptr : sessions) {
7842  std::vector<QuerySessionStatus> query_infos;
7843  {
7845  executor->getSessionLock());
7846  query_infos = executor->getQuerySessionInfo(query_session_ptr->get_session_id(),
7847  session_read_lock);
7848  }
7849  // if there exists query info fired from this session we report it to user
7850  const std::string getQueryStatusStr[] = {"UNDEFINED",
7851  "PENDING_QUEUE",
7852  "PENDING_EXECUTOR",
7853  "RUNNING_QUERY_KERNEL",
7854  "RUNNING_REDUCTION",
7855  "RUNNING_IMPORTER"};
7856  bool is_table_import_session = false;
7857  for (QuerySessionStatus& query_info : query_infos) {
7858  logical_values.emplace_back(RelLogicalValues::RowValues{});
7859  logical_values.back().emplace_back(
7860  genLiteralStr(query_session_ptr->get_public_session_id()));
7861  auto query_status = query_info.getQueryStatus();
7862  logical_values.back().emplace_back(genLiteralStr(getQueryStatusStr[query_status]));
7863  if (query_status == QuerySessionStatus::QueryStatus::RUNNING_IMPORTER) {
7864  is_table_import_session = true;
7865  }
7866  logical_values.back().emplace_back(
7867  genLiteralStr(::toString(query_info.getExecutorId())));
7868  logical_values.back().emplace_back(
7869  genLiteralStr(query_info.getQuerySubmittedTime()));
7870  logical_values.back().emplace_back(genLiteralStr(query_info.getQueryStr()));
7871  logical_values.back().emplace_back(
7872  genLiteralStr(query_session_ptr->get_currentUser().userName));
7873  logical_values.back().emplace_back(
7874  genLiteralStr(query_session_ptr->get_connection_info()));
7875  logical_values.back().emplace_back(
7876  genLiteralStr(query_session_ptr->getCatalog().getCurrentDB().dbName));
7877  if (query_session_ptr->get_executor_device_type() == ExecutorDeviceType::GPU &&
7878  !is_table_import_session) {
7879  logical_values.back().emplace_back(genLiteralStr("GPU"));
7880  } else {
7881  logical_values.back().emplace_back(genLiteralStr("CPU"));
7882  }
7883  }
7884  }
7885 
7886  rSet = std::shared_ptr<ResultSet>(
7887  ResultSetLogicalValuesBuilder::create(label_infos, logical_values));
7888 
7889  return ExecutionResult(rSet, label_infos);
7890 }
7891 
7892 void DBHandler::get_queries_info(std::vector<TQueryInfo>& _return,
7893  const TSessionId& session_id_or_json) {
7894  heavyai::RequestInfo const request_info(session_id_or_json);
7895  SET_REQUEST_ID(request_info.requestId());
7896  auto stdlog = STDLOG(get_session_ptr(request_info.sessionId()));
7898  jit_debug_ ? "/tmp" : "",
7899  jit_debug_ ? "mapdquery" : "",
7901  CHECK(executor);
7902  auto sessions = sessions_store_->getAllSessions();
7903  for (const auto& query_session_ptr : sessions) {
7904  const auto query_session_user_name = query_session_ptr->get_currentUser().userName;
7905  std::vector<QuerySessionStatus> query_infos;
7906  {
7908  executor->getSessionLock());
7909  query_infos = executor->getQuerySessionInfo(query_session_ptr->get_session_id(),
7910  session_read_lock);
7911  }
7912  // if there exists query info fired from this session we report it to user
7913  const std::string getQueryStatusStr[] = {"UNDEFINED",
7914  "PENDING_QUEUE",
7915  "PENDING_EXECUTOR",
7916  "RUNNING_QUERY_KERNEL",
7917  "RUNNING_REDUCTION",
7918  "RUNNING_IMPORTER"};
7919  TQueryInfo info;
7920  for (QuerySessionStatus& query_info : query_infos) {
7921  info.query_session_id = query_session_ptr->get_session_id();
7922  info.query_public_session_id = query_session_ptr->get_public_session_id();
7923  info.current_status = getQueryStatusStr[query_info.getQueryStatus()];
7924  info.query_str = query_info.getQueryStr();
7925  info.executor_id = query_info.getExecutorId();
7926  info.submitted = query_info.getQuerySubmittedTime();
7927  info.login_name = query_session_user_name;
7928  info.client_address = query_session_ptr->get_connection_info();
7929  info.db_name = query_session_ptr->getCatalog().getCurrentDB().dbName;
7930  if (query_session_ptr->get_executor_device_type() == ExecutorDeviceType::GPU) {
7931  info.exec_device_type = "GPU";
7932  } else {
7933  info.exec_device_type = "CPU";
7934  }
7935  }
7936  _return.push_back(info);
7937  }
7938 }
7939 
7941  const std::string& target_session) {
7942  // capture the interrupt request from user and then pass to corresponding Executors
7943  // that queries fired by the given session are assigned
7944  // Basic-flow that each query session gets through:
7945  // Enroll --> Update (query session info / executor) --> Running -> Cleanup
7946  // 1. We have to separate 1) "target" query session to interrupt and 2) request session
7947  // Here, we have to focus on "target" session: all interruption management is based on
7948  // the "target" session
7949  // 2. Session info and its required data structures are global to Executor, so
7950  // we can send the interrupt request from UNITARY_EXECUTOR (note that the actual query
7951  // is processed by specific Executor but can also access the global data structure)
7952  // to the Executor that the session's query has been assigned
7953  // this means each Executor should handle the interrupt request, and then update its
7954  // the latest status to the global session map for the correctness
7955  // 3. Three target session's status: PENDING_QUEUE / PENDING_EXECUTOR / RUNNING
7956  // (for now we can interrupt a query at "PENDING_EXECUTOR" and "RUNNING")
7957  // 4. each session has 1) a list of queries that the session tries to initiate and
7958  // 2) a interrupt flag map that indicates whether the session is interrupted
7959  // If a session is interrupted, we turn the flag for the session on so as to Executor
7960  // can know about the user's interrupt request on the query (after all queries are
7961  // removed then the session's query list and its flag are also deleted). And those
7962  // info is managed by Executor's global data structure
7963  // 5. To interrupt queries at "PENDING_EXECUTOR", corresponding Executor regularly
7964  // checks the interrupt flag of the session, and throws an exception if got interrupted
7965  // For the case of running query, we also turn the flag in device memory on in async
7966  // manner so as to inform the query kernel about the latest interrupt flag status
7967  // (it also checks the flag regularly during the query kernel execution and
7968  // query threads return with the error code if necessary -->
7969  // for this we inject interrupt flag checking logic in the generated query kernel)
7970  // 6. Interruption are implemented by throwing runtime_error that contains a visible
7971  // error message like "Query has been interrupted"
7972 
7974  // at least type of query interruption is enabled to allow kill query
7975  // if non-kernel query interrupt is enabled but tries to kill that type's query?
7976  // then the request is skipped
7977  // todo(yoonmin): improve kill query cmd under both types of query
7978  throw std::runtime_error(
7979  "Unable to interrupt running query. Query interrupt is disabled.");
7980  }
7981 
7982  CHECK_EQ(target_session.length(), static_cast<unsigned long>(8));
7983  auto target_query_session = sessions_store_->getByPublicID(target_session);
7984  if (!target_query_session) {
7985  throw std::runtime_error(
7986  "Unable to interrupt running query. An invalid query session is given.");
7987  }
7988  auto target_session_id = target_query_session->get_session_id();
7990  jit_debug_ ? "/tmp" : "",
7991  jit_debug_ ? "mapdquery" : "",
7993  CHECK(executor);
7994 
7995  auto non_admin_interrupt_user = !session_info.get_currentUser().isSuper.load();
7996  auto interrupt_user_name = session_info.get_currentUser().userName;
7997  if (non_admin_interrupt_user) {
7998  auto target_user_name = target_query_session->get_currentUser().userName;
7999  if (target_user_name.compare(interrupt_user_name) != 0) {
8000  throw std::runtime_error("Unable to interrupt running query.");
8001  }
8002  }
8003 
8004  auto target_executor_ids = executor->getExecutorIdsRunningQuery(target_session_id);
8005  if (target_executor_ids.empty()) {
8007  executor->getSessionLock());
8008  if (executor->checkIsQuerySessionEnrolled(target_session_id, session_read_lock)) {
8009  session_read_lock.unlock();
8010  VLOG(1) << "Received interrupt: "
8011  << "User " << session_info.get_currentUser().userLoggable()
8012  << ", LeafCount " << leaf_aggregator_.leafCount() << ", Database "
8013  << session_info.getCatalog().getCurrentDB().dbName << std::endl;
8014  executor->interrupt(target_session_id, session_info.get_session_id());
8015  }
8016  } else {
8017  for (auto& executor_id : target_executor_ids) {
8018  VLOG(1) << "Received interrupt: "
8019  << "User " << session_info.get_currentUser().userLoggable() << ", Executor "
8020  << executor_id << ", LeafCount " << leaf_aggregator_.leafCount()
8021  << ", Database " << session_info.getCatalog().getCurrentDB().dbName
8022  << std::endl;
8023  auto target_executor = Executor::getExecutor(executor_id);
8024  target_executor->interrupt(target_session_id, session_info.get_session_id());
8025  }
8026  }
8027 }
8028 
8029 void DBHandler::alterSystemClear(const std::string& session_id,
8031  const std::string& cache_type,
8032  int64_t& execution_time_ms) {
8033  result = ExecutionResult();
8034  if (to_upper(cache_type) == "CPU") {
8035  execution_time_ms = measure<>::execution([&]() { clear_cpu_memory(session_id); });
8036  } else if (to_upper(cache_type) == "GPU") {
8037  execution_time_ms = measure<>::execution([&]() { clear_gpu_memory(session_id); });
8038  } else if (to_upper(cache_type) == "RENDER") {
8039  execution_time_ms = measure<>::execution([&]() { clearRenderMemory(session_id); });
8040  } else {
8041  throw std::runtime_error("Invalid cache type. Valid values are CPU,GPU or RENDER");
8042  }
8043 }
8044 
8045 void DBHandler::alterSession(const std::string& session_id,
8047  const std::pair<std::string, std::string>& session_parameter,
8048  int64_t& execution_time_ms) {
8049  result = ExecutionResult();
8050  if (session_parameter.first == "EXECUTOR_DEVICE") {
8051  std::string parameter_value = to_upper(session_parameter.second);
8052  TExecuteMode::type executorType;
8053  if (parameter_value == "GPU") {
8054  executorType = TExecuteMode::type::GPU;
8055  } else if (parameter_value == "CPU") {
8056  executorType = TExecuteMode::type::CPU;
8057  } else {
8058  throw std::runtime_error("Cannot set the " + session_parameter.first + " to " +
8059  session_parameter.second +
8060  ". Valid options are CPU and GPU");
8061  }
8062  execution_time_ms =
8063  measure<>::execution([&]() { set_execution_mode(session_id, executorType); });
8064  } else if (session_parameter.first == "CURRENT_DATABASE") {
8065  execution_time_ms = measure<>::execution(
8066  [&]() { switch_database(session_id, session_parameter.second); });
8067  }
8068 }
8069 
8071  TQueryResult& _return,
8072  const std::string& query_ra,
8073  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr) {
8074  DdlCommandExecutor executor = DdlCommandExecutor(query_ra, session_ptr);
8075  std::string commandStr = executor.commandStr();
8076 
8077  if (executor.isKillQuery()) {
8078  interruptQuery(*session_ptr, executor.getTargetQuerySessionToKill());
8079  } else {
8081  int64_t execution_time_ms;
8082  if (executor.isShowQueries()) {
8083  // getQueries still requires Thrift cannot be nested into DdlCommandExecutor
8084  _return.execution_time_ms +=
8085  measure<>::execution([&]() { result = getQueries(session_ptr); });
8086  } else if (executor.isShowUserSessions()) {
8087  // getUserSessions still requires Thrift cannot be nested into DdlCommandExecutor
8088  _return.execution_time_ms +=
8089  measure<>::execution([&]() { result = getUserSessions(session_ptr); });
8090  } else if (executor.isAlterSystemClear()) {
8091  alterSystemClear(session_ptr->get_session_id(),
8092  result,
8093  executor.returnCacheType(),
8094  execution_time_ms);
8095  _return.execution_time_ms += execution_time_ms;
8096 
8097  } else if (executor.isAlterSessionSet()) {
8098  alterSession(session_ptr->get_session_id(),
8099  result,
8100  executor.getSessionParameter(),
8101  execution_time_ms);
8102  _return.execution_time_ms += execution_time_ms;
8103  } else if (executor.isAlterSystemControlExecutorQueue()) {
8104  result = ExecutionResult();
8105  if (executor.returnQueueAction() == "PAUSE") {
8106  _return.execution_time_ms += measure<>::execution(
8107  [&]() { pause_executor_queue(session_ptr->get_session_id()); });
8108  } else if (executor.returnQueueAction() == "RESUME") {
8109  _return.execution_time_ms += measure<>::execution(
8110  [&]() { resume_executor_queue(session_ptr->get_session_id()); });
8111  } else {
8112  throw std::runtime_error("Unknown queue command.");
8113  }
8114  } else {
8115  _return.execution_time_ms +=
8116  measure<>::execution([&]() { result = executor.execute(read_only_); });
8117  }
8118 
8119  if (!result.empty()) {
8120  // reduce execution time by the time spent during queue waiting
8121  _return.execution_time_ms -= result.getRows()->getQueueTime();
8122  convertResultSet(result, *session_ptr, commandStr, _return);
8123  }
8124  }
8125 }
8126 
8128  ExecutionResult& _return,
8129  const std::string& query_ra,
8130  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr) {
8131  DdlCommandExecutor executor = DdlCommandExecutor(query_ra, session_ptr);
8132  std::string commandStr = executor.commandStr();
8133 
8134  if (executor.isKillQuery()) {
8135  interruptQuery(*session_ptr, executor.getTargetQuerySessionToKill());
8136  } else {
8137  int64_t execution_time_ms;
8138  if (executor.isShowQueries()) {
8139  // getQueries still requires Thrift cannot be nested into DdlCommandExecutor
8140  execution_time_ms =
8141  measure<>::execution([&]() { _return = getQueries(session_ptr); });
8142  } else if (executor.isShowUserSessions()) {
8143  // getUserSessions still requires Thrift cannot be nested into DdlCommandExecutor
8144  execution_time_ms =
8145  measure<>::execution([&]() { _return = getUserSessions(session_ptr); });
8146  } else if (executor.isAlterSystemClear()) {
8147  alterSystemClear(session_ptr->get_session_id(),
8148  _return,
8149  executor.returnCacheType(),
8150  execution_time_ms);
8151  } else if (executor.isAlterSessionSet()) {
8152  alterSession(session_ptr->get_session_id(),
8153  _return,
8154  executor.getSessionParameter(),
8155  execution_time_ms);
8156  } else if (executor.isAlterSystemControlExecutorQueue()) {
8157  _return = ExecutionResult();
8158  if (executor.returnQueueAction() == "PAUSE") {
8159  execution_time_ms = measure<>::execution(
8160  [&]() { pause_executor_queue(session_ptr->get_session_id()); });
8161  } else if (executor.returnQueueAction() == "RESUME") {
8162  execution_time_ms = measure<>::execution(
8163  [&]() { resume_executor_queue(session_ptr->get_session_id()); });
8164  } else {
8165  throw std::runtime_error("Unknwon queue command.");
8166  }
8167  } else {
8168  execution_time_ms =
8169  measure<>::execution([&]() { _return = executor.execute(read_only_); });
8170  }
8171  _return.setExecutionTime(execution_time_ms);
8172  }
8173  if (_return.getResultType() == ExecutionResult::QueryResult) {
8174  // ResultType defaults to QueryResult => which can limit
8175  // the number of lines output via ConvertRow... use CalciteDdl instead
8177  }
8178 }
8179 
8180 void DBHandler::resizeDispatchQueue(size_t queue_size) {
8181  dispatch_queue_ = std::make_unique<QueryDispatchQueue>(queue_size);
8182 }
8183 
8185  const std::unordered_set<shared::TableKey>& selected_table_keys) const {
8186  bool is_in_memory_system_table_query{false};
8187  const auto info_schema_catalog =
8189  if (info_schema_catalog) {
8190  for (const auto& table_key : selected_table_keys) {
8191  if (table_key.db_id == info_schema_catalog->getDatabaseId()) {
8192  auto td = info_schema_catalog->getMetadataForTable(table_key.table_id, false);
8193  CHECK(td);
8194  if (check_and_reset_in_memory_system_table(*info_schema_catalog, *td)) {
8195  is_in_memory_system_table_query = true;
8196  }
8197  }
8198  }
8199  }
8200  return is_in_memory_system_table_query;
8201 }
std::lock_guard< T > lock_guard
std::pair< size_t, size_t > ArraySliceRange
Definition: Importer.h:74
void interrupt(const TSessionId &query_session, const TSessionId &interrupt_session) override
Definition: DBHandler.cpp:792
Classes used to wrap parser calls for calcite redirection.
int64_t process_deferred_copy_from(const TSessionId &session_id)
Definition: DBHandler.cpp:1430
void get_table_details_impl(TTableDetails &_return, query_state::StdLog &stdlog, const std::string &table_name, const bool get_system, const bool get_physical, const std::string &database_name={})
Definition: DBHandler.cpp:2529
std::string to_lower(const std::string &str)
std::vector< LeafHostInfo > string_leaves_
Definition: DBHandler.h:636
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
void get_tables_for_database(std::vector< std::string > &_return, const TSessionId &session, const std::string &database_name) override
Definition: DBHandler.cpp:2702
static void convertData(TQueryResult &_return, ExecutionResult &result, const QueryStateProxy &query_state_proxy, const bool column_format, const int32_t first_n, const int32_t at_most_n)
Definition: DBHandler.cpp:1250
static void addUdfs(const std::string &json_func_sigs)
const std::vector< std::string > & clang_options_
Definition: DBHandler.h:983
#define CHECK_EQ(x, y)
Definition: Logger.h:301
std::string s3_secret_key
Definition: CopyParams.h:62
boost::filesystem::path import_path_
Definition: DBHandler.h:638
RType getResultType() const
std::unique_ptr< QueryDispatchQueue > dispatch_queue_
Definition: DBHandler.h:660
void add_vsi_archive_prefix(std::string &path)
Definition: DBHandler.cpp:4212
std::vector< int > ChunkKey
Definition: types.h:36
std::vector< std::unique_ptr< lockmgr::AbstractLockContainer< const TableDescriptor * >>> LockedTableDescriptors
Definition: LockMgr.h:272
double g_running_query_interrupt_freq
Definition: Execute.cpp:141
int32_t raster_scanlines_per_thread
Definition: CopyParams.h:90
static const int32_t SERVER_USAGE
Definition: DBObject.h:129
void importGeoTableSingle(const TSessionId &session, const std::string &table_name, const std::string &file_name, const import_export::CopyParams &copy_params, const TRowDescriptor &row_desc, const TCreateParams &create_params)
Definition: DBHandler.cpp:5450
void insert_chunks(const TSessionId &session, const TInsertChunks &insert_chunks) override
Definition: DBHandler.cpp:7126
void set_table_epoch(const TSessionId &session, const int db_id, const int table_id, const int new_epoch) override
Definition: DBHandler.cpp:7362
static const AccessPrivileges VIEW_DASHBOARD
Definition: DBObject.h:171
const std::string kDataDirectoryName
size_t g_num_tuple_threshold_switch_to_baseline
Definition: Execute.cpp:110
void resume_executor_queue(const TSessionId &session)
Definition: DBHandler.cpp:2964
static const int32_t DROP_VIEW
Definition: DBObject.h:113
#define NULL_DOUBLE
static std::vector< TableFunction > get_table_funcs()
void resetSessionsStore()
Definition: DBHandler.cpp:365
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
static TableSchemaLockMgr & instance()
Definition: LockMgr.h:40
void importGeoTableGlobFilterSort(const TSessionId &session, const std::string &table_name, const std::string &file_name, const import_export::CopyParams &copy_params, const TRowDescriptor &row_desc, const TCreateParams &create_params)
Definition: DBHandler.cpp:5423
ClientProtocol
static void convertExplain(TQueryResult &_return, const ResultSet &results, const bool column_format)
Definition: DBHandler.cpp:6405
const std::string getTargetQuerySessionToKill() const
void validate_configurations()
Definition: DBHandler.cpp:354
const bool renderer_use_parallel_executors_
Definition: DBHandler.h:965
void get_table_function_names(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:7714
void insert_data(const TSessionId &session, const TInsertData &insert_data) override
Definition: DBHandler.cpp:7185
bool is_a_supported_archive_file(const std::string &path)
Definition: DBHandler.cpp:4278
const std::string & udf_filename_
Definition: DBHandler.h:981
static const int32_t ALTER_SERVER
Definition: DBObject.h:128
bool g_multi_instance
Definition: heavyai_locks.h:22
QueryStateProxy createQueryStateProxy()
Definition: QueryState.cpp:71
std::vector< PushedDownFilterInfo > execute_rel_alg(ExecutionResult &_return, QueryStateProxy, const std::string &query_ra, const bool column_format, const ExecutorDeviceType executor_device_type, const int32_t first_n, const int32_t at_most_n, const bool just_validate, const bool find_push_down_candidates, const ExplainInfo &explain_info, const std::optional< size_t > executor_index=std::nullopt) const
Definition: DBHandler.cpp:6206
std::string cat(Ts &&...args)
void set_license_key(TLicenseInfo &_return, const TSessionId &session, const std::string &key, const std::string &nonce) override
Definition: DBHandler.cpp:7532
void clearRenderMemory(const TSessionId &session)
Definition: DBHandler.cpp:2934
shared utility for globbing files, paths can be specified as either a single file, directory or wildcards
DBObjectKey getObjectKey() const
Definition: DBObject.h:221
static bool has_view_permission(const AccessPrivileges &privs, const TDBObjectPermissions &permissions)
Definition: DBHandler.cpp:2035
static const int32_t SELECT_FROM_VIEW
Definition: DBObject.h:114
void get_all_effective_roles_for_user(std::vector< std::string > &_return, const TSessionId &session, const std::string &granteeName) override
Definition: DBHandler.cpp:2293
double g_executor_resource_mgr_per_query_max_cpu_slots_ratio
Definition: Execute.cpp:182
bool isCalciteExplainDetail() const
Definition: ParserWrapper.h:75
static std::vector< ExtensionFunction > get_ext_funcs(const std::string &name)
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:143
Definition: sqltypes.h:76
void get_tables(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:2692
void unshare_dashboard(const TSessionId &session, const int32_t dashboard_id, const std::vector< std::string > &groups, const std::vector< std::string > &objects, const TDashboardPermissions &permissions) override
Definition: DBHandler.cpp:5082
std::mutex handle_to_dev_ptr_mutex_
Definition: DBHandler.h:1021
TDatumType::type type_to_thrift(const SQLTypeInfo &type_info)
static const int32_t UPDATE_IN_VIEW
Definition: DBObject.h:116
const std::string commandStr() const
std::vector< std::string > * stringsPtr
Definition: sqltypes.h:234
std::string tableName
void init_executor_resource_mgr()
Definition: DBHandler.cpp:252
TRowDescriptor target_meta_infos_to_thrift(const std::vector< TargetMetaInfo > &targets)
void unshare_dashboards(const TSessionId &session, const std::vector< int32_t > &dashboard_ids, const std::vector< std::string > &groups, const TDashboardPermissions &permissions) override
Definition: DBHandler.cpp:5072
static TimeT::rep execution(F func, Args &&...args)
Definition: sample.cpp:29
std::vector< ArrayDatum > * arraysPtr
Definition: sqltypes.h:235
static void convertResult(TQueryResult &_return, const ResultSet &results, const bool column_format)
Definition: DBHandler.cpp:6411
std::shared_ptr< query_state::QueryState > create_query_state(ARGS &&...args)
Definition: DBHandler.h:663
void sql_execute_impl(ExecutionResult &_return, QueryStateProxy, const bool column_format, const ExecutorDeviceType executor_device_type, const int32_t first_n, const int32_t at_most_n, const bool use_calcite, lockmgr::LockedTableDescriptors &locks)
Definition: DBHandler.cpp:6445
static void add(const std::string &name, const TableFunctionOutputRowSizer sizer, const std::vector< ExtArgumentType > &input_args, const std::vector< ExtArgumentType > &output_args, const std::vector< ExtArgumentType > &sql_args, const std::vector< std::map< std::string, std::string >> &annotations, bool is_runtime=false)
TTableRefreshInfo get_refresh_info(const TableDescriptor *td)
Definition: DBHandler.cpp:2463
logger::RequestId requestId() const
Definition: RequestInfo.h:39
void getAllRolesForUserImpl(std::shared_ptr< Catalog_Namespace::SessionInfo const > session_ptr, std::vector< std::string > &roles, const std::string &granteeName, bool effective)
Definition: DBHandler.cpp:2248
void sql_execute_gdf(TDataFrame &_return, const TSessionId &session, const std::string &query, const int32_t device_id, const int32_t first_n) override
Definition: DBHandler.cpp:1543
bool isVerbose() const
Definition: ParserWrapper.h:83
std::string const & getQueryStr() const
Definition: QueryState.h:159
DBObjectType
Definition: DBObject.h:40
static thread_local std::string client_address
Definition: DBHandler.h:154
void get_views(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:2725
void get_runtime_table_function_names(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:7724
static std::string getAstFileName(const std::string &udf_file_name)
static const int32_t CREATE_VIEW
Definition: DBObject.h:112
void add(const std::string &session_id, const DeferredCopyFromState &state)
Definition: DBHandler.h:1007
#define NULL_FLOAT
bool path_is_relative(const std::string &path)
Definition: DBHandler.cpp:4246
int64_t query_get_outer_fragment_count(const TSessionId &session, const std::string &select_query) override
Definition: DBHandler.cpp:7021
void share_dashboard(const TSessionId &session, const int32_t dashboard_id, const std::vector< std::string > &groups, const std::vector< std::string > &objects, const TDashboardPermissions &permissions, const bool grant_role) override
Definition: DBHandler.cpp:5063
TCopyParams copyparams_to_thrift(const import_export::CopyParams &cp)
Definition: DBHandler.cpp:4049
auto getExecuteReadLock()
EncodingType thrift_to_encoding(const TEncodingType::type tEncodingType)
Data_Namespace::DataMgr & getDataMgr() const
Definition: Catalog.h:266
static void loadRuntimeLibs(const std::string &torch_lib_path=std::string())
void clone_session(const TSessionId session1, const TSessionId session2)
static void set_geo_physical_import_buffer_columnar(const Catalog_Namespace::Catalog &catalog, const ColumnDescriptor *cd, std::vector< std::unique_ptr< TypedImportBuffer >> &import_buffers, size_t &col_idx, std::vector< std::vector< double >> &coords_column, std::vector< std::vector< double >> &bounds_column, std::vector< std::vector< int >> &ring_sizes_column, std::vector< std::vector< int >> &poly_rings_column)
Definition: Importer.cpp:1731
#define NULL_BIGINT
void check_and_invalidate_sessions(Parser::DDLStmt *ddl)
Definition: DBHandler.cpp:6433
std::string strip(std::string_view str)
trim any whitespace from the left and right ends of a string
unsigned g_pending_query_interrupt_freq
Definition: Execute.cpp:140
std::string convert_temporal_to_iso_format(const SQLTypeInfo &type_info, int64_t unix_time)
Definition: misc.cpp:111
bool has_object_privilege(const TSessionId &sessionId, const std::string &granteeName, const std::string &objectName, const TDBObjectType::type object_type, const TDBObjectPermissions &permissions) override
Definition: DBHandler.cpp:2067
#define LOG(tag)
Definition: Logger.h:285
bool enable_calcite_view_optimize
std::vector< SQLTypeInfo > getBestColumnTypes() const
Definition: Importer.cpp:3498
char unescape_char(std::string str)
Definition: DBHandler.cpp:3836
std::unordered_map< std::string, std::unordered_set< std::string > > fill_column_names_by_table(std::vector< std::string > &table_names, query_state::StdLog &stdlog)
Definition: DBHandler.cpp:1770
#define ARROW_ASSIGN_OR_THROW(lhs, rexpr)
Definition: ArrowUtil.h:60
void get_custom_expressions(std::vector< TCustomExpression > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:4643
SystemMemoryUsage getSystemMemoryUsage() const
Definition: DataMgr.cpp:131
HOST DEVICE int get_scale() const
Definition: sqltypes.h:396
static const std::string MAPD_EDITION
Definition: release.h:40
static thread_local ClientProtocol client_protocol
Definition: DBHandler.h:155
static ArrayDatum composeNullArray(const SQLTypeInfo &ti)
Definition: Importer.cpp:395
LeafAggregator leaf_aggregator_
Definition: DBHandler.h:634
void get_hardware_info(TClusterHardwareInfo &_return, const TSessionId &session) override
Definition: DBHandler.cpp:923
ArrowTransport
static bool has_server_permission(const AccessPrivileges &privs, const TDBObjectPermissions &permissions)
Definition: DBHandler.cpp:2053
void sql_execute(ExecutionResult &_return, const TSessionId &session, const std::string &query, const bool column_format, const int32_t first_n, const int32_t at_most_n, lockmgr::LockedTableDescriptors &locks)
Definition: DBHandler.cpp:1368
static const AccessPrivileges INSERT_INTO_TABLE
Definition: DBObject.h:161
void get_completion_hints(std::vector< TCompletionHint > &hints, const TSessionId &session, const std::string &sql, const int cursor) override
Definition: DBHandler.cpp:1661
std::vector< std::unique_ptr< TypedImportBuffer > > fill_missing_columns(const Catalog_Namespace::Catalog *cat, Fragmenter_Namespace::InsertData &insert_data)
Definition: Importer.cpp:6217
static const AccessPrivileges CREATE_DASHBOARD
Definition: DBObject.h:170
std::vector< TCompletionHint > just_whitelisted_keyword_hints(const std::vector< TCompletionHint > &hints)
TSessionId getInvalidSessionId() const
Definition: DBHandler.cpp:3016
void validate_sort_options(const FilePathOptions &options)
std::string join(T const &container, std::string const &delim)
void get_tables_impl(std::vector< std::string > &table_names, const Catalog_Namespace::SessionInfo &, const GetTablesType get_tables_type, const std::string &database_name={})
Definition: DBHandler.cpp:2675
std::vector< std::string > getTargetNames(const std::vector< TargetMetaInfo > &targets) const
Definition: DBHandler.cpp:6304
static void add(const std::string &json_func_sigs)
int32_t get_table_epoch(const TSessionId &session, const int32_t db_id, const int32_t table_id) override
Definition: DBHandler.cpp:7417
#define UNREACHABLE()
Definition: Logger.h:338
std::vector< std::unique_ptr< TypedImportBuffer > > setup_column_loaders(const TableDescriptor *td, Loader *loader)
Definition: Importer.cpp:6202
static void value_to_thrift_column(const TargetValue &tv, const SQLTypeInfo &ti, TColumn &column)
Definition: DBHandler.cpp:978
void sql_execute_local(TQueryResult &_return, const QueryStateProxy &query_state_proxy, const std::shared_ptr< Catalog_Namespace::SessionInfo > session_ptr, const std::string &query_str, const bool column_format, const std::string &nonce, const int32_t first_n, const int32_t at_most_n, const bool use_calcite)
Definition: DBHandler.cpp:1193
DEVICE void sort(ARGS &&...args)
Definition: gpu_enabled.h:105
void delete_custom_expressions(const TSessionId &session, const std::vector< int32_t > &custom_expression_ids, const bool do_soft_delete) override
Definition: DBHandler.cpp:4678
void get_column_hints(std::vector< TCompletionHint > &hints, const std::string &last_word, const std::unordered_map< std::string, std::unordered_set< std::string >> &column_names_by_table)
void get_table_epochs(std::vector< TTableEpochInfo > &_return, const TSessionId &session, const int32_t db_id, const int32_t table_id) override
Definition: DBHandler.cpp:7464
bool path_has_valid_filename(const std::string &path)
Definition: DBHandler.cpp:4254
void execute_query_step(TStepResult &_return, const TPendingQuery &pending_query, const TSubqueryId subquery_id, const std::string &start_time_str) override
Definition: DBHandler.cpp:7086
std::vector< bool > is_default
Definition: Fragmenter.h:75
unsigned g_cpu_threads_override
const unsigned renderer_vulkan_timeout_
Definition: DBHandler.h:964
std::string get_mismatch_attr_warning_text(const std::string &table_name, const std::string &file_path, const std::string &column_name, const std::string &attr, const std::string &got, const std::string &expected)
Definition: DBHandler.cpp:5385
int32_t objectId
Definition: DBObject.h:55
void set_execution_mode_nolock(Catalog_Namespace::SessionInfo *session_ptr, const TExecuteMode::type mode)
Definition: DBHandler.cpp:6186
const std::string base_data_path_
Definition: DBHandler.h:637
void set_execution_mode(const TSessionId &session, const TExecuteMode::type mode) override
Definition: DBHandler.cpp:3087
static const int32_t ALTER_TABLE
Definition: DBObject.h:93
ExtArgumentType from_thrift(const TExtArgumentType::type &t)
void initialize(const bool is_new_db)
Definition: DBHandler.cpp:382
const bool jit_debug_
Definition: DBHandler.h:642
const ResultSetPtr & getDataPtr() const
std::string connection_string
Definition: CopyParams.h:105
static std::vector< GeoFileLayerInfo > gdalGetLayersInGeoFile(const std::string &file_name, const CopyParams &copy_params)
Definition: Importer.cpp:5157
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:81
const size_t render_mem_bytes_
Definition: DBHandler.h:968
const CopyParams & get_copy_params() const
Definition: Importer.h:710
static void init_resource_mgr(const size_t num_cpu_slots, const size_t num_gpu_slots, const size_t cpu_result_mem, const size_t cpu_buffer_pool_mem, const size_t gpu_buffer_pool_mem, const double per_query_max_cpu_slots_ratio, const double per_query_max_cpu_result_mem_ratio, const bool allow_cpu_kernel_concurrency, const bool allow_cpu_gpu_kernel_concurrency, const bool allow_cpu_slot_oversubscription_concurrency, const bool allow_cpu_result_mem_oversubscription, const double max_available_resource_use_ratio)
Definition: Execute.cpp:5387
void start_render_query(TPendingRenderQuery &_return, const TSessionId &session, const int64_t widget_id, const int16_t node_idx, const std::string &vega_json) override
Definition: DBHandler.cpp:7295
void setPrivileges(const AccessPrivileges &privs)
Definition: DBObject.h:227
bool g_enable_non_kernel_time_query_interrupt
Definition: Execute.cpp:138
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
void switch_database(const TSessionId session, const std::string &dbname)
void setSessionInfo(std::shared_ptr< Catalog_Namespace::SessionInfo >)
Definition: QueryState.cpp:282
void get_db_object_privs(std::vector< TDBObject > &_return, const TSessionId &session, const std::string &objectName, const TDBObjectType::type type) override
Definition: DBHandler.cpp:2162
const std::string kInfoSchemaDbName
void krb5_connect(TKrb5Session &session, const std::string &token, const std::string &dbname) override
Definition: DBHandler.cpp:652
void get_token_based_completions(std::vector< TCompletionHint > &hints, query_state::StdLog &stdlog, std::vector< std::string > &visible_tables, const std::string &sql, const int cursor)
Definition: DBHandler.cpp:1725
Timer createTimer(char const *event_name)
Definition: QueryState.cpp:129
void check_table_load_privileges(const Catalog_Namespace::SessionInfo &session_info, const std::string &table_name)
Definition: DBHandler.cpp:6169
void disconnect(const TSessionId session)
bool isForeignTable() const
static void createSimpleResult(TQueryResult &_return, const ResultSet &results, const bool column_format, const std::string label)
Definition: DBHandler.cpp:6368
std::string raster_import_dimensions
Definition: CopyParams.h:93
double g_executor_resource_mgr_cpu_result_mem_ratio
Definition: Execute.cpp:180
void get_users(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:2847
void dispatch_query_task(std::shared_ptr< QueryDispatchQueue::Task > query_task, const bool is_update_delete)
Definition: DBHandler.cpp:1820
bool get_qualified_column_hints(std::vector< TCompletionHint > &hints, const std::string &last_word, const std::unordered_map< std::string, std::unordered_set< std::string >> &column_names_by_table)
static constexpr const char * MANUAL_REFRESH_TIMING_TYPE
Definition: ForeignTable.h:54
#define CHECK_GT(x, y)
Definition: Logger.h:305
void detect_column_types(TDetectResult &_return, const TSessionId &session, const std::string &file_name, const TCopyParams &copy_params) override
Definition: DBHandler.cpp:4340
bool g_executor_resource_mgr_allow_cpu_gpu_kernel_concurrency
Definition: Execute.cpp:188
static const size_t auto_cpu_mem_bytes
Definition: Execute.h:1626
void initEncoder(const SQLTypeInfo &tmp_sql_type)
DeferredCopyFromSessions deferred_copy_from_sessions
Definition: DBHandler.h:1018
void execute_distributed_copy_statement(Parser::CopyTableStmt *, const Catalog_Namespace::SessionInfo &session_info)
Definition: DBHandler.cpp:6838
static bool gdalFileExists(const std::string &path, const CopyParams &copy_params)
Definition: Importer.cpp:5053
void import_table_status(TImportStatus &_return, const TSessionId &session, const std::string &import_id) override
Definition: DBHandler.cpp:5906
std::unique_ptr< AbstractImporter > create_importer(Catalog_Namespace::Catalog &catalog, const TableDescriptor *td, const std::string &copy_from_source, const import_export::CopyParams &copy_params)
Definition: Importer.cpp:6287
std::string sourceName
void fillMissingBuffers(const TSessionId &session, const Catalog_Namespace::Catalog &catalog, std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers, const std::list< const ColumnDescriptor * > &cds, const std::vector< int > &desc_id_to_column_id, size_t num_rows, const std::string &table_name)
Definition: DBHandler.cpp:3275
static TableDataLockMgr & instance()
Definition: LockMgr.h:78
void get_function_names(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:7684
void setNumElems(const size_t num_elems)
Definition: Encoder.h:285
DBObject * findDbObject(const DBObjectKey &objectKey, bool only_direct) const
Definition: Grantee.cpp:85
ExecutorDeviceType
void emergency_shutdown()
Definition: DBHandler.cpp:7565
std::string to_string(char const *&&v)
size_t get_column_size(const TColumn &column)
Definition: DBHandler.cpp:3454
const std::string kGeoColumnName
Definition: ColumnNames.h:23
std::string find_last_word_from_cursor(const std::string &sql, const int64_t cursor)
void set_leaf_info(const TSessionId &session, const TLeafInfo &info) override
Definition: DBHandler.cpp:973
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
int64_t start_time_
Definition: DBHandler.h:650
std::vector< std::string > getTableNamesForUser(const UserMetadata &user, const GetTablesType get_tables_type) const
Definition: Catalog.cpp:4979
void sql_validate(TRowDescriptor &_return, const TSessionId &session, const std::string &query) override
Definition: DBHandler.cpp:1594
std::vector< int > column_ids_by_names(const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
Definition: DBHandler.cpp:3131
void internal_connect(TSessionId &session, const std::string &username, const std::string &dbname)
Definition: DBHandler.cpp:620
#define LOG_IF(severity, condition)
Definition: Logger.h:384
std::pair< std::string, std::string > getSessionParameter() const
int32_t get_table_epoch_by_name(const TSessionId &session, const std::string &table_name) override
Definition: DBHandler.cpp:7438
#define NULL_INT
static void clearMemory(const Data_Namespace::MemoryLevel memory_level)
Definition: Execute.cpp:535
bool is_in_memory_system_table
bool should_suggest_column_hints(const std::string &partial_query)
std::unique_ptr< lockmgr::AbstractLockContainer< const TableDescriptor * > > prepare_loader_generic(const Catalog_Namespace::SessionInfo &session_info, const std::string &table_name, size_t num_cols, std::unique_ptr< import_export::Loader > *loader, std::vector< std::unique_ptr< import_export::TypedImportBuffer >> *import_buffers, const std::vector< std::string > &column_names, std::string load_type)
Definition: DBHandler.cpp:3399
std::unique_lock< WrapperType< std::shared_mutex >> ExecutorWriteLock
import_export::CopyParams copy_params
Definition: DBHandler.h:990
void fillGeoColumns(const TSessionId &session, const Catalog_Namespace::Catalog &catalog, std::vector< std::unique_ptr< import_export::TypedImportBuffer >> &import_buffers, const ColumnDescriptor *cd, size_t &col_idx, size_t num_rows, const std::string &table_name)
Definition: DBHandler.cpp:3237
static void resume_executor_queue()
Definition: Execute.cpp:5429
size_t getCurrentCacheSizeForDevice(CacheItemType item_type, DeviceIdentifier device_identifier) const
Definition: DataRecycler.h:590
constexpr double a
Definition: Utm.h:32
std::unordered_map< std::string, Catalog_Namespace::SessionInfoPtr > calcite_sessions_
Definition: DBHandler.h:951
int tableId
identifies the database into which the data is being inserted
Definition: Fragmenter.h:70
void convertResultSet(ExecutionResult &result, const Catalog_Namespace::SessionInfo &session_info, const std::string &query_state_str, TQueryResult &_return)
Definition: DBHandler.cpp:7746
std::shared_lock< T > shared_lock
static std::shared_ptr< Executor > getExecutor(const ExecutorId id, const std::string &debug_dir="", const std::string &debug_file="", const SystemParameters &system_parameters=SystemParameters())
Definition: Execute.cpp:513
std::conditional_t< is_cuda_compiler(), DeviceArrayDatum, HostArrayDatum > ArrayDatum
Definition: sqltypes.h:229
TRowDescriptor validateRelAlg(const std::string &query_ra, QueryStateProxy query_state_proxy)
Definition: DBHandler.cpp:1826
void connect_impl(TSessionId &session, const std::string &passwd, const std::string &dbname, const Catalog_Namespace::UserMetadata &user_meta, std::shared_ptr< Catalog_Namespace::Catalog > cat, query_state::StdLog &stdlog)
Definition: DBHandler.cpp:694
void addExecutionTime(int64_t execution_time_ms)
std::string getDefaultValueLiteral() const
tuple rows
Definition: report.py:114
Driver for running validation on a single node.
void log_cache_size(const Catalog_Namespace::Catalog &cat)
Definition: DBHandler.cpp:3167
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:178
std::string add_metadata_columns
Definition: CopyParams.h:94
size_t numRows
a vector of column ids for the row(s) being inserted
Definition: Fragmenter.h:72
void disconnect_impl(Catalog_Namespace::SessionInfoPtr &session_ptr)
Definition: DBHandler.cpp:727
This file contains the class specification and related data structures for Catalog.
ImportHeaderRow has_header
Definition: CopyParams.h:46
void connect(TSessionId &session, const std::string &username, const std::string &passwd, const std::string &dbname) override
Definition: DBHandler.cpp:658
void load_table_binary_arrow(const TSessionId &session, const std::string &table_name, const std::string &arrow_stream, const bool use_column_names) override
Definition: DBHandler.cpp:3606
void checkpoint(const TSessionId &session, const int32_t table_id) override
Definition: DBHandler.cpp:7352
std::string ActualQuery()
Definition: ParserWrapper.h:81
bool isAlterSystemClear() const
bool isAggregator() const
Definition: DBHandler.cpp:648
bool g_enable_columnar_output
Definition: Execute.cpp:106
size_t g_ratio_num_hash_entry_to_num_tuple_switch_to_baseline
Definition: Execute.cpp:111
void delete_dashboards(const TSessionId &session, const std::vector< int32_t > &dashboard_ids) override
Definition: DBHandler.cpp:4926
void add(AccessPrivileges newprivs)
Definition: DBObject.h:145
bool is_reserved_name(const std::string &name)
static constexpr const char * REFRESH_START_DATE_TIME_KEY
Definition: ForeignTable.h:44
TRole::type getServerRole() const
Definition: DBHandler.cpp:850
std::optional< std::string > regex_path_filter
Definition: CopyParams.h:85
void validateDashboardIdsForSharing(const Catalog_Namespace::SessionInfo &session_info, const std::vector< int32_t > &dashboard_ids)
Definition: DBHandler.cpp:4983
std::shared_lock< WrapperType< std::shared_mutex >> ExecutorReadLock
const std::string kDefaultImportDirName
TColumnType populateThriftColumnType(const Catalog_Namespace::Catalog *cat, const ColumnDescriptor *cd)
Definition: DBHandler.cpp:2363
Supported runtime functions management and retrieval.
static bool has_table_permission(const AccessPrivileges &privs, const TDBObjectPermissions &permission)
Definition: DBHandler.cpp:1999
void get_layers_in_geo_file(std::vector< TGeoFileLayerInfo > &_return, const TSessionId &session, const std::string &file_name, const TCopyParams &copy_params) override
Definition: DBHandler.cpp:6007
const size_t reserved_gpu_mem_
Definition: DBHandler.h:970
TColumnType create_geo_column(const TDatumType::type type, const std::string &name, const bool is_array)
Definition: DBHandler.cpp:5160
std::string TTypeInfo_TypeToString(const TDatumType::type &t)
Definition: DBHandler.cpp:5379
static SysCatalog & instance()
Definition: SysCatalog.h:343
void get_first_geo_file_in_archive(std::string &_return, const TSessionId &session, const std::string &archive_path, const TCopyParams &copy_params) override
Definition: DBHandler.cpp:5921
void create_table(const TSessionId &session, const std::string &table_name, const TRowDescriptor &row_desc, const TCreateParams &create_params) override
Definition: DBHandler.cpp:5191
void check_geospatial_files(const boost::filesystem::path file_path, const import_export::CopyParams &copy_params)
Definition: DBHandler.cpp:5170
CONSTEXPR DEVICE bool is_null(const T &value)
RasterPointType raster_point_type
Definition: CopyParams.h:88
void update_custom_expression(const TSessionId &session, const int32_t id, const std::string &expression_json) override
Definition: DBHandler.cpp:4660
#define THROW_COLUMN_ATTR_MISMATCH_EXCEPTION(attr, got, expected)
Definition: DBHandler.cpp:5398
static constexpr const char * REFRESH_UPDATE_TYPE_KEY
Definition: ForeignTable.h:46
const bool render_compositor_use_last_gpu_
Definition: DBHandler.h:971
QueryStateProxy createQueryStateProxy()
Definition: QueryState.cpp:139
const DBMetadata & getCurrentDB() const
Definition: Catalog.h:265
void get_function_details(std::vector< TUserDefinedFunction > &_return, const TSessionId &session, const std::vector< std::string > &udf_names) override
Definition: DBHandler.cpp:7704
void get_all_files_in_archive(std::vector< std::string > &_return, const TSessionId &session, const std::string &archive_path, const TCopyParams &copy_params) override
Definition: DBHandler.cpp:5968
bool g_enable_system_tables
Definition: SysCatalog.cpp:64
std::string g_base_path
Definition: SysCatalog.cpp:62
const size_t CALCITE_SESSION_ID_LENGTH
Definition: SessionInfo.h:126
static const int32_t DROP_DATABASE
Definition: DBObject.h:79
std::string generate_random_string(const size_t len)
void setQueryState(std::shared_ptr< QueryState >)
Definition: QueryState.cpp:278
#define EXPOSE_THRIFT_MAP(TYPENAME)
Definition: DBHandler.cpp:7573
DEVICE auto copy(ARGS &&...args)
Definition: gpu_enabled.h:51
void get_tables_meta_impl(std::vector< TTableMeta > &_return, QueryStateProxy query_state_proxy, const Catalog_Namespace::SessionInfo &session_info, const bool with_table_locks=true)
Definition: DBHandler.cpp:2734
void check_not_info_schema_db(const std::string &db_name, bool throw_db_exception)
Definition: DBHandler.cpp:4811
GetTablesType
Definition: Catalog.h:63
static HashtableRecycler * getHashTableCache()
const int max_session_duration_
Definition: DBHandler.h:960
ExecutorDeviceType executor_device_type_
Definition: DBHandler.h:639
static void readMetadataSampleGDAL(const std::string &fileName, const std::string &geoColumnName, std::map< std::string, std::vector< std::string >> &metadata, int rowLimit, const CopyParams &copy_params)
Definition: Importer.cpp:4625
static constexpr const char * REFRESH_INTERVAL_KEY
Definition: ForeignTable.h:45
std::shared_ptr< Catalog_Namespace::SessionInfo > get_session_ptr(const TSessionId &session_id)
Definition: DBHandler.cpp:6147
string version
Definition: setup.in.py:73
ProjectionTokensForCompletion extract_projection_tokens_for_completion(const std::string &sql)
Definition: DBHandler.cpp:1637
static std::shared_ptr< QueryEngine > createInstance(CudaMgr_Namespace::CudaMgr *cuda_mgr, bool cpu_only)
Definition: QueryEngine.h:97
std::shared_ptr< Catalog_Namespace::SessionInfo > getSessionInfo() const
Definition: QueryState.cpp:155
std::vector< LeafHostInfo > db_leaves_
Definition: DBHandler.h:635
void shutdown()
Definition: DBHandler.cpp:7551
static std::unordered_set< std::string > get_udfs_name(const bool is_runtime)
const File_Namespace::DiskCacheConfig & disk_cache_config_
Definition: DBHandler.h:980
void removeInMemoryCalciteSession(const std::string &session_id)
Definition: DBHandler.cpp:613
RequestId set_new_request_id()
Definition: Logger.cpp:891
#define INVALID_SESSION_ID
Definition: DBHandler.cpp:131
static bool has_database_permission(const AccessPrivileges &privs, const TDBObjectPermissions &permissions)
Definition: DBHandler.cpp:1982
static const int32_t DELETE_FROM_TABLE
Definition: DBObject.h:91
const std::vector< TargetMetaInfo > & getTargetsMeta() const
void validate_import_file_path_if_local(const std::string &file_path)
Definition: DBHandler.cpp:4332
const std::string & clang_path_
Definition: DBHandler.h:982
const std::shared_ptr< ResultSet > & getRows() const
bool hasErrorCode(ErrorCode const ec) const
Definition: ErrorHandling.h:65
bool isShowUserSessions() const
bool g_executor_resource_mgr_allow_cpu_result_mem_oversubscription_concurrency
Definition: Execute.cpp:194
std::unique_lock< T > unique_lock
std::unique_ptr< RenderHandler > render_handler_
Definition: DBHandler.h:654
void get_completion_hints_unsorted(std::vector< TCompletionHint > &hints, std::vector< std::string > &visible_tables, query_state::StdLog &stdlog, const std::string &sql, const int cursor)
Definition: DBHandler.cpp:1696
void alterSession(const std::string &sesson_id, ExecutionResult &result, const std::pair< std::string, std::string > &session_parameter, int64_t &execution_time_ms)
Definition: DBHandler.cpp:8045
static const int32_t TRUNCATE_TABLE
Definition: DBObject.h:92
std::string sql_order_by
Definition: CopyParams.h:98
Checked json field retrieval.
bool g_enable_watchdog
static bool getGeoColumns(const std::string &wkt_or_wkb_hex, SQLTypeInfo &ti, std::vector< double > &coords, std::vector< double > &bounds, std::vector< int > &ring_sizes, std::vector< int > &poly_rings, const bool validate_with_geos_if_available)
Definition: Types.cpp:1121
const DashboardDescriptor * getMetadataForDashboard(const std::string &userId, const std::string &dashName) const
void set_cur_session(const TSessionId &parent_session, const TSessionId &leaf_session, const std::string &start_time_str, const std::string &label, bool for_running_query_kernel) override
Definition: DBHandler.cpp:2978
std::string get_import_tag(const std::string &import_tag, const std::string &table_name, const std::string &file_path)
Definition: DBHandler.cpp:3315
~DBHandler() override
Definition: DBHandler.cpp:576
void updateResultSet(const std::string &query_ra, RType type, bool success=true)
std::shared_ptr< QueryEngine > query_engine_
Definition: DBHandler.h:653
SystemParameters & system_parameters_
Definition: DBHandler.h:652
const size_t num_reader_threads_
Definition: DBHandler.h:973
An AbstractBuffer is a unit of data management for a data manager.
import_export::SourceType source_type
Definition: CopyParams.h:57
std::string get_load_tag(const std::string &load_tag, const std::string &table_name)
Definition: DBHandler.cpp:3309
#define SET_REQUEST_ID(parent_request_id)
Definition: DBHandler.cpp:133
size_t getTotalMemorySizeForDictionariesForDatabase() const
Definition: Catalog.cpp:2380
TDashboard get_dashboard_impl(const std::shared_ptr< Catalog_Namespace::SessionInfo const > &session_ptr, Catalog_Namespace::UserMetadata &user_meta, const DashboardDescriptor *dash, const bool populate_state=true)
Definition: DBHandler.cpp:4745
const DictDescriptor * getMetadataForDict(int dict_ref, bool loadDict=true) const
Definition: Catalog.cpp:1907
specifies the content in-memory of a row in the column metadata table
int32_t max_num_sessions_
Definition: DBHandler.h:984
size_t g_max_log_length
Definition: Execute.cpp:176
void delete_dashboard(const TSessionId &session, const int32_t dashboard_id) override
Definition: DBHandler.cpp:4921
std::string getName() const
Definition: DBObject.h:219
static std::unique_ptr< SessionsStore > create(const std::string &base_path, size_t n_workers, int idle_session_duration, int max_session_duration, int capacity, DisconnectCallback disconnect_callback)
void get_session_info(TSessionInfo &_return, const TSessionId &session) override
Definition: DBHandler.cpp:958
size_t leafCount() const
std::string toString(const Executor::ExtModuleKinds &kind)
Definition: Execute.h:1703
static const int32_t EDIT_DASHBOARD
Definition: DBObject.h:104
static const int32_t DELETE_DASHBOARD
Definition: DBObject.h:102
std::pair< TPlanResult, lockmgr::LockedTableDescriptors > parse_to_ra(QueryStateProxy, const std::string &query_str, const std::vector< TFilterPushDownInfo > &filter_push_down_info, const bool acquire_locks, const SystemParameters &system_parameters, bool check_privileges=true)
Definition: DBHandler.cpp:6913
bool isOptimizedExplain() const
Definition: ParserWrapper.h:70
void create_link(std::string &_return, const TSessionId &session, const std::string &view_state, const std::string &view_metadata) override
Definition: DBHandler.cpp:5136
#define NULL_BOOLEAN
static const int32_t INSERT_INTO_TABLE
Definition: DBObject.h:89
void get_version(std::string &_return) override
Definition: DBHandler.cpp:2867
RecordBatchVector loadArrowStream(const std::string &stream)
Definition: DBHandler.cpp:3576
static bool supportsNetworkFileAccess()
Definition: GDAL.cpp:123
bool g_optimize_cuda_block_and_grid_sizes
Definition: Execute.cpp:169
void get_tables_meta(std::vector< TTableMeta > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:2828
bool g_executor_resource_mgr_allow_cpu_slot_oversubscription_concurrency
Definition: Execute.cpp:191
int get_precision() const
Definition: sqltypes.h:394
void validate_allowed_file_path(const std::string &file_path, const DataTransferType data_transfer_type, const bool allow_wildcards)
Definition: DdlUtils.cpp:822
const bool renderer_prefer_igpu_
Definition: DBHandler.h:963
heavyai::shared_mutex calcite_sessions_mtx_
Definition: DBHandler.h:952
DBObjectType getType() const
Definition: DBObject.h:220
std::string to_upper(const std::string &str)
void get_server_status(TServerStatus &_return, const TSessionId &session) override
Definition: DBHandler.cpp:859
void setResultType(RType type)
static bool gdalFileOrDirectoryExists(const std::string &path, const CopyParams &copy_params)
Definition: Importer.cpp:5058
static ResultSet * create(std::vector< TargetMetaInfo > &label_infos, std::vector< RelLogicalValues::RowValues > &logical_values)
std::vector< std::shared_ptr< arrow::RecordBatch >> RecordBatchVector
Definition: DBHandler.cpp:3561
void load_table_binary_columnar(const TSessionId &session, const std::string &table_name, const std::vector< TColumn > &cols, const std::vector< std::string > &column_names) override
Definition: DBHandler.cpp:3468
bool is_info_schema_db(const std::string &db_name)
Definition: DBHandler.cpp:4806
bool isSelectExplain() const
Definition: ParserWrapper.h:58
static TDBObject serialize_db_object(const std::string &roleName, const DBObject &inObject)
Definition: DBHandler.cpp:1919
bool is_column() const
Definition: sqltypes.h:600
std::string thrift_to_encoding_name(const TTypeInfo &ti)
void fixup_geo_column_descriptor(TColumnType &col_type, const SQLTypes subtype, const int output_srid)
static const int32_t CREATE_SERVER
Definition: DBObject.h:126
std::string thrift_to_name(const TTypeInfo &ti)
RuntimeUdfRegistrationPolicy runtime_udf_registration_policy
std::vector< TargetMetaInfo > getTargetMetaInfo(const std::vector< std::shared_ptr< Analyzer::TargetEntry >> &targets) const
Definition: DBHandler.cpp:6282
std::string get_session_id() const
Definition: SessionInfo.h:93
bool isJustExplain() const
Definition: ParserWrapper.h:52
static void deallocateArrowResultBuffer(const ArrowResult &result, const ExecutorDeviceType device_type, const size_t device_id, std::shared_ptr< Data_Namespace::DataMgr > &data_mgr)
std::string geo_layer_name
Definition: CopyParams.h:81
const bool allow_loop_joins_
Definition: DBHandler.h:646
void loadKey()
Definition: DBObject.cpp:190
void start_query(TPendingQuery &_return, const TSessionId &leaf_session, const TSessionId &parent_session, const std::string &serialized_rel_alg_dag, const std::string &start_time_str, const bool just_explain, const std::vector< int64_t > &outer_fragment_indices) override
Definition: DBHandler.cpp:7053
const AccessPrivileges & getPrivileges() const
Definition: DBObject.h:226
std::shared_ptr< Catalog > getCatalog(const std::string &dbName)
bool isAlterSessionSet() const
bool file_or_glob_path_exists(const std::string &path)
std::unique_ptr< HeavyDBAggHandler > agg_handler_
Definition: DBHandler.h:655
std::optional< std::string > default_value
void load_table(const TSessionId &session, const std::string &table_name, const std::vector< TStringRow > &rows, const std::vector< std::string > &column_names) override
Definition: DBHandler.cpp:3710
Definition: sqltypes.h:79
Definition: sqltypes.h:80
void broadcast_serialized_rows(const TSerializedRows &serialized_rows, const TRowDescriptor &row_desc, const TQueryId query_id, const TSubqueryId subquery_id, const bool is_final_subquery_result) override
Definition: DBHandler.cpp:7106
std::vector< std::string > get_valid_groups(const TSessionId &session, int32_t dashboard_id, std::vector< std::string > groups)
Definition: DBHandler.cpp:4946
heavyai::shared_mutex custom_expressions_mutex_
Definition: DBHandler.h:1073
TExecuteMode::type getExecutionMode(const TSessionId &session)
Definition: DBHandler.cpp:3074
void clone_session(TSessionId &session2, const TSessionId &session1) override
Definition: DBHandler.cpp:768
TExtArgumentType::type to_thrift(const ExtArgumentType &t)
int32_t g_distributed_leaf_idx
Definition: Catalog.cpp:98
std::unordered_set< std::string > get_uc_compatible_table_names_by_column(const std::unordered_set< std::string > &uc_column_names, std::vector< std::string > &table_names, query_state::StdLog &stdlog)
Definition: DBHandler.cpp:1795
void get_dashboard(TDashboard &_return, const TSessionId &session, const int32_t dashboard_id) override
Definition: DBHandler.cpp:4698
const bool enable_rendering_
Definition: DBHandler.h:962
static void addUdfIrToModule(const std::string &udf_ir_filename, const bool is_cuda_ir)
#define ARROW_THRIFT_THROW_NOT_OK(s)
Definition: DBHandler.cpp:3563
std::string json() const
Definition: RequestInfo.cpp:22
void get_table_details(TTableDetails &_return, const TSessionId &session, const std::string &table_name) override
Definition: DBHandler.cpp:2435
void share_dashboards(const TSessionId &session, const std::vector< int32_t > &dashboard_ids, const std::vector< std::string > &groups, const TDashboardPermissions &permissions) override
Definition: DBHandler.cpp:5052
TPlanResult processCalciteRequest(QueryStateProxy, const std::shared_ptr< Catalog_Namespace::Catalog > &cat, const std::string &query_str, const std::vector< TFilterPushDownInfo > &filter_push_down_info, const SystemParameters &system_parameters, const bool check_privileges)
Definition: DBHandler.cpp:6880
void get_db_objects_for_grantee(std::vector< TDBObject > &_return, const TSessionId &session, const std::string &roleName) override
Definition: DBHandler.cpp:2133
ExecutionResult execute(bool read_only_mode)
void appendNameValuePairs(Pairs &&...pairs)
Definition: QueryState.h:312
void get_link_view(TFrontendView &_return, const TSessionId &session, const std::string &link) override
Definition: DBHandler.cpp:2639
const bool intel_jit_profile_
Definition: DBHandler.h:643
bool super_user_rights_
Definition: DBHandler.h:956
std::string format_num_bytes(const size_t bytes)
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399
const bool renderer_enable_slab_allocation_
Definition: DBHandler.h:972
std::string returnQueueAction() const
bool is_date_in_days() const
Definition: sqltypes.h:1018
bool g_executor_resource_mgr_allow_cpu_kernel_concurrency
Definition: Execute.cpp:187
std::vector< DataBlockPtr > data
the number of rows being inserted
Definition: Fragmenter.h:73
static WriteLock getWriteLockForTable(const Catalog_Namespace::Catalog &cat, const std::string &table_name)
Definition: LockMgr.cpp:137
void disconnect(const TSessionId &session) override
Definition: DBHandler.cpp:719
void check_in_memory_system_table_query(const std::vector< std::vector< std::string >> &selected_tables)
Definition: DBHandler.cpp:6864
static constexpr const char * ALL_REFRESH_UPDATE_TYPE
Definition: ForeignTable.h:51
bool table_is_replicated(const TableDescriptor *td)
TCustomExpression create_thrift_obj_from_custom_expr(const CustomExpression &custom_expr, const Catalog &catalog)
Definition: DBHandler.cpp:4601
void pause_executor_queue(const TSessionId &session)
Definition: DBHandler.cpp:2950
Catalog & getCatalog() const
Definition: SessionInfo.h:75
static void convertRows(TQueryResult &_return, QueryStateProxy query_state_proxy, const std::vector< TargetMetaInfo > &targets, const ResultSet &results, const bool column_format, const int32_t first_n, const int32_t at_most_n)
Definition: DBHandler.cpp:6313
std::string sanitize_name(const std::string &name, const bool underscore=false)
void import_table(const TSessionId &session, const std::string &table_name, const std::string &file_name, const TCopyParams &copy_params) override
Definition: DBHandler.cpp:5280
std::unique_ptr< Catalog_Namespace::SessionsStore > sessions_store_
Definition: DBHandler.h:985
void register_runtime_extension_functions(const TSessionId &session, const std::vector< TUserDefinedFunction > &udfs, const std::vector< TUserDefinedTableFunction > &udtfs, const std::map< std::string, std::string > &device_ir_map) override
Definition: DBHandler.cpp:7601
std::shared_ptr< Calcite > calcite_
Definition: DBHandler.h:657
static ArrayDatum composeNullPointCoords(const SQLTypeInfo &coords_ti, const SQLTypeInfo &geo_ti)
Definition: Importer.cpp:399
T & instance()
Definition: LockMgr.cpp:101
Basic constructors and methods of the row set interface.
void get_table_details_for_database(TTableDetails &_return, const TSessionId &session, const std::string &table_name, const std::string &database_name) override
Definition: DBHandler.cpp:2448
void get_status(std::vector< TServerStatus > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:878
const std::vector< PushedDownFilterInfo > & getPushedDownFilterInfo() const
static const int32_t ACCESS
Definition: DBObject.h:81
void switch_database(const TSessionId &session, const std::string &dbname) override
Definition: DBHandler.cpp:747
std::shared_ptr< Data_Namespace::DataMgr > data_mgr_
Definition: DBHandler.h:632
void validateGroups(const std::vector< std::string > &groups)
Definition: DBHandler.cpp:4975
const bool read_only_
Definition: DBHandler.h:645
std::string s3_session_token
Definition: CopyParams.h:63
static const int32_t CREATE_DATABASE
Definition: DBObject.h:78
void check_table_not_sharded(const TableDescriptor *td)
Definition: DBHandler.cpp:3099
void executeDdl(TQueryResult &_return, const std::string &query_ra, std::shared_ptr< Catalog_Namespace::SessionInfo const > session_ptr)
Definition: DBHandler.cpp:8070
void shareOrUnshareDashboards(const TSessionId &session, const std::vector< int32_t > &dashboard_ids, const std::vector< std::string > &groups, const TDashboardPermissions &permissions, const bool do_share)
Definition: DBHandler.cpp:5008
void get_queries_info(std::vector< TQueryInfo > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:7892
bool checkInMemorySystemTableQuery(const std::unordered_set< shared::TableKey > &tables_selected_from) const
Definition: DBHandler.cpp:8184
void removeFragmenterForTable(const int table_id) const
Definition: Catalog.cpp:4270
void get_runtime_function_names(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:7694
static std::map< ExtModuleKinds, std::string > extension_module_sources
Definition: Execute.h:528
static const AccessPrivileges ACCESS
Definition: DBObject.h:153
void get_databases(std::vector< TDBInfo > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:3056
static const int32_t VIEW_DASHBOARD
Definition: DBObject.h:103
bool user_can_access_table(const Catalog_Namespace::SessionInfo &, const TableDescriptor *td, const AccessPrivileges acess_priv)
Definition: DBHandler.cpp:6418
static const AccessPrivileges ALL_TABLE
Definition: DBObject.h:157
std::vector< std::vector< std::string > > get_sample_rows(size_t n)
Definition: Importer.cpp:3464
void replace_dashboard(const TSessionId &session, const int32_t dashboard_id, const std::string &dashboard_name, const std::string &dashboard_owner, const std::string &dashboard_state, const std::string &image_hash, const std::string &dashboard_metadata) override
Definition: DBHandler.cpp:4869
static const int32_t VIEW_SQL_EDITOR
Definition: DBObject.h:80
static constexpr const char * APPEND_REFRESH_UPDATE_TYPE
Definition: ForeignTable.h:52
void get_result_row_for_pixel(TPixelTableRowResult &_return, const TSessionId &session, const int64_t widget_id, const TPixel &pixel, const std::map< std::string, std::vector< std::string >> &table_col_names, const bool column_format, const int32_t pixel_radius, const std::string &nonce) override
Definition: DBHandler.cpp:2317
const bool legacy_syntax_
Definition: DBHandler.h:658
void import_geo_table(const TSessionId &session, const std::string &table_name, const std::string &file_name, const TCopyParams &copy_params, const TRowDescriptor &row_desc, const TCreateParams &create_params) override
Definition: DBHandler.cpp:5404
void check_read_only(const std::string &str)
Definition: DBHandler.cpp:580
HOST DEVICE int get_comp_param() const
Definition: sqltypes.h:402
static std::unique_ptr< RexLiteral > genLiteralStr(std::string val)
Definition: DBHandler.cpp:7770
void set_table_epoch_by_name(const TSessionId &session, const std::string &table_name, const int new_epoch) override
Definition: DBHandler.cpp:7387
int32_t g_distributed_num_leaves
Definition: Catalog.cpp:99
void set_table_epochs(const TSessionId &session, const int32_t db_id, const std::vector< TTableEpochInfo > &table_epochs) override
Definition: DBHandler.cpp:7493
bool g_allow_system_dashboard_update
Definition: DBHandler.cpp:124
double g_executor_resource_mgr_per_query_max_cpu_result_mem_ratio
Definition: Execute.cpp:183
std::string returnCacheType() const
bool g_uniform_request_ids_per_thrift_call
Definition: DBHandler.cpp:125
static const int32_t DROP_TABLE
Definition: DBObject.h:87
#define NULL_TINYINT
size_t g_executor_resource_mgr_cpu_result_mem_bytes
Definition: Execute.cpp:181
RequestId request_id()
Definition: Logger.cpp:876
dictionary params
Definition: report.py:27
bool g_enable_filter_push_down
Definition: Execute.cpp:102
std::string find_first_geo_file_in_archive(const std::string &archive_path, const import_export::CopyParams &copy_params)
Definition: DBHandler.cpp:4291
std::vector< std::string > get_headers()
Definition: Importer.cpp:3479
void deallocate_df(const TSessionId &session, const TDataFrame &df, const TDeviceType::type device_type, const int32_t device_id) override
Definition: DBHandler.cpp:1562
std::string pg_shim(std::string const &query)
static const int32_t INSERT_INTO_VIEW
Definition: DBObject.h:115
std::string raster_import_bands
Definition: CopyParams.h:89
static void registerExtensionFunctions(F register_extension_functions)
Definition: Execute.h:470
Catalog_Namespace::SessionInfo get_session_copy(const TSessionId &session_id)
Definition: DBHandler.cpp:6136
bool dashboard_exists(const Catalog_Namespace::Catalog &cat, const int32_t user_id, const std::string &dashboard_name)
Definition: DBHandler.cpp:152
bool has_role(const TSessionId &sessionId, const std::string &granteeName, const std::string &roleName) override
Definition: DBHandler.cpp:1897
static constexpr const char * REFRESH_TIMING_TYPE_KEY
Definition: ForeignTable.h:43
int32_t create_custom_expression(const TSessionId &session, const TCustomExpression &custom_expression) override
Definition: DBHandler.cpp:4625
void render_vega(TRenderResult &_return, const TSessionId &session, const int64_t widget_id, const std::string &vega_json, const int32_t compression_level, const std::string &nonce) override
Definition: DBHandler.cpp:4523
std::map< const std::string, const PermissionFuncPtr > permissionFuncMap_
Definition: DBHandler.h:1032
bool g_allow_memory_status_log
Definition: Execute.cpp:200
std::unique_ptr< Catalog_Namespace::CustomExpression > create_custom_expr_from_thrift_obj(const TCustomExpression &t_custom_expr, const Catalog &catalog)
Definition: DBHandler.cpp:4582
std::unique_ptr< HeavyDBLeafHandler > leaf_handler_
Definition: DBHandler.h:656
std::string remove_vsi_prefixes(const std::string &path_in)
Definition: DBHandler.cpp:4224
void invalidate_cur_session(const TSessionId &parent_session, const TSessionId &leaf_session, const std::string &start_time_str, const std::string &label, bool for_running_query_kernel) override
Definition: DBHandler.cpp:3001
void add_vsi_network_prefix(std::string &path)
Definition: DBHandler.cpp:4183
#define CHECK(condition)
Definition: Logger.h:291
static const int32_t DELETE_FROM_VIEW
Definition: DBObject.h:117
bool is_geometry() const
Definition: sqltypes.h:597
#define DEBUG_TIMER(name)
Definition: Logger.h:412
static HashtableRecycler * getHashTableCache()
static const int32_t CREATE_TABLE
Definition: DBObject.h:86
static ImportStatus get_import_status(const std::string &id)
Definition: Importer.cpp:231
void check_table_consistency(TTableMeta &_return, const TSessionId &session, const int32_t table_id) override
Definition: DBHandler.cpp:7037
void execute_rel_alg_with_filter_push_down(ExecutionResult &_return, QueryStateProxy, std::string &query_ra, const bool column_format, const ExecutorDeviceType executor_device_type, const int32_t first_n, const int32_t at_most_n, const bool just_explain, const bool just_calcite_explain, const std::vector< PushedDownFilterInfo > &filter_push_down_requests)
Definition: DBHandler.cpp:6794
const int idle_session_duration_
Definition: DBHandler.h:959
double g_executor_resource_mgr_max_available_resource_use_ratio
Definition: Execute.cpp:195
void setExecutionTime(int64_t execution_time_ms)
static TDatum value_to_thrift(const TargetValue &tv, const SQLTypeInfo &ti)
Definition: DBHandler.cpp:1103
static bool is_allowed_on_dashboard(const Catalog_Namespace::SessionInfo &session_info, int32_t dashboard_id, AccessPrivileges requestedPermissions)
Definition: DBHandler.cpp:4565
void remove(const std::string &session_id)
Definition: DBHandler.h:1013
static const std::list< ColumnDescriptor > gdalToColumnDescriptors(const std::string &fileName, const bool is_raster, const std::string &geoColumnName, const CopyParams &copy_params)
Definition: Importer.cpp:4820
static void clearExternalCaches(bool for_update, const TableDescriptor *td, const int current_db_id)
Definition: Execute.h:438
static const int32_t CREATE_DASHBOARD
Definition: DBObject.h:101
#define NULL_SMALLINT
static constexpr int NULL_REFRESH_TIME
Definition: ForeignTable.h:55
void start_heap_profile(const TSessionId &session) override
Definition: DBHandler.cpp:6084
bool g_cluster
void get_table_function_details(std::vector< TUserDefinedTableFunction > &_return, const TSessionId &session, const std::vector< std::string > &udtf_names) override
Definition: DBHandler.cpp:7735
static std::vector< std::string > gdalGetAllFilesInArchive(const std::string &archive_path, const CopyParams &copy_params)
Definition: Importer.cpp:5130
static void pause_executor_queue()
Definition: Execute.cpp:5420
The data to be inserted using the fragment manager.
Definition: Fragmenter.h:68
void get_device_parameters(std::map< std::string, std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:7584
std::unordered_map< std::string, std::string > ipc_handle_to_dev_ptr_
Definition: DBHandler.h:1022
auto getExecuteWriteLock()
void interruptQuery(const Catalog_Namespace::SessionInfo &session_info, const std::string &target_session)
Definition: DBHandler.cpp:7940
static std::shared_ptr< QueryEngine > getInstance()
Definition: QueryEngine.h:89
void set_executor_device_type(ExecutorDeviceType t)
Definition: SessionInfo.h:92
bool hasTableAccessPrivileges(const TableDescriptor *td, const Catalog_Namespace::SessionInfo &session_info)
Definition: DBHandler.cpp:2658
Serializers for query engine types to/from thrift.
bool isCalciteExplain() const
Definition: ParserWrapper.h:71
std::list< DBSummary > DBSummaryList
Definition: SysCatalog.h:145
const size_t max_concurrent_render_sessions_
Definition: DBHandler.h:969
int32_t permissionType
Definition: DBObject.h:53
void log_system_cpu_memory_status(std::string const &query, const Catalog_Namespace::Catalog &cat)
Definition: DBHandler.cpp:3226
std::vector< std::string > local_glob_filter_sort_files(const std::string &file_path, const FilePathOptions &options, const bool recurse)
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:195
void check_valid_column_names(const std::list< const ColumnDescriptor * > &descs, const std::vector< std::string > &column_names)
Definition: DBHandler.cpp:3105
Catalog_Namespace::SessionInfoPtr findCalciteSession(TSessionId const &) const
Definition: DBHandler.cpp:6112
bool allow_multifrag_
Definition: DBHandler.h:644
static const AccessPrivileges DELETE_DASHBOARD
Definition: DBObject.h:173
void get_internal_table_details(TTableDetails &_return, const TSessionId &session, const std::string &table_name, const bool include_system_columns) override
Definition: DBHandler.cpp:2410
void get_roles(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:1879
Definition: sqltypes.h:72
static const int32_t SELECT_FROM_TABLE
Definition: DBObject.h:88
static const std::string MAPD_RELEASE
Definition: release.h:42
SQLTypeInfo columnType
const TableDescriptor * getMetadataForTable(const std::string &tableName, const bool populateFragmenter=true) const
Returns a pointer to a const TableDescriptor struct matching the provided tableName.
static constexpr ExecutorId UNITARY_EXECUTOR_ID
Definition: Execute.h:423
std::vector< std::unique_ptr< const RexScalar >> RowValues
Definition: RelAlgDag.h:2656
static std::shared_ptr< Chunk > getChunk(const ColumnDescriptor *cd, DataMgr *data_mgr, const ChunkKey &key, const MemoryLevel mem_level, const int deviceId, const size_t num_bytes, const size_t num_elems, const bool pinnable=true)
Definition: Chunk.cpp:31
void load_table_binary(const TSessionId &session, const std::string &table_name, const std::vector< TRow > &rows, const std::vector< std::string > &column_names) override
Definition: DBHandler.cpp:3324
bool isAlterSystemControlExecutorQueue() const
string name
Definition: setup.in.py:72
std::vector< TServerStatus > getLeafStatus(TSessionId session)
void clear_gpu_memory(const TSessionId &session) override
Definition: DBHandler.cpp:2887
size_t g_leaf_count
Definition: ParserNode.cpp:79
bool check_and_reset_in_memory_system_table(const Catalog &catalog, const TableDescriptor &td)
Definition: DBHandler.cpp:6843
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
void init_table_functions()
int8_t * numbersPtr
Definition: sqltypes.h:233
void get_physical_tables(std::vector< std::string > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:2716
void get_dashboards(std::vector< TDashboard > &_return, const TSessionId &session) override
Definition: DBHandler.cpp:4723
std::pair< std::string, std::string > compileUdf(const std::string &udf_file_name) const
const AuthMetadata & authMetadata_
Definition: DBHandler.h:651
import_export::CopyParams thrift_to_copyparams(const TCopyParams &cp)
Definition: DBHandler.cpp:3854
unsigned g_dynamic_watchdog_time_limit
Definition: Execute.cpp:92
static bool has_dashboard_permission(const AccessPrivileges &privs, const TDBObjectPermissions &permissions)
Definition: DBHandler.cpp:2019
TTypeInfo type_info_to_thrift(const SQLTypeInfo &ti)
static constexpr DeviceIdentifier CPU_DEVICE_IDENTIFIER
Definition: DataRecycler.h:136
static const AccessPrivileges EDIT_DASHBOARD
Definition: DBObject.h:172
std::shared_ptr< Catalog_Namespace::SessionInfo const > getConstSessionInfo() const
Definition: QueryState.cpp:150
int cpu_threads()
Definition: thread_count.h:25
static const int32_t UPDATE_IN_TABLE
Definition: DBObject.h:90
static constexpr size_t kDefaultSampleRowsCount
std::string userLoggable() const
Definition: SysCatalog.cpp:158
DBHandler(const std::vector< LeafHostInfo > &db_leaves, const std::vector< LeafHostInfo > &string_leaves, const std::string &base_data_path, const bool allow_multifrag, const bool jit_debug, const bool intel_jit_profile, const bool read_only, const bool allow_loop_joins, const bool enable_rendering, const bool renderer_prefer_igpu, const unsigned renderer_vulkan_timeout_ms, const bool renderer_use_parallel_executors, const bool enable_auto_clear_render_mem, const int render_oom_retry_threshold, const size_t render_mem_bytes, const size_t max_concurrent_render_sessions, const size_t reserved_gpu_mem, const bool render_compositor_use_last_gpu, const bool renderer_enable_slab_allocation, const size_t num_reader_threads, const AuthMetadata &authMetadata, SystemParameters &system_parameters, const bool legacy_syntax, const int idle_session_duration, const int max_session_duration, const std::string &udf_filename, const std::string &clang_path, const std::vector< std::string > &clang_options, const File_Namespace::DiskCacheConfig &disk_cache_config, const bool is_new_db)
Definition: DBHandler.cpp:169
std::string s3_access_key
Definition: CopyParams.h:61
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:977
bool is_decimal() const
Definition: sqltypes.h:570
void sql_execute_df(TDataFrame &_return, const TSessionId &session, const std::string &query, const TDeviceType::type device_type, const int32_t device_id, const int32_t first_n, const TArrowTransport::type transport_method) override
Definition: DBHandler.cpp:1461
std::vector< int > columnIds
identifies the table into which the data is being inserted
Definition: Fragmenter.h:71
bool g_allow_s3_server_privileges
Definition: S3Archive.cpp:34
std::string columnName
std::shared_ptr< Catalog_Namespace::SessionInfo const > getConstSessionInfo() const
Definition: QueryState.cpp:84
int64_t getExecutionTime() const
void get_heap_profile(std::string &_return, const TSessionId &session) override
Definition: DBHandler.cpp:6119
RasterPointTransform raster_point_transform
Definition: CopyParams.h:91
bool hasPermission(int permission) const
Definition: DBObject.h:141
ExecutionResult getQueries(std::shared_ptr< Catalog_Namespace::SessionInfo const > session_ptr)
Definition: DBHandler.cpp:7813
void get_all_roles_for_user(std::vector< std::string > &_return, const TSessionId &session, const std::string &granteeName) override
Definition: DBHandler.cpp:2281
const UserMetadata & get_currentUser() const
Definition: SessionInfo.h:88
std::string const & sessionId() const
Definition: RequestInfo.h:40
std::optional< std::string > file_sort_order_by
Definition: CopyParams.h:86
bool is_a_supported_geo_file(const std::string &path)
Definition: DBHandler.cpp:4262
#define IS_GEO(T)
Definition: sqltypes.h:310
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:137
bool cpu_mode_only_
Definition: DBHandler.h:647
const std::string kInfoSchemaMigrationName
std::unique_ptr< Parser::Stmt > create_stmt_for_query(const std::string &queryStr, const Catalog_Namespace::SessionInfo &session_info)
std::string dump_table_col_names(const std::map< std::string, std::vector< std::string >> &table_col_names)
Definition: DBHandler.cpp:2304
static BoundingBoxIntersectTuningParamRecycler * getBoundingBoxIntersectTuningParamCache()
void get_dashboard_grantees(std::vector< TDashboardGrantees > &_return, const TSessionId &session, const int32_t dashboard_id) override
Definition: DBHandler.cpp:5090
void get_license_claims(TLicenseInfo &_return, const TSessionId &session, const std::string &nonce) override
Definition: DBHandler.cpp:7542
ExecutionResult getUserSessions(std::shared_ptr< Catalog_Namespace::SessionInfo const > session_ptr)
Definition: DBHandler.cpp:7775
ThreadLocalIds thread_local_ids()
Definition: Logger.cpp:882
void get_memory(std::vector< TNodeMemoryInfo > &_return, const TSessionId &session, const std::string &memory_level) override
Definition: DBHandler.cpp:3020
void add_vsi_geo_prefix(std::string &path)
Definition: DBHandler.cpp:4205
std::string credential_string
Definition: CopyParams.h:102
static ReadLock getReadLockForTable(Catalog_Namespace::Catalog &cat, const std::string &table_name)
ConnectionInfo getConnectionInfo() const
Definition: DBHandler.cpp:1790
bool is_array() const
Definition: sqltypes.h:585
#define STDLOG(...)
Definition: QueryState.h:234
const std::string getQuerySubmittedTime() const
Definition: QueryState.cpp:101
#define VLOG(n)
Definition: Logger.h:388
void resizeDispatchQueue(size_t queue_size)
Definition: DBHandler.cpp:8180
static constexpr const char * SCHEDULE_REFRESH_TIMING_TYPE
Definition: ForeignTable.h:53
void execute_next_render_step(TRenderStepResult &_return, const TPendingRenderQuery &pending_render, const TRenderAggDataMap &merged_data) override
Definition: DBHandler.cpp:7330
void get_internal_table_details_for_database(TTableDetails &_return, const TSessionId &session, const std::string &table_name, const std::string &database_name) override
Definition: DBHandler.cpp:2422
std::atomic< bool > isSuper
Definition: SysCatalog.h:107
std::string const createInMemoryCalciteSession(const std::shared_ptr< Catalog_Namespace::Catalog > &catalog_ptr)
Definition: DBHandler.cpp:586
bool isPlanExplain() const
Definition: ParserWrapper.h:78
void setRequestId(logger::RequestId const request_id)
Definition: RequestInfo.h:42
static const int32_t DROP_SERVER
Definition: DBObject.h:127
void alterSystemClear(const std::string &sesson_id, ExecutionResult &result, const std::string &cache_type, int64_t &execution_time_ms)
Definition: DBHandler.cpp:8029
void stop_heap_profile(const TSessionId &session) override
Definition: DBHandler.cpp:6098
std::pair< std::string_view, const char * > substring(const std::string &str, size_t substr_length)
return substring of str with postfix if str.size() &gt; substr_length
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:397
std::optional< std::string > file_sort_regex
Definition: CopyParams.h:87
std::atomic< bool > initialized_
Definition: DBHandler.h:686
static void addRTUdfs(const std::string &json_func_sigs)
bool TTypeInfo_IsGeo(const TDatumType::type &t)
Definition: DBHandler.cpp:5373
int32_t create_dashboard(const TSessionId &session, const std::string &dashboard_name, const std::string &dashboard_state, const std::string &image_hash, const std::string &dashboard_metadata) override
Definition: DBHandler.cpp:4824
std::string get_hostname()
void interrupt(const TSessionId query_session, const TSessionId interrupt_session)
TEncodingType::type encoding_to_thrift(const SQLTypeInfo &type_info)
std::shared_ptr< SessionInfo > SessionInfoPtr
Definition: SessionsStore.h:27
#define THROW_DB_EXCEPTION(errstr)
Definition: DBHandler.cpp:139
EncodingType geo_coords_encoding
Definition: CopyParams.h:76
bool is_local_file(const std::string &file_path)
Definition: DBHandler.cpp:4326
void clear_cpu_memory(const TSessionId &session) override
Definition: DBHandler.cpp:2911