OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
DBHandler.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
23 #pragma once
24 
25 #include "LeafAggregator.h"
26 
27 #ifdef HAVE_PROFILER
28 #include <gperftools/heap-profiler.h>
29 #endif // HAVE_PROFILER
30 
31 #include "Calcite/Calcite.h"
32 #include "Catalog/Catalog.h"
33 #include "Catalog/SessionsStore.h"
35 #include "Geospatial/Transforms.h"
36 #include "ImportExport/Importer.h"
37 #include "LockMgr/LockMgr.h"
38 #include "Logger/Logger.h"
39 #include "Parser/ParserNode.h"
40 #include "Parser/ParserWrapper.h"
44 #include "QueryEngine/Execute.h"
51 #include "Shared/StringTransform.h"
55 #include "Shared/measure.h"
56 #include "Shared/scope.h"
62 
63 #include <sys/types.h>
64 #include <thrift/server/TServer.h>
65 #include <thrift/transport/THttpClient.h>
66 #include <thrift/transport/TSocket.h>
67 #include <thrift/transport/TTransport.h>
68 #include <atomic>
69 #include <boost/algorithm/string.hpp>
70 #include <boost/algorithm/string/replace.hpp>
71 #include <boost/algorithm/string/trim.hpp>
72 #include <boost/filesystem.hpp>
73 #include <boost/make_shared.hpp>
74 #include <boost/noncopyable.hpp>
75 #include <boost/none_t.hpp>
76 #include <boost/optional.hpp>
77 #include <boost/program_options.hpp>
78 #include <boost/tokenizer.hpp>
79 #include <cmath>
80 #include <csignal>
81 #include <fstream>
82 #include <list>
83 #include <map>
84 #include <memory>
85 #include <mutex>
86 #include <random>
87 #include <string>
88 #include <thread>
89 #include <typeinfo>
90 #include <unordered_map>
91 
92 #include "gen-cpp/Heavy.h"
93 #include "gen-cpp/extension_functions_types.h"
94 
95 using namespace std::string_literals;
96 
97 class HeavyDBAggHandler;
98 class HeavyDBLeafHandler;
99 
100 // Multiple concurrent requests for the same session can occur. For that reason, each
101 // request briefly takes a lock to make a copy of the appropriate SessionInfo object. Then
102 // it releases the lock and uses the copy for the remainder of the request.
103 using SessionMap = std::map<TSessionId, std::shared_ptr<Catalog_Namespace::SessionInfo>>;
104 using PermissionFuncPtr = bool (*)(const AccessPrivileges&, const TDBObjectPermissions&);
106 
107 namespace dbhandler {
108 bool is_info_schema_db(const std::string& db_name);
109 
110 void check_not_info_schema_db(const std::string& db_name,
111  bool throw_db_exception = false);
112 } // namespace dbhandler
113 
114 class TrackingProcessor : public HeavyProcessor {
115  public:
116  TrackingProcessor(std::shared_ptr<HeavyIf> handler, const bool check_origin)
117  : HeavyProcessor(handler), check_origin_(check_origin) {}
118 
119  bool process(std::shared_ptr<::apache::thrift::protocol::TProtocol> in,
120  std::shared_ptr<::apache::thrift::protocol::TProtocol> out,
121  void* connectionContext) override {
122  using namespace ::apache::thrift;
123 
124  auto transport = in->getTransport();
125  if (transport && check_origin_) {
126  static std::mutex processor_mutex;
127  std::lock_guard lock(processor_mutex);
128  const auto origin_str = transport->getOrigin();
129  std::vector<std::string> origins;
130  boost::split(origins, origin_str, boost::is_any_of(","));
131  if (origins.empty()) {
133  } else {
134  // Take the first origin, which should be the client IP before any intermediate
135  // servers (e.g. the web server)
136  auto trimmed_origin = origins.front();
137  boost::algorithm::trim(trimmed_origin);
138  TrackingProcessor::client_address = trimmed_origin;
139  }
140  if (dynamic_cast<transport::THttpTransport*>(transport.get())) {
142  } else if (dynamic_cast<transport::TBufferedTransport*>(transport.get())) {
144  } else {
146  }
147  } else {
149  }
150 
151  return HeavyProcessor::process(in, out, connectionContext);
152  }
153 
154  static thread_local std::string client_address;
155  static thread_local ClientProtocol client_protocol;
156 
157  private:
158  const bool check_origin_;
159 };
160 
161 namespace File_Namespace {
162 struct DiskCacheConfig;
163 }
164 
165 class DBHandler : public HeavyIf {
166  public:
167  DBHandler(const std::vector<LeafHostInfo>& db_leaves,
168  const std::vector<LeafHostInfo>& string_leaves,
169  const std::string& base_data_path,
170  const bool allow_multifrag,
171  const bool jit_debug,
172  const bool intel_jit_profile,
173  const bool read_only,
174  const bool allow_loop_joins,
175  const bool enable_rendering,
176  const bool renderer_prefer_igpu,
177  const unsigned renderer_vulkan_timeout_ms,
178  const bool renderer_use_parallel_executors,
179  const bool enable_auto_clear_render_mem,
180  const int render_oom_retry_threshold,
181  const size_t render_mem_bytes,
182  const size_t max_concurrent_render_sessions,
183  const size_t reserved_gpu_mem,
184  const bool render_compositor_use_last_gpu,
185  const bool renderer_enable_slab_allocation,
186  const size_t num_reader_threads,
187  const AuthMetadata& authMetadata,
188  SystemParameters& system_parameters,
189  const bool legacy_syntax,
190  const int idle_session_duration,
191  const int max_session_duration,
192  const std::string& udf_filename,
193  const std::string& clang_path,
194  const std::vector<std::string>& clang_options,
195 #ifdef ENABLE_GEOS
196  const std::string& libgeos_so_filename,
197 #endif
198 #ifdef HAVE_TORCH_TFS
199  const std::string& torch_lib_path,
200 #endif
201  const File_Namespace::DiskCacheConfig& disk_cache_config,
202  const bool is_new_db);
203  void initialize(const bool is_new_db);
204  ~DBHandler() override;
205 
206  static inline size_t max_bytes_for_thrift() {
207  return 2 * 1000 * 1000 * 1000LL;
208  }
209 
210  // Important ****
211  // This block must be keep in sync with mapd.thrift and HAHandler.h
212  // Please keep in same order for easy check and cut and paste
213  // Important ****
214 
215  void krb5_connect(TKrb5Session& session,
216  const std::string& token,
217  const std::string& dbname) override;
218  // connection, admin
219  void connect(TSessionId& session,
220  const std::string& username,
221  const std::string& passwd,
222  const std::string& dbname) override;
223  void disconnect(const TSessionId& session) override;
224  void switch_database(const TSessionId& session, const std::string& dbname) override;
225  void clone_session(TSessionId& session2, const TSessionId& session1) override;
226  void get_server_status(TServerStatus& _return, const TSessionId& session) override;
227  void get_status(std::vector<TServerStatus>& _return,
228  const TSessionId& session) override;
229  void get_hardware_info(TClusterHardwareInfo& _return,
230  const TSessionId& session) override;
231 
232  bool hasTableAccessPrivileges(const TableDescriptor* td,
233  const Catalog_Namespace::SessionInfo& session_info);
234  void get_tables(std::vector<std::string>& _return, const TSessionId& session) override;
235  void get_tables_for_database(std::vector<std::string>& _return,
236  const TSessionId& session,
237  const std::string& database_name) override;
238  void get_physical_tables(std::vector<std::string>& _return,
239  const TSessionId& session) override;
240  void get_views(std::vector<std::string>& _return, const TSessionId& session) override;
241  void get_tables_meta(std::vector<TTableMeta>& _return,
242  const TSessionId& session) override;
243  void get_table_details(TTableDetails& _return,
244  const TSessionId& session,
245  const std::string& table_name) override;
246  void get_table_details_for_database(TTableDetails& _return,
247  const TSessionId& session,
248  const std::string& table_name,
249  const std::string& database_name) override;
250  void get_internal_table_details(TTableDetails& _return,
251  const TSessionId& session,
252  const std::string& table_name,
253  const bool include_system_columns) override;
254  void get_internal_table_details_for_database(TTableDetails& _return,
255  const TSessionId& session,
256  const std::string& table_name,
257  const std::string& database_name) override;
258  void get_users(std::vector<std::string>& _return, const TSessionId& session) override;
259  void get_databases(std::vector<TDBInfo>& _return, const TSessionId& session) override;
260 
261  void get_version(std::string& _return) override;
262  void start_heap_profile(const TSessionId& session) override;
263  void stop_heap_profile(const TSessionId& session) override;
264  void get_heap_profile(std::string& _return, const TSessionId& session) override;
265  void get_memory(std::vector<TNodeMemoryInfo>& _return,
266  const TSessionId& session,
267  const std::string& memory_level) override;
268  void clear_cpu_memory(const TSessionId& session) override;
269  void clear_gpu_memory(const TSessionId& session) override;
270  void clearRenderMemory(const TSessionId& session); // it's not declared on thrifth
271  // and on persisten leaf client
272 
273  void pause_executor_queue(
274  const TSessionId& session); // Not implemented for persistent leaf client
275  void resume_executor_queue(
276  const TSessionId& session); // Not implemented for persistent leaf client
277 
278  void set_cur_session(const TSessionId& parent_session,
279  const TSessionId& leaf_session,
280  const std::string& start_time_str,
281  const std::string& label,
282  bool for_running_query_kernel) override;
283  void invalidate_cur_session(const TSessionId& parent_session,
284  const TSessionId& leaf_session,
285  const std::string& start_time_str,
286  const std::string& label,
287  bool for_running_query_kernel) override;
288  void set_table_epoch(const TSessionId& session,
289  const int db_id,
290  const int table_id,
291  const int new_epoch) override;
292  void set_table_epoch_by_name(const TSessionId& session,
293  const std::string& table_name,
294  const int new_epoch) override;
295  int32_t get_table_epoch(const TSessionId& session,
296  const int32_t db_id,
297  const int32_t table_id) override;
298  int32_t get_table_epoch_by_name(const TSessionId& session,
299  const std::string& table_name) override;
300  void get_table_epochs(std::vector<TTableEpochInfo>& _return,
301  const TSessionId& session,
302  const int32_t db_id,
303  const int32_t table_id) override;
304  void set_table_epochs(const TSessionId& session,
305  const int32_t db_id,
306  const std::vector<TTableEpochInfo>& table_epochs) override;
307 
308  void get_session_info(TSessionInfo& _return, const TSessionId& session) override;
309 
310  void set_leaf_info(const TSessionId& session, const TLeafInfo& info) override;
311 
312  void sql_execute(ExecutionResult& _return,
313  const TSessionId& session,
314  const std::string& query,
315  const bool column_format,
316  const int32_t first_n,
317  const int32_t at_most_n,
319  // query, render
320  void sql_execute(TQueryResult& _return,
321  const TSessionId& session,
322  const std::string& query,
323  const bool column_format,
324  const std::string& nonce,
325  const int32_t first_n,
326  const int32_t at_most_n) override;
327  void get_completion_hints(std::vector<TCompletionHint>& hints,
328  const TSessionId& session,
329  const std::string& sql,
330  const int cursor) override;
331  // TODO(miyu): merge the following two data frame APIs.
332  void sql_execute_df(TDataFrame& _return,
333  const TSessionId& session,
334  const std::string& query,
335  const TDeviceType::type device_type,
336  const int32_t device_id,
337  const int32_t first_n,
338  const TArrowTransport::type transport_method) override;
339  void sql_execute_gdf(TDataFrame& _return,
340  const TSessionId& session,
341  const std::string& query,
342  const int32_t device_id,
343  const int32_t first_n) override;
344  void deallocate_df(const TSessionId& session,
345  const TDataFrame& df,
346  const TDeviceType::type device_type,
347  const int32_t device_id) override;
348  void interrupt(const TSessionId& query_session,
349  const TSessionId& interrupt_session) override;
350  void sql_validate(TRowDescriptor& _return,
351  const TSessionId& session,
352  const std::string& query) override;
353  TExecuteMode::type getExecutionMode(const TSessionId& session);
354  void set_execution_mode(const TSessionId& session,
355  const TExecuteMode::type mode) override;
356  void render_vega(TRenderResult& _return,
357  const TSessionId& session,
358  const int64_t widget_id,
359  const std::string& vega_json,
360  const int32_t compression_level,
361  const std::string& nonce) override;
362  void get_result_row_for_pixel(
363  TPixelTableRowResult& _return,
364  const TSessionId& session,
365  const int64_t widget_id,
366  const TPixel& pixel,
367  const std::map<std::string, std::vector<std::string>>& table_col_names,
368  const bool column_format,
369  const int32_t pixel_radius,
370  const std::string& nonce) override;
371 
372  // custom expressions
373  int32_t create_custom_expression(const TSessionId& session,
374  const TCustomExpression& custom_expression) override;
375  void get_custom_expressions(std::vector<TCustomExpression>& _return,
376  const TSessionId& session) override;
377  void update_custom_expression(const TSessionId& session,
378  const int32_t id,
379  const std::string& expression_json) override;
380  void delete_custom_expressions(const TSessionId& session,
381  const std::vector<int32_t>& custom_expression_ids,
382  const bool do_soft_delete) override;
383 
384  // dashboards
385  void get_dashboard(TDashboard& _return,
386  const TSessionId& session,
387  const int32_t dashboard_id) override;
388  void get_dashboards(std::vector<TDashboard>& _return,
389  const TSessionId& session) override;
390  int32_t create_dashboard(const TSessionId& session,
391  const std::string& dashboard_name,
392  const std::string& dashboard_state,
393  const std::string& image_hash,
394  const std::string& dashboard_metadata) override;
395  void replace_dashboard(const TSessionId& session,
396  const int32_t dashboard_id,
397  const std::string& dashboard_name,
398  const std::string& dashboard_owner,
399  const std::string& dashboard_state,
400  const std::string& image_hash,
401  const std::string& dashboard_metadata) override;
402  void delete_dashboard(const TSessionId& session, const int32_t dashboard_id) override;
403  void share_dashboards(const TSessionId& session,
404  const std::vector<int32_t>& dashboard_ids,
405  const std::vector<std::string>& groups,
406  const TDashboardPermissions& permissions) override;
407  void delete_dashboards(const TSessionId& session,
408  const std::vector<int32_t>& dashboard_ids) override;
409  void share_dashboard(const TSessionId& session,
410  const int32_t dashboard_id,
411  const std::vector<std::string>& groups,
412  const std::vector<std::string>& objects,
413  const TDashboardPermissions& permissions,
414  const bool grant_role) override;
415  void unshare_dashboards(const TSessionId& session,
416  const std::vector<int32_t>& dashboard_ids,
417  const std::vector<std::string>& groups,
418  const TDashboardPermissions& permissions) override;
419  void unshare_dashboard(const TSessionId& session,
420  const int32_t dashboard_id,
421  const std::vector<std::string>& groups,
422  const std::vector<std::string>& objects,
423  const TDashboardPermissions& permissions) override;
424  void get_dashboard_grantees(std::vector<TDashboardGrantees>& _return,
425  const TSessionId& session,
426  const int32_t dashboard_id) override;
427 
428  void get_link_view(TFrontendView& _return,
429  const TSessionId& session,
430  const std::string& link) override;
431  void create_link(std::string& _return,
432  const TSessionId& session,
433  const std::string& view_state,
434  const std::string& view_metadata) override;
435  // import
436  void load_table_binary(const TSessionId& session,
437  const std::string& table_name,
438  const std::vector<TRow>& rows,
439  const std::vector<std::string>& column_names) override;
440 
441  void load_table_binary_columnar(const TSessionId& session,
442  const std::string& table_name,
443  const std::vector<TColumn>& cols,
444  const std::vector<std::string>& column_names) override;
445  void load_table_binary_arrow(const TSessionId& session,
446  const std::string& table_name,
447  const std::string& arrow_stream,
448  const bool use_column_names) override;
449 
450  void load_table(const TSessionId& session,
451  const std::string& table_name,
452  const std::vector<TStringRow>& rows,
453  const std::vector<std::string>& column_names) override;
454  void detect_column_types(TDetectResult& _return,
455  const TSessionId& session,
456  const std::string& file_name,
457  const TCopyParams& copy_params) override;
458  void create_table(const TSessionId& session,
459  const std::string& table_name,
460  const TRowDescriptor& row_desc,
461  const TCreateParams& create_params) override;
462  void import_table(const TSessionId& session,
463  const std::string& table_name,
464  const std::string& file_name,
465  const TCopyParams& copy_params) override;
466  void import_geo_table(const TSessionId& session,
467  const std::string& table_name,
468  const std::string& file_name,
469  const TCopyParams& copy_params,
470  const TRowDescriptor& row_desc,
471  const TCreateParams& create_params) override;
472  void import_table_status(TImportStatus& _return,
473  const TSessionId& session,
474  const std::string& import_id) override;
475  void get_first_geo_file_in_archive(std::string& _return,
476  const TSessionId& session,
477  const std::string& archive_path,
478  const TCopyParams& copy_params) override;
479  void get_all_files_in_archive(std::vector<std::string>& _return,
480  const TSessionId& session,
481  const std::string& archive_path,
482  const TCopyParams& copy_params) override;
483  void get_layers_in_geo_file(std::vector<TGeoFileLayerInfo>& _return,
484  const TSessionId& session,
485  const std::string& file_name,
486  const TCopyParams& copy_params) override;
487  // distributed
488  int64_t query_get_outer_fragment_count(const TSessionId& session,
489  const std::string& select_query) override;
490 
491  void check_table_consistency(TTableMeta& _return,
492  const TSessionId& session,
493  const int32_t table_id) override;
494  void start_query(TPendingQuery& _return,
495  const TSessionId& leaf_session,
496  const TSessionId& parent_session,
497  const std::string& serialized_rel_alg_dag,
498  const std::string& start_time_str,
499  const bool just_explain,
500  const std::vector<int64_t>& outer_fragment_indices) override;
501  void execute_query_step(TStepResult& _return,
502  const TPendingQuery& pending_query,
503  const TSubqueryId subquery_id,
504  const std::string& start_time_str) override;
505  void broadcast_serialized_rows(const TSerializedRows& serialized_rows,
506  const TRowDescriptor& row_desc,
507  const TQueryId query_id,
508  const TSubqueryId subquery_id,
509  const bool is_final_subquery_result) override;
510 
511  void start_render_query(TPendingRenderQuery& _return,
512  const TSessionId& session,
513  const int64_t widget_id,
514  const int16_t node_idx,
515  const std::string& vega_json) override;
516  void execute_next_render_step(TRenderStepResult& _return,
517  const TPendingRenderQuery& pending_render,
518  const TRenderAggDataMap& merged_data) override;
519 
520  void insert_data(const TSessionId& session, const TInsertData& insert_data) override;
521  void insert_chunks(const TSessionId& session,
522  const TInsertChunks& insert_chunks) override;
523  void checkpoint(const TSessionId& session, const int32_t table_id) override;
524  // DB Object Privileges
525  void get_roles(std::vector<std::string>& _return, const TSessionId& session) override;
526  bool has_role(const TSessionId& sessionId,
527  const std::string& granteeName,
528  const std::string& roleName) override;
529  bool has_object_privilege(const TSessionId& sessionId,
530  const std::string& granteeName,
531  const std::string& objectName,
532  const TDBObjectType::type object_type,
533  const TDBObjectPermissions& permissions) override;
534  void get_db_objects_for_grantee(std::vector<TDBObject>& _return,
535  const TSessionId& session,
536  const std::string& roleName) override;
537  void get_db_object_privs(std::vector<TDBObject>& _return,
538  const TSessionId& session,
539  const std::string& objectName,
540  const TDBObjectType::type type) override;
541  void get_all_roles_for_user(std::vector<std::string>& _return,
542  const TSessionId& session,
543  const std::string& granteeName) override;
544  void get_all_effective_roles_for_user(std::vector<std::string>& _return,
545  const TSessionId& session,
546  const std::string& granteeName) override;
547  std::vector<std::string> get_valid_groups(const TSessionId& session,
548  int32_t dashboard_id,
549  std::vector<std::string> groups);
550  // licensing
551  void set_license_key(TLicenseInfo& _return,
552  const TSessionId& session,
553  const std::string& key,
554  const std::string& nonce) override;
555  void get_license_claims(TLicenseInfo& _return,
556  const TSessionId& session,
557  const std::string& nonce) override;
558  // user-defined functions
559  /*
560  Returns a mapping of device (CPU, GPU) parameters (name, LLVM IR
561  triplet, features, etc)
562  */
563  void get_device_parameters(std::map<std::string, std::string>& _return,
564  const TSessionId& session) override;
565 
566  /*
567  Register Runtime Extension Functions (UDFs, UDTFs) with given
568  signatures. The extension functions implementations are given in a
569  mapping of a device and the corresponding LLVM/NVVM IR string.
570  */
571 
572  void register_runtime_extension_functions(
573  const TSessionId& session,
574  const std::vector<TUserDefinedFunction>& udfs,
575  const std::vector<TUserDefinedTableFunction>& udtfs,
576  const std::map<std::string, std::string>& device_ir_map) override;
577 
578  /*
579  Returns a list of User-Defined Function names available
580  */
581  void get_function_names(std::vector<std::string>& _return,
582  const TSessionId& session) override;
583 
584  /*
585  Returns a list of runtime User-Defined Function names available
586  */
587  void get_runtime_function_names(std::vector<std::string>& _return,
588  const TSessionId& session) override;
589 
590  /*
591  Returns a list of runtime User-Defined Function names available
592  */
593  void get_function_details(std::vector<TUserDefinedFunction>& _return,
594  const TSessionId& session,
595  const std::vector<std::string>& udf_names) override;
596 
597  /*
598  Returns a list of User-Defined Table Function names available
599  */
600  void get_table_function_names(std::vector<std::string>& _return,
601  const TSessionId& session) override;
602 
603  /*
604  Returns a list of runtime User-Defined Table Function names available
605  */
606  void get_runtime_table_function_names(std::vector<std::string>& _return,
607  const TSessionId& session) override;
608 
609  /*
610  Returns a list of User-Defined Table Function details
611  */
612  void get_table_function_details(std::vector<TUserDefinedTableFunction>& _return,
613  const TSessionId& session,
614  const std::vector<std::string>& udtf_names) override;
615 
616  // end of sync block for HAHandler and mapd.thrift
617 
618  void shutdown();
619  void emergency_shutdown();
620 
621  TSessionId getInvalidSessionId() const;
622 
623  void internal_connect(TSessionId& session,
624  const std::string& username,
625  const std::string& dbname);
626 
627  bool isAggregator() const;
628 
629  bool checkInMemorySystemTableQuery(
630  const std::unordered_set<shared::TableKey>& tables_selected_from) const;
631 
632  std::shared_ptr<Data_Namespace::DataMgr> data_mgr_;
633 
635  std::vector<LeafHostInfo> db_leaves_;
636  std::vector<LeafHostInfo> string_leaves_;
637  const std::string base_data_path_;
638  boost::filesystem::path import_path_;
640  std::default_random_engine random_gen_;
641  std::uniform_int_distribution<int64_t> session_id_dist_;
642  const bool jit_debug_;
643  const bool intel_jit_profile_;
645  const bool read_only_;
646  const bool allow_loop_joins_;
649  std::mutex render_mutex_;
650  int64_t start_time_;
653  std::shared_ptr<QueryEngine> query_engine_;
654  std::unique_ptr<RenderHandler> render_handler_;
655  std::unique_ptr<HeavyDBAggHandler> agg_handler_;
656  std::unique_ptr<HeavyDBLeafHandler> leaf_handler_;
657  std::shared_ptr<Calcite> calcite_;
658  const bool legacy_syntax_;
659 
660  std::unique_ptr<QueryDispatchQueue> dispatch_queue_;
661 
662  template <typename... ARGS>
663  std::shared_ptr<query_state::QueryState> create_query_state(ARGS&&... args) {
664  return query_states_.create(std::forward<ARGS>(args)...);
665  }
666 
667  // Exactly one immutable SessionInfo copy should be taken by a typical request.
668  Catalog_Namespace::SessionInfo get_session_copy(const TSessionId& session_id);
669 
670  void get_tables_meta_impl(std::vector<TTableMeta>& _return,
671  QueryStateProxy query_state_proxy,
672  const Catalog_Namespace::SessionInfo& session_info,
673  const bool with_table_locks = true);
674 
675  // Visible for use in tests.
676  void resizeDispatchQueue(size_t queue_size);
677 
678  protected:
679  // Returns empty std::shared_ptr if session.empty().
680  std::shared_ptr<Catalog_Namespace::SessionInfo> get_session_ptr(
681  const TSessionId& session_id);
682 
683  ConnectionInfo getConnectionInfo() const;
684 
685  private:
686  std::atomic<bool> initialized_{false};
687  void init_executor_resource_mgr();
688  void validate_configurations();
689  std::shared_ptr<Catalog_Namespace::SessionInfo> create_new_session(
690  TSessionId& session,
691  const std::string& dbname,
692  const Catalog_Namespace::UserMetadata& user_meta,
693  std::shared_ptr<Catalog_Namespace::Catalog> cat);
694  void connect_impl(TSessionId& session,
695  const std::string& passwd,
696  const std::string& dbname,
697  const Catalog_Namespace::UserMetadata& user_meta,
698  std::shared_ptr<Catalog_Namespace::Catalog> cat,
699  query_state::StdLog& stdlog);
700  void disconnect_impl(Catalog_Namespace::SessionInfoPtr& session_ptr);
701  void check_table_load_privileges(const Catalog_Namespace::SessionInfo& session_info,
702  const std::string& table_name);
703  void get_tables_impl(std::vector<std::string>& table_names,
705  const GetTablesType get_tables_type,
706  const std::string& database_name = {});
707  void get_table_details_impl(TTableDetails& _return,
708  query_state::StdLog& stdlog,
709  const std::string& table_name,
710  const bool get_system,
711  const bool get_physical,
712  const std::string& database_name = {});
713  void getAllRolesForUserImpl(
714  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr,
715  std::vector<std::string>& roles,
716  const std::string& granteeName,
717  bool effective);
718  void check_read_only(const std::string& str);
719  void validateGroups(const std::vector<std::string>& groups);
720  void validateDashboardIdsForSharing(const Catalog_Namespace::SessionInfo& session_info,
721  const std::vector<int32_t>& dashboard_ids);
722  void shareOrUnshareDashboards(const TSessionId& session,
723  const std::vector<int32_t>& dashboard_ids,
724  const std::vector<std::string>& groups,
725  const TDashboardPermissions& permissions,
726  const bool do_share);
727 
728  static void value_to_thrift_column(const TargetValue& tv,
729  const SQLTypeInfo& ti,
730  TColumn& column);
731  static TDatum value_to_thrift(const TargetValue& tv, const SQLTypeInfo& ti);
732 
733  std::pair<TPlanResult, lockmgr::LockedTableDescriptors> parse_to_ra(
735  const std::string& query_str,
736  const std::vector<TFilterPushDownInfo>& filter_push_down_info,
737  const bool acquire_locks,
738  const SystemParameters& system_parameters,
739  bool check_privileges = true);
740 
741  void sql_execute_local(
742  TQueryResult& _return,
743  const QueryStateProxy& query_state_proxy,
744  const std::shared_ptr<Catalog_Namespace::SessionInfo> session_ptr,
745  const std::string& query_str,
746  const bool column_format,
747  const std::string& nonce,
748  const int32_t first_n,
749  const int32_t at_most_n,
750  const bool use_calcite);
751 
752  int64_t process_deferred_copy_from(const TSessionId& session_id);
753 
754  static void convertData(TQueryResult& _return,
756  const QueryStateProxy& query_state_proxy,
757  const bool column_format,
758  const int32_t first_n,
759  const int32_t at_most_n);
760 
761  void sql_execute_impl(ExecutionResult& _return,
763  const bool column_format,
764  const ExecutorDeviceType executor_device_type,
765  const int32_t first_n,
766  const int32_t at_most_n,
767  const bool use_calcite,
769 
771  const TableDescriptor* td,
772  const AccessPrivileges acess_priv);
773 
774  void execute_distributed_copy_statement(
776  const Catalog_Namespace::SessionInfo& session_info);
777 
778  TPlanResult processCalciteRequest(
780  const std::shared_ptr<Catalog_Namespace::Catalog>& cat,
781  const std::string& query_str,
782  const std::vector<TFilterPushDownInfo>& filter_push_down_info,
783  const SystemParameters& system_parameters,
784  const bool check_privileges);
785 
786  TRowDescriptor validateRelAlg(const std::string& query_ra,
787  QueryStateProxy query_state_proxy);
788 
789  void dispatch_query_task(std::shared_ptr<QueryDispatchQueue::Task> query_task,
790  const bool is_update_delete);
791 
792  std::vector<PushedDownFilterInfo> execute_rel_alg(
793  ExecutionResult& _return,
795  const std::string& query_ra,
796  const bool column_format,
797  const ExecutorDeviceType executor_device_type,
798  const int32_t first_n,
799  const int32_t at_most_n,
800  const bool just_validate,
801  const bool find_push_down_candidates,
802  const ExplainInfo& explain_info,
803  const std::optional<size_t> executor_index = std::nullopt) const;
804 
805  void execute_rel_alg_with_filter_push_down(
806  ExecutionResult& _return,
808  std::string& query_ra,
809  const bool column_format,
810  const ExecutorDeviceType executor_device_type,
811  const int32_t first_n,
812  const int32_t at_most_n,
813  const bool just_explain,
814  const bool just_calcite_explain,
815  const std::vector<PushedDownFilterInfo>& filter_push_down_requests);
816 
817  void executeDdl(TQueryResult& _return,
818  const std::string& query_ra,
819  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
820 
821  void executeDdl(ExecutionResult& _return,
822  const std::string& query_ra,
823  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
824 
825  TColumnType populateThriftColumnType(const Catalog_Namespace::Catalog* cat,
826  const ColumnDescriptor* cd);
827 
828  void set_execution_mode_nolock(Catalog_Namespace::SessionInfo* session_ptr,
829  const TExecuteMode::type mode);
830  char unescape_char(std::string str);
831  import_export::CopyParams thrift_to_copyparams(const TCopyParams& cp);
832  TCopyParams copyparams_to_thrift(const import_export::CopyParams& cp);
833  void check_geospatial_files(const boost::filesystem::path file_path,
834  const import_export::CopyParams& copy_params);
835  void render_rel_alg(TRenderResult& _return,
836  const std::string& query_ra,
837  const std::string& query_str,
838  const Catalog_Namespace::SessionInfo& session_info,
839  const std::string& render_type,
840  const bool is_projection_query);
841 
842  TColumnType create_geo_column(const TDatumType::type type,
843  const std::string& name,
844  const bool is_array);
845 
846  static void convertExplain(TQueryResult& _return,
847  const ResultSet& results,
848  const bool column_format);
849  static void convertResult(TQueryResult& _return,
850  const ResultSet& results,
851  const bool column_format);
852 
853  static void convertRows(TQueryResult& _return,
854  QueryStateProxy query_state_proxy,
855  const std::vector<TargetMetaInfo>& targets,
856  const ResultSet& results,
857  const bool column_format,
858  const int32_t first_n,
859  const int32_t at_most_n);
860 
861  // Use ExecutionResult to populate a TQueryResult
862  // calls convertRows, but after some setup using session_info
863  void convertResultSet(ExecutionResult& result,
864  const Catalog_Namespace::SessionInfo& session_info,
865  const std::string& query_state_str,
866  TQueryResult& _return);
867 
868  static void createSimpleResult(TQueryResult& _return,
869  const ResultSet& results,
870  const bool column_format,
871  const std::string label);
872 
873  std::vector<TargetMetaInfo> getTargetMetaInfo(
874  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& targets) const;
875 
876  std::vector<std::string> getTargetNames(
877  const std::vector<TargetMetaInfo>& targets) const;
878 
879  std::vector<std::string> getTargetNames(
880  const std::vector<std::shared_ptr<Analyzer::TargetEntry>>& targets) const;
881 
882  void get_completion_hints_unsorted(std::vector<TCompletionHint>& hints,
883  std::vector<std::string>& visible_tables,
884  query_state::StdLog& stdlog,
885  const std::string& sql,
886  const int cursor);
887  void get_token_based_completions(std::vector<TCompletionHint>& hints,
888  query_state::StdLog& stdlog,
889  std::vector<std::string>& visible_tables,
890  const std::string& sql,
891  const int cursor);
892 
893  std::unordered_map<std::string, std::unordered_set<std::string>>
894  fill_column_names_by_table(std::vector<std::string>& table_names,
895  query_state::StdLog& stdlog);
896 
897  TDashboard get_dashboard_impl(
898  const std::shared_ptr<Catalog_Namespace::SessionInfo const>& session_ptr,
900  const DashboardDescriptor* dash,
901  const bool populate_state = true);
902 
903  static bool has_database_permission(const AccessPrivileges& privs,
904  const TDBObjectPermissions& permissions);
905  static bool has_table_permission(const AccessPrivileges& privs,
906  const TDBObjectPermissions& permission);
907  static bool has_dashboard_permission(const AccessPrivileges& privs,
908  const TDBObjectPermissions& permissions);
909  static bool has_view_permission(const AccessPrivileges& privs,
910  const TDBObjectPermissions& permissions);
911  static bool has_server_permission(const AccessPrivileges& privs,
912  const TDBObjectPermissions& permissions);
913  // For the provided upper case column names `uc_column_names`, return
914  // the tables from `table_names` which contain at least one of them.
915  // Used to rank the TABLE auto-completion hints by the columns
916  // specified in the projection.
917  std::unordered_set<std::string> get_uc_compatible_table_names_by_column(
918  const std::unordered_set<std::string>& uc_column_names,
919  std::vector<std::string>& table_names,
920  query_state::StdLog& stdlog);
921 
922  std::unique_ptr<lockmgr::AbstractLockContainer<const TableDescriptor*>>
923  prepare_loader_generic(
924  const Catalog_Namespace::SessionInfo& session_info,
925  const std::string& table_name,
926  size_t num_cols,
927  std::unique_ptr<import_export::Loader>* loader,
928  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>* import_buffers,
929  const std::vector<std::string>& column_names,
930  std::string load_type);
931 
932  void fillGeoColumns(
933  const TSessionId& session,
934  const Catalog_Namespace::Catalog& catalog,
935  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
936  const ColumnDescriptor* cd,
937  size_t& col_idx,
938  size_t num_rows,
939  const std::string& table_name);
940 
941  void fillMissingBuffers(
942  const TSessionId& session,
943  const Catalog_Namespace::Catalog& catalog,
944  std::vector<std::unique_ptr<import_export::TypedImportBuffer>>& import_buffers,
945  const std::list<const ColumnDescriptor*>& cds,
946  const std::vector<int>& desc_id_to_column_id,
947  size_t num_rows,
948  const std::string& table_name);
949 
951  std::unordered_map<std::string, Catalog_Namespace::SessionInfoPtr> calcite_sessions_;
953 
954  Catalog_Namespace::SessionInfoPtr findCalciteSession(TSessionId const&) const;
955 
956  bool super_user_rights_; // default is "false"; setting to "true"
957  // ignores passwd checks in "connect(..)"
958  // method
959  const int idle_session_duration_; // max duration of idle session
960  const int max_session_duration_; // max duration of session
961 
962  const bool enable_rendering_;
964  const unsigned renderer_vulkan_timeout_;
968  const size_t render_mem_bytes_;
970  const size_t reserved_gpu_mem_;
973  const size_t num_reader_threads_;
974 #ifdef ENABLE_GEOS
975  const std::string& libgeos_so_filename_;
976 #endif
977 #ifdef HAVE_TORCH_TFS
978  const std::string& torch_lib_path_;
979 #endif
981  const std::string& udf_filename_;
982  const std::string& clang_path_;
983  const std::vector<std::string>& clang_options_;
984  int32_t max_num_sessions_{-1};
985  std::unique_ptr<Catalog_Namespace::SessionsStore> sessions_store_;
986 
988  std::string table;
989  std::string file_name;
991  std::string partitions;
992  };
993 
995  std::unordered_map<std::string, DeferredCopyFromState> was_deferred_copy_from;
997 
998  std::optional<DeferredCopyFromState> operator()(const std::string& session_id) {
999  std::lock_guard<std::mutex> map_lock(deferred_copy_from_mutex);
1000  auto itr = was_deferred_copy_from.find(session_id);
1001  if (itr == was_deferred_copy_from.end()) {
1002  return std::nullopt;
1003  }
1004  return itr->second;
1005  }
1006 
1007  void add(const std::string& session_id, const DeferredCopyFromState& state) {
1008  std::lock_guard<std::mutex> map_lock(deferred_copy_from_mutex);
1009  const auto ret = was_deferred_copy_from.insert(std::make_pair(session_id, state));
1010  CHECK(ret.second);
1011  }
1012 
1013  void remove(const std::string& session_id) {
1014  std::lock_guard<std::mutex> map_lock(deferred_copy_from_mutex);
1015  was_deferred_copy_from.erase(session_id);
1016  }
1017  };
1019 
1020  // Only for IPC device memory deallocation
1021  mutable std::mutex handle_to_dev_ptr_mutex_;
1022  mutable std::unordered_map<std::string, std::string> ipc_handle_to_dev_ptr_;
1023 
1024  friend void run_warmup_queries(std::shared_ptr<DBHandler> handler,
1025  std::string base_path,
1026  std::string query_file_path);
1027 
1028  friend class RenderHandler::Impl;
1029  friend class HeavyDBAggHandler;
1030  friend class HeavyDBLeafHandler;
1031 
1032  std::map<const std::string, const PermissionFuncPtr> permissionFuncMap_ = {
1033  {"database"s, has_database_permission},
1034  {"dashboard"s, has_dashboard_permission},
1035  {"table"s, has_table_permission},
1036  {"view"s, has_view_permission},
1037  {"server"s, has_server_permission}};
1038 
1039  void check_and_invalidate_sessions(Parser::DDLStmt* ddl);
1040 
1041  std::string const createInMemoryCalciteSession(
1042  const std::shared_ptr<Catalog_Namespace::Catalog>& catalog_ptr);
1043  void removeInMemoryCalciteSession(const std::string& session_id);
1044 
1045  ExecutionResult getUserSessions(
1046  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
1047 
1048  // getQueries returns a set of queries queued in the DB
1049  // that belongs to the same DB in the caller's session
1050 
1051  ExecutionResult getQueries(
1052  std::shared_ptr<Catalog_Namespace::SessionInfo const> session_ptr);
1053 
1054  void get_queries_info(std::vector<TQueryInfo>& _return,
1055  const TSessionId& session) override;
1056 
1057  // this function passes the interrupt request to the DB executor
1058  void interruptQuery(const Catalog_Namespace::SessionInfo& session_info,
1059  const std::string& target_session);
1060 
1061  void alterSystemClear(const std::string& sesson_id,
1063  const std::string& cache_type,
1064  int64_t& execution_time_ms);
1065 
1066  void alterSession(const std::string& sesson_id,
1068  const std::pair<std::string, std::string>& session_parameter,
1069  int64_t& execution_time_ms);
1070 
1071  TRole::type getServerRole() const;
1072 
1074 
1075  void importGeoTableGlobFilterSort(const TSessionId& session,
1076  const std::string& table_name,
1077  const std::string& file_name,
1078  const import_export::CopyParams& copy_params,
1079  const TRowDescriptor& row_desc,
1080  const TCreateParams& create_params);
1081 
1082  void importGeoTableSingle(const TSessionId& session,
1083  const std::string& table_name,
1084  const std::string& file_name,
1085  const import_export::CopyParams& copy_params,
1086  const TRowDescriptor& row_desc,
1087  const TCreateParams& create_params);
1088 
1089  void resetSessionsStore();
1090 };
std::lock_guard< T > lock_guard
Classes used to wrap parser calls for calcite redirection.
std::vector< LeafHostInfo > string_leaves_
Definition: DBHandler.h:636
auto get_users(SysCatalog &syscat, std::unique_ptr< SqliteConnector > &sqliteConnector, const int32_t dbId=-1)
const std::vector< std::string > & clang_options_
Definition: DBHandler.h:983
boost::filesystem::path import_path_
Definition: DBHandler.h:638
std::unique_ptr< QueryDispatchQueue > dispatch_queue_
Definition: DBHandler.h:660
std::vector< std::unique_ptr< lockmgr::AbstractLockContainer< const TableDescriptor * >>> LockedTableDescriptors
Definition: LockMgr.h:272
ClientProtocol
const bool renderer_use_parallel_executors_
Definition: DBHandler.h:965
const std::string & udf_filename_
Definition: DBHandler.h:981
std::string cat(Ts &&...args)
const int render_oom_retry_threshold_
Definition: DBHandler.h:967
class for a per-database catalog. also includes metadata for the current database and the current use...
Definition: Catalog.h:143
void run_warmup_queries(std::shared_ptr< DBHandler > handler, std::string base_path, std::string query_file_path)
Definition: HeavyDB.cpp:215
std::mutex handle_to_dev_ptr_mutex_
Definition: DBHandler.h:1021
std::shared_ptr< query_state::QueryState > create_query_state(ARGS &&...args)
Definition: DBHandler.h:663
static thread_local std::string client_address
Definition: DBHandler.h:154
void add(const std::string &session_id, const DeferredCopyFromState &state)
Definition: DBHandler.h:1007
bool user_can_access_table(const Catalog_Namespace::SessionInfo &session_info, const TableDescriptor *td, const AccessPrivileges access_priv)
bool(*)(const AccessPrivileges &, const TDBObjectPermissions &) PermissionFuncPtr
Definition: DBHandler.h:104
static thread_local ClientProtocol client_protocol
Definition: DBHandler.h:155
LeafAggregator leaf_aggregator_
Definition: DBHandler.h:634
const unsigned renderer_vulkan_timeout_
Definition: DBHandler.h:964
const std::string base_data_path_
Definition: DBHandler.h:637
const bool jit_debug_
Definition: DBHandler.h:642
const bool check_origin_
Definition: DBHandler.h:158
const size_t render_mem_bytes_
Definition: DBHandler.h:968
std::map< TSessionId, std::shared_ptr< Catalog_Namespace::SessionInfo >> SessionMap
Definition: DBHandler.h:103
DeferredCopyFromSessions deferred_copy_from_sessions
Definition: DBHandler.h:1018
ExecutorDeviceType
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
int64_t start_time_
Definition: DBHandler.h:650
import_export::CopyParams copy_params
Definition: DBHandler.h:990
std::unordered_map< std::string, Catalog_Namespace::SessionInfoPtr > calcite_sessions_
Definition: DBHandler.h:951
tuple rows
Definition: report.py:114
This file contains the class specification and related data structures for Catalog.
std::mutex render_mutex_
Definition: DBHandler.h:649
static size_t max_bytes_for_thrift()
Definition: DBHandler.h:206
query_state::QueryStates query_states_
Definition: DBHandler.h:950
Supported runtime functions management and retrieval.
const size_t reserved_gpu_mem_
Definition: DBHandler.h:970
std::optional< DeferredCopyFromState > operator()(const std::string &session_id)
Definition: DBHandler.h:998
Classes representing a parse tree.
const bool render_compositor_use_last_gpu_
Definition: DBHandler.h:971
void check_not_info_schema_db(const std::string &db_name, bool throw_db_exception)
Definition: DBHandler.cpp:4811
GetTablesType
Definition: Catalog.h:63
const int max_session_duration_
Definition: DBHandler.h:960
ExecutorDeviceType executor_device_type_
Definition: DBHandler.h:639
std::vector< LeafHostInfo > db_leaves_
Definition: DBHandler.h:635
const File_Namespace::DiskCacheConfig & disk_cache_config_
Definition: DBHandler.h:980
const std::string & clang_path_
Definition: DBHandler.h:982
std::unique_ptr< RenderHandler > render_handler_
Definition: DBHandler.h:654
Checked json field retrieval.
std::shared_ptr< QueryEngine > query_engine_
Definition: DBHandler.h:653
SystemParameters & system_parameters_
Definition: DBHandler.h:652
const size_t num_reader_threads_
Definition: DBHandler.h:973
specifies the content in-memory of a row in the column metadata table
const bool enable_auto_clear_render_mem_
Definition: DBHandler.h:966
const bool renderer_prefer_igpu_
Definition: DBHandler.h:963
heavyai::shared_mutex calcite_sessions_mtx_
Definition: DBHandler.h:952
std::map< std::string, std::string > get_device_parameters(bool cpu_only)
bool is_info_schema_db(const std::string &db_name)
Definition: DBHandler.cpp:4806
const bool allow_loop_joins_
Definition: DBHandler.h:646
heavyai::shared_mutex sessions_mutex_
Definition: DBHandler.h:648
std::unique_ptr< HeavyDBAggHandler > agg_handler_
Definition: DBHandler.h:655
heavyai::shared_mutex custom_expressions_mutex_
Definition: DBHandler.h:1073
const bool enable_rendering_
Definition: DBHandler.h:962
std::unordered_map< std::string, DeferredCopyFromState > was_deferred_copy_from
Definition: DBHandler.h:995
const bool intel_jit_profile_
Definition: DBHandler.h:643
bool super_user_rights_
Definition: DBHandler.h:956
const bool renderer_enable_slab_allocation_
Definition: DBHandler.h:972
std::unique_ptr< Catalog_Namespace::SessionsStore > sessions_store_
Definition: DBHandler.h:985
std::shared_ptr< Calcite > calcite_
Definition: DBHandler.h:657
void shutdown()
Definition: Logger.cpp:401
std::shared_ptr< Data_Namespace::DataMgr > data_mgr_
Definition: DBHandler.h:632
const bool read_only_
Definition: DBHandler.h:645
bool process(std::shared_ptr<::apache::thrift::protocol::TProtocol > in, std::shared_ptr<::apache::thrift::protocol::TProtocol > out, void *connectionContext) override
Definition: DBHandler.h:119
const bool legacy_syntax_
Definition: DBHandler.h:658
std::unique_ptr< HeavyDBLeafHandler > leaf_handler_
Definition: DBHandler.h:656
#define CHECK(condition)
Definition: Logger.h:291
const int idle_session_duration_
Definition: DBHandler.h:959
std::unordered_map< std::string, std::string > ipc_handle_to_dev_ptr_
Definition: DBHandler.h:1022
const size_t max_concurrent_render_sessions_
Definition: DBHandler.h:969
boost::variant< ScalarTargetValue, ArrayTargetValue, GeoTargetValue, GeoTargetValuePtr > TargetValue
Definition: TargetValue.h:195
bool allow_multifrag_
Definition: DBHandler.h:644
string name
Definition: setup.in.py:72
std::shared_timed_mutex shared_mutex
const AuthMetadata & authMetadata_
Definition: DBHandler.h:651
TrackingProcessor(std::shared_ptr< HeavyIf > handler, const bool check_origin)
Definition: DBHandler.h:116
bool cpu_mode_only_
Definition: DBHandler.h:647
std::default_random_engine random_gen_
Definition: DBHandler.h:640
std::uniform_int_distribution< int64_t > session_id_dist_
Definition: DBHandler.h:641
std::shared_ptr< SessionInfo > SessionInfoPtr
Definition: SessionsStore.h:27