OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CommandLineOptions.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <fcntl.h>
18 #include <sys/stat.h>
19 #include <sys/types.h>
20 
21 #include <filesystem>
22 #include <iostream>
23 #include <string>
24 
25 using namespace std::string_literals;
26 
27 #include "CommandLineOptions.h"
29 #include "LeafHostInfo.h"
30 #include "MapDRelease.h"
36 #include "Shared/Compressor.h"
37 #include "Shared/SysDefinitions.h"
39 #include "Utils/DdlUtils.h"
40 
41 #ifdef _WIN32
42 #include <io.h>
43 #include <process.h>
44 #endif
45 
46 const std::string CommandLineOptions::nodeIds_token = {"node_id"};
47 const std::string CommandLineOptions::cluster_command_line_arg{"cluster_topology"};
48 
50 
51 extern bool g_use_table_device_offset;
53 extern bool g_cache_string_hash;
56 extern int64_t g_large_ndv_threshold;
57 extern size_t g_large_ndv_multiplier;
58 extern int64_t g_bitmap_memory_limit;
59 extern bool g_enable_seconds_refresh;
61 extern size_t g_approx_quantile_buffer;
62 extern size_t g_approx_quantile_centroids;
63 extern size_t g_parallel_top_min;
64 extern size_t g_parallel_top_max;
65 extern size_t g_streaming_topn_max;
70 extern bool g_enable_system_tables;
72 extern bool g_allow_memory_status_log;
73 extern bool g_enable_logs_system_tables;
75 extern std::string g_logs_system_tables_refresh_interval;
81 
82 #ifdef ENABLE_MEMKIND
83 extern std::string g_pmem_path;
84 #endif
85 
86 namespace Catalog_Namespace {
87 extern bool g_log_user_id;
88 }
89 
90 unsigned connect_timeout{20000};
91 unsigned recv_timeout{300000};
92 unsigned send_timeout{300000};
93 bool with_keepalive{false};
95 
97  if (verbose_logging && logger::Severity::DEBUG1 < log_options_.severity_) {
98  log_options_.severity_ = logger::Severity::DEBUG1;
99  }
100  validate_base_path();
101  log_options_.set_base_path(base_path);
102  logger::init(log_options_);
103 }
105  po::options_description& desc = help_desc_;
106 
107  desc.add_options()("help,h", "Show available options.");
108  desc.add_options()(
109  "allow-cpu-retry",
110  po::value<bool>(&g_allow_cpu_retry)
111  ->default_value(g_allow_cpu_retry)
112  ->implicit_value(true),
113  R"(Allow the queries which failed on GPU to retry on CPU, even when watchdog is enabled.)");
114  desc.add_options()("allow-loop-joins",
115  po::value<bool>(&allow_loop_joins)
116  ->default_value(allow_loop_joins)
117  ->implicit_value(true),
118  "Enable loop joins.");
119  desc.add_options()("bigint-count",
120  po::value<bool>(&g_bigint_count)
121  ->default_value(g_bigint_count)
122  ->implicit_value(true),
123  "Use 64-bit count.");
124 
125  desc.add_options()(
126  "enable-executor-resource-mgr",
127  po::value<bool>(&g_enable_executor_resource_mgr)
128  ->default_value(g_enable_executor_resource_mgr)
129  ->implicit_value(true),
130  "Enable executor resource manager to track execution resources and selectively "
131  "gate concurrency based on resource availability.");
132 
133  // Note we allow executor-cpu-result-mem-ratio to have values > 0 to allow
134  // oversubscription of memory when warranted, but user should be careful with this as
135  // too high a value can cause OOM errors.
136  desc.add_options()(
137  "executor-cpu-result-mem-ratio",
140  "Set executor resource manager reserved memory for query result sets as a ratio "
141  "greater than 0, representing the fraction of the system memory not allocated for "
142  "the CPU buffer pool. Values of 1.0 are permitted to allow oversubscription when "
143  "warranted, but too high a value can cause out-of-memory errors. Requires "
144  "--executor-resource-mgr to be set");
145 
146  desc.add_options()(
147  "executor-cpu-result-mem-bytes",
150  "Set executor resource manager reserved memory for query result sets in bytes, "
151  "this overrides the default reservation of 80% the size of the system memory that "
152  "is not allocated for the CPU buffer pool. Use 0 for auto. Requires "
153  "--enable-executor-resource-mgr to be set.");
154 
155  // Note we allow executor-per-query-max-cpu-threads-ratio to have values > 1 to allow
156  // oversubscription of threads when warranted, given we may be overly pessimistic about
157  // kernel core occupation for some classes of queries. Care should be taken however with
158  // setting this value too high as thrashing and thread starvation can result.
159  desc.add_options()(
160  "executor-per-query-max-cpu-threads-ratio",
163  "Set max fraction of executor resource manager total CPU slots/threads that can be "
164  "allocated for a single query. Requires --enable-executor-resource-mgr to be set.");
165 
166  // Note we allow executor-per-query-max-cpu-result-mem-ratio to have values > 0 to allow
167  // oversubscription of memory when warranted, but user should be careful with this as
168  // too high a value can cause OOM errors.
169  desc.add_options()(
170  "executor-per-query-max-cpu-result-mem-ratio",
173  "Set max fraction of executor resource manager total CPU result memory reservation "
174  "that can be "
175  "allocated for a single query. Requires --enable-executor-resource-mgr to be set.");
176 
177  desc.add_options()(
178  "allow-cpu-kernel-concurrency",
181  ->implicit_value(true),
182  "Allow for multiple queries to run execution kernels concurrently on CPU. Requires "
183  "--enable-executor-resource-mgr to be set.");
184 
185  desc.add_options()(
186  "allow-cpu-gpu-kernel-concurrency",
189  ->implicit_value(true),
190  "Allow multiple queries to run execution kernels concurrently on CPU while a "
191  "GPU query is executing. Requires --enable-executor-resource-mgr to be set.");
192 
193  // Below controls whether multiple concurrent queries in conjunction can oversubscribe
194  // CPU slots/threads Single query CPU slot oversubscription should be controlled with
195  // --executor-per-query-max-cpu-threads-ratio (i.e. by setting it to > 1.0)
196 
197  desc.add_options()(
198  "allow-cpu-thread-oversubscription-concurrency",
199  po::value<bool>(
201  ->default_value(
203  ->implicit_value(true),
204  "Allow for concurrent query kernel execution even if it results in "
205  "oversubscription of CPU threads. Caution should be used when turning this on as "
206  "it can lead to thread exhaustion. Requires --enable-executor-resource-mgr to be "
207  "set.");
208 
209  // Below controls whether multiple concurrent queries in conjunction can oversubscribe
210  // CPU result memory. Single query CPU result memory oversubscription should be
211  // controlled with
212  // --executor-per-query-cpu-result-mem-ratio (i.e. by setting it to > 1.0)
213 
214  desc.add_options()(
215  "allow-cpu-result-mem-oversubscription-concurrency",
216  po::value<bool>(
218  ->default_value(
220  ->implicit_value(true),
221  "Allow for concurrent query kernel execution even if it results in "
222  "oversubscription of CPU memory. Caution should be used when turning this on as it "
223  "can lead to out-of-memory errors. Requires --enable-executor-resource-mgr to be "
224  "set.");
225 
226  desc.add_options()(
227  "executor-max-available-resource-use-ratio",
230  "Set max proportion (0 < ratio <= 1.0) of available resources that should be "
231  "granted to a query. Requires --executor-resource-mgr to be set");
232 
233  desc.add_options()("calcite-max-mem",
234  po::value<size_t>(&system_parameters.calcite_max_mem)
235  ->default_value(system_parameters.calcite_max_mem),
236  "Max memory available to calcite JVM.");
237  if (!dist_v5_) {
238  desc.add_options()("calcite-port",
239  po::value<int>(&system_parameters.calcite_port)
240  ->default_value(system_parameters.calcite_port),
241  "Calcite port number.");
242  }
243  desc.add_options()("config",
244  po::value<std::string>(&system_parameters.config_file),
245  "Path to server configuration file.");
246  desc.add_options()("cpu-buffer-mem-bytes",
247  po::value<size_t>(&system_parameters.cpu_buffer_mem_bytes)
248  ->default_value(system_parameters.cpu_buffer_mem_bytes),
249  "Size of memory reserved for CPU buffers, in bytes.");
250 
251  desc.add_options()("cpu-only",
252  po::value<bool>(&system_parameters.cpu_only)
253  ->default_value(system_parameters.cpu_only)
254  ->implicit_value(true),
255  "Run on CPU only, even if GPUs are available.");
256  desc.add_options()("cuda-block-size",
257  po::value<size_t>(&system_parameters.cuda_block_size)
258  ->default_value(system_parameters.cuda_block_size),
259  "Size of block to use on NVIDIA GPU.");
260  desc.add_options()("cuda-grid-size",
261  po::value<size_t>(&system_parameters.cuda_grid_size)
262  ->default_value(system_parameters.cuda_grid_size),
263  "Size of grid to use on NVIDIA GPU.");
264  desc.add_options()("optimize-cuda-block-and-grid-sizes",
265  po::value<bool>(&optimize_cuda_block_and_grid_sizes)
266  ->default_value(false)
267  ->implicit_value(true));
268 
269  if (!dist_v5_) {
270  desc.add_options()(
271  "data",
272  po::value<std::string>(&base_path)->required()->default_value("storage"),
273  "Directory path to HeavyDB data storage (catalogs, raw data, log files, etc).");
274  positional_options.add("data", 1);
275  }
276  desc.add_options()("db-query-list",
277  po::value<std::string>(&db_query_file),
278  "Path to file containing HeavyDB warmup queries.");
279  desc.add_options()(
280  "exit-after-warmup",
281  po::value<bool>(&exit_after_warmup)->default_value(false)->implicit_value(true),
282  "Exit after HeavyDB warmup queries.");
283  desc.add_options()("dynamic-watchdog-time-limit",
284  po::value<unsigned>(&dynamic_watchdog_time_limit)
285  ->default_value(dynamic_watchdog_time_limit)
286  ->implicit_value(10000),
287  "Dynamic watchdog time limit, in milliseconds.");
288  desc.add_options()("enable-data-recycler",
289  po::value<bool>(&enable_data_recycler)
290  ->default_value(enable_data_recycler)
291  ->implicit_value(true),
292  "Use data recycler.");
293  desc.add_options()("use-hashtable-cache",
294  po::value<bool>(&use_hashtable_cache)
295  ->default_value(use_hashtable_cache)
296  ->implicit_value(true),
297  "Use hashtable cache.");
298  desc.add_options()("use-query-resultset-cache",
299  po::value<bool>(&g_use_query_resultset_cache)
300  ->default_value(g_use_query_resultset_cache)
301  ->implicit_value(true),
302  "Use query resultset cache.");
303  desc.add_options()("use-chunk-metadata-cache",
304  po::value<bool>(&g_use_chunk_metadata_cache)
305  ->default_value(g_use_chunk_metadata_cache)
306  ->implicit_value(true),
307  "Use chunk metadata cache.");
308  desc.add_options()(
309  "hashtable-cache-total-bytes",
310  po::value<size_t>(&hashtable_cache_total_bytes)
311  ->default_value(hashtable_cache_total_bytes)
312  ->implicit_value(4294967296),
313  "Size of total memory space for hashtable cache, in bytes (default: 4GB).");
314  desc.add_options()("max-cacheable-hashtable-size-bytes",
315  po::value<size_t>(&max_cacheable_hashtable_size_bytes)
316  ->default_value(max_cacheable_hashtable_size_bytes)
317  ->implicit_value(2147483648),
318  "The maximum size of hashtable that is available to cache, in "
319  "bytes (default: 2GB).");
320  desc.add_options()(
321  "query-resultset-cache-total-bytes",
322  po::value<size_t>(&g_query_resultset_cache_total_bytes)
323  ->default_value(g_query_resultset_cache_total_bytes),
324  "Size of total memory space for query resultset cache, in bytes (default: 4GB).");
325  desc.add_options()("max-query-resultset-size-bytes",
328  "The maximum size of query resultset that is available to cache, in "
329  "bytes (default: 2GB).");
330  desc.add_options()("allow-auto-query-resultset-caching",
331  po::value<bool>(&g_allow_auto_resultset_caching)
332  ->default_value(g_allow_auto_resultset_caching)
333  ->implicit_value(true),
334  "Allow automatic query resultset caching when the size of "
335  "query resultset is smaller or equal to the threshold defined "
336  "by `auto-resultset-caching-threshold-bytes`, in bytes (to "
337  "enable this, query resultset recycler "
338  "should be enabled, default: 1048576 bytes (or 1MB)).");
339  desc.add_options()(
340  "auto-resultset-caching-threshold-bytes",
341  po::value<size_t>(&g_auto_resultset_caching_threshold)
342  ->default_value(g_auto_resultset_caching_threshold),
343  "A threshold that allows caching query resultset automatically if the size of "
344  "resultset is less than it, in bytes (default: 1MB).");
345  desc.add_options()("allow-query-step-skipping",
346  po::value<bool>(&g_allow_query_step_skipping)
347  ->default_value(g_allow_query_step_skipping)
348  ->implicit_value(true),
349  "Allow query step skipping when multi-step query has at least "
350  "one cached query resultset.");
351  desc.add_options()("enable-debug-timer",
352  po::value<bool>(&g_enable_debug_timer)
353  ->default_value(g_enable_debug_timer)
354  ->implicit_value(true),
355  "Enable debug timer logging.");
356  desc.add_options()("enable-dynamic-watchdog",
357  po::value<bool>(&enable_dynamic_watchdog)
358  ->default_value(enable_dynamic_watchdog)
359  ->implicit_value(true),
360  "Enable dynamic watchdog.");
361  desc.add_options()("enable-filter-push-down",
362  po::value<bool>(&g_enable_filter_push_down)
363  ->default_value(g_enable_filter_push_down)
364  ->implicit_value(true),
365  "Enable filter push down through joins.");
366  desc.add_options()(
367  "enable-bbox-intersect-hashjoin",
368  po::value<bool>(&g_enable_bbox_intersect_hashjoin)
369  ->default_value(g_enable_bbox_intersect_hashjoin)
370  ->implicit_value(true),
371  "Enable the bounding box intersect hash join framework to enable post-filtering of "
372  "pairs of geometries before actually comptuing geometry function.");
373  desc.add_options()("enable-hashjoin-many-to-many",
374  po::value<bool>(&g_enable_hashjoin_many_to_many)
375  ->default_value(g_enable_hashjoin_many_to_many)
376  ->implicit_value(true),
377  "Enable the bounding box intersect hash join framework to more "
378  "spatial join operators for pairs of geometry types corresponding "
379  "to many-to-many relationship.");
380  desc.add_options()("enable-distance-rangejoin",
381  po::value<bool>(&g_enable_distance_rangejoin)
382  ->default_value(g_enable_distance_rangejoin)
383  ->implicit_value(true),
384  "Enable accelerating point distance joins with a hash table. "
385  "This rewrites ST_Distance when using an upperbound (<= X).");
386  desc.add_options()("enable-runtime-query-interrupt",
387  po::value<bool>(&enable_runtime_query_interrupt)
388  ->default_value(enable_runtime_query_interrupt)
389  ->implicit_value(true),
390  "Enable runtime query interrupt.");
391  desc.add_options()("enable-non-kernel-time-query-interrupt",
392  po::value<bool>(&enable_non_kernel_time_query_interrupt)
393  ->default_value(enable_non_kernel_time_query_interrupt)
394  ->implicit_value(true),
395  "Enable non-kernel time query interrupt.");
396  desc.add_options()("pending-query-interrupt-freq",
397  po::value<unsigned>(&pending_query_interrupt_freq)
398  ->default_value(pending_query_interrupt_freq)
399  ->implicit_value(1000),
400  "A frequency of checking the request of pending query "
401  "interrupt from user (in millisecond).");
402  desc.add_options()("running-query-interrupt-freq",
403  po::value<double>(&running_query_interrupt_freq)
404  ->default_value(running_query_interrupt_freq)
405  ->implicit_value(0.5),
406  "A frequency of checking the request of running query "
407  "interrupt from user (0.0 (less frequent) ~ (more frequent) 1.0).");
408  desc.add_options()("use-estimator-result-cache",
409  po::value<bool>(&use_estimator_result_cache)
410  ->default_value(use_estimator_result_cache)
411  ->implicit_value(true),
412  "Use estimator result cache.");
413  if (!dist_v5_) {
414  desc.add_options()(
415  "enable-string-dict-hash-cache",
416  po::value<bool>(&g_cache_string_hash)
417  ->default_value(g_cache_string_hash)
418  ->implicit_value(true),
419  "Cache string hash values in the string dictionary server during import.");
420  }
421  desc.add_options()("enable-thrift-logs",
422  po::value<bool>(&g_enable_thrift_logs)
423  ->default_value(g_enable_thrift_logs)
424  ->implicit_value(true),
425  "Enable writing messages directly from thrift to stdout/stderr.");
426  desc.add_options()("enable-watchdog",
427  po::value<bool>(&enable_watchdog)
428  ->default_value(enable_watchdog)
429  ->implicit_value(true),
430  "Enable watchdog.");
431  desc.add_options()("watchdog-max-projected-rows-per-device",
432  po::value<size_t>(&g_watchdog_max_projected_rows_per_device)
434  "Max number of rows allowed to be projected when running a query "
435  "with watchdog enabled.");
436  desc.add_options()(
437  "preflight-count-query-threshold",
438  po::value<size_t>(&preflight_count_query_threshold)
439  ->default_value(preflight_count_query_threshold),
440  "Threshold to run pre-flight count query which computes # output rows accurately.");
441  desc.add_options()(
442  "watchdog-none-encoded-string-translation-limit",
445  "Max number of none-encoded strings allowed to be translated "
446  "to dictionary-encoded with watchdog enabled");
447  desc.add_options()("filter-push-down-low-frac",
448  po::value<float>(&g_filter_push_down_low_frac)
449  ->default_value(g_filter_push_down_low_frac)
450  ->implicit_value(g_filter_push_down_low_frac),
451  "Lower threshold for selectivity of filters that are pushed down.");
452  desc.add_options()("filter-push-down-high-frac",
453  po::value<float>(&g_filter_push_down_high_frac)
454  ->default_value(g_filter_push_down_high_frac)
455  ->implicit_value(g_filter_push_down_high_frac),
456  "Higher threshold for selectivity of filters that are pushed down.");
457  desc.add_options()("filter-push-down-passing-row-ubound",
458  po::value<size_t>(&g_filter_push_down_passing_row_ubound)
460  ->implicit_value(g_filter_push_down_passing_row_ubound),
461  "Upperbound on the number of rows that should pass the filter "
462  "if the selectivity is less than "
463  "the high fraction threshold.");
464  desc.add_options()("from-table-reordering",
465  po::value<bool>(&g_from_table_reordering)
466  ->default_value(g_from_table_reordering)
467  ->implicit_value(true),
468  "Enable automatic table reordering in FROM clause.");
469  desc.add_options()("gpu-buffer-mem-bytes",
470  po::value<size_t>(&system_parameters.gpu_buffer_mem_bytes)
471  ->default_value(system_parameters.gpu_buffer_mem_bytes),
472  "Size of memory reserved for GPU buffers, in bytes, per GPU.");
473  desc.add_options()("gpu-input-mem-limit",
474  po::value<double>(&system_parameters.gpu_input_mem_limit)
475  ->default_value(system_parameters.gpu_input_mem_limit),
476  "Force query to CPU when input data memory usage exceeds this "
477  "percentage of available GPU memory.");
478  desc.add_options()("watchdog-in-clause-max-num-elem-non-bitmap",
481  "Max number of unique values allowed to process IN-clause without "
482  "using a bitmap when watchdog is enabled.");
483  desc.add_options()("watchdog-in-clause-max-num-elem-bitmap",
484  po::value<size_t>(&g_watchdog_in_clause_max_num_elem_bitmap)
486  "Max number of unique values allowed to "
487  "process IN-clause using a bitmap when watchdog is enabled.");
488  desc.add_options()(
489  "watchdog-in-clause-max-num-input-rows",
490  po::value<size_t>(&g_watchdog_in_clause_max_num_input_rows)
492  "Max number of input rows allowed to process IN-clause when watchdog is enabled");
493  desc.add_options()("in-clause-num-elem-skip-bitmap",
494  po::value<size_t>(&g_in_clause_num_elem_skip_bitmap)
495  ->default_value(g_in_clause_num_elem_skip_bitmap),
496  "# values to skip constructing a bitmap to process IN-clause");
497 
498  desc.add_options()(
499  "hll-precision-bits",
500  po::value<int>(&g_hll_precision_bits)
501  ->default_value(g_hll_precision_bits)
502  ->implicit_value(g_hll_precision_bits),
503  "Number of bits used from the hash value used to specify the bucket number.");
504  if (!dist_v5_) {
505  desc.add_options()("http-port",
506  po::value<int>(&http_port)->default_value(http_port),
507  "HTTP port number.");
508  desc.add_options()("http-binary-port",
509  po::value<int>(&http_binary_port)->default_value(http_binary_port),
510  "HTTP binary port number.");
511  }
512  desc.add_options()(
513  "idle-session-duration",
514  po::value<int>(&idle_session_duration)->default_value(idle_session_duration),
515  "Maximum duration of idle session.");
516  desc.add_options()("inner-join-fragment-skipping",
517  po::value<bool>(&g_inner_join_fragment_skipping)
518  ->default_value(g_inner_join_fragment_skipping)
519  ->implicit_value(true),
520  "Enable/disable inner join fragment skipping. This feature is "
521  "considered stable and is enabled by default. This "
522  "parameter will be removed in a future release.");
523  desc.add_options()(
524  "max-session-duration",
525  po::value<int>(&max_session_duration)->default_value(max_session_duration),
526  "Maximum duration of active session.");
527  desc.add_options()("num-sessions",
528  po::value<int>(&system_parameters.num_sessions)
529  ->default_value(system_parameters.num_sessions),
530  "Maximum number of active session.");
531  desc.add_options()("null-div-by-zero",
532  po::value<bool>(&g_null_div_by_zero)
533  ->default_value(g_null_div_by_zero)
534  ->implicit_value(true),
535  "Return null on division by zero instead of throwing an exception.");
536  desc.add_options()(
537  "num-reader-threads",
538  po::value<size_t>(&num_reader_threads)->default_value(num_reader_threads),
539  "Number of reader threads to use.");
540  desc.add_options()(
541  "max-import-threads",
542  po::value<size_t>(&g_max_import_threads)->default_value(g_max_import_threads),
543  "Max number of default import threads to use (num hardware threads will be used "
544  "instead if lower). Can be overriden with copy statement threads option).");
545  desc.add_options()(
546  "bbox-intersect-max-table-size-bytes",
547  po::value<size_t>(&g_bbox_intersect_max_table_size_bytes)
548  ->default_value(g_bbox_intersect_max_table_size_bytes),
549  "The maximum size in bytes of the hash table for bounding box intersect.");
550  desc.add_options()("bbox-intersect-target-entries-per-bin",
551  po::value<double>(&g_bbox_intersect_target_entries_per_bin)
553  "The target number of entries per bin for bounding box intersect");
554  if (!dist_v5_) {
555  desc.add_options()("port,p",
556  po::value<int>(&system_parameters.omnisci_server_port)
557  ->default_value(system_parameters.omnisci_server_port),
558  "TCP Port number.");
559  }
560  desc.add_options()("num-gpus",
561  po::value<int>(&system_parameters.num_gpus)
562  ->default_value(system_parameters.num_gpus),
563  "Number of gpus to use.");
564  desc.add_options()(
565  "read-only",
566  po::value<bool>(&read_only)->default_value(read_only)->implicit_value(true),
567  "Enable read-only mode.");
568 
569  desc.add_options()(
570  "res-gpu-mem",
571  po::value<size_t>(&reserved_gpu_mem)->default_value(reserved_gpu_mem),
572  "Reduces GPU memory available to the HeavyDB allocator by this amount. Used for "
573  "compiled code cache and ancillary GPU functions and other processes that may also "
574  "be using the GPU concurrent with HeavyDB.");
575 
576  desc.add_options()("start-gpu",
577  po::value<int>(&system_parameters.start_gpu)
578  ->default_value(system_parameters.start_gpu),
579  "First gpu to use.");
580  desc.add_options()("trivial-loop-join-threshold",
581  po::value<unsigned>(&g_trivial_loop_join_threshold)
582  ->default_value(g_trivial_loop_join_threshold)
583  ->implicit_value(1000),
584  "The maximum number of rows in the inner table of a loop join "
585  "considered to be trivially small.");
586  desc.add_options()(
587  "uniform-request-ids-per-thrift-call",
588  po::value<bool>(&g_uniform_request_ids_per_thrift_call)
590  ->implicit_value(true),
591  "If true (default) then assign the same request_id to thrift calls that were "
592  "initiated by the same external thrift call. If false then assign different "
593  "request_ids and log the parent/child relationships.");
594  desc.add_options()("verbose",
595  po::value<bool>(&verbose_logging)
596  ->default_value(verbose_logging)
597  ->implicit_value(true),
598  "Write additional debug log messages to server logs.");
599  desc.add_options()(
600  "enable-runtime-udf",
601  po::value<bool>(&enable_runtime_udf)
602  ->default_value(enable_runtime_udf)
603  ->implicit_value(true),
604  "DEPRECATED. Please use `enable-runtime-udfs` instead as this flag will be removed "
605  "in the near future.");
606  desc.add_options()(
607  "enable-runtime-udfs",
608  po::value<bool>(&enable_runtime_udfs)
609  ->default_value(enable_runtime_udfs)
610  ->implicit_value(true),
611  "Enable runtime UDF registration by passing signatures and corresponding LLVM IR "
612  "to the `register_runtime_udf` endpoint. For use with the Python Remote Backend "
613  "Compiler server, packaged separately.");
614  desc.add_options()("enable-udf-registration-for-all-users",
615  po::value<bool>(&enable_udf_registration_for_all_users)
616  ->default_value(enable_udf_registration_for_all_users)
617  ->implicit_value(true),
618  "Allow all users, not just superusers, to register runtime "
619  "UDFs/UDTFs. Option only valid if "
620  "`--enable-runtime-udfs` is set to true.");
621  desc.add_options()("version,v", "Print Version Number.");
622  desc.add_options()("enable-string-functions",
623  po::value<bool>(&g_enable_string_functions)
624  ->default_value(g_enable_string_functions)
625  ->implicit_value(true),
626  "Enable experimental string functions.");
627  desc.add_options()("enable-experimental-string-functions",
628  po::value<bool>(&g_enable_string_functions)
629  ->default_value(g_enable_string_functions)
630  ->implicit_value(true),
631  "DEPRECATED. String functions are now enabled by default, "
632  "but can still be controlled with --enable-string-functions.");
633  desc.add_options()(
634  "enable-fsi",
635  po::value<bool>(&g_enable_fsi)->default_value(g_enable_fsi)->implicit_value(true),
636  "Enable foreign storage interface.");
637 
638  desc.add_options()("enable-legacy-delimited-import",
639  po::value<bool>(&g_enable_legacy_delimited_import)
640  ->default_value(g_enable_legacy_delimited_import)
641  ->implicit_value(true),
642  "Use legacy importer for delimited sources.");
643 #ifdef ENABLE_IMPORT_PARQUET
644  desc.add_options()("enable-legacy-parquet-import",
645  po::value<bool>(&g_enable_legacy_parquet_import)
646  ->default_value(g_enable_legacy_parquet_import)
647  ->implicit_value(true),
648  "Use legacy importer for parquet sources.");
649 #endif
650  desc.add_options()("enable-fsi-regex-import",
651  po::value<bool>(&g_enable_fsi_regex_import)
652  ->default_value(g_enable_fsi_regex_import)
653  ->implicit_value(true),
654  "Use FSI importer for regex parsed sources.");
655 
656  desc.add_options()("enable-add-metadata-columns",
657  po::value<bool>(&g_enable_add_metadata_columns)
658  ->default_value(g_enable_add_metadata_columns)
659  ->implicit_value(true),
660  "Enable add_metadata_columns COPY FROM WITH option (Beta).");
661 
662  desc.add_options()("disk-cache-path",
663  po::value<std::string>(&disk_cache_config.path),
664  "Specify the path for the disk cache.");
665 
666  desc.add_options()(
667  "disk-cache-level",
668  po::value<std::string>(&(disk_cache_level))->default_value("foreign_tables"),
669  "Specify level of disk cache. Valid options are 'foreign_tables', "
670  "'local_tables', 'none', and 'all'.");
671 
672  desc.add_options()("disk-cache-size",
673  po::value<size_t>(&(disk_cache_config.size_limit)),
674  "Specify a maximum size for the disk cache in bytes.");
675 
676  desc.add_options()(
677  "enable-interoperability",
678  po::value<bool>(&g_enable_interop)
679  ->default_value(g_enable_interop)
680  ->implicit_value(true),
681  "Enable offloading of query portions to an external execution engine.");
682  desc.add_options()("enable-union",
683  po::value<bool>(&g_enable_union)
684  ->default_value(g_enable_union)
685  ->implicit_value(true),
686  "DEPRECATED. UNION ALL is enabled by default. Please remove "
687  "use of this option, as it may be disabled in the future.");
688  desc.add_options()(
689  "calcite-service-timeout",
690  po::value<size_t>(&system_parameters.calcite_timeout)
691  ->default_value(system_parameters.calcite_timeout),
692  "Calcite server timeout (milliseconds). Increase this on systems with frequent "
693  "schema changes or when running large numbers of parallel queries.");
694  desc.add_options()("calcite-service-keepalive",
695  po::value<size_t>(&system_parameters.calcite_keepalive)
696  ->default_value(system_parameters.calcite_keepalive)
697  ->implicit_value(true),
698  "Enable keepalive on Calcite connections.");
699  desc.add_options()(
700  "stringdict-parallelizm",
701  po::value<bool>(&g_enable_stringdict_parallel)
702  ->default_value(g_enable_stringdict_parallel)
703  ->implicit_value(true),
704  "Allow StringDictionary to parallelize loads using multiple threads");
705  desc.add_options()("log-user-id",
706  po::value<bool>(&Catalog_Namespace::g_log_user_id)
707  ->default_value(Catalog_Namespace::g_log_user_id)
708  ->implicit_value(true),
709  "Log userId integer in place of the userName (when available).");
710  desc.add_options()("log-user-origin",
711  po::value<bool>(&log_user_origin)
712  ->default_value(log_user_origin)
713  ->implicit_value(true),
714  "Lookup the origin of inbound connections by IP address/DNS "
715  "name, and print this information as part of stdlog.");
716  desc.add_options()("allowed-import-paths",
717  po::value<std::string>(&allowed_import_paths),
718  "List of allowed root paths that can be used in import operations.");
719  desc.add_options()("allowed-export-paths",
720  po::value<std::string>(&allowed_export_paths),
721  "List of allowed root paths that can be used in export operations.");
722  desc.add_options()("enable-system-tables",
723  po::value<bool>(&g_enable_system_tables)
724  ->default_value(g_enable_system_tables)
725  ->implicit_value(true),
726  "Enable use of system tables.");
727  desc.add_options()("enable-table-functions",
728  po::value<bool>(&g_enable_table_functions)
729  ->default_value(g_enable_table_functions)
730  ->implicit_value(true),
731  "Enable system table functions support.");
732  desc.add_options()("enable-ml-functions",
733  po::value<bool>(&g_enable_ml_functions)
734  ->default_value(g_enable_ml_functions)
735  ->implicit_value(true),
736  "Enable ML support.");
737  desc.add_options()("restrict-ml-model-metadata-to-superusers",
740  ->implicit_value(true),
741  "RESTRICT SHOW MODEL and SHOW MODEL DETAILS to superusers only.");
742  desc.add_options()("enable-logs-system-tables",
743  po::value<bool>(&g_enable_logs_system_tables)
744  ->default_value(g_enable_logs_system_tables)
745  ->implicit_value(true),
746  "Enable use of logs system tables.");
747  desc.add_options()("enable-logs-system-tables-auto-refresh",
750  ->implicit_value(true),
751  "Enable automatic refreshes of logs system tables.");
752  desc.add_options()("logs-system-tables-refresh-interval",
753  po::value<std::string>(&g_logs_system_tables_refresh_interval)
754  ->default_value(g_logs_system_tables_refresh_interval),
755  "Refresh interval for logs system tables. Interval should have the "
756  "following format: nS, nH, or nD");
757  desc.add_options()(
758  "logs-system-tables-max-files-count",
759  po::value<size_t>(&g_logs_system_tables_max_files_count)
760  ->default_value(g_logs_system_tables_max_files_count),
761  "Maximum number of log files that will be processed by each logs system table.");
762 #ifdef ENABLE_MEMKIND
763  desc.add_options()("enable-tiered-cpu-mem",
764  po::value<bool>(&g_enable_tiered_cpu_mem)
765  ->default_value(g_enable_tiered_cpu_mem)
766  ->implicit_value(true),
767  "Enable additional tiers of CPU memory (PMEM, etc...)");
768  desc.add_options()("pmem-size", po::value<size_t>(&g_pmem_size)->default_value(0));
769  desc.add_options()("pmem-path", po::value<std::string>(&g_pmem_path));
770 #endif
771 
772  desc.add(log_options_.get_options());
773 }
774 
776  po::options_description& desc = developer_desc_;
777 
778  desc.add_options()("dev-options", "Print internal developer options.");
779  desc.add_options()(
780  "enable-calcite-view-optimize",
781  po::value<bool>(&system_parameters.enable_calcite_view_optimize)
782  ->default_value(system_parameters.enable_calcite_view_optimize)
783  ->implicit_value(true),
784  "Enable additional calcite (query plan) optimizations when a view is part of the "
785  "query.");
786  desc.add_options()("enable-columnar-output",
787  po::value<bool>(&g_enable_columnar_output)
788  ->default_value(g_enable_columnar_output)
789  ->implicit_value(true),
790  "Enable columnar output for intermediate/final query steps.");
791  desc.add_options()("enable-left-join-filter-hoisting",
792  po::value<bool>(&g_enable_left_join_filter_hoisting)
793  ->default_value(g_enable_left_join_filter_hoisting)
794  ->implicit_value(true),
795  "Enable hoisting left hand side filters through left joins.");
796  desc.add_options()("optimize-row-init",
797  po::value<bool>(&g_optimize_row_initialization)
798  ->default_value(g_optimize_row_initialization)
799  ->implicit_value(true),
800  "Optimize row initialization.");
801  desc.add_options()("enable-legacy-syntax",
802  po::value<bool>(&enable_legacy_syntax)
803  ->default_value(enable_legacy_syntax)
804  ->implicit_value(true),
805  "Enable legacy syntax.");
806  desc.add_options()(
807  "enable-multifrag",
808  po::value<bool>(&allow_multifrag)
809  ->default_value(allow_multifrag)
810  ->implicit_value(true),
811  "Enable execution over multiple fragments in a single round-trip to GPU.");
812  desc.add_options()("enable-lazy-fetch",
813  po::value<bool>(&g_enable_lazy_fetch)
814  ->default_value(g_enable_lazy_fetch)
815  ->implicit_value(true),
816  "Enable lazy fetch columns in query results.");
817  desc.add_options()("enable-shared-mem-group-by",
818  po::value<bool>(&g_enable_smem_group_by)
819  ->default_value(g_enable_smem_group_by)
820  ->implicit_value(true),
821  "Enable using GPU shared memory for some GROUP BY queries.");
822  desc.add_options()(
823  "use-cpu-mem-pool-for-output-buffers",
824  po::value<bool>(&g_use_cpu_mem_pool_for_output_buffers)
826  ->implicit_value(true),
827  "Use the CPU memory buffer pool (whose capacity is determined by the "
828  "cpu-buffer-mem-bytes configuration parameter) for output buffer allocations. "
829  "When this configuration parameter is set to false, output (e.g. result set) "
830  "buffer allocations will use heap memory outside the cpu-buffer-mem-bytes based "
831  "memory buffer pool.");
832  desc.add_options()("num-executors",
833  po::value<int>(&system_parameters.num_executors)
834  ->default_value(system_parameters.num_executors),
835  "Number of executors to run in parallel.");
836  desc.add_options()(
837  "num-tuple-threshold-switch-to-baseline",
838  po::value<size_t>(&g_num_tuple_threshold_switch_to_baseline)
840  ->implicit_value(100000),
841  "Control a threshold to switch perfect hash join to baseline hash join by "
842  "comparing a hash entry range of the join column to the input table cardinality."
843  "This condition checks the following: |INPUT_TABLE| < {THIS_THRESHOLD}"
844  "We switch hash table layout when this condition and the condition related to "
845  "\'col-range-to-num-hash-entries-threshold-switch-to-baseline\' are satisfied "
846  "together.");
847  desc.add_options()(
848  "ratio-num-hash-entry-to-num-tuple-switch-to-baseline",
851  ->implicit_value(100),
852  "Control a threshold to switch perfect hash join to baseline hash join by "
853  "comparing a hash entry range of the join column to the input table cardinality."
854  "This condition checks the following: HASH_ENTRY_RANGE / |INPUT_TABLE| < "
855  "{THIS_THRESHOLD}"
856  "We switch hash table layout when this condition and the condition related to "
857  "\'num-tuple-threshold-switch-to-baseline\' are satisfied together.");
858  desc.add_options()(
859  "gpu-shared-mem-threshold",
860  po::value<size_t>(&g_gpu_smem_threshold)->default_value(g_gpu_smem_threshold),
861  "GPU shared memory threshold (in bytes). If query requires larger buffers than "
862  "this threshold, we disable those optimizations. 0 (default) means no static cap.");
863  desc.add_options()(
864  "enable-shared-mem-grouped-non-count-agg",
865  po::value<bool>(&g_enable_smem_grouped_non_count_agg)
866  ->default_value(g_enable_smem_grouped_non_count_agg)
867  ->implicit_value(true),
868  "Enable using GPU shared memory for grouped non-count aggregate queries.");
869  desc.add_options()("enable-shared-mem-non-grouped-agg",
870  po::value<bool>(&g_enable_smem_non_grouped_agg)
871  ->default_value(g_enable_smem_non_grouped_agg)
872  ->implicit_value(true),
873  "Enable using GPU shared memory for non-grouped aggregate queries.");
874  desc.add_options()("enable-direct-columnarization",
875  po::value<bool>(&g_enable_direct_columnarization)
876  ->default_value(g_enable_direct_columnarization)
877  ->implicit_value(true),
878  "Enables/disables a more optimized columnarization method "
879  "for intermediate steps in multi-step queries.");
880  desc.add_options()(
881  "offset-device-by-table-id",
882  po::value<bool>(&g_use_table_device_offset)
883  ->default_value(g_use_table_device_offset)
884  ->implicit_value(true),
885  "Enables/disables offseting the chosen device ID by the table ID for a given "
886  "fragment. This improves balance of fragments across GPUs.");
887  desc.add_options()("enable-window-functions",
888  po::value<bool>(&g_enable_window_functions)
889  ->default_value(g_enable_window_functions)
890  ->implicit_value(true),
891  "Enable window function support.");
892  desc.add_options()("enable-parallel-window-partition-compute",
895  ->implicit_value(true),
896  "Enable parallel window function partition computation.");
897  desc.add_options()("enable-parallel-window-partition-sort",
900  ->implicit_value(true),
901  "Enable parallel window function partition sorting.");
902  desc.add_options()(
903  "window-function-frame-aggregation-tree-fanout",
904  po::value<size_t>(&g_window_function_aggregation_tree_fanout)->default_value(8),
905  "A tree fanout for aggregation tree used to compute aggregation over "
906  "window frame");
907  desc.add_options()("enable-dev-table-functions",
908  po::value<bool>(&g_enable_dev_table_functions)
909  ->default_value(g_enable_dev_table_functions)
910  ->implicit_value(true),
911  "Enable dev (test or alpha) table functions. Also "
912  "requires --enable-table-functions to be turned on");
913 
914  desc.add_options()("enable-geo-ops-on-uncompressed-coords",
917  ->implicit_value(true),
918  "Enable faster geo operations on uncompressed coords");
919  desc.add_options()(
920  "jit-debug-ir",
921  po::value<bool>(&jit_debug)->default_value(jit_debug)->implicit_value(true),
922  "Enable runtime debugger support for the JIT. Note that this flag is "
923  "incompatible "
924  "with the `ENABLE_JIT_DEBUG` build flag. The generated code can be found at "
925  "`/tmp/mapdquery`.");
926  desc.add_options()(
927  "intel-jit-profile",
928  po::value<bool>(&intel_jit_profile)
929  ->default_value(intel_jit_profile)
930  ->implicit_value(true),
931  "Enable runtime support for the JIT code profiling using Intel VTune.");
932  desc.add_options()(
933  "enable-cpu-sub-tasks",
934  po::value<bool>(&g_enable_cpu_sub_tasks)
935  ->default_value(g_enable_cpu_sub_tasks)
936  ->implicit_value(true),
937  "Enable parallel processing of a single data fragment on CPU. This can improve CPU "
938  "load balance and decrease reduction overhead.");
939  desc.add_options()(
940  "cpu-sub-task-size",
941  po::value<size_t>(&g_cpu_sub_task_size)->default_value(g_cpu_sub_task_size),
942  "Set CPU sub-task size in rows.");
943  desc.add_options()(
944  "cpu-threads",
945  po::value<unsigned>(&g_cpu_threads_override)->default_value(g_cpu_threads_override),
946  "Set max CPU concurrent threads. Values <= 0 will use default of 2X the number of "
947  "hardware threads.");
948  desc.add_options()(
949  "skip-intermediate-count",
950  po::value<bool>(&g_skip_intermediate_count)
951  ->default_value(g_skip_intermediate_count)
952  ->implicit_value(true),
953  "Skip pre-flight counts for intermediate projections with no filters.");
954  desc.add_options()("strip-join-covered-quals",
955  po::value<bool>(&g_strip_join_covered_quals)
956  ->default_value(g_strip_join_covered_quals)
957  ->implicit_value(true),
958  "Remove quals from the filtered count if they are covered by a "
959  "join condition (currently only ST_Contains).");
960 
961  desc.add_options()("min-cpu-slab-size",
962  po::value<size_t>(&system_parameters.min_cpu_slab_size)
963  ->default_value(system_parameters.min_cpu_slab_size),
964  "Min slab size (size of memory allocations) for CPU buffer pool.");
965  desc.add_options()(
966  "max-cpu-slab-size",
967  po::value<size_t>(&system_parameters.max_cpu_slab_size)
968  ->default_value(system_parameters.max_cpu_slab_size),
969  "Max CPU buffer pool slab size (size of memory allocations). Note if "
970  "there is not enough free memory to accomodate the target slab size, smaller "
971  "slabs will be allocated, down to the minimum size specified by "
972  "min-cpu-slab-size.");
973  desc.add_options()("default-cpu-slab-size",
974  po::value<size_t>(&system_parameters.default_cpu_slab_size)
975  ->default_value(system_parameters.default_cpu_slab_size),
976  "Default CPU buffer pool slab size (size of memory allocations). "
977  "Note that allocations above this size are allowed up to the size "
978  "specified by max-cpu-slab-size.");
979 
980  desc.add_options()("min-gpu-slab-size",
981  po::value<size_t>(&system_parameters.min_gpu_slab_size)
982  ->default_value(system_parameters.min_gpu_slab_size),
983  "Min slab size (size of memory allocations) for GPU buffer pools.");
984  desc.add_options()(
985  "max-gpu-slab-size",
986  po::value<size_t>(&system_parameters.max_gpu_slab_size)
987  ->default_value(system_parameters.max_gpu_slab_size),
988  "Max GPU buffer pool slab size (size of memory allocations). Note if "
989  "there is not enough free memory to accomodate the target slab size, smaller "
990  "slabs will be allocated, down to the minimum size speified by "
991  "min-gpu-slab-size.");
992  desc.add_options()("default-gpu-slab-size",
993  po::value<size_t>(&system_parameters.default_gpu_slab_size)
994  ->default_value(system_parameters.default_gpu_slab_size),
995  "Default GPU buffer pool slab size (size of memory allocations). "
996  "Note that allocations above this size are allowed up to the size "
997  "specified by max-gpu-slab-size.");
998 
999  desc.add_options()(
1000  "max-output-projection-allocation-bytes",
1001  po::value<size_t>(&g_max_memory_allocation_size)
1002  ->default_value(g_max_memory_allocation_size),
1003  "Maximum allocation size for a fixed output buffer allocation for projection "
1004  "queries with no pre-flight count. Default is the maximum slab size (sizes "
1005  "greater "
1006  "than the maximum slab size have no affect). Requires bump allocator.");
1007  desc.add_options()(
1008  "min-output-projection-allocation-bytes",
1009  po::value<size_t>(&g_min_memory_allocation_size)
1010  ->default_value(g_min_memory_allocation_size),
1011  "Minimum allocation size for a fixed output buffer allocation for projection "
1012  "queries with no pre-flight count. If an allocation of this size cannot be "
1013  "obtained, the query will be retried with different execution parameters and/or "
1014  "on "
1015  "CPU (if allow-cpu-retry is enabled). Requires bump allocator.");
1016  desc.add_options()("enable-bump-allocator",
1017  po::value<bool>(&g_enable_bump_allocator)
1018  ->default_value(g_enable_bump_allocator)
1019  ->implicit_value(true),
1020  "Enable the bump allocator for projection queries on "
1021  "GPU. The bump allocator will "
1022  "allocate a fixed size buffer for each query, track the "
1023  "number of rows passing the "
1024  "kernel during query execution, and copy back only the "
1025  "rows that passed the kernel "
1026  "to CPU after execution. When disabled, pre-flight "
1027  "count queries are used to size "
1028  "the output buffer for projection queries.");
1029  desc.add_options()(
1030  "code-cache-eviction-percent",
1031  po::value<float>(&g_fraction_code_cache_to_evict)
1032  ->default_value(g_fraction_code_cache_to_evict),
1033  "Percentage of the GPU code cache to evict if an out of memory error is "
1034  "encountered while attempting to place generated code on the GPU.");
1035 
1036  desc.add_options()("ssl-cert",
1037  po::value<std::string>(&system_parameters.ssl_cert_file)
1038  ->default_value(std::string("")),
1039  "SSL Validated public certficate.");
1040 
1041  desc.add_options()(
1042  "gpu-code-cache-max-size-in-bytes",
1043  po::value<size_t>(&g_gpu_code_cache_max_size_in_bytes)
1044  ->default_value(g_gpu_code_cache_max_size_in_bytes),
1045  "The maximum size of cached compiled codes for the gpu code cache in bytes.");
1046 
1047  desc.add_options()("ssl-private-key",
1048  po::value<std::string>(&system_parameters.ssl_key_file)
1049  ->default_value(std::string("")),
1050  "SSL private key file.");
1051  // Note ssl_trust_store is passed through to Calcite via system_parameters
1052  // todo(jack): add ensure ssl-trust-store exists if cert and private key in use
1053  desc.add_options()("ssl-trust-store",
1054  po::value<std::string>(&system_parameters.ssl_trust_store)
1055  ->default_value(std::string("")),
1056  "SSL public CA certifcates (java trust store) to validate "
1057  "TLS connections (passed through to the Calcite server).");
1058 
1059  desc.add_options()(
1060  "ssl-trust-password",
1061  po::value<std::string>(&system_parameters.ssl_trust_password)
1062  ->default_value(std::string("")),
1063  "SSL password for java trust store provided via --ssl-trust-store parameter.");
1064 
1065  desc.add_options()(
1066  "ssl-trust-ca",
1067  po::value<std::string>(&system_parameters.ssl_trust_ca_file)
1068  ->default_value(std::string("")),
1069  "SSL public CA certificates to validate TLS connection(as a client).");
1070 
1071  desc.add_options()(
1072  "ssl-trust-ca-server",
1073  po::value<std::string>(&authMetadata.ca_file_name)->default_value(std::string("")),
1074  "SSL public CA certificates to validate TLS connection(as a server).");
1075 
1076  desc.add_options()("ssl-keystore",
1077  po::value<std::string>(&system_parameters.ssl_keystore)
1078  ->default_value(std::string("")),
1079  "SSL server credentials as a java key store (passed "
1080  "through to the Calcite server).");
1081 
1082  desc.add_options()("ssl-keystore-password",
1083  po::value<std::string>(&system_parameters.ssl_keystore_password)
1084  ->default_value(std::string("")),
1085  "SSL password for java keystore, provide by via --ssl-keystore.");
1086 
1087  desc.add_options()(
1088  "udf",
1089  po::value<std::string>(&udf_file_name),
1090  "Load user defined extension functions from this file at startup. The file is "
1091  "expected to be a C/C++ file with extension .cpp.");
1092 
1093  desc.add_options()("udf-compiler-path",
1094  po::value<std::string>(&udf_compiler_path),
1095  "Provide absolute path to clang++ used in udf compilation.");
1096 
1097  desc.add_options()("udf-compiler-options",
1098  po::value<std::vector<std::string>>(&udf_compiler_options),
1099  "Specify compiler options to tailor udf compilation.");
1100 
1101 #ifdef ENABLE_GEOS
1102  desc.add_options()("libgeos-so-filename",
1103  po::value<std::string>(&libgeos_so_filename),
1104  "Specify libgeos shared object filename to be used for "
1105  "geos-backed geo opertations.");
1106 #endif
1107  desc.add_options()(
1108  "large-ndv-threshold",
1109  po::value<int64_t>(&g_large_ndv_threshold)->default_value(g_large_ndv_threshold));
1110  desc.add_options()(
1111  "large-ndv-multiplier",
1112  po::value<size_t>(&g_large_ndv_multiplier)->default_value(g_large_ndv_multiplier));
1113  desc.add_options()("approx_quantile_buffer",
1114  po::value<size_t>(&g_approx_quantile_buffer)
1115  ->default_value(g_approx_quantile_buffer));
1116  desc.add_options()("approx_quantile_centroids",
1117  po::value<size_t>(&g_approx_quantile_centroids)
1118  ->default_value(g_approx_quantile_centroids));
1119  desc.add_options()(
1120  "bitmap-memory-limit",
1121  po::value<int64_t>(&g_bitmap_memory_limit)->default_value(g_bitmap_memory_limit),
1122  "Limit for count distinct bitmap memory use. The limit is computed by taking the "
1123  "size of the group by buffer (entry count in Query Memory Descriptor) and "
1124  "multiplying it by the number of count distinct expression and the size of bitmap "
1125  "required for each. For approx_count_distinct this is typically 8192 bytes.");
1126  desc.add_options()(
1127  "enable-filter-function",
1128  po::value<bool>(&g_enable_filter_function)
1129  ->default_value(g_enable_filter_function)
1130  ->implicit_value(true),
1131  "Enable the filter function protection feature for the SQL JIT compiler. "
1132  "Normally should be on but techs might want to disable for troubleshooting.");
1133  desc.add_options()(
1134  "enable-idp-temporary-users",
1135  po::value<bool>(&g_enable_idp_temporary_users)
1136  ->default_value(g_enable_idp_temporary_users)
1137  ->implicit_value(true),
1138  "Enable temporary users for SAML and LDAP logins on read-only servers. "
1139  "Normally should be on but techs might want to disable for troubleshooting.");
1140  desc.add_options()("enable-foreign-table-scheduled-refresh",
1143  ->implicit_value(true),
1144  "Enable scheduled foreign table refresh.");
1145  desc.add_options()(
1146  "enable-seconds-refresh-interval",
1147  po::value<bool>(&g_enable_seconds_refresh)
1148  ->default_value(g_enable_seconds_refresh)
1149  ->implicit_value(true),
1150  "Enable foreign table seconds refresh interval for testing purposes.");
1151  desc.add_options()("enable-auto-metadata-update",
1152  po::value<bool>(&g_enable_auto_metadata_update)
1153  ->default_value(g_enable_auto_metadata_update)
1154  ->implicit_value(true),
1155  "Enable automatic metadata update.");
1156  desc.add_options()(
1157  "parallel-top-min",
1158  po::value<size_t>(&g_parallel_top_min)->default_value(g_parallel_top_min),
1159  "For ResultSets requiring a heap sort, the number of rows necessary to trigger "
1160  "parallelTop() to sort.");
1161  desc.add_options()(
1162  "parallel-top-max",
1163  po::value<size_t>(&g_parallel_top_max)->default_value(g_parallel_top_max),
1164  "For ResultSets requiring a heap sort, the maximum number of rows allowed by "
1165  "watchdog.");
1166  desc.add_options()(
1167  "streaming-top-n-max",
1168  po::value<size_t>(&g_streaming_topn_max)->default_value(g_streaming_topn_max),
1169  "The maximum number of rows allowing streaming top-N sorting.");
1170  desc.add_options()("vacuum-min-selectivity",
1171  po::value<float>(&g_vacuum_min_selectivity)
1172  ->default_value(g_vacuum_min_selectivity),
1173  "Minimum selectivity for automatic vacuuming. "
1174  "This specifies the percentage (with a value of 0 "
1175  "implying 0% and a value of 1 implying 100%) of "
1176  "deleted rows in a fragment at which to perform "
1177  "automatic vacuuming. A number greater than 1 can "
1178  "be used to disable automatic vacuuming.");
1179  desc.add_options()("enable-automatic-ir-metadata",
1180  po::value<bool>(&g_enable_automatic_ir_metadata)
1181  ->default_value(g_enable_automatic_ir_metadata)
1182  ->implicit_value(true),
1183  "Enable automatic IR metadata (debug builds only).");
1184  desc.add_options()(
1185  "max-log-length",
1186  po::value<size_t>(&g_max_log_length)->default_value(g_max_log_length),
1187  "The maximum number of characters that a log message can has. If the log message "
1188  "is longer than this, we only record \'g_max_log_message_length\' characters.");
1189  desc.add_options()(
1190  "estimator-failure-max-groupby-size",
1191  po::value<size_t>(&g_estimator_failure_max_groupby_size)
1192  ->default_value(g_estimator_failure_max_groupby_size),
1193  "Maximum size of the groupby buffer if the estimator fails. By default we use the "
1194  "number of tuples in the table up to this value.");
1195  desc.add_options()("ndv-group-estimator-multiplier",
1196  po::value<double>(&g_ndv_groups_estimator_multiplier)
1197  ->default_value(g_ndv_groups_estimator_multiplier),
1198  "A non-negative threshold to control the result of ndv group "
1199  "estimator (default: 2.0). The value must be between 1.0 and 2.0");
1200 
1201  desc.add_options()("columnar-large-projections",
1202  po::value<bool>(&g_columnar_large_projections)
1203  ->default_value(g_columnar_large_projections)
1204  ->implicit_value(true),
1205  "Prefer columnar output if projection size is >= "
1206  "threshold set by --columnar-large-projections-threshold "
1207  "(default 1,000,000 rows).");
1208  desc.add_options()(
1209  "columnar-large-projections-threshold",
1210  po::value<size_t>(&g_columnar_large_projections_threshold)
1211  ->default_value(g_columnar_large_projections_threshold),
1212  "Threshold (in minimum number of rows) to prefer columnar output for projections. "
1213  "Requires --columnar-large-projections to be set.");
1214 
1215  desc.add_options()(
1216  "allow-memory-status-log",
1217  po::value<bool>(&g_allow_memory_status_log)
1218  ->default_value(g_allow_memory_status_log),
1219  "Allow CPU (and GPU if necessary) memory status before/after the query execution.");
1220 
1221  desc.add_options()(
1222  "allow-query-step-cpu-retry",
1223  po::value<bool>(&g_allow_query_step_cpu_retry)
1224  ->default_value(g_allow_query_step_cpu_retry)
1225  ->implicit_value(true),
1226  R"(Allow certain query steps to retry on CPU, even when allow-cpu-retry is disabled)");
1227  desc.add_options()("enable-http-binary-server",
1228  po::value<bool>(&g_enable_http_binary_server)
1229  ->default_value(g_enable_http_binary_server)
1230  ->implicit_value(true),
1231  "Enable binary over HTTP Thrift server");
1232 
1233  desc.add_options()("enable-query-engine-cuda-streams",
1234  po::value<bool>(&g_query_engine_cuda_streams)
1235  ->default_value(g_query_engine_cuda_streams)
1236  ->implicit_value(true),
1237  "Enable Query Engine CUDA streams");
1238 
1239  desc.add_options()(
1240  "allow-invalid-literal-buffer-reads",
1241  po::value<bool>(&g_allow_invalid_literal_buffer_reads)
1242  ->default_value(g_allow_invalid_literal_buffer_reads)
1243  ->implicit_value(true),
1244  "For backwards compatibility. Enabling may cause invalid query results.");
1245 
1246 #ifdef HAVE_TORCH_TFS
1247  desc.add_options()("torch-lib-path",
1248  po::value<std::string>(&torch_lib_path),
1249  "Absolute path to custom LibTorch shared library location to be "
1250  "loaded at runtime. (If not provided, the library will be searched "
1251  "for in the system's default library path.)");
1252 #endif
1253 }
1254 
1255 namespace {
1256 
1257 std::stringstream sanitize_config_file(std::ifstream& in) {
1258  // Strip the web section out of the config file so boost can validate program options
1259  std::stringstream ss;
1260  std::string line;
1261  while (std::getline(in, line)) {
1262  ss << line << "\n";
1263  if (line == "[web]" || line == "[iq]") {
1264  break;
1265  }
1266  }
1267  return ss;
1268 }
1269 
1270 bool trim_and_check_file_exists(std::string& filename, const std::string desc) {
1271  if (!filename.empty()) {
1272  boost::algorithm::trim_if(filename, boost::is_any_of("\"'"));
1273  if (!boost::filesystem::exists(filename)) {
1274  std::cerr << desc << " " << filename << " does not exist." << std::endl;
1275  return false;
1276  }
1277  }
1278  return true;
1279 }
1280 
1282  if (!filename.empty()) {
1284  }
1285 }
1286 
1287 } // namespace
1288 
1290  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
1291  if (!boost::filesystem::exists(base_path)) {
1292  throw std::runtime_error("HeavyDB base directory does not exist at " + base_path);
1293  }
1294 }
1295 
1297  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
1298  const auto data_path = boost::filesystem::path(base_path) / shared::kDataDirectoryName;
1299  if (!boost::filesystem::exists(data_path)) {
1300  throw std::runtime_error("HeavyDB data directory does not exist at '" + base_path +
1301  "'");
1302  }
1303 
1304 // TODO: support lock on Windows
1305 #ifndef _WIN32
1306  {
1307  // If we aren't sharing the data directory, take and hold a write lock on
1308  // heavydb_pid.lck to prevent other processes from trying to share our dir.
1309  // TODO(sy): Probably need to get rid of this PID file because it doesn't make much
1310  // sense to store only one server's PID when we have the --multi-instance option.
1311  auto exe_filename = boost::filesystem::path(exe_name).filename().string();
1312  const std::string lock_file =
1313  (boost::filesystem::path(base_path) / std::string(exe_filename + "_pid.lck"))
1314  .string();
1315  auto pid = std::to_string(getpid());
1316  if (!g_multi_instance) {
1317  VLOG(1) << "taking [" << lock_file << "] read+write lock until process exit";
1318  } else {
1319  VLOG(1) << "taking [" << lock_file << "] read-only lock until process exit";
1320  }
1321 
1322  int fd;
1323  fd = heavyai::safe_open(lock_file.c_str(), O_RDWR | O_CREAT, 0664);
1324  if (fd == -1) {
1325  throw std::runtime_error("failed to open lockfile: " + lock_file + ": " +
1326  std::string(strerror(errno)) + " (" +
1327  std::to_string(errno) + ")");
1328  }
1329 
1330  struct flock fl;
1331  memset(&fl, 0, sizeof(fl));
1332  fl.l_type = !g_multi_instance ? F_WRLCK : F_RDLCK;
1333  fl.l_whence = SEEK_SET;
1334  int cmd;
1335 #ifdef __linux__
1336  // cmd = F_OFD_SETLK; // TODO(sy): broken on centos
1337  cmd = F_SETLK;
1338 #else
1339  cmd = F_SETLK;
1340 #endif // __linux__
1341  int ret = heavyai::safe_fcntl(fd, cmd, &fl);
1342  if (ret == -1 && (errno == EACCES || errno == EAGAIN)) { // locked by someone else
1343  heavyai::safe_close(fd);
1344  throw std::runtime_error(
1345  "another HeavyDB server instance is already using data directory: " +
1346  base_path);
1347  } else if (ret == -1) {
1348  auto errno0 = errno;
1349  heavyai::safe_close(fd);
1350  throw std::runtime_error("failed to lock lockfile: " + lock_file + ": " +
1351  std::string(strerror(errno0)) + " (" +
1352  std::to_string(errno0) + ")");
1353  }
1354 
1355  if (!g_multi_instance) {
1356  if (heavyai::ftruncate(fd, 0) == -1) {
1357  auto errno0 = errno;
1358  heavyai::safe_close(fd);
1359  throw std::runtime_error("failed to truncate lockfile: " + lock_file + ": " +
1360  std::string(strerror(errno0)) + " (" +
1361  std::to_string(errno0) + ")");
1362  }
1363  if (heavyai::safe_write(fd, pid.c_str(), pid.length()) == -1) {
1364  auto errno0 = errno;
1365  heavyai::safe_close(fd);
1366  throw std::runtime_error("failed to write lockfile: " + lock_file + ": " +
1367  std::string(strerror(errno0)) + " (" +
1368  std::to_string(errno0) + ")");
1369  }
1370  }
1371 
1372  // Intentionally leak the file descriptor. Lock will be held until process exit.
1373  }
1374 #endif // _WIN32
1375 
1376  boost::algorithm::trim_if(db_query_file, boost::is_any_of("\"'"));
1377  if (db_query_file.length() > 0 && !boost::filesystem::exists(db_query_file)) {
1378  throw std::runtime_error("File containing DB queries " + db_query_file +
1379  " does not exist.");
1380  }
1381  const auto db_file = boost::filesystem::path(base_path) /
1383  if (!boost::filesystem::exists(db_file)) {
1384  { // check old system catalog existsense
1385  const auto db_file =
1386  boost::filesystem::path(base_path) / shared::kCatalogDirectoryName / "mapd";
1387  if (!boost::filesystem::exists(db_file)) {
1388  throw std::runtime_error("System catalog " + shared::kSystemCatalogName +
1389  " does not exist.");
1390  }
1391  }
1392  }
1393  if (license_path.length() == 0) {
1394  license_path = base_path + "/" + shared::kDefaultLicenseFileName;
1395  }
1396 
1397  // add all parameters to be displayed on startup
1398  LOG(INFO) << "HeavyDB started with data directory at '" << base_path << "'";
1399  if (vm.count("license-path")) {
1400  LOG(INFO) << "License key path set to '" << license_path << "'";
1401  }
1402  g_read_only = read_only;
1403  LOG(INFO) << " Server read-only mode is " << read_only << " (--read-only)";
1404  if (g_multi_instance) {
1405  LOG(INFO) << " Multiple servers per --data directory is " << g_multi_instance
1406  << " (--multi-instance)";
1407  }
1408  if (g_read_only && g_multi_instance) {
1409  throw std::runtime_error(
1410  "You may not use the --read-only and --multi-instance configuration flags "
1411  "simultaneously.");
1412  }
1414  LOG(WARNING) << " Allowing invalid reads from the literal buffer. May cause invalid "
1415  "query results! (--allow-invalid-literal-buffer-reads)";
1416  }
1417 #if DISABLE_CONCURRENCY
1418  LOG(INFO) << " Threading layer: serial";
1419 #elif ENABLE_TBB
1420  LOG(INFO) << " Threading layer: TBB";
1421 #else
1422  LOG(INFO) << " Threading layer: std";
1423 #endif
1424  LOG(INFO) << " Watchdog is set to " << enable_watchdog;
1425  LOG(INFO) << " Dynamic Watchdog is set to " << enable_dynamic_watchdog;
1426  if (enable_dynamic_watchdog) {
1427  LOG(INFO) << " Dynamic Watchdog timeout is set to " << dynamic_watchdog_time_limit;
1428  }
1429  LOG(INFO) << " Runtime query interrupt is set to " << enable_runtime_query_interrupt;
1430  if (enable_runtime_query_interrupt) {
1431  LOG(INFO) << " A frequency of checking pending query interrupt request is set to "
1432  << pending_query_interrupt_freq << " (in ms.)";
1433  LOG(INFO) << " A frequency of checking running query interrupt request is set to "
1434  << running_query_interrupt_freq << " (0.0 ~ 1.0)";
1435  }
1436  LOG(INFO) << " Non-kernel time query interrupt is set to "
1437  << enable_non_kernel_time_query_interrupt;
1438 
1439  LOG(INFO) << " Debug Timer is set to " << g_enable_debug_timer;
1440  LOG(INFO) << " LogUserId is set to " << Catalog_Namespace::g_log_user_id;
1441  LOG(INFO) << " Maximum idle session duration " << idle_session_duration;
1442  LOG(INFO) << " Maximum active session duration " << max_session_duration;
1443  LOG(INFO) << " Maximum number of sessions " << system_parameters.num_sessions;
1444 
1445  LOG(INFO) << "Legacy delimited import is set to " << g_enable_legacy_delimited_import;
1446 #ifdef ENABLE_IMPORT_PARQUET
1447  LOG(INFO) << "Legacy parquet import is set to " << g_enable_legacy_parquet_import;
1448 #endif
1449  LOG(INFO) << "FSI regex parsed import is set to " << g_enable_fsi_regex_import;
1450 
1451  LOG(INFO) << "Allowed import paths is set to " << allowed_import_paths;
1452  LOG(INFO) << "Allowed export paths is set to " << allowed_export_paths;
1454  base_path, allowed_import_paths, allowed_export_paths);
1455 
1458  ddl_utils::FilePathBlacklist::addToBlacklist(base_path + "/temporary/" +
1465  g_enable_s3_fsi = false;
1466 
1468 #ifdef ENABLE_IMPORT_PARQUET
1469  !g_enable_legacy_parquet_import ||
1470 #endif
1472  g_enable_fsi =
1473  true; // a requirement for FSI import code-paths is for FSI to be enabled
1474  LOG(INFO) << "FSI has been enabled as a side effect of enabling non-legacy import.";
1475  }
1476 
1477  const bool executor_resource_mgr_cpu_result_mem_ratio_flag_set =
1478  vm["executor-cpu-result-mem-ratio"].defaulted() ? false : true;
1479  const bool executor_resource_mgr_cpu_result_mem_bytes_flag_set =
1480  vm["executor-cpu-result-mem-bytes"].defaulted() ? false : true;
1481  const bool executor_resource_mgr_per_query_max_cpu_thread_ratio_flag_set =
1482  vm["executor-per-query-max-cpu-threads-ratio"].defaulted() ? false : true;
1483  const bool executor_resource_mgr_per_query_max_cpu_result_mem_ratio_flag_set =
1484  vm["executor-per-query-max-cpu-result-mem-ratio"].defaulted() ? false : true;
1485  const bool executor_resource_mgr_cpu_kernel_concurrency_flag_set =
1486  vm["allow-cpu-kernel-concurrency"].defaulted() ? false : true;
1487  const bool executor_resource_mgr_cpu_gpu_kernel_concurrency_flag_set =
1488  vm["allow-cpu-gpu-kernel-concurrency"].defaulted() ? false : true;
1489  const bool executor_resource_mgr_cpu_thread_oversubscription_concurrency_flag_set =
1490  vm["allow-cpu-thread-oversubscription-concurrency"].defaulted() ? false : true;
1491  const bool executor_resource_mgr_cpu_result_mem_oversubscription_concurrency_flag_set =
1492  vm["allow-cpu-result-mem-oversubscription-concurrency"].defaulted() ? false : true;
1493 
1495  if (executor_resource_mgr_cpu_result_mem_bytes_flag_set) {
1496  throw std::runtime_error(
1497  "Cannot set executor-cpu-result-mem-bytes without enable-executor-resource-mgr "
1498  "option enabled");
1499  }
1500  if (executor_resource_mgr_cpu_result_mem_ratio_flag_set) {
1501  throw std::runtime_error(
1502  "Cannot set executor-cpu-result-mem-ratio without enable-executor-resource-mgr "
1503  "option enabled");
1504  }
1505  if (executor_resource_mgr_per_query_max_cpu_thread_ratio_flag_set) {
1506  throw std::runtime_error(
1507  "Cannot set executor-per-query-max-cpu-slots-ratio without "
1508  "enable-executor-resource-mgr option enabled");
1509  }
1510  if (executor_resource_mgr_per_query_max_cpu_result_mem_ratio_flag_set) {
1511  throw std::runtime_error(
1512  "Cannot set executor-per-query-max-cpu-result-mem-ratio without "
1513  "enable-executor-resource-mgr option enabled");
1514  }
1515  if (executor_resource_mgr_cpu_kernel_concurrency_flag_set) {
1516  throw std::runtime_error(
1517  "Cannot set allow-cpu-kernel-concurrency without "
1518  "enable-executor-resource-mgr option enabled");
1519  }
1520  if (executor_resource_mgr_cpu_gpu_kernel_concurrency_flag_set) {
1521  throw std::runtime_error(
1522  "Cannot set allow-cpu-gpu-kernel-concurrency without "
1523  "enable-executor-resource-mgr option enabled");
1524  }
1525  if (executor_resource_mgr_cpu_thread_oversubscription_concurrency_flag_set) {
1526  throw std::runtime_error(
1527  "Cannot set allow-cpu-thread-oversubscription-concurrency without "
1528  "enable-executor-resource-mgr option enabled");
1529  }
1530  if (executor_resource_mgr_cpu_result_mem_oversubscription_concurrency_flag_set) {
1531  throw std::runtime_error(
1532  "Cannot set allow-cpu-thread-result-mem-concurrency without "
1533  "enable-executor-resource-mgr option enabled");
1534  }
1535  }
1536  if (executor_resource_mgr_cpu_result_mem_bytes_flag_set &&
1537  executor_resource_mgr_cpu_result_mem_ratio_flag_set) {
1538  throw std::runtime_error(
1539  "Setting both executor-cpu-result-mem-bytes and executor-cpu-result-mem-ratio is "
1540  "not allowed as the flags are mutually exclusive.");
1541  }
1545  throw std::runtime_error(
1546  "allow-cpu-thread-oversubscription-concurrency cannot be set without at least "
1547  "one of allow-cpu-kernel-concurrency or allow-cpu-gpu-kernel-concurrency being "
1548  "set.");
1549  }
1551  throw std::runtime_error(
1552  "allow-cpu-result-mem-oversubscription-concurrency cannot be set without at "
1553  "least one of allow-cpu-kernel-concurrency or allow-cpu-gpu-kernel-concurrency "
1554  "being set.");
1555  }
1556  }
1557 
1559  throw std::runtime_error(
1560  "Invalid value for executor-cpu-result-mem-ratio, must be greater than 0.");
1561  }
1563  throw std::runtime_error(
1564  "Invalid value for executor-per-query-max-cpu-slots-ratio, must be greater than "
1565  "0.");
1566  }
1568  throw std::runtime_error(
1569  "Invalid value for executor-per-query-max-cpu-result-mem-ratio, must be greater "
1570  "than "
1571  "0.");
1572  }
1575  throw std::runtime_error(
1576  "Invalid value for executor-max-available-resource-use-ratio, must be greater "
1577  "than "
1578  "0. and less than or equal to 1.0");
1579  }
1580 
1581 #ifndef HAVE_SYSTEM_TFS
1583  g_enable_table_functions = false;
1584  LOG(INFO) << "System table functions turned off due to HeavyDB being built without "
1585  "table function support.";
1586  }
1587 #endif // HAVE_SYSTEM_TFS
1589  g_enable_ml_functions = false;
1590  LOG(INFO) << "ML functions turned off due to `--enable-table-functions` being set to "
1591  "false. Please enable table functions to use ML functionality.";
1592  }
1593 
1594  if (disk_cache_level == "foreign_tables") {
1595  if (g_enable_fsi) {
1596  disk_cache_config.enabled_level = File_Namespace::DiskCacheLevel::fsi;
1597  LOG(INFO) << "Disk cache enabled for foreign tables only";
1598  } else {
1599  LOG(INFO) << "Cannot enable disk cache for fsi when fsi is disabled. Defaulted to "
1600  "disk cache disabled";
1601  }
1602  } else if (disk_cache_level == "all") {
1603  disk_cache_config.enabled_level = File_Namespace::DiskCacheLevel::all;
1604  LOG(INFO) << "Disk cache enabled for all tables";
1605  } else if (disk_cache_level == "local_tables") {
1606  disk_cache_config.enabled_level = File_Namespace::DiskCacheLevel::non_fsi;
1607  LOG(INFO) << "Disk cache enabled for non-FSI tables";
1608  } else if (disk_cache_level == "none") {
1609  disk_cache_config.enabled_level = File_Namespace::DiskCacheLevel::none;
1610  LOG(INFO) << "Disk cache disabled";
1611  } else {
1612  throw std::runtime_error{
1613  "Unexpected \"disk-cache-level\" value: " + disk_cache_level +
1614  ". Valid options are 'foreign_tables', "
1615  "'local_tables', 'none', and 'all'."};
1616  }
1617 
1618  if (disk_cache_config.size_limit < File_Namespace::CachingFileMgr::getMinimumSize()) {
1619  throw std::runtime_error{"disk-cache-size must be at least " +
1621  }
1622 
1623  if (disk_cache_config.path.empty()) {
1624  disk_cache_config.path = base_path + "/" + shared::kDefaultDiskCacheDirName;
1625  }
1626  ddl_utils::FilePathBlacklist::addToBlacklist(disk_cache_config.path);
1627 
1630 
1631  // If passed in, blacklist all security config files
1632  addOptionalFileToBlacklist(license_path);
1633  addOptionalFileToBlacklist(system_parameters.ssl_cert_file);
1634  addOptionalFileToBlacklist(authMetadata.ca_file_name);
1635  addOptionalFileToBlacklist(system_parameters.ssl_trust_store);
1636  addOptionalFileToBlacklist(system_parameters.ssl_keystore);
1637  addOptionalFileToBlacklist(system_parameters.ssl_key_file);
1638  addOptionalFileToBlacklist(system_parameters.ssl_trust_ca_file);
1639  addOptionalFileToBlacklist(cluster_file);
1640 
1641  if (g_vacuum_min_selectivity < 0) {
1642  throw std::runtime_error{"vacuum-min-selectivity cannot be less than 0."};
1643  }
1644  LOG(INFO) << "Vacuum Min Selectivity: " << g_vacuum_min_selectivity;
1645 
1646  LOG(INFO) << "Enable system tables is set to " << g_enable_system_tables;
1647  if (g_enable_system_tables) {
1648  // System tables currently reuse FSI infrastructure and therefore, require FSI to be
1649  // enabled
1650  if (!g_enable_fsi) {
1651  g_enable_fsi = true;
1652  LOG(INFO) << "FSI has been enabled as a side effect of enabling system tables";
1653  }
1654  }
1655  LOG(INFO) << "Enable FSI is set to " << g_enable_fsi;
1656  LOG(INFO) << "Enable logs system tables set to " << g_enable_logs_system_tables;
1657 
1659  LOG(INFO) << "Enable logs system tables auto refresh set to "
1661  } else {
1663  LOG(INFO) << "Logs system tables auto refresh has been disabled as a side effect of "
1664  "disabling foreign table scheduled refresh";
1665  }
1666 
1667  static const boost::regex interval_regex{"^\\d{1,}[SHD]$",
1668  boost::regex::extended | boost::regex::icase};
1669  if (!boost::regex_match(g_logs_system_tables_refresh_interval, interval_regex)) {
1670  throw std::runtime_error{
1671  "Invalid interval value provided for the \"logs-system-tables-refresh-interval\" "
1672  "option. Interval should have the following format: nS, nH, or nD"};
1673  }
1674  LOG(INFO) << "Logs system tables refresh interval set to "
1676 
1678  throw std::runtime_error{
1679  "Invalid value provided for the \"logs-system-tables-max-files-count\" "
1680  "option. Value must be greater than 0."};
1681  }
1682  LOG(INFO) << "Maximum number of logs system table files set to "
1684 
1685 #ifdef ENABLE_MEMKIND
1686  if (g_enable_tiered_cpu_mem) {
1687  if (g_pmem_path == "") {
1688  throw std::runtime_error{"pmem-path must be set to use tiered cpu memory"};
1689  }
1690  if (g_pmem_size == 0) {
1691  throw std::runtime_error{"pmem-size must be set to use tiered cpu memory"};
1692  }
1693  if (!std::filesystem::exists(g_pmem_path.c_str())) {
1694  throw std::runtime_error{"path to PMem directory (" + g_pmem_path +
1695  ") does not exist."};
1696  }
1697  }
1698 #endif
1699 
1702  throw std::runtime_error(
1703  "Invalid value provided for the \"ndv-groups-estimator-correction\" option. "
1704  "Value must be between 1.0 and 2.0");
1705  }
1706 
1707  // Check for the g_use_cpu_mem_pool_size_for_max_cpu_slab_size flag, since DataMgr
1708  // ensures that min_cpu_slab_size cannot be greater than the buffer pool size.
1710  system_parameters.max_cpu_slab_size < system_parameters.min_cpu_slab_size) {
1711  throw std::runtime_error("max-cpu-slab-size (" +
1712  std::to_string(system_parameters.max_cpu_slab_size) +
1713  ") cannot be less than min-cpu-slab-size (" +
1714  std::to_string(system_parameters.min_cpu_slab_size) + ").");
1715  }
1716  if (system_parameters.default_cpu_slab_size < system_parameters.min_cpu_slab_size) {
1717  throw std::runtime_error("default-cpu-slab-size (" +
1718  std::to_string(system_parameters.default_cpu_slab_size) +
1719  ") cannot be less than min-cpu-slab-size (" +
1720  std::to_string(system_parameters.min_cpu_slab_size) + ").");
1721  }
1722  // Check for the g_use_cpu_mem_pool_size_for_max_cpu_slab_size flag, since DataMgr
1723  // ensures that default_cpu_slab_size cannot be greater than the buffer pool size.
1725  system_parameters.default_cpu_slab_size > system_parameters.max_cpu_slab_size) {
1726  throw std::runtime_error("default-cpu-slab-size (" +
1727  std::to_string(system_parameters.default_cpu_slab_size) +
1728  ") cannot be greater than max-cpu-slab-size (" +
1729  std::to_string(system_parameters.max_cpu_slab_size) + ").");
1730  }
1731  if (system_parameters.max_gpu_slab_size < system_parameters.min_gpu_slab_size) {
1732  throw std::runtime_error("max-gpu-slab-size (" +
1733  std::to_string(system_parameters.max_gpu_slab_size) +
1734  ") cannot be less than min-gpu-slab-size (" +
1735  std::to_string(system_parameters.min_gpu_slab_size) + ").");
1736  }
1737  if (system_parameters.default_gpu_slab_size < system_parameters.min_gpu_slab_size) {
1738  throw std::runtime_error("default-gpu-slab-size (" +
1739  std::to_string(system_parameters.default_gpu_slab_size) +
1740  ") cannot be less than min-gpu-slab-size (" +
1741  std::to_string(system_parameters.min_gpu_slab_size) + ").");
1742  }
1743  if (system_parameters.default_gpu_slab_size > system_parameters.max_gpu_slab_size) {
1744  throw std::runtime_error("default-gpu-slab-size (" +
1745  std::to_string(system_parameters.default_gpu_slab_size) +
1746  ") cannot be greater than max-gpu-slab-size (" +
1747  std::to_string(system_parameters.max_gpu_slab_size) + ").");
1748  }
1749 }
1750 
1752  const bool enable_runtime_udfs,
1753  const bool enable_udf_registration_for_all_users) {
1754  return enable_runtime_udfs
1755  ? (enable_udf_registration_for_all_users
1760 }
1761 
1763  int argc,
1764  char const* const* argv,
1765  const bool should_init_logging) {
1766  po::options_description all_desc("All options");
1767  all_desc.add(help_desc_).add(developer_desc_);
1768 
1769  try {
1770  po::store(po::command_line_parser(argc, argv)
1771  .options(all_desc)
1772  .positional(positional_options)
1773  .run(),
1774  vm);
1775  po::notify(vm);
1776 
1777  if (vm.count("help")) {
1778  std::cerr << "Usage: heavydb <data directory path> [-p <port number>] "
1779  "[--http-port <http port number>] [--flush-log] [--version|-v]"
1780  << std::endl
1781  << std::endl;
1782  std::cout << help_desc_ << std::endl;
1783  return 0;
1784  }
1785  if (vm.count("dev-options")) {
1786  std::cout << "Usage: heavydb <data directory path> [-p <port number>] "
1787  "[--http-port <http port number>] [--flush-log] [--version|-v]"
1788  << std::endl
1789  << std::endl;
1790  std::cout << developer_desc_ << std::endl;
1791  return 0;
1792  }
1793  if (vm.count("version")) {
1794  std::cout << "HeavyDB Version: " << MAPD_RELEASE << std::endl;
1795  return 0;
1796  }
1797 
1798  if (vm.count("config")) {
1799  std::ifstream settings_file(system_parameters.config_file);
1800 
1801  auto sanitized_settings = sanitize_config_file(settings_file);
1802 
1803  po::store(po::parse_config_file(sanitized_settings, all_desc, false), vm);
1804  po::notify(vm);
1805  settings_file.close();
1806  }
1807 
1808  if (!g_enable_union) {
1809  std::cerr
1810  << "The enable-union option is DEPRECATED and is now enabled by default. "
1811  "Please remove use of this option, as it may be disabled in the future."
1812  << std::endl;
1813  }
1814 
1815  // Trim base path before executing migration
1816  boost::algorithm::trim_if(base_path, boost::is_any_of("\"'"));
1817  if (!boost::filesystem::exists(base_path)) {
1818  std::cerr << "Storage folder (--data) not found: " << base_path << std::endl;
1819  std::cerr << "Need to run initheavy before heavydb." << std::endl;
1820  return 1;
1821  }
1822 
1823  // Execute rebrand migration before accessing any system files.
1824  std::string lockfiles_path = base_path + "/" + shared::kLockfilesDirectoryName;
1825  if (!boost::filesystem::exists(lockfiles_path)) {
1826  if (!boost::filesystem::create_directory(lockfiles_path)) {
1827  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName +
1828  " subdirectory under "
1829  << base_path << std::endl;
1830  return 1;
1831  }
1832  }
1833  std::string lockfiles_path2 = lockfiles_path + "/" + shared::kCatalogDirectoryName;
1834  if (!boost::filesystem::exists(lockfiles_path2)) {
1835  if (!boost::filesystem::create_directory(lockfiles_path2)) {
1836  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName + "/" +
1837  shared::kCatalogDirectoryName + " subdirectory under "
1838  << base_path << std::endl;
1839  return 1;
1840  }
1841  }
1842  std::string lockfiles_path3 = lockfiles_path + "/" + shared::kDataDirectoryName;
1843  if (!boost::filesystem::exists(lockfiles_path3)) {
1844  if (!boost::filesystem::create_directory(lockfiles_path3)) {
1845  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName + "/" +
1846  shared::kDataDirectoryName + " subdirectory under "
1847  << base_path << std::endl;
1848  return 1;
1849  }
1850  }
1854  }
1855 
1856  if (!vm["enable-runtime-udf"].defaulted()) {
1857  if (!vm["enable-runtime-udfs"].defaulted()) {
1858  std::cerr << "Usage Error: Both enable-runtime-udf and enable-runtime-udfs "
1859  "specified. Please remove use of the enable-runtime-udfs flag, "
1860  "as it will be deprecated in the future."
1861  << std::endl;
1862  return 1;
1863  } else {
1864  enable_runtime_udfs = enable_runtime_udf;
1865  std::cerr << "The enable-runtime-udf flag has been deprecated and replaced "
1866  "with enable-runtime-udfs. Please remove use of this option "
1867  "as it will be disabled in the future."
1868  << std::endl;
1869  }
1870  }
1871  system_parameters.runtime_udf_registration_policy =
1872  construct_runtime_udf_registration_policy(enable_runtime_udfs,
1873  enable_udf_registration_for_all_users);
1874 
1875  if (should_init_logging) {
1876  init_logging();
1877  }
1878 
1879  if (!trim_and_check_file_exists(system_parameters.ssl_cert_file, "ssl cert file")) {
1880  return 1;
1881  }
1882  if (!trim_and_check_file_exists(authMetadata.ca_file_name, "ca file name")) {
1883  return 1;
1884  }
1885  if (!trim_and_check_file_exists(system_parameters.ssl_trust_store,
1886  "ssl trust store")) {
1887  return 1;
1888  }
1889  if (!trim_and_check_file_exists(system_parameters.ssl_keystore, "ssl key store")) {
1890  return 1;
1891  }
1892  if (!trim_and_check_file_exists(system_parameters.ssl_key_file, "ssl key file")) {
1893  return 1;
1894  }
1895  if (!trim_and_check_file_exists(system_parameters.ssl_trust_ca_file, "ssl ca file")) {
1896  return 1;
1897  }
1898 
1899  g_enable_watchdog = enable_watchdog;
1900  g_watchdog_max_projected_rows_per_device = watchdog_max_projected_rows_per_device;
1901  g_preflight_count_query_threshold = preflight_count_query_threshold;
1902  g_enable_dynamic_watchdog = enable_dynamic_watchdog;
1903  g_dynamic_watchdog_time_limit = dynamic_watchdog_time_limit;
1904  g_enable_runtime_query_interrupt = enable_runtime_query_interrupt;
1905  g_enable_non_kernel_time_query_interrupt = enable_non_kernel_time_query_interrupt;
1906  g_pending_query_interrupt_freq = pending_query_interrupt_freq;
1907  g_running_query_interrupt_freq = running_query_interrupt_freq;
1908  g_use_estimator_result_cache = use_estimator_result_cache;
1909  g_enable_data_recycler = enable_data_recycler;
1910  g_use_hashtable_cache = use_hashtable_cache;
1911  g_max_cacheable_hashtable_size_bytes = max_cacheable_hashtable_size_bytes;
1912  g_hashtable_cache_total_bytes = hashtable_cache_total_bytes;
1913  if (g_use_hashtable_cache) {
1926  }
1927  g_optimize_cuda_block_and_grid_sizes = optimize_cuda_block_and_grid_sizes;
1928  } catch (po::error& e) {
1929  std::cerr << "Usage Error: " << e.what() << std::endl;
1930  return 1;
1931  }
1932 
1933  if (g_hll_precision_bits < 1 || g_hll_precision_bits > 16) {
1934  std::cerr << "hll-precision-bits must be between 1 and 16." << std::endl;
1935  return 1;
1936  }
1937 
1938  if (!g_from_table_reordering) {
1939  LOG(INFO) << " From clause table reordering is disabled";
1940  }
1941 
1943  LOG(INFO) << " Filter push down for JOIN is enabled";
1944  }
1945 
1946  if (vm.count("udf")) {
1947  boost::algorithm::trim_if(udf_file_name, boost::is_any_of("\"'"));
1948 
1949  if (!boost::filesystem::exists(udf_file_name)) {
1950  LOG(ERROR) << " User defined function file " << udf_file_name << " does not exist.";
1951  return 1;
1952  }
1953 
1954  LOG(INFO) << " User provided extension functions loaded from " << udf_file_name;
1955  }
1956 
1957  if (vm.count("udf-compiler-path")) {
1958  boost::algorithm::trim_if(udf_compiler_path, boost::is_any_of("\"'"));
1959  }
1960 
1961 #ifdef HAVE_TORCH_TFS
1962  if (vm.count("torch-lib-path")) {
1963  boost::algorithm::trim_if(torch_lib_path, boost::is_any_of("\"'"));
1964  }
1965 #endif
1966 
1967  auto trim_string = [](std::string& s) {
1968  boost::algorithm::trim_if(s, boost::is_any_of("\"'"));
1969  };
1970 
1971  if (vm.count("udf-compiler-options")) {
1972  std::for_each(udf_compiler_options.begin(), udf_compiler_options.end(), trim_string);
1973  }
1974 
1975  boost::algorithm::trim_if(system_parameters.ha_brokers, boost::is_any_of("\"'"));
1976  boost::algorithm::trim_if(system_parameters.ha_group_id, boost::is_any_of("\"'"));
1977  boost::algorithm::trim_if(system_parameters.ha_shared_data, boost::is_any_of("\"'"));
1978  boost::algorithm::trim_if(system_parameters.ha_unique_server_id,
1979  boost::is_any_of("\"'"));
1980 
1981  if (!system_parameters.ha_group_id.empty()) {
1982  LOG(INFO) << " HA group id " << system_parameters.ha_group_id;
1983  if (system_parameters.ha_unique_server_id.empty()) {
1984  LOG(ERROR) << "Starting server in HA mode --ha-unique-server-id must be set ";
1985  return 5;
1986  } else {
1987  LOG(INFO) << " HA unique server id " << system_parameters.ha_unique_server_id;
1988  }
1989  if (system_parameters.ha_brokers.empty()) {
1990  LOG(ERROR) << "Starting server in HA mode --ha-brokers must be set ";
1991  return 6;
1992  } else {
1993  LOG(INFO) << " HA brokers " << system_parameters.ha_brokers;
1994  }
1995  if (system_parameters.ha_shared_data.empty()) {
1996  LOG(ERROR) << "Starting server in HA mode --ha-shared-data must be set ";
1997  return 7;
1998  } else {
1999  LOG(INFO) << " HA shared data is " << system_parameters.ha_shared_data;
2000  }
2001  }
2002 
2003  boost::algorithm::trim_if(system_parameters.master_address, boost::is_any_of("\"'"));
2004  if (!system_parameters.master_address.empty()) {
2005  if (!read_only) {
2006  LOG(ERROR) << "The master-address setting is only allowed in read-only mode";
2007  return 9;
2008  }
2009  LOG(INFO) << " Master Address is " << system_parameters.master_address;
2010  LOG(INFO) << " Master Port is " << system_parameters.master_port;
2011  }
2012 
2013  if (g_max_import_threads < 1) {
2014  std::cerr << "max-import-threads must be >= 1 (was set to " << g_max_import_threads
2015  << ")." << std::endl;
2016  return 8;
2017  } else {
2018  LOG(INFO) << " Max import threads " << g_max_import_threads;
2019  }
2020 
2021  if (system_parameters.cuda_block_size) {
2022  LOG(INFO) << " cuda block size " << system_parameters.cuda_block_size;
2023  }
2024  if (system_parameters.cuda_grid_size) {
2025  LOG(INFO) << " cuda grid size " << system_parameters.cuda_grid_size;
2026  }
2027 
2029  if (vm["max-cpu-slab-size"].defaulted()) {
2030  LOG(INFO)
2031  << "max-cpu-slab-size is not set while use-cpu-mem-pool-for-output-buffers is "
2032  "true. Using the CPU memory buffer pool size for the max CPU slab size.";
2034  }
2035  } else {
2036  if (!vm["max-cpu-slab-size"].defaulted() && vm["default-cpu-slab-size"].defaulted()) {
2037  LOG(INFO)
2038  << "default-cpu-slab-size is not set while max-cpu-slab-size is set. "
2039  "Setting default-cpu-slab-size to the same value as max-cpu-slab-size ("
2040  << system_parameters.max_cpu_slab_size << " bytes)";
2041  system_parameters.default_cpu_slab_size = system_parameters.max_cpu_slab_size;
2042  }
2043  }
2044 
2045  if (!vm["max-gpu-slab-size"].defaulted() && vm["default-gpu-slab-size"].defaulted()) {
2046  LOG(INFO) << "default-gpu-slab-size is not set while max-gpu-slab-size is set. "
2047  "Setting default-gpu-slab-size to the same value as max-gpu-slab-size ("
2048  << system_parameters.max_gpu_slab_size << " bytes)";
2049  system_parameters.default_gpu_slab_size = system_parameters.max_gpu_slab_size;
2050  }
2051 
2052  LOG(INFO) << " Min CPU buffer pool slab size (in bytes) "
2053  << system_parameters.min_cpu_slab_size;
2055  LOG(INFO) << " Max CPU buffer pool slab size is set to the CPU buffer pool size";
2056  } else {
2057  LOG(INFO) << " Max CPU buffer pool slab size (in bytes) "
2058  << system_parameters.max_cpu_slab_size;
2059  }
2060  LOG(INFO) << " Default CPU buffer pool slab size (in bytes) "
2061  << system_parameters.default_cpu_slab_size;
2062  LOG(INFO) << " Min GPU buffer pool slab size (in bytes) "
2063  << system_parameters.min_gpu_slab_size;
2064  LOG(INFO) << " Max GPU buffer pool slab size (in bytes) "
2065  << system_parameters.max_gpu_slab_size;
2066  LOG(INFO) << " Default GPU buffer pool slab size (in bytes) "
2067  << system_parameters.default_gpu_slab_size;
2068  LOG(INFO) << " calcite JVM max memory (in MB) " << system_parameters.calcite_max_mem;
2069  LOG(INFO) << " HeavyDB Server Port " << system_parameters.omnisci_server_port;
2070  LOG(INFO) << " HeavyDB Calcite Port " << system_parameters.calcite_port;
2071  LOG(INFO) << " Enable Calcite view optimize "
2072  << system_parameters.enable_calcite_view_optimize;
2073  LOG(INFO) << " Allow Local Auth Fallback: "
2074  << (authMetadata.allowLocalAuthFallback ? "enabled" : "disabled");
2075  LOG(INFO) << " ParallelTop min threshold: " << g_parallel_top_min;
2076  LOG(INFO) << " ParallelTop watchdog max: " << g_parallel_top_max;
2077 
2078  LOG(INFO) << " Enable Data Recycler: "
2079  << (g_enable_data_recycler ? "enabled" : "disabled");
2080  if (g_enable_data_recycler) {
2081  LOG(INFO) << " \t Use hashtable cache: "
2082  << (g_use_hashtable_cache ? "enabled" : "disabled");
2083  if (g_use_hashtable_cache) {
2084  LOG(INFO) << " \t\t Total amount of bytes that hashtable cache keeps: "
2085  << g_hashtable_cache_total_bytes / (1024 * 1024) << " MB.";
2086  LOG(INFO) << " \t\t Per-hashtable size limit: "
2087  << g_max_cacheable_hashtable_size_bytes / (1024 * 1024) << " MB.";
2088  }
2089  LOG(INFO) << " \t Use query resultset cache: "
2090  << (g_use_query_resultset_cache ? "enabled" : "disabled");
2092  LOG(INFO) << " \t\t Total amount of bytes that query resultset cache keeps: "
2093  << g_query_resultset_cache_total_bytes / (1024 * 1024) << " MB.";
2094  LOG(INFO) << " \t\t Per-query resultset size limit: "
2095  << g_max_cacheable_query_resultset_size_bytes / (1024 * 1024) << " MB.";
2096  }
2097  LOG(INFO) << " \t\t Use auto query resultset caching: "
2098  << (g_allow_auto_resultset_caching ? "enabled" : "disabled");
2100  LOG(INFO) << " \t\t\t The maximum bytes of a query resultset which is "
2101  "automatically cached: "
2102  << g_auto_resultset_caching_threshold << " Bytes.";
2103  }
2104  LOG(INFO) << " \t\t Use query step skipping: "
2105  << (g_allow_query_step_skipping ? "enabled" : "disabled");
2106  LOG(INFO) << " \t Use chunk metadata cache: "
2107  << (g_use_chunk_metadata_cache ? "enabled" : "disabled");
2108  }
2109  LOG(INFO) << "Number of executors is set to " << system_parameters.num_executors;
2110 
2111  LOG(INFO) << "Use CPU memory pool for output buffers is set to "
2113 
2114  LOG(INFO) << "Executor Resource Manager: "
2115  << (g_enable_executor_resource_mgr ? "enabled" : "disabled");
2117  LOG(INFO) << "\tCPU kernel concurrency: "
2119  : "disabled");
2120  LOG(INFO) << "\tCPU-GPU kernel concurrency: "
2122  : "disabled");
2124  LOG(INFO) << "\tCPU result set reserved allocation: "
2125  << g_executor_resource_mgr_cpu_result_mem_bytes / (1024 * 1024) << " MB";
2126  } else {
2127  LOG(INFO) << "\tCPU result set reserved ratio of CPU buffer pool size: "
2129  }
2130  LOG(INFO) << "\tPer-query max CPU threads ratio: "
2132  LOG(INFO) << "\tPer-query max CPU result memory ratio of allocated total: "
2134  LOG(INFO) << "\tAllow concurrent CPU thread/slot oversubscription: "
2136  ? "enabled"
2137  : "disabled");
2138  LOG(INFO)
2139  << "\tAllow concurrent CPU result memory oversubscription: "
2141  ? "enabled"
2142  : "disabled");
2143  LOG(INFO) << "\tPer-query Max available resource utilization ratio: "
2145  }
2146 
2147  const std::string udf_reg_policy_log_prefix{"Runtime UDF/UDTF Registration Policy: "};
2148  switch (system_parameters.runtime_udf_registration_policy) {
2150  LOG(INFO) << udf_reg_policy_log_prefix << " DISALLOWED";
2151  break;
2152  }
2154  LOG(INFO) << udf_reg_policy_log_prefix << " ALLOWED for superusers only";
2155  break;
2156  }
2158  LOG(INFO) << udf_reg_policy_log_prefix << " ALLOWED for all users";
2159  break;
2160  }
2161  default: {
2162  UNREACHABLE() << "Unrecognized option for Runtime UDF/UDTF registration policy.";
2163  }
2164  }
2165 
2166  boost::algorithm::trim_if(authMetadata.distinguishedName, boost::is_any_of("\"'"));
2167  boost::algorithm::trim_if(authMetadata.uri, boost::is_any_of("\"'"));
2168  boost::algorithm::trim_if(authMetadata.ldapQueryUrl, boost::is_any_of("\"'"));
2169  boost::algorithm::trim_if(authMetadata.ldapRoleRegex, boost::is_any_of("\"'"));
2170  boost::algorithm::trim_if(authMetadata.ldapSuperUserRole, boost::is_any_of("\"'"));
2171 
2172  return boost::none;
2173 }
size_t g_watchdog_in_clause_max_num_elem_non_bitmap
Definition: Execute.cpp:85
int64_t g_large_ndv_threshold
bool g_use_table_device_offset
bool g_enable_parallel_window_partition_sort
unsigned connect_timeout
bool g_enable_left_join_filter_hoisting
Definition: Execute.cpp:107
double g_running_query_interrupt_freq
Definition: Execute.cpp:141
bool g_enable_smem_group_by
size_t g_pmem_size
size_t g_parallel_top_max
Definition: ResultSet.cpp:50
int safe_open(const char *path, int flags, mode_t mode) noexcept
Definition: heavyai_fs.cpp:90
float g_filter_push_down_low_frac
Definition: Execute.cpp:103
const std::string kDataDirectoryName
size_t g_num_tuple_threshold_switch_to_baseline
Definition: Execute.cpp:110
bool g_use_query_resultset_cache
Definition: Execute.cpp:160
bool g_multi_instance
Definition: heavyai_locks.h:22
size_t g_cpu_sub_task_size
Definition: Execute.cpp:90
double g_executor_resource_mgr_per_query_max_cpu_slots_ratio
Definition: Execute.cpp:182
SystemParameters::RuntimeUdfRegistrationPolicy construct_runtime_udf_registration_policy(const bool enable_runtime_udfs, const bool enable_udf_registration_for_all_users)
bool trim_and_check_file_exists(std::string &filename, const std::string desc)
bool g_strip_join_covered_quals
Definition: Execute.cpp:116
bool g_enable_logs_system_tables
Definition: Catalog.cpp:100
size_t g_gpu_code_cache_max_size_in_bytes
Definition: QueryEngine.h:12
bool g_enable_direct_columnarization
Definition: Execute.cpp:134
static void initialize(const std::string &data_dir, const std::string &allowed_import_paths, const std::string &allowed_export_paths)
Definition: DdlUtils.cpp:878
bool g_enable_lazy_fetch
Definition: Execute.cpp:136
const std::string kDefaultDiskCacheDirName
bool g_enable_legacy_delimited_import
Definition: ParserNode.cpp:84
bool g_skip_intermediate_count
unsigned g_pending_query_interrupt_freq
Definition: Execute.cpp:140
#define LOG(tag)
Definition: Logger.h:285
bool g_allow_query_step_skipping
Definition: Execute.cpp:163
size_t g_logs_system_tables_max_files_count
bool g_enable_debug_timer
Definition: Logger.cpp:17
const std::string kDefaultLogDirName
bool g_use_cpu_mem_pool_size_for_max_cpu_slab_size
Definition: DataMgr.cpp:56
#define UNREACHABLE()
Definition: Logger.h:338
size_t g_preflight_count_query_threshold
Definition: Execute.cpp:84
const std::string kSystemCatalogName
double g_bbox_intersect_target_entries_per_bin
Definition: Execute.cpp:115
unsigned g_cpu_threads_override
bool g_enable_auto_metadata_update
size_t g_filter_push_down_passing_row_ubound
Definition: Execute.cpp:105
bool g_enable_logs_system_tables_auto_refresh
Definition: Catalog.cpp:101
boost::optional< int > parse_command_line(int argc, char const *const *argv, const bool should_init_logging=false)
unsigned send_timeout
void addOptionalFileToBlacklist(std::string &filename)
size_t g_streaming_topn_max
Definition: ResultSet.cpp:51
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:81
size_t g_hashtable_cache_total_bytes
Definition: Execute.cpp:164
unsigned g_trivial_loop_join_threshold
Definition: Execute.cpp:96
bool g_enable_geo_ops_on_uncompressed_coords
Definition: Execute.cpp:125
bool g_enable_non_kernel_time_query_interrupt
Definition: Execute.cpp:138
void setMaxCacheItemSize(CacheItemType item_type, size_t new_max_cache_item_size)
Definition: DataRecycler.h:613
int g_hll_precision_bits
double g_executor_resource_mgr_cpu_result_mem_ratio
Definition: Execute.cpp:180
bool g_enable_data_recycler
Definition: Execute.cpp:158
bool g_executor_resource_mgr_allow_cpu_gpu_kernel_concurrency
Definition: Execute.cpp:188
bool g_use_cpu_mem_pool_for_output_buffers
Definition: Execute.cpp:197
std::string to_string(char const *&&v)
bool g_from_table_reordering
Definition: Execute.cpp:97
bool g_inner_join_fragment_skipping
Definition: Execute.cpp:98
bool g_use_chunk_metadata_cache
Definition: Execute.cpp:161
size_t g_max_cacheable_hashtable_size_bytes
Definition: Execute.cpp:165
bool g_enable_string_functions
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:150
size_t g_watchdog_none_encoded_string_translation_limit
Definition: Execute.cpp:82
bool g_null_div_by_zero
Definition: Execute.cpp:95
bool g_enable_interop
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:178
bool g_restrict_ml_model_metadata_to_superusers
Definition: Execute.cpp:123
size_t g_parallel_top_min
Definition: ResultSet.cpp:49
bool g_enable_columnar_output
Definition: Execute.cpp:106
size_t g_ratio_num_hash_entry_to_num_tuple_switch_to_baseline
Definition: Execute.cpp:111
ssize_t safe_write(const int fd, const void *buffer, const size_t buffer_size) noexcept
Definition: heavyai_fs.cpp:144
bool g_enable_s3_fsi
Definition: Catalog.cpp:97
bool g_enable_idp_temporary_users
Definition: SysCatalog.cpp:63
size_t g_window_function_aggregation_tree_fanout
static void setDefaultImportPath(const std::string &base_path)
singleton class to handle concurrancy and state for blosc library. A C++ wrapper over a pure C librar...
bool g_enable_hashjoin_many_to_many
Definition: Execute.cpp:113
bool g_enable_system_tables
Definition: SysCatalog.cpp:64
void init(LogOptions const &log_opts)
Definition: Logger.cpp:364
std::string g_logs_system_tables_refresh_interval
Definition: Catalog.cpp:103
static HashtableRecycler * getHashTableCache()
bool g_enable_http_binary_server
std::string g_pmem_path
static bool migrationEnabled()
Definition: MigrationMgr.h:47
size_t g_watchdog_max_projected_rows_per_device
Definition: Execute.cpp:83
float g_filter_push_down_high_frac
Definition: Execute.cpp:104
bool g_enable_distance_rangejoin
Definition: Execute.cpp:112
bool g_executor_resource_mgr_allow_cpu_result_mem_oversubscription_concurrency
Definition: Execute.cpp:194
bool g_bigint_count
bool g_enable_watchdog
int64_t g_bitmap_memory_limit
size_t g_max_memory_allocation_size
Definition: Execute.cpp:128
size_t g_approx_quantile_buffer
Definition: Execute.cpp:171
bool g_allow_auto_resultset_caching
Definition: Execute.cpp:162
size_t g_max_log_length
Definition: Execute.cpp:176
bool g_enable_dev_table_functions
Definition: Execute.cpp:124
void setTotalCacheSize(CacheItemType item_type, size_t new_total_cache_size)
Definition: DataRecycler.h:606
static const std::string cluster_command_line_arg
bool g_optimize_cuda_block_and_grid_sizes
Definition: Execute.cpp:169
bool g_executor_resource_mgr_allow_cpu_slot_oversubscription_concurrency
Definition: Execute.cpp:191
size_t g_query_resultset_cache_total_bytes
Definition: Execute.cpp:166
bool g_enable_window_functions
Definition: Execute.cpp:120
size_t g_max_cacheable_query_resultset_size_bytes
Definition: Execute.cpp:167
size_t g_min_memory_allocation_size
Definition: Execute.cpp:129
bool with_keepalive
size_t g_watchdog_in_clause_max_num_input_rows
Definition: Execute.cpp:87
static void executeRebrandMigration(const std::string &base_path)
static void takeMigrationLock(const std::string &base_path)
bool g_read_only
Definition: heavyai_locks.h:21
bool g_executor_resource_mgr_allow_cpu_kernel_concurrency
Definition: Execute.cpp:187
bool g_enable_seconds_refresh
bool g_enable_fsi_regex_import
Definition: ParserNode.cpp:88
size_t g_estimator_failure_max_groupby_size
tuple line
Definition: parse_ast.py:10
bool g_enable_bbox_intersect_hashjoin
Definition: Execute.cpp:109
unsigned recv_timeout
std::stringstream sanitize_config_file(std::ifstream &in)
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:147
bool g_enable_automatic_ir_metadata
Definition: Execute.cpp:174
bool g_enable_foreign_table_scheduled_refresh
float g_vacuum_min_selectivity
static const std::string nodeIds_token
bool g_enable_filter_function
Definition: Execute.cpp:91
bool g_cache_string_hash
const std::string kCatalogDirectoryName
bool g_enable_ml_functions
Definition: Execute.cpp:122
float g_fraction_code_cache_to_evict
size_t g_in_clause_num_elem_skip_bitmap
Definition: Execute.cpp:88
double g_ndv_groups_estimator_multiplier
bool g_allow_invalid_literal_buffer_reads
Definition: ConstantIR.cpp:140
bool g_allow_system_dashboard_update
Definition: DBHandler.cpp:124
double g_executor_resource_mgr_per_query_max_cpu_result_mem_ratio
Definition: Execute.cpp:183
bool g_uniform_request_ids_per_thrift_call
Definition: DBHandler.cpp:125
size_t g_executor_resource_mgr_cpu_result_mem_bytes
Definition: Execute.cpp:181
bool g_enable_filter_push_down
Definition: Execute.cpp:102
bool g_use_estimator_result_cache
Definition: Execute.cpp:139
const std::string kDefaultLicenseFileName
bool g_enable_bump_allocator
Definition: Execute.cpp:132
bool g_enable_parallel_window_partition_compute
bool g_allow_memory_status_log
Definition: Execute.cpp:200
static HashtableRecycler * getHashTableCache()
double g_executor_resource_mgr_max_available_resource_use_ratio
Definition: Execute.cpp:195
bool g_enable_union
bool g_enable_cpu_sub_tasks
Definition: Execute.cpp:89
bool g_allow_query_step_cpu_retry
Definition: Execute.cpp:94
bool g_allow_cpu_retry
Definition: Execute.cpp:93
int32_t ftruncate(const int32_t fd, int64_t length)
Definition: heavyai_fs.cpp:86
size_t g_approx_quantile_centroids
Definition: Execute.cpp:172
const std::string kLockfilesDirectoryName
static void addToBlacklist(const std::string &path)
Definition: DdlUtils.cpp:925
bool g_enable_stringdict_parallel
static const std::string MAPD_RELEASE
Definition: release.h:42
bool g_optimize_row_initialization
Definition: Execute.cpp:108
static bool run
int safe_fcntl(int fd, int cmd, struct flock *fl) noexcept
Definition: heavyai_fs.cpp:112
bool g_columnar_large_projections
size_t g_watchdog_in_clause_max_num_elem_bitmap
Definition: Execute.cpp:86
int safe_close(int fd) noexcept
Definition: heavyai_fs.cpp:101
unsigned g_dynamic_watchdog_time_limit
Definition: Execute.cpp:92
bool g_enable_fsi
Definition: Catalog.cpp:96
size_t g_columnar_large_projections_threshold
bool g_query_engine_cuda_streams
Definition: QueryEngine.h:10
bool g_enable_thrift_logs
Definition: HeavyDB.cpp:298
bool g_enable_add_metadata_columns
Definition: ParserNode.cpp:90
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:137
size_t g_max_import_threads
Definition: Importer.cpp:105
bool g_use_hashtable_cache
Definition: Execute.cpp:159
#define VLOG(n)
Definition: Logger.h:388
size_t g_auto_resultset_caching_threshold
Definition: Execute.cpp:168
size_t g_bbox_intersect_max_table_size_bytes
Definition: Execute.cpp:114
size_t g_large_ndv_multiplier
bool g_enable_table_functions
Definition: Execute.cpp:121
size_t g_gpu_smem_threshold
Definition: Execute.cpp:142