OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CommandLineOptions.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
19 #include <blosc.h>
20 #include <cstddef>
21 
22 #include <boost/algorithm/string.hpp>
23 #include <boost/algorithm/string/trim.hpp>
24 #include <boost/filesystem.hpp>
25 #include <boost/locale/generator.hpp>
26 #include <boost/make_shared.hpp>
27 #include <boost/program_options.hpp>
28 
29 #include "Catalog/AuthMetadata.h"
35 
36 namespace po = boost::program_options;
37 
38 class LeafHostInfo;
39 
46 
48  public:
49  CommandLineOptions(char const* argv0, bool dist_v5_ = false)
50  : log_options_(argv0), exe_name(argv0), dist_v5_(dist_v5_) {
51  fillOptions();
53  }
54  int http_port = 6278;
55  int http_binary_port = 6276;
56  size_t reserved_gpu_mem = 384 * 1024 * 1024;
57  std::string base_path;
59  std::string cluster_file = {"cluster.conf"};
60  std::string cluster_topology_file = {"cluster_topology.conf"};
61  std::string license_path = {""};
62  std::string encryption_key_store_path = {};
63  bool verbose_logging = false;
64  bool jit_debug = false;
65  bool intel_jit_profile = false;
66  bool allow_multifrag = true;
67  bool read_only = false;
68  bool allow_loop_joins = false;
69  bool enable_legacy_syntax = true;
70  bool log_user_origin = true;
72 
74  bool enable_rendering = false;
76  int render_oom_retry_threshold = 0; // in milliseconds
77  size_t render_mem_bytes = 1000000000;
80  bool renderer_prefer_igpu = false;
81  unsigned renderer_vulkan_timeout_ms = 60000; // in milliseconds
83  bool renderer_enable_slab_allocation = false; // until proven fully working
84  bool enable_watchdog = true;
93  double running_query_interrupt_freq = 0.1; // 0.0 ~ 1.0
94  unsigned pending_query_interrupt_freq = 1000; // in milliseconds
95  unsigned dynamic_watchdog_time_limit = 10000;
96  std::string disk_cache_level = "";
97 
98  bool enable_data_recycler = true;
99  bool use_hashtable_cache = true;
100  size_t hashtable_cache_total_bytes = 4294967296; // 4GB
101  size_t max_cacheable_hashtable_size_bytes = 2147483648; // 2GB
103 
107  size_t num_reader_threads = 0;
111  std::string db_query_file = {""};
115  bool exit_after_warmup = false;
125  std::string udf_file_name = {""};
126  std::string udf_compiler_path = {""};
127  std::vector<std::string> udf_compiler_options;
128 
129  std::string allowed_import_paths{};
130  std::string allowed_export_paths{};
131 
132 #ifdef ENABLE_GEOS
133  std::string libgeos_so_filename = {"libgeos_c.so"};
134 #endif
135 
136 #ifdef HAVE_TORCH_TFS
137  std::string torch_lib_path = {""};
138 #endif
139 
140  void fillOptions();
141  void fillDeveloperOptions();
142 
143  std::string compressor = std::string(BLOSC_LZ4HC_COMPNAME);
144 
145  po::options_description help_desc_;
146  po::options_description developer_desc_;
148  std::string exe_name;
149  po::positional_options_description positional_options;
150 
151  public:
152  std::vector<LeafHostInfo> db_leaves;
153  std::vector<LeafHostInfo> string_leaves;
154  po::variables_map vm;
155  std::string clusterIds_arg;
156 
157  std::string getNodeIds();
158  std::vector<std::string> getNodeIdsArray();
159  static const std::string nodeIds_token, cluster_command_line_arg;
160 
161  boost::optional<int> parse_command_line(int argc,
162  char const* const* argv,
163  const bool should_init_logging = false);
164  void validate();
165  void validate_base_path();
166  void init_logging();
167  const bool dist_v5_;
168 
169  private:
170  bool enable_runtime_udfs = true;
171  // To store deprecated --enable-runtime-udf flag, replaced by --enable-runtime-udfs
172  // If the --enable-runtime-udf flag is specified, the contents of enable_runtime_udf
173  // are transferred to enable_runtime_udfs
174  bool enable_runtime_udf = true;
176 };
177 
178 extern bool g_enable_watchdog;
179 extern bool g_enable_dynamic_watchdog;
180 extern unsigned g_dynamic_watchdog_time_limit;
181 extern unsigned g_trivial_loop_join_threshold;
186 extern bool g_from_table_reordering;
187 extern bool g_enable_filter_push_down;
188 extern bool g_allow_cpu_retry;
189 extern bool g_allow_query_step_cpu_retry;
190 extern bool g_null_div_by_zero;
191 extern bool g_bigint_count;
192 
193 // Following are options that are specific to ExecutorResourceMgr (and require
194 // g_enable_executor_resource_mgr to be true to apply/allow to be flagged)
205 
207 extern float g_filter_push_down_low_frac;
208 extern float g_filter_push_down_high_frac;
210 extern bool g_enable_columnar_output;
216 extern bool g_enable_distance_rangejoin;
219 extern bool g_strip_join_covered_quals;
220 extern size_t g_constrained_by_in_threshold;
221 extern size_t g_big_group_threshold;
222 extern bool g_enable_window_functions;
226 extern bool g_enable_table_functions;
227 extern bool g_enable_ml_functions;
229 extern bool g_enable_dev_table_functions;
231 extern bool g_allow_memory_status_log;
232 
233 extern size_t g_max_memory_allocation_size;
234 extern double g_bump_allocator_step_reduction;
237 extern unsigned g_pending_query_interrupt_freq;
238 extern double g_running_query_interrupt_freq;
240 extern size_t g_gpu_smem_threshold;
243 extern bool g_use_estimator_result_cache;
244 extern bool g_enable_lazy_fetch;
245 
246 extern int64_t g_omni_kafka_seek;
247 extern size_t g_leaf_count;
248 extern size_t g_compression_limit_bytes;
249 extern bool g_skip_intermediate_count;
250 extern bool g_enable_bump_allocator;
252 extern size_t g_min_memory_allocation_size;
253 extern bool g_enable_string_functions;
254 extern bool g_enable_fsi;
255 extern bool g_enable_s3_fsi;
257 #ifdef ENABLE_IMPORT_PARQUET
258 extern bool g_enable_legacy_parquet_import;
259 #endif
260 extern bool g_enable_fsi_regex_import;
262 extern bool g_enable_interop;
263 extern bool g_enable_union;
264 extern bool g_enable_cpu_sub_tasks;
265 extern size_t g_cpu_sub_task_size;
266 extern unsigned g_cpu_threads_override;
267 extern bool g_enable_filter_function;
268 extern size_t g_max_import_threads;
270 extern bool g_allow_s3_server_privileges;
271 extern float g_vacuum_min_selectivity;
272 extern bool g_read_only;
274 extern size_t g_enable_parallel_linearization;
275 extern size_t g_max_log_length;
276 #ifdef ENABLE_MEMKIND
277 extern bool g_enable_tiered_cpu_mem;
278 extern size_t g_pmem_size;
279 extern std::string g_pmem_path;
280 #endif
281 extern bool g_enable_data_recycler;
282 extern bool g_use_hashtable_cache;
283 extern size_t g_hashtable_cache_total_bytes;
285 extern bool g_use_query_resultset_cache;
288 extern bool g_use_chunk_metadata_cache;
291 extern bool g_allow_query_step_skipping;
292 extern bool g_query_engine_cuda_streams;
293 extern bool g_multi_instance;
size_t g_watchdog_in_clause_max_num_elem_non_bitmap
Definition: Execute.cpp:85
CommandLineOptions(char const *argv0, bool dist_v5_=false)
bool g_enable_parallel_window_partition_sort
double g_running_query_interrupt_freq
Definition: Execute.cpp:141
size_t g_pmem_size
float g_filter_push_down_low_frac
Definition: Execute.cpp:103
size_t g_num_tuple_threshold_switch_to_baseline
Definition: Execute.cpp:110
size_t g_constrained_by_in_threshold
Definition: Execute.cpp:117
bool g_use_query_resultset_cache
Definition: Execute.cpp:160
bool g_multi_instance
Definition: heavyai_locks.h:22
double g_bump_allocator_step_reduction
Definition: Execute.cpp:133
size_t g_cpu_sub_task_size
Definition: Execute.cpp:90
double g_executor_resource_mgr_per_query_max_cpu_slots_ratio
Definition: Execute.cpp:182
logger::LogOptions log_options_
bool g_strip_join_covered_quals
Definition: Execute.cpp:116
unsigned renderer_vulkan_timeout_ms
std::vector< LeafHostInfo > string_leaves
bool g_enable_direct_columnarization
Definition: Execute.cpp:134
bool g_enable_lazy_fetch
Definition: Execute.cpp:136
std::string udf_compiler_path
bool g_enable_legacy_delimited_import
Definition: ParserNode.cpp:84
bool g_skip_intermediate_count
unsigned g_pending_query_interrupt_freq
Definition: Execute.cpp:140
bool g_allow_query_step_skipping
Definition: Execute.cpp:163
size_t preflight_count_query_threshold
size_t g_preflight_count_query_threshold
Definition: Execute.cpp:84
static constexpr int64_t kMinsPerHour
double g_bbox_intersect_target_entries_per_bin
Definition: Execute.cpp:115
size_t g_lockfile_lock_extension_milliseconds
Definition: heavyai_locks.h:24
unsigned g_cpu_threads_override
bool g_enable_auto_metadata_update
size_t g_filter_push_down_passing_row_ubound
Definition: Execute.cpp:105
boost::optional< int > parse_command_line(int argc, char const *const *argv, const bool should_init_logging=false)
bool g_enable_dynamic_watchdog
Definition: Execute.cpp:81
size_t g_hashtable_cache_total_bytes
Definition: Execute.cpp:164
unsigned g_trivial_loop_join_threshold
Definition: Execute.cpp:96
bool g_enable_geo_ops_on_uncompressed_coords
Definition: Execute.cpp:125
bool g_enable_non_kernel_time_query_interrupt
Definition: Execute.cpp:138
bool enable_non_kernel_time_query_interrupt
This file includes the class specification for the cache used by the Foreign Storage Interface (FSI)...
double g_executor_resource_mgr_cpu_result_mem_ratio
Definition: Execute.cpp:180
bool g_enable_data_recycler
Definition: Execute.cpp:158
po::options_description help_desc_
size_t g_compression_limit_bytes
Definition: Compressor.cpp:35
bool g_executor_resource_mgr_allow_cpu_gpu_kernel_concurrency
Definition: Execute.cpp:188
std::string encryption_key_store_path
bool g_from_table_reordering
Definition: Execute.cpp:97
bool g_inner_join_fragment_skipping
Definition: Execute.cpp:98
bool g_use_chunk_metadata_cache
Definition: Execute.cpp:161
size_t g_max_cacheable_hashtable_size_bytes
Definition: Execute.cpp:165
bool g_enable_string_functions
bool g_enable_smem_non_grouped_agg
Definition: Execute.cpp:150
static constexpr int64_t kMinsPerMonth
size_t g_watchdog_none_encoded_string_translation_limit
Definition: Execute.cpp:82
bool g_null_div_by_zero
Definition: Execute.cpp:95
bool g_enable_interop
bool g_enable_executor_resource_mgr
Definition: Execute.cpp:178
bool g_restrict_ml_model_metadata_to_superusers
Definition: Execute.cpp:123
bool g_enable_columnar_output
Definition: Execute.cpp:106
size_t g_ratio_num_hash_entry_to_num_tuple_switch_to_baseline
Definition: Execute.cpp:111
size_t max_concurrent_render_sessions
bool g_enable_s3_fsi
Definition: Catalog.cpp:97
size_t g_window_function_aggregation_tree_fanout
std::string getNodeIds()
bool g_enable_hashjoin_many_to_many
Definition: Execute.cpp:113
std::vector< LeafHostInfo > db_leaves
std::vector< std::string > getNodeIdsArray()
std::string g_pmem_path
size_t g_big_group_threshold
Definition: Execute.cpp:119
size_t g_watchdog_max_projected_rows_per_device
Definition: Execute.cpp:83
float g_filter_push_down_high_frac
Definition: Execute.cpp:104
bool g_enable_distance_rangejoin
Definition: Execute.cpp:112
bool g_executor_resource_mgr_allow_cpu_result_mem_oversubscription_concurrency
Definition: Execute.cpp:194
bool g_bigint_count
bool g_enable_watchdog
size_t g_max_memory_allocation_size
Definition: Execute.cpp:128
bool g_allow_auto_resultset_caching
Definition: Execute.cpp:162
std::string allowed_export_paths
size_t g_max_log_length
Definition: Execute.cpp:176
bool g_enable_dev_table_functions
Definition: Execute.cpp:124
static const std::string cluster_command_line_arg
AuthMetadata authMetadata
bool g_optimize_cuda_block_and_grid_sizes
Definition: Execute.cpp:169
bool g_executor_resource_mgr_allow_cpu_slot_oversubscription_concurrency
Definition: Execute.cpp:191
size_t g_query_resultset_cache_total_bytes
Definition: Execute.cpp:166
bool g_enable_window_functions
Definition: Execute.cpp:120
size_t g_max_cacheable_query_resultset_size_bytes
Definition: Execute.cpp:167
size_t g_min_memory_allocation_size
Definition: Execute.cpp:129
size_t g_watchdog_in_clause_max_num_input_rows
Definition: Execute.cpp:87
bool g_read_only
Definition: heavyai_locks.h:21
bool g_executor_resource_mgr_allow_cpu_kernel_concurrency
Definition: Execute.cpp:187
unsigned pending_query_interrupt_freq
bool g_enable_fsi_regex_import
Definition: ParserNode.cpp:88
bool g_enable_bbox_intersect_hashjoin
Definition: Execute.cpp:109
bool g_enable_smem_grouped_non_count_agg
Definition: Execute.cpp:147
bool g_enable_automatic_ir_metadata
Definition: Execute.cpp:174
std::vector< std::string > udf_compiler_options
float g_vacuum_min_selectivity
static const std::string nodeIds_token
bool g_enable_filter_function
Definition: Execute.cpp:91
bool g_enable_ml_functions
Definition: Execute.cpp:122
size_t g_in_clause_num_elem_skip_bitmap
Definition: Execute.cpp:88
bool g_allow_invalid_literal_buffer_reads
Definition: ConstantIR.cpp:140
double g_executor_resource_mgr_per_query_max_cpu_result_mem_ratio
Definition: Execute.cpp:183
size_t g_executor_resource_mgr_cpu_result_mem_bytes
Definition: Execute.cpp:181
bool g_enable_filter_push_down
Definition: Execute.cpp:102
int64_t g_omni_kafka_seek
bool g_use_estimator_result_cache
Definition: Execute.cpp:139
bool g_enable_bump_allocator
Definition: Execute.cpp:132
bool g_enable_parallel_window_partition_compute
bool g_allow_memory_status_log
Definition: Execute.cpp:200
po::positional_options_description positional_options
size_t watchdog_max_projected_rows_per_device
double g_executor_resource_mgr_max_available_resource_use_ratio
Definition: Execute.cpp:195
bool g_enable_union
bool g_enable_cpu_sub_tasks
Definition: Execute.cpp:89
Functions used to work with HyperLogLog records.
bool g_allow_query_step_cpu_retry
Definition: Execute.cpp:94
bool g_allow_cpu_retry
Definition: Execute.cpp:93
File_Namespace::DiskCacheConfig disk_cache_config
bool enable_udf_registration_for_all_users
std::string allowed_import_paths
std::string disk_cache_level
bool g_optimize_row_initialization
Definition: Execute.cpp:108
po::variables_map vm
size_t max_cacheable_hashtable_size_bytes
size_t watchdog_none_encoded_string_translation_limit
size_t g_leaf_count
Definition: ParserNode.cpp:79
size_t g_watchdog_in_clause_max_num_elem_bitmap
Definition: Execute.cpp:86
unsigned g_dynamic_watchdog_time_limit
Definition: Execute.cpp:92
size_t g_watchdog_in_subquery_max_in_values
bool g_enable_fsi
Definition: Catalog.cpp:96
bool g_query_engine_cuda_streams
Definition: QueryEngine.h:10
bool g_allow_s3_server_privileges
Definition: S3Archive.cpp:34
bool g_enable_add_metadata_columns
Definition: ParserNode.cpp:90
po::options_description developer_desc_
bool g_enable_runtime_query_interrupt
Definition: Execute.cpp:137
size_t g_max_import_threads
Definition: Importer.cpp:105
size_t g_enable_parallel_linearization
Definition: Execute.cpp:156
std::string cluster_topology_file
bool g_use_hashtable_cache
Definition: Execute.cpp:159
size_t g_auto_resultset_caching_threshold
Definition: Execute.cpp:168
size_t g_bbox_intersect_max_table_size_bytes
Definition: Execute.cpp:114
bool g_enable_table_functions
Definition: Execute.cpp:121
unsigned dynamic_watchdog_time_limit
SystemParameters system_parameters
size_t g_gpu_smem_threshold
Definition: Execute.cpp:142