OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
/home/jenkins-slave/workspace/core-os-doxygen/initdb.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <thrift/Thrift.h>
18 #include <array>
19 #include <boost/filesystem.hpp>
20 #include <boost/program_options.hpp>
21 #include <exception>
22 #include <iostream>
23 #include <memory>
24 #include <string>
25 
26 #include "Catalog/Catalog.h"
27 #include "Logger/Logger.h"
29 #include "Shared/SysDefinitions.h"
31 
32 #define CALCITEPORT 3279
33 
34 static const std::array<std::string, 3> SampleGeoFileNames{"us-states.json",
35  "us-counties.json",
36  "countries.json"};
37 static const std::array<std::string, 3> SampleGeoTableNames{"heavyai_us_states",
38  "heavyai_us_counties",
39  "heavyai_countries"};
40 
41 extern bool g_enable_thrift_logs;
42 
43 static void loadGeo(std::string base_path) {
44  TSessionId session_id{};
45  SystemParameters system_parameters{};
46  AuthMetadata auth_metadata{};
47  std::string udf_filename{};
48  std::string udf_compiler_path{};
49  std::vector<std::string> udf_compiler_options{};
50 #ifdef ENABLE_GEOS
51  std::string libgeos_so_filename{};
52 #endif
53 #ifdef HAVE_TORCH_TFS
54  std::string torch_lib_path{};
55 #endif
56  std::vector<LeafHostInfo> db_leaves{};
57  std::vector<LeafHostInfo> string_leaves{};
58 
59  // Whitelist root path for tests by default
61  ddl_utils::FilePathWhitelist::initialize(base_path, "[\"/\"]", "[\"/\"]");
62 
63  // Based on default values observed from starting up an OmniSci DB server.
64  const bool allow_multifrag{true};
65  const bool jit_debug{false};
66  const bool intel_jit_profile{false};
67  const bool read_only{false};
68  const bool allow_loop_joins{false};
69  const bool enable_rendering{false};
70  const bool renderer_prefer_igpu{false};
71  const unsigned renderer_vulkan_timeout_ms{300000};
72  const bool renderer_use_parallel_executors{false};
73  const bool enable_auto_clear_render_mem{false};
74  const int render_oom_retry_threshold{0};
75  const size_t render_mem_bytes{500000000};
76  const size_t max_concurrent_render_sessions{500};
77  const bool render_compositor_use_last_gpu{false};
78  const bool renderer_enable_slab_allocation{true};
79  const size_t reserved_gpu_mem{134217728};
80  const size_t num_reader_threads{0};
81  const bool legacy_syntax{true};
82  const int idle_session_duration{60};
83  const int max_session_duration{43200};
84  system_parameters.runtime_udf_registration_policy =
86  system_parameters.omnisci_server_port = -1;
87  system_parameters.calcite_port = 3280;
88 
89  system_parameters.aggregator = false;
90  g_leaf_count = 0;
91  g_cluster = false;
92 
94  File_Namespace::DiskCacheConfig disk_cache_config{
96  cache_level};
97 
98  auto db_handler = std::make_unique<DBHandler>(db_leaves,
99  string_leaves,
100  base_path,
101  allow_multifrag,
102  jit_debug,
103  intel_jit_profile,
104  read_only,
105  allow_loop_joins,
106  enable_rendering,
107  renderer_prefer_igpu,
108  renderer_vulkan_timeout_ms,
109  renderer_use_parallel_executors,
110  enable_auto_clear_render_mem,
111  render_oom_retry_threshold,
112  render_mem_bytes,
113  max_concurrent_render_sessions,
114  reserved_gpu_mem,
115  render_compositor_use_last_gpu,
116  renderer_enable_slab_allocation,
117  num_reader_threads,
118  auth_metadata,
119  system_parameters,
120  legacy_syntax,
121  idle_session_duration,
122  max_session_duration,
123  udf_filename,
124  udf_compiler_path,
125  udf_compiler_options,
126 #ifdef ENABLE_GEOS
127  libgeos_so_filename,
128 #endif
129 #ifdef HAVE_TORCH_TFS
130  torch_lib_path,
131 #endif
132  disk_cache_config,
133  false);
134  db_handler->internal_connect(session_id, shared::kRootUsername, shared::kDefaultDbName);
135 
136  // Execute on CPU by default
137  db_handler->set_execution_mode(session_id, TExecuteMode::CPU);
138  TQueryResult res;
139 
140  const size_t num_samples = SampleGeoFileNames.size();
141  for (size_t i = 0; i < num_samples; i++) {
142  const std::string table_name = SampleGeoTableNames[i];
143  const std::string file_name = SampleGeoFileNames[i];
144 
145  auto file_path = boost::filesystem::path(heavyai::get_root_abs_path()) /
146  "ThirdParty" / "geo_samples" / file_name;
147 
148  if (!boost::filesystem::exists(file_path)) {
149  throw std::runtime_error(
150  "Unable to populate geo sample data. File does not exist: " +
151  file_path.string());
152  }
153 #ifdef _WIN32
154  std::string sql_string = "COPY " + table_name + " FROM '" +
155  file_path.generic_string() + "' WITH (GEO='true');";
156 #else
157  std::string sql_string =
158  "COPY " + table_name + " FROM '" + file_path.string() + "' WITH (GEO='true');";
159 #endif
160  db_handler->sql_execute(res, session_id, sql_string, true, "", -1, -1);
161  }
162 }
163 
164 int main(int argc, char* argv[]) {
165  std::string base_path;
166  bool force = false;
167  bool skip_geo = false;
168  namespace po = boost::program_options;
169 
170  po::options_description desc("Options");
171  desc.add_options()("help,h", "Print help messages ")(
172  "data",
173  po::value<std::string>(&base_path)->required(),
174  "Directory path to HeavyDB catalogs")("force,f",
175  "Force overwriting of existing HeavyDB "
176  "instance")("skip-geo",
177  "Skip inserting sample geo data");
178 
179  desc.add_options()("enable-thrift-logs",
180  po::value<bool>(&g_enable_thrift_logs)
181  ->default_value(g_enable_thrift_logs)
182  ->implicit_value(true),
183  "Enable writing messages directly from thrift to stdout/stderr.");
184 
185  logger::LogOptions log_options(argv[0]);
186  desc.add(log_options.get_options());
187 
188  po::positional_options_description positionalOptions;
189  positionalOptions.add("data", 1);
190 
191  po::variables_map vm;
192 
193  try {
194  po::store(po::command_line_parser(argc, argv)
195  .options(desc)
196  .positional(positionalOptions)
197  .run(),
198  vm);
199  if (vm.count("help")) {
200  std::cout << desc;
201  return 0;
202  }
203  if (vm.count("force")) {
204  force = true;
205  }
206  if (vm.count("skip-geo")) {
207  skip_geo = true;
208  }
209  po::notify(vm);
210  } catch (boost::program_options::error& e) {
211  std::cerr << "Usage Error: " << e.what() << std::endl;
212  return 1;
213  }
214 
215  if (!g_enable_thrift_logs) {
216  apache::thrift::GlobalOutput.setOutputFunction([](const char* msg) {});
217  }
218 
219  if (!boost::filesystem::exists(base_path)) {
220  std::cerr << "Catalog basepath " + base_path + " does not exist.\n";
221  return 1;
222  }
223  std::string catalogs_path = base_path + "/" + shared::kCatalogDirectoryName;
224  if (boost::filesystem::exists(catalogs_path)) {
225  if (force) {
226  boost::filesystem::remove_all(catalogs_path);
227  } else {
228  std::cerr << "HeavyDB catalogs directory already exists at " + catalogs_path +
229  ". Use -f to force reinitialization.\n";
230  return 1;
231  }
232  }
233  std::string data_path = base_path + "/" + shared::kDataDirectoryName;
234  if (boost::filesystem::exists(data_path)) {
235  if (force) {
236  boost::filesystem::remove_all(data_path);
237  } else {
238  std::cerr << "HeavyDB data directory already exists at " + data_path +
239  ". Use -f to force reinitialization.\n";
240  return 1;
241  }
242  }
243  std::string lockfiles_path = base_path + "/" + shared::kLockfilesDirectoryName;
244  if (boost::filesystem::exists(lockfiles_path)) {
245  if (force) {
246  boost::filesystem::remove_all(lockfiles_path);
247  } else {
248  std::cerr << "HeavyDB lockfiles directory already exists at " + lockfiles_path +
249  ". Use -f to force reinitialization.\n";
250  return 1;
251  }
252  }
253  std::string lockfiles_path2 = lockfiles_path + "/" + shared::kCatalogDirectoryName;
254  if (boost::filesystem::exists(lockfiles_path2)) {
255  if (force) {
256  boost::filesystem::remove_all(lockfiles_path2);
257  } else {
258  std::cerr << "HeavyDB lockfiles catalogs directory already exists at " +
259  lockfiles_path2 + ". Use -f to force reinitialization.\n";
260  return 1;
261  }
262  }
263  std::string lockfiles_path3 = lockfiles_path + "/" + shared::kDataDirectoryName;
264  if (boost::filesystem::exists(lockfiles_path3)) {
265  if (force) {
266  boost::filesystem::remove_all(lockfiles_path3);
267  } else {
268  std::cerr << "HeavyDB lockfiles data directory already exists at " +
269  lockfiles_path3 + ". Use -f to force reinitialization.\n";
270  return 1;
271  }
272  }
273  std::string export_path = base_path + "/" + shared::kDefaultExportDirName;
274  if (boost::filesystem::exists(export_path)) {
275  if (force) {
276  boost::filesystem::remove_all(export_path);
277  } else {
278  std::cerr << "HeavyDB export directory already exists at " + export_path +
279  ". Use -f to force reinitialization.\n";
280  return 1;
281  }
282  }
283  std::string disk_cache_path = base_path + "/" + shared::kDefaultDiskCacheDirName;
284  if (boost::filesystem::exists(disk_cache_path)) {
285  if (force) {
286  boost::filesystem::remove_all(disk_cache_path);
287  } else {
288  std::cerr << "HeavyDB disk cache already exists at " + disk_cache_path +
289  ". Use -f to force reinitialization.\n";
290  return 1;
291  }
292  }
293 
294  if (!boost::filesystem::create_directory(catalogs_path)) {
295  std::cerr << "Cannot create " + shared::kCatalogDirectoryName + " subdirectory under "
296  << base_path << std::endl;
297  }
298  if (!boost::filesystem::create_directory(lockfiles_path)) {
299  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName +
300  " subdirectory under "
301  << base_path << std::endl;
302  }
303  if (!boost::filesystem::create_directory(lockfiles_path2)) {
304  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName + "/" +
305  shared::kCatalogDirectoryName + " subdirectory under "
306  << base_path << std::endl;
307  }
308  if (!boost::filesystem::create_directory(lockfiles_path3)) {
309  std::cerr << "Cannot create " + shared::kLockfilesDirectoryName + "/" +
310  shared::kDataDirectoryName + " subdirectory under "
311  << base_path << std::endl;
312  }
313  if (!boost::filesystem::create_directory(export_path)) {
314  std::cerr << "Cannot create " + shared::kDefaultExportDirName + " subdirectory under "
315  << base_path << std::endl;
316  }
317 
318  log_options.set_base_path(base_path);
319  logger::init(log_options);
320 
321  try {
322  SystemParameters sys_parms;
323  auto dummy = std::make_shared<Data_Namespace::DataMgr>(
324  data_path, sys_parms, nullptr, false, 0);
325  auto calcite =
326  std::make_shared<Calcite>(-1, CALCITEPORT, base_path, 1024, 5000, true, "");
327  g_base_path = base_path;
328  auto& sys_cat = Catalog_Namespace::SysCatalog::instance();
329  sys_cat.init(base_path, dummy, {}, calcite, true, false, {});
330 
331  } catch (std::exception& e) {
332  std::cerr << "Exception: " << e.what() << "\n";
333  }
334 
335  if (!skip_geo) {
336  loadGeo(base_path);
337  } else {
339  }
340 
341  return 0;
342 }
const std::string kDataDirectoryName
static void initialize(const std::string &data_dir, const std::string &allowed_import_paths, const std::string &allowed_export_paths)
Definition: DdlUtils.cpp:878
std::string get_root_abs_path()
const std::string kDefaultDiskCacheDirName
const std::string kDefaultExportDirName
This file contains the class specification and related data structures for Catalog.
static const std::array< std::string, 3 > SampleGeoFileNames
Definition: initdb.cpp:34
static SysCatalog & instance()
Definition: SysCatalog.h:343
const std::string kDefaultDbName
std::string g_base_path
Definition: SysCatalog.cpp:62
void init(LogOptions const &log_opts)
Definition: Logger.cpp:364
static void loadGeo(std::string base_path)
Definition: initdb.cpp:43
static const std::array< std::string, 3 > SampleGeoTableNames
Definition: initdb.cpp:37
const std::string kRootUsername
#define CALCITEPORT
Definition: initdb.cpp:32
const std::string kCatalogDirectoryName
boost::program_options::options_description const & get_options() const
void set_base_path(std::string const &base_path)
bool g_cluster
const std::string kLockfilesDirectoryName
static std::string getDefaultPath(const std::string &base_path)
static bool run
size_t g_leaf_count
Definition: ParserNode.cpp:79
bool g_enable_thrift_logs
Definition: HeavyDB.cpp:298