OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
CopyParams.h
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 /*
18  * @file CopyParams.h
19  * @brief CopyParams struct
20  *
21  */
22 
23 #pragma once
24 
25 #include <optional>
26 #include <string>
27 
29 #include "Shared/sqltypes.h"
30 
31 namespace import_export {
32 
33 // not too big (need much memory) but not too small (many thread forks)
34 constexpr static size_t kImportFileBufferSize = (1 << 23);
35 
36 // import buffers may grow to this size if necessary
37 constexpr static size_t max_import_buffer_resize_byte_size = 1024 * 1024 * 1024;
38 
42 
43 struct CopyParams {
44  char delimiter;
45  std::string null_str;
47  bool quoted; // does the input have any quoted fields, default to false
48  char quote;
49  char escape;
50  char line_delim;
53  char array_end;
54  int threads;
55  size_t
56  max_reject; // maximum number of records that can be rejected before copy is failed
58  bool plain_text = false;
60  // s3/parquet related params
61  std::string s3_access_key; // per-query credentials to override the
62  std::string s3_secret_key; // settings in ~/.aws/credentials or environment
63  std::string s3_session_token = "";
64  std::string s3_region;
65  std::string s3_endpoint;
67  8; // maximum number of concurrent file downloads from S3
68  // kafka related params
69  size_t retry_count;
70  size_t retry_wait;
71  size_t batch_size;
72  size_t buffer_size;
74  // geospatial params
75  bool lonlat;
79  int32_t geo_coords_srid;
81  std::string geo_layer_name;
84  int32_t source_srid;
85  std::optional<std::string> regex_path_filter;
86  std::optional<std::string> file_sort_order_by;
87  std::optional<std::string> file_sort_regex;
89  std::string raster_import_bands;
94  std::string add_metadata_columns;
96  // odbc parameters
97  std::string sql_select;
98  std::string sql_order_by;
99  // odbc user mapping parameters
100  std::string username;
101  std::string password;
102  std::string credential_string;
103  // odbc server parameters
104  std::string dsn;
105  std::string connection_string;
106  // regex parameters
107  std::string line_start_regex;
108  std::string line_regex;
109 
111  : delimiter(',')
112  , null_str("\\N")
114  , quoted(true)
115  , quote('"')
116  , escape('"')
117  , line_delim('\n')
118  , array_delim(',')
119  , array_begin('{')
120  , array_end('}')
121  , threads(0)
122  , max_reject(100000)
123  , source_type(import_export::SourceType::kDelimitedFile)
124  , trim_spaces(true)
125  , retry_count(100)
126  , retry_wait(5)
127  , batch_size(1000)
130  , lonlat(true)
134  , geo_coords_srid(4326)
137  , geo_validate_geometry{false}
138  , source_srid(0)
143  , raster_drop_if_all_null{false} {}
144 
145  CopyParams(char d, const std::string& n, char l, size_t b, size_t retries, size_t wait)
146  : delimiter(d)
147  , null_str(n)
149  , quoted(true)
150  , quote('"')
151  , escape('"')
152  , line_delim(l)
153  , array_delim(',')
154  , array_begin('{')
155  , array_end('}')
156  , threads(0)
157  , max_reject(100000)
158  , source_type(import_export::SourceType::kDelimitedFile)
159  , trim_spaces(true)
160  , retry_count(retries)
161  , retry_wait(wait)
162  , batch_size(b)
165  , lonlat(true)
169  , geo_coords_srid(4326)
172  , geo_validate_geometry{false}
173  , source_srid(0)
178  , raster_drop_if_all_null{false} {}
179 };
180 
181 } // namespace import_export
std::string s3_secret_key
Definition: CopyParams.h:62
int32_t raster_scanlines_per_thread
Definition: CopyParams.h:90
SQLTypes
Definition: sqltypes.h:65
Constants for Builtin SQL Types supported by HEAVY.AI.
std::string connection_string
Definition: CopyParams.h:105
std::string raster_import_dimensions
Definition: CopyParams.h:93
std::string add_metadata_columns
Definition: CopyParams.h:94
ImportHeaderRow has_header
Definition: CopyParams.h:46
EncodingType
Definition: sqltypes.h:240
std::optional< std::string > regex_path_filter
Definition: CopyParams.h:85
RasterPointType raster_point_type
Definition: CopyParams.h:88
int32_t s3_max_concurrent_downloads
Definition: CopyParams.h:66
std::string sql_order_by
Definition: CopyParams.h:98
import_export::SourceType source_type
Definition: CopyParams.h:57
bool g_enable_smem_group_by true
std::string geo_layer_name
Definition: CopyParams.h:81
std::string line_start_regex
Definition: CopyParams.h:107
std::string s3_session_token
Definition: CopyParams.h:63
CopyParams(char d, const std::string &n, char l, size_t b, size_t retries, size_t wait)
Definition: CopyParams.h:145
std::string raster_import_bands
Definition: CopyParams.h:89
bool g_enable_watchdog false
Definition: Execute.cpp:80
static constexpr size_t max_import_buffer_resize_byte_size
Definition: CopyParams.h:37
static constexpr size_t kImportFileBufferSize
Definition: CopyParams.h:34
constexpr double n
Definition: Utm.h:38
std::string s3_access_key
Definition: CopyParams.h:61
RasterPointTransform raster_point_transform
Definition: CopyParams.h:91
std::optional< std::string > file_sort_order_by
Definition: CopyParams.h:86
Shared Enum.
std::string credential_string
Definition: CopyParams.h:102
std::optional< std::string > file_sort_regex
Definition: CopyParams.h:87
EncodingType geo_coords_encoding
Definition: CopyParams.h:76