OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ddl_utils Namespace Reference

Namespaces

 alter_column_utils
 
 anonymous_namespace{DdlUtils.cpp}
 

Classes

class  SqlType
 
class  Encoding
 
class  FilePathWhitelist
 
class  FilePathBlacklist
 

Enumerations

enum  DataTransferType { DataTransferType::IMPORT = 1, DataTransferType::EXPORT }
 
enum  TableType { TableType::TABLE = 1, TableType::VIEW, TableType::FOREIGN_TABLE }
 

Functions

void set_default_encoding (ColumnDescriptor &cd)
 
void validate_and_set_fixed_encoding (ColumnDescriptor &cd, int encoding_size, const SqlType *column_type)
 
void validate_and_set_dictionary_encoding (ColumnDescriptor &cd, int encoding_size)
 
void validate_and_set_none_encoding (ColumnDescriptor &cd)
 
void validate_and_set_sparse_encoding (ColumnDescriptor &cd, int encoding_size)
 
void validate_and_set_compressed_encoding (ColumnDescriptor &cd, int encoding_size)
 
void validate_and_set_date_encoding (ColumnDescriptor &cd, int encoding_size)
 
void validate_and_set_encoding (ColumnDescriptor &cd, const Encoding *encoding, const SqlType *column_type)
 
void validate_and_set_type (ColumnDescriptor &cd, SqlType *column_type)
 
void validate_and_set_array_size (ColumnDescriptor &cd, const SqlType *column_type)
 
void validate_and_set_default_value (ColumnDescriptor &cd, const std::string *default_value, bool not_null)
 
void set_column_descriptor (const std::string &column_name, ColumnDescriptor &cd, SqlType *column_type, const bool not_null, const Encoding *encoding, const std::string *default_value)
 
void set_default_table_attributes (const std::string &table_name, TableDescriptor &td, const int32_t column_count)
 
void validate_non_duplicate_column (const std::string &column_name, std::unordered_set< std::string > &upper_column_names)
 
void validate_non_reserved_keyword (const std::string &column_name)
 
void validate_table_type (const TableDescriptor *td, const TableType expected_table_type, const std::string &command)
 
std::string table_type_enum_to_string (const TableType table_type)
 
std::string get_malformed_config_error_message (const std::string &config_key)
 
void validate_expanded_file_path (const std::string &file_path, const std::vector< std::string > &whitelisted_root_paths)
 
std::vector< std::string > get_expanded_file_paths (const std::string &file_path, const DataTransferType data_transfer_type)
 
void validate_allowed_file_path (const std::string &file_path, const DataTransferType data_transfer_type, const bool allow_wildcards)
 
void set_whitelisted_paths (const std::string &config_key, const std::string &config_value, std::vector< std::string > &whitelisted_paths)
 

Enumeration Type Documentation

Enumerator
IMPORT 
EXPORT 

Definition at line 80 of file DdlUtils.h.

enum ddl_utils::TableType
strong
Enumerator
TABLE 
VIEW 
FOREIGN_TABLE 

Definition at line 107 of file DdlUtils.h.

Function Documentation

std::vector<std::string> ddl_utils::get_expanded_file_paths ( const std::string &  file_path,
const DataTransferType  data_transfer_type 
)

Definition at line 798 of file DdlUtils.cpp.

References IMPORT, and shared::local_glob_filter_sort_files().

Referenced by validate_allowed_file_path().

800  {
801  std::vector<std::string> file_paths;
802  if (data_transfer_type == DataTransferType::IMPORT) {
803  file_paths = shared::local_glob_filter_sort_files(file_path, {});
804  } else {
805  std::string path;
806  if (!boost::filesystem::exists(file_path)) {
807  // For exports, it is possible to provide a path to a new (nonexistent) file. In
808  // this case, validate using the parent path.
809  path = boost::filesystem::path(file_path).parent_path().string();
810  if (!boost::filesystem::exists(path)) {
811  throw std::runtime_error{"File or directory \"" + file_path +
812  "\" does not exist."};
813  }
814  } else {
815  path = file_path;
816  }
817  file_paths = {path};
818  }
819  return file_paths;
820 }
std::vector< std::string > local_glob_filter_sort_files(const std::string &file_path, const FilePathOptions &options, const bool recurse)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string ddl_utils::get_malformed_config_error_message ( const std::string &  config_key)

Definition at line 775 of file DdlUtils.cpp.

Referenced by set_whitelisted_paths().

775  {
776  return "Configuration value for \"" + config_key +
777  "\" is malformed. Value should be a list of paths with format: [ "
778  "\"root-path-1\", \"root-path-2\", ... ]";
779 }

+ Here is the caller graph for this function:

void ddl_utils::set_column_descriptor ( const std::string &  column_name,
ColumnDescriptor cd,
SqlType *  column_type,
const bool  not_null,
const Encoding *  encoding,
const std::string *  default_value 
)

Definition at line 698 of file DdlUtils.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, ColumnDescriptor::isSystemCol, ColumnDescriptor::isVirtualCol, SQLTypeInfo::set_notnull(), validate_and_set_array_size(), validate_and_set_default_value(), validate_and_set_encoding(), and validate_and_set_type().

Referenced by Parser::set_column_descriptor(), and CreateForeignTableCommand::setColumnDetails().

703  {
704  cd.columnName = column_name;
705  validate_and_set_type(cd, column_type);
706  cd.columnType.set_notnull(not_null);
707  validate_and_set_encoding(cd, encoding, column_type);
708  validate_and_set_array_size(cd, column_type);
709  cd.isSystemCol = false;
710  cd.isVirtualCol = false;
711  validate_and_set_default_value(cd, default_value, not_null);
712 }
void validate_and_set_array_size(ColumnDescriptor &cd, const SqlType *column_type)
Definition: DdlUtils.cpp:531
void validate_and_set_encoding(ColumnDescriptor &cd, const Encoding *encoding, const SqlType *column_type)
Definition: DdlUtils.cpp:472
void set_notnull(bool n)
Definition: sqltypes.h:477
void validate_and_set_default_value(ColumnDescriptor &cd, const std::string *default_value, bool not_null)
Definition: DdlUtils.cpp:677
SQLTypeInfo columnType
std::string columnName
void validate_and_set_type(ColumnDescriptor &cd, SqlType *column_type)
Definition: DdlUtils.cpp:508

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void ddl_utils::set_default_encoding ( ColumnDescriptor cd)

Definition at line 251 of file DdlUtils.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, g_use_date_in_days_default_encoding, SQLTypeInfo::get_output_srid(), SQLTypeInfo::get_precision(), SQLTypeInfo::get_type(), SQLTypeInfo::is_decimal(), SQLTypeInfo::is_geometry(), SQLTypeInfo::is_string(), SQLTypeInfo::is_string_array(), kDATE, kENCODING_DATE_IN_DAYS, kENCODING_DICT, kENCODING_FIXED, kENCODING_GEOINT, kENCODING_NONE, sql_constants::kMaxNumericPrecision, SQLTypeInfo::set_comp_param(), SQLTypeInfo::set_compression(), and to_string().

Referenced by validate_and_set_encoding().

251  {
252  // Change default TEXT column behaviour to be DICT encoded
253  if (cd.columnType.is_string() || cd.columnType.is_string_array()) {
254  // default to 32-bits
256  cd.columnType.set_comp_param(32);
257  } else if (cd.columnType.is_decimal() && cd.columnType.get_precision() <= 4) {
259  cd.columnType.set_comp_param(16);
260  } else if (cd.columnType.is_decimal() && cd.columnType.get_precision() <= 9) {
262  cd.columnType.set_comp_param(32);
263  } else if (cd.columnType.is_decimal() &&
265  throw std::runtime_error(cd.columnName + ": Precision too high, max " +
267  } else if (cd.columnType.is_geometry() && cd.columnType.get_output_srid() == 4326) {
268  // default to GEOINT 32-bits
270  cd.columnType.set_comp_param(32);
272  // Days encoding for DATE
275  } else {
278  }
279 }
void set_compression(EncodingType c)
Definition: sqltypes.h:481
static constexpr int32_t kMaxNumericPrecision
Definition: sqltypes.h:58
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
std::string to_string(char const *&&v)
bool g_use_date_in_days_default_encoding
Definition: DdlUtils.cpp:35
int get_precision() const
Definition: sqltypes.h:394
void set_comp_param(int p)
Definition: sqltypes.h:482
Definition: sqltypes.h:80
bool is_geometry() const
Definition: sqltypes.h:597
SQLTypeInfo columnType
bool is_string() const
Definition: sqltypes.h:561
bool is_string_array() const
Definition: sqltypes.h:564
bool is_decimal() const
Definition: sqltypes.h:570
std::string columnName
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:397

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void ddl_utils::set_default_table_attributes ( const std::string &  table_name,
TableDescriptor td,
const int32_t  column_count 
)

Definition at line 714 of file DdlUtils.cpp.

References DEFAULT_FRAGMENT_ROWS, DEFAULT_MAX_CHUNK_SIZE, DEFAULT_MAX_ROWS, DEFAULT_PAGE_SIZE, TableDescriptor::fragmenter, TableDescriptor::fragPageSize, TableDescriptor::fragType, Fragmenter_Namespace::INSERT_ORDER, TableDescriptor::isView, TableDescriptor::maxChunkSize, TableDescriptor::maxFragRows, TableDescriptor::maxRows, TableDescriptor::nColumns, and TableDescriptor::tableName.

Referenced by Parser::CreateTableStmt::executeDryRun(), and CreateForeignTableCommand::setTableDetails().

716  {
717  td.tableName = table_name;
718  td.nColumns = column_count;
719  td.isView = false;
720  td.fragmenter = nullptr;
726 }
std::string tableName
#define DEFAULT_MAX_CHUNK_SIZE
#define DEFAULT_MAX_ROWS
std::shared_ptr< Fragmenter_Namespace::AbstractFragmenter > fragmenter
#define DEFAULT_PAGE_SIZE
#define DEFAULT_FRAGMENT_ROWS
Fragmenter_Namespace::FragmenterType fragType

+ Here is the caller graph for this function:

void ddl_utils::set_whitelisted_paths ( const std::string &  config_key,
const std::string &  config_value,
std::vector< std::string > &  whitelisted_paths 
)

Definition at line 855 of file DdlUtils.cpp.

References get_malformed_config_error_message(), logger::INFO, LOG, and shared::printContainer().

Referenced by ddl_utils::FilePathWhitelist::initialize().

857  {
858  rapidjson::Document whitelisted_root_paths;
859  whitelisted_root_paths.Parse(config_value);
860  if (!whitelisted_root_paths.IsArray()) {
861  throw std::runtime_error{get_malformed_config_error_message(config_key)};
862  }
863  for (const auto& root_path : whitelisted_root_paths.GetArray()) {
864  if (!root_path.IsString()) {
865  throw std::runtime_error{get_malformed_config_error_message(config_key)};
866  }
867  if (!boost::filesystem::exists(root_path.GetString())) {
868  throw std::runtime_error{"Whitelisted root path \"" +
869  std::string{root_path.GetString()} + "\" does not exist."};
870  }
871  whitelisted_paths.emplace_back(
872  boost::filesystem::canonical(root_path.GetString()).string());
873  }
874  LOG(INFO) << "Parsed " << config_key << ": "
875  << shared::printContainer(whitelisted_paths);
876 }
#define LOG(tag)
Definition: Logger.h:285
std::string get_malformed_config_error_message(const std::string &config_key)
Definition: DdlUtils.cpp:775
PrintContainer< CONTAINER > printContainer(CONTAINER &container)
Definition: misc.h:108

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::string ddl_utils::table_type_enum_to_string ( const TableType  table_type)

Definition at line 762 of file DdlUtils.cpp.

References FOREIGN_TABLE, TABLE, and VIEW.

762  {
763  if (table_type == ddl_utils::TableType::TABLE) {
764  return "Table";
765  }
766  if (table_type == ddl_utils::TableType::FOREIGN_TABLE) {
767  return "ForeignTable";
768  }
769  if (table_type == ddl_utils::TableType::VIEW) {
770  return "View";
771  }
772  throw std::runtime_error{"Unexpected table type"};
773 }
void ddl_utils::validate_allowed_file_path ( const std::string &  file_path,
const DataTransferType  data_transfer_type,
const bool  allow_wildcards = false 
)

Validates that the given file path is allowed. Validation entails ensuring that given path is not under a blacklisted root path and path is under a whitelisted path, if whitelisted paths have been configured. Also excludes the use of spaces and punctuation other than: . / _ + - = :

Parameters
file_path- file path to validate
data_transfer_type- enum indicating whether validation is for an import or export use case
allow_wildcards- bool indicating if wildcards are allowed

Definition at line 822 of file DdlUtils.cpp.

References get_expanded_file_paths(), ddl_utils::FilePathBlacklist::isBlacklistedPath(), and ddl_utils::FilePathWhitelist::validateWhitelistedFilePath().

Referenced by TableArchiver::dumpTable(), Parser::CopyTableStmt::execute(), Parser::ExportQueryStmt::execute(), TableArchiver::restoreTable(), anonymous_namespace{TableArchiver.cpp}::simple_file_cat(), anonymous_namespace{DBHandler.cpp}::validate_import_file_path_if_local(), and foreign_storage::AbstractFileStorageDataWrapper::validateFilePath().

824  {
825  // Reject any punctuation characters except for a few safe ones.
826  // Some punctuation characters present a security risk when passed
827  // to subprocesses. Don't change this without a security review.
828  static const std::string safe_punctuation{"./_+-=:~"};
829  for (const auto& ch : file_path) {
830  if (std::ispunct(ch) && safe_punctuation.find(ch) == std::string::npos &&
831  !(allow_wildcards && ch == '*')) {
832  throw std::runtime_error(std::string("Punctuation \"") + ch +
833  "\" is not allowed in file path: " + file_path);
834  }
835  }
836 
837  // Enforce our whitelist and blacklist for file paths.
838  const auto& expanded_file_paths =
839  get_expanded_file_paths(file_path, data_transfer_type);
840  for (const auto& path : expanded_file_paths) {
841  if (FilePathBlacklist::isBlacklistedPath(path)) {
842  const auto& canonical_file_path = boost::filesystem::canonical(file_path);
843  if (canonical_file_path == boost::filesystem::absolute(file_path)) {
844  throw std::runtime_error{"Access to file or directory path \"" + file_path +
845  "\" is not allowed."};
846  }
847  throw std::runtime_error{"Access to file or directory path \"" + file_path +
848  "\" (resolved to \"" + canonical_file_path.string() +
849  "\") is not allowed."};
850  }
851  }
852  FilePathWhitelist::validateWhitelistedFilePath(expanded_file_paths, data_transfer_type);
853 }
std::vector< std::string > get_expanded_file_paths(const std::string &file_path, const DataTransferType data_transfer_type)
Definition: DdlUtils.cpp:798

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void ddl_utils::validate_and_set_array_size ( ColumnDescriptor cd,
const SqlType *  column_type 
)

Definition at line 531 of file DdlUtils.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, ddl_utils::SqlType::get_array_size(), SQLTypeInfo::get_compression(), SQLTypeInfo::get_elem_type(), ddl_utils::SqlType::get_is_array(), SQLTypeInfo::get_size(), SQLTypeInfo::is_string_array(), kENCODING_DICT, SQLTypeInfo::set_fixed_size(), and SQLTypeInfo::set_size().

Referenced by set_column_descriptor().

531  {
532  if (cd.columnType.is_string_array() &&
534  throw std::runtime_error(
535  cd.columnName +
536  ": Array of strings must be dictionary encoded. Specify ENCODING DICT");
537  }
538 
539  if (column_type->get_is_array()) {
540  int s = -1;
541  auto array_size = column_type->get_array_size();
542  if (array_size > 0) {
543  auto sti = cd.columnType.get_elem_type();
544  s = array_size * sti.get_size();
545  if (s <= 0) {
546  throw std::runtime_error(cd.columnName + ": Unexpected fixed length array size");
547  }
548  }
549  cd.columnType.set_size(s);
550 
551  } else {
553  }
554 }
void set_size(int s)
Definition: sqltypes.h:478
HOST DEVICE int get_size() const
Definition: sqltypes.h:403
void set_fixed_size()
Definition: sqltypes.h:479
HOST DEVICE EncodingType get_compression() const
Definition: sqltypes.h:399
SQLTypeInfo columnType
bool is_string_array() const
Definition: sqltypes.h:564
SQLTypeInfo get_elem_type() const
Definition: sqltypes.h:977
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void ddl_utils::validate_and_set_compressed_encoding ( ColumnDescriptor cd,
int  encoding_size 
)

Definition at line 433 of file DdlUtils.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_output_srid(), SQLTypeInfo::is_geometry(), kENCODING_GEOINT, SQLTypeInfo::set_comp_param(), and SQLTypeInfo::set_compression().

Referenced by validate_and_set_encoding().

433  {
434  if (!cd.columnType.is_geometry() || cd.columnType.get_output_srid() != 4326) {
435  throw std::runtime_error(
436  cd.columnName + ": COMPRESSED encoding is only supported on WGS84 geo columns.");
437  }
438  int comp_param;
439  if (encoding_size == 0) {
440  comp_param = 32; // default to 32-bits
441  } else {
442  comp_param = encoding_size;
443  }
444  if (comp_param != 32) {
445  throw std::runtime_error(cd.columnName +
446  ": only 32-bit COMPRESSED geo encoding is supported");
447  }
448  // encoding longitude/latitude as integers
450  cd.columnType.set_comp_param(comp_param);
451 }
void set_compression(EncodingType c)
Definition: sqltypes.h:481
void set_comp_param(int p)
Definition: sqltypes.h:482
bool is_geometry() const
Definition: sqltypes.h:597
SQLTypeInfo columnType
std::string columnName
HOST DEVICE int get_output_srid() const
Definition: sqltypes.h:397

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void ddl_utils::validate_and_set_date_encoding ( ColumnDescriptor cd,
int  encoding_size 
)

Definition at line 453 of file DdlUtils.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), kARRAY, kDATE, kENCODING_DATE_IN_DAYS, SQLTypeInfo::set_comp_param(), and SQLTypeInfo::set_compression().

Referenced by validate_and_set_encoding().

453  {
454  // days encoding for dates
455  if (cd.columnType.get_type() == kARRAY && cd.columnType.get_subtype() == kDATE) {
456  throw std::runtime_error(cd.columnName +
457  ": Cannot apply days encoding to date array.");
458  }
459  if (cd.columnType.get_type() != kDATE) {
460  throw std::runtime_error(cd.columnName +
461  ": Days encoding is only supported for DATE columns.");
462  }
463  if (encoding_size != 32 && encoding_size != 16) {
464  throw std::runtime_error(cd.columnName +
465  ": Compression parameter for Days encoding on "
466  "DATE must be 16 or 32.");
467  }
469  cd.columnType.set_comp_param((encoding_size == 16) ? 16 : 0);
470 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
void set_compression(EncodingType c)
Definition: sqltypes.h:481
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
void set_comp_param(int p)
Definition: sqltypes.h:482
Definition: sqltypes.h:80
SQLTypeInfo columnType
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void ddl_utils::validate_and_set_default_value ( ColumnDescriptor cd,
const std::string *  default_value,
bool  not_null 
)

Definition at line 677 of file DdlUtils.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, ColumnDescriptor::default_value, SQLTypeInfo::is_geometry(), to_upper(), and ddl_utils::anonymous_namespace{DdlUtils.cpp}::validate_literal().

Referenced by set_column_descriptor().

679  {
680  bool is_null_literal =
681  default_value && ((to_upper(*default_value) == "NULL") ||
682  (cd.columnType.is_geometry() && default_value->empty()));
683  if (not_null && (is_null_literal)) {
684  throw std::runtime_error(cd.columnName +
685  ": cannot set default value to NULL for "
686  "NOT NULL column");
687  }
688  if (!default_value || is_null_literal) {
689  cd.default_value = std::nullopt;
690  return;
691  }
692  const auto& column_type = cd.columnType;
693  const auto& val = *default_value;
694  validate_literal(val, column_type, cd.columnName);
695  cd.default_value = std::make_optional(*default_value);
696 }
void validate_literal(const std::string &val, SQLTypeInfo column_type, const std::string &column_name)
Definition: DdlUtils.cpp:558
std::string to_upper(const std::string &str)
std::optional< std::string > default_value
bool is_geometry() const
Definition: sqltypes.h:597
SQLTypeInfo columnType
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void ddl_utils::validate_and_set_dictionary_encoding ( ColumnDescriptor cd,
int  encoding_size 
)

Definition at line 379 of file DdlUtils.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::is_string(), SQLTypeInfo::is_string_array(), kENCODING_DICT, SQLTypeInfo::set_comp_param(), and SQLTypeInfo::set_compression().

Referenced by validate_and_set_encoding().

379  {
380  if (!cd.columnType.is_string() && !cd.columnType.is_string_array()) {
381  throw std::runtime_error(
382  cd.columnName +
383  ": Dictionary encoding is only supported on string or string array columns.");
384  }
385  int comp_param;
386  if (encoding_size == 0) {
387  comp_param = 32; // default to 32-bits
388  } else {
389  comp_param = encoding_size;
390  }
391  if (cd.columnType.is_string_array() && comp_param != 32) {
392  throw std::runtime_error(cd.columnName +
393  ": Compression parameter for string arrays must be 32");
394  }
395  if (comp_param != 8 && comp_param != 16 && comp_param != 32) {
396  throw std::runtime_error(
397  cd.columnName +
398  ": Compression parameter for Dictionary encoding must be 8 or 16 or 32.");
399  }
400  // dictionary encoding
402  cd.columnType.set_comp_param(comp_param);
403 }
void set_compression(EncodingType c)
Definition: sqltypes.h:481
void set_comp_param(int p)
Definition: sqltypes.h:482
SQLTypeInfo columnType
bool is_string() const
Definition: sqltypes.h:561
bool is_string_array() const
Definition: sqltypes.h:564
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void ddl_utils::validate_and_set_encoding ( ColumnDescriptor cd,
const Encoding *  encoding,
const SqlType *  column_type 
)

Definition at line 472 of file DdlUtils.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, ddl_utils::Encoding::get_encoding_name(), ddl_utils::Encoding::get_encoding_param(), kENCODING_DIFF, kENCODING_RL, SQLTypeInfo::set_comp_param(), SQLTypeInfo::set_compression(), set_default_encoding(), validate_and_set_compressed_encoding(), validate_and_set_date_encoding(), validate_and_set_dictionary_encoding(), validate_and_set_fixed_encoding(), validate_and_set_none_encoding(), and validate_and_set_sparse_encoding().

Referenced by set_column_descriptor().

474  {
475  if (encoding == nullptr) {
477  } else {
478  const std::string& comp = *encoding->get_encoding_name();
479  if (boost::iequals(comp, "fixed")) {
480  validate_and_set_fixed_encoding(cd, encoding->get_encoding_param(), column_type);
481  } else if (boost::iequals(comp, "rl")) {
482  // run length encoding
485  // throw std::runtime_error("RL(Run Length) encoding not supported yet.");
486  } else if (boost::iequals(comp, "diff")) {
487  // differential encoding
490  // throw std::runtime_error("DIFF(differential) encoding not supported yet.");
491  } else if (boost::iequals(comp, "dict")) {
492  validate_and_set_dictionary_encoding(cd, encoding->get_encoding_param());
493  } else if (boost::iequals(comp, "NONE")) {
495  } else if (boost::iequals(comp, "sparse")) {
496  validate_and_set_sparse_encoding(cd, encoding->get_encoding_param());
497  } else if (boost::iequals(comp, "compressed")) {
498  validate_and_set_compressed_encoding(cd, encoding->get_encoding_param());
499  } else if (boost::iequals(comp, "days")) {
500  validate_and_set_date_encoding(cd, encoding->get_encoding_param());
501  } else {
502  throw std::runtime_error(cd.columnName + ": Invalid column compression scheme " +
503  comp);
504  }
505  }
506 }
void set_compression(EncodingType c)
Definition: sqltypes.h:481
void validate_and_set_sparse_encoding(ColumnDescriptor &cd, int encoding_size)
Definition: DdlUtils.cpp:416
void validate_and_set_dictionary_encoding(ColumnDescriptor &cd, int encoding_size)
Definition: DdlUtils.cpp:379
void validate_and_set_none_encoding(ColumnDescriptor &cd)
Definition: DdlUtils.cpp:405
void set_default_encoding(ColumnDescriptor &cd)
Definition: DdlUtils.cpp:251
void set_comp_param(int p)
Definition: sqltypes.h:482
void validate_and_set_compressed_encoding(ColumnDescriptor &cd, int encoding_size)
Definition: DdlUtils.cpp:433
void validate_and_set_fixed_encoding(ColumnDescriptor &cd, int encoding_size, const SqlType *column_type)
Definition: DdlUtils.cpp:281
void validate_and_set_date_encoding(ColumnDescriptor &cd, int encoding_size)
Definition: DdlUtils.cpp:453
SQLTypeInfo columnType
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void ddl_utils::validate_and_set_fixed_encoding ( ColumnDescriptor cd,
int  encoding_size,
const SqlType *  column_type 
)

Definition at line 281 of file DdlUtils.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_precision(), SQLTypeInfo::get_subtype(), SQLTypeInfo::get_type(), is_datetime(), SQLTypeInfo::is_high_precision_timestamp(), IS_INTEGER, kARRAY, kBIGINT, kDATE, kDECIMAL, kENCODING_DATE_IN_DAYS, kENCODING_FIXED, kINT, kNUMERIC, kSMALLINT, kTIME, kTIMESTAMP, kTINYINT, SQLTypeInfo::set_comp_param(), SQLTypeInfo::set_compression(), ddl_utils::SqlType::to_string(), and run_benchmark_import::type.

Referenced by validate_and_set_encoding().

283  {
284  auto type = cd.columnType.get_type();
285  // fixed-bits encoding
286  if (type == kARRAY) {
287  type = cd.columnType.get_subtype();
288  switch (type) {
289  case kTINYINT:
290  case kSMALLINT:
291  case kINT:
292  case kBIGINT:
293  case kDATE:
294  throw std::runtime_error(cd.columnName + ": Cannot apply FIXED encoding to " +
295  column_type->to_string() + " type array.");
296  break;
297  default:
298  break;
299  }
300  }
301 
302  if (!IS_INTEGER(type) && !is_datetime(type) &&
303  !(type == kDECIMAL || type == kNUMERIC)) {
304  throw std::runtime_error(
305  cd.columnName +
306  ": Fixed encoding is only supported for integer or time columns.");
307  }
308 
309  switch (type) {
310  case kSMALLINT:
311  if (encoding_size != 8) {
312  throw std::runtime_error(
313  cd.columnName +
314  ": Compression parameter for Fixed encoding on SMALLINT must be 8.");
315  }
316  break;
317  case kINT:
318  if (encoding_size != 8 && encoding_size != 16) {
319  throw std::runtime_error(
320  cd.columnName +
321  ": Compression parameter for Fixed encoding on INTEGER must be 8 or 16.");
322  }
323  break;
324  case kBIGINT:
325  if (encoding_size != 8 && encoding_size != 16 && encoding_size != 32) {
326  throw std::runtime_error(cd.columnName +
327  ": Compression parameter for Fixed encoding on "
328  "BIGINT must be 8 or 16 or 32.");
329  }
330  break;
331  case kTIMESTAMP:
332  case kTIME:
333  if (encoding_size != 32) {
334  throw std::runtime_error(cd.columnName +
335  ": Compression parameter for Fixed encoding on "
336  "TIME or TIMESTAMP must be 32.");
337  } else if (cd.columnType.is_high_precision_timestamp()) {
338  throw std::runtime_error("Fixed encoding is not supported for TIMESTAMP(3|6|9).");
339  }
340  break;
341  case kDECIMAL:
342  case kNUMERIC:
343  if (encoding_size != 32 && encoding_size != 16) {
344  throw std::runtime_error(cd.columnName +
345  ": Compression parameter for Fixed encoding on "
346  "DECIMAL must be 16 or 32.");
347  }
348 
349  if (encoding_size == 32 && cd.columnType.get_precision() > 9) {
350  throw std::runtime_error(cd.columnName +
351  ": Precision too high for Fixed(32) encoding, max 9.");
352  }
353 
354  if (encoding_size == 16 && cd.columnType.get_precision() > 4) {
355  throw std::runtime_error(cd.columnName +
356  ": Precision too high for Fixed(16) encoding, max 4.");
357  }
358  break;
359  case kDATE:
360  if (encoding_size != 32 && encoding_size != 16) {
361  throw std::runtime_error(cd.columnName +
362  ": Compression parameter for Fixed encoding on "
363  "DATE must be 16 or 32.");
364  }
365  break;
366  default:
367  throw std::runtime_error(cd.columnName + ": Cannot apply FIXED encoding to " +
368  column_type->to_string());
369  }
370  if (type == kDATE) {
372  cd.columnType.set_comp_param(16);
373  } else {
375  cd.columnType.set_comp_param(encoding_size);
376  }
377 }
HOST DEVICE SQLTypes get_subtype() const
Definition: sqltypes.h:392
void set_compression(EncodingType c)
Definition: sqltypes.h:481
Definition: sqltypes.h:76
HOST DEVICE SQLTypes get_type() const
Definition: sqltypes.h:391
int get_precision() const
Definition: sqltypes.h:394
void set_comp_param(int p)
Definition: sqltypes.h:482
Definition: sqltypes.h:80
#define IS_INTEGER(T)
Definition: sqltypes.h:304
bool is_high_precision_timestamp() const
Definition: sqltypes.h:1036
Definition: sqltypes.h:72
SQLTypeInfo columnType
std::string columnName
constexpr auto is_datetime(SQLTypes type)
Definition: sqltypes.h:325

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void ddl_utils::validate_and_set_none_encoding ( ColumnDescriptor cd)

Definition at line 405 of file DdlUtils.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::is_geometry(), SQLTypeInfo::is_string(), SQLTypeInfo::is_string_array(), kENCODING_NONE, SQLTypeInfo::set_comp_param(), and SQLTypeInfo::set_compression().

Referenced by validate_and_set_encoding().

405  {
406  if (!cd.columnType.is_string() && !cd.columnType.is_string_array() &&
407  !cd.columnType.is_geometry()) {
408  throw std::runtime_error(
409  cd.columnName +
410  ": None encoding is only supported on string, string array, or geo columns.");
411  }
414 }
void set_compression(EncodingType c)
Definition: sqltypes.h:481
void set_comp_param(int p)
Definition: sqltypes.h:482
bool is_geometry() const
Definition: sqltypes.h:597
SQLTypeInfo columnType
bool is_string() const
Definition: sqltypes.h:561
bool is_string_array() const
Definition: sqltypes.h:564
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void ddl_utils::validate_and_set_sparse_encoding ( ColumnDescriptor cd,
int  encoding_size 
)

Definition at line 416 of file DdlUtils.cpp.

References ColumnDescriptor::columnName, ColumnDescriptor::columnType, SQLTypeInfo::get_notnull(), kENCODING_SPARSE, SQLTypeInfo::set_comp_param(), and SQLTypeInfo::set_compression().

Referenced by validate_and_set_encoding().

416  {
417  // sparse column encoding with mostly NULL values
418  if (cd.columnType.get_notnull()) {
419  throw std::runtime_error(cd.columnName +
420  ": Cannot do sparse column encoding on a NOT NULL column.");
421  }
422  if (encoding_size == 0 || encoding_size % 8 != 0 || encoding_size > 48) {
423  throw std::runtime_error(
424  cd.columnName +
425  "Must specify number of bits as 8, 16, 24, 32 or 48 as the parameter to "
426  "sparse-column encoding.");
427  }
429  cd.columnType.set_comp_param(encoding_size);
430  // throw std::runtime_error("SPARSE encoding not supported yet.");
431 }
void set_compression(EncodingType c)
Definition: sqltypes.h:481
void set_comp_param(int p)
Definition: sqltypes.h:482
SQLTypeInfo columnType
HOST DEVICE bool get_notnull() const
Definition: sqltypes.h:398
std::string columnName

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void ddl_utils::validate_and_set_type ( ColumnDescriptor cd,
SqlType *  column_type 
)

Definition at line 508 of file DdlUtils.cpp.

References ddl_utils::SqlType::check_type(), ColumnDescriptor::columnType, ddl_utils::SqlType::get_is_array(), ddl_utils::SqlType::get_param1(), ddl_utils::SqlType::get_param2(), ddl_utils::SqlType::get_type(), IS_GEO, kARRAY, kGEOMETRY, SQLTypeInfo::set_dimension(), SQLTypeInfo::set_input_srid(), SQLTypeInfo::set_output_srid(), SQLTypeInfo::set_scale(), SQLTypeInfo::set_subtype(), and SQLTypeInfo::set_type().

Referenced by set_column_descriptor().

508  {
509  column_type->check_type();
510 
511  if (column_type->get_type() == kGEOMETRY) {
512  throw std::runtime_error("Unsupported type \"GEOMETRY\" specified.");
513  }
514 
515  if (column_type->get_is_array()) {
517  cd.columnType.set_subtype(column_type->get_type());
518  } else {
519  cd.columnType.set_type(column_type->get_type());
520  }
521  if (IS_GEO(column_type->get_type())) {
522  cd.columnType.set_subtype(static_cast<SQLTypes>(column_type->get_param1()));
523  cd.columnType.set_input_srid(column_type->get_param2());
524  cd.columnType.set_output_srid(column_type->get_param2());
525  } else {
526  cd.columnType.set_dimension(column_type->get_param1());
527  cd.columnType.set_scale(column_type->get_param2());
528  }
529 }
HOST DEVICE void set_subtype(SQLTypes st)
Definition: sqltypes.h:471
void set_input_srid(int d)
Definition: sqltypes.h:474
void set_scale(int s)
Definition: sqltypes.h:475
void set_output_srid(int s)
Definition: sqltypes.h:476
void set_dimension(int d)
Definition: sqltypes.h:472
SQLTypeInfo columnType
#define IS_GEO(T)
Definition: sqltypes.h:310
HOST DEVICE void set_type(SQLTypes t)
Definition: sqltypes.h:470

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void ddl_utils::validate_expanded_file_path ( const std::string &  file_path,
const std::vector< std::string > &  whitelisted_root_paths 
)

Definition at line 781 of file DdlUtils.cpp.

Referenced by ddl_utils::FilePathWhitelist::validateWhitelistedFilePath().

782  {
783  const auto& canonical_file_path = boost::filesystem::canonical(file_path);
784  for (const auto& root_path : whitelisted_root_paths) {
785  if (boost::istarts_with(canonical_file_path.string(), root_path)) {
786  return;
787  }
788  }
789  if (canonical_file_path == boost::filesystem::absolute(file_path)) {
790  throw std::runtime_error{"File or directory path \"" + file_path +
791  "\" is not whitelisted."};
792  }
793  throw std::runtime_error{"File or directory path \"" + file_path +
794  "\" (resolved to \"" + canonical_file_path.string() +
795  "\") is not whitelisted."};
796 }

+ Here is the caller graph for this function:

void ddl_utils::validate_non_duplicate_column ( const std::string &  column_name,
std::unordered_set< std::string > &  upper_column_names 
)

Definition at line 728 of file DdlUtils.cpp.

Referenced by Parser::CreateTableStmt::executeDryRun(), and CreateForeignTableCommand::setColumnDetails().

729  {
730  const auto upper_column_name = boost::to_upper_copy<std::string>(column_name);
731  const auto insert_it = upper_column_names.insert(upper_column_name);
732  if (!insert_it.second) {
733  throw std::runtime_error("Column '" + column_name + "' defined more than once");
734  }
735 }

+ Here is the caller graph for this function:

void ddl_utils::validate_non_reserved_keyword ( const std::string &  column_name)

Definition at line 737 of file DdlUtils.cpp.

References reserved_keywords.

Referenced by CreateForeignTableCommand::setColumnDetails().

737  {
738  const auto upper_column_name = boost::to_upper_copy<std::string>(column_name);
739  if (reserved_keywords.find(upper_column_name) != reserved_keywords.end()) {
740  throw std::runtime_error("Cannot create column with reserved keyword '" +
741  column_name + "'");
742  }
743 }
static std::set< std::string > reserved_keywords

+ Here is the caller graph for this function:

void ddl_utils::validate_table_type ( const TableDescriptor td,
const TableType  expected_table_type,
const std::string &  command 
)

Definition at line 745 of file DdlUtils.cpp.

References StorageType::FOREIGN_TABLE, FOREIGN_TABLE, TableDescriptor::isView, TableDescriptor::storageType, TABLE, TableDescriptor::tableName, and VIEW.

Referenced by AlterTableAlterColumnCommand::alterColumn(), Parser::AddColumnStmt::check_executable(), DropForeignTableCommand::execute(), AlterForeignTableCommand::execute(), Parser::DropTableStmt::execute(), Parser::RenameColumnStmt::execute(), Parser::DropColumnStmt::execute(), and Parser::DropViewStmt::execute().

747  {
748  if (td->isView) {
749  if (expected_table_type != TableType::VIEW) {
750  throw std::runtime_error(td->tableName + " is a view. Use " + command + " VIEW.");
751  }
752  } else if (td->storageType == StorageType::FOREIGN_TABLE) {
753  if (expected_table_type != TableType::FOREIGN_TABLE) {
754  throw std::runtime_error(td->tableName + " is a foreign table. Use " + command +
755  " FOREIGN TABLE.");
756  }
757  } else if (expected_table_type != TableType::TABLE) {
758  throw std::runtime_error(td->tableName + " is a table. Use " + command + " TABLE.");
759  }
760 }
std::string tableName
std::string storageType
static constexpr char const * FOREIGN_TABLE

+ Here is the caller graph for this function: