OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
anonymous_namespace{StringDictionary.cpp} Namespace Reference

Classes

struct  ThreadInfo
 
class  MapMaker
 

Functions

int checked_open (const char *path, const bool recover)
 
const uint64_t round_up_p2 (const uint64_t num)
 
string_dict_hash_t hash_string (const std::string_view &str)
 
template<class T >
void throw_encoding_error (std::string_view str, const shared::StringDictKey &dict_key)
 
void throw_string_too_long_error (std::string_view str, const shared::StringDictKey &dict_key)
 
bool is_regexp_like (const std::string &str, const std::string &pattern, const char escape)
 

Variables

const int SYSTEM_PAGE_SIZE = heavyai::get_page_size()
 

Function Documentation

int anonymous_namespace{StringDictionary.cpp}::checked_open ( const char *  path,
const bool  recover 
)

Definition at line 61 of file StringDictionary.cpp.

References logger::ERROR, LOG, and heavyai::open().

Referenced by StringDictionary::StringDictionary().

61  {
62  auto fd = heavyai::open(path, O_RDWR | O_CREAT | (recover ? O_APPEND : O_TRUNC), 0644);
63  if (fd > 0) {
64  return fd;
65  }
66  auto err = std::string("Dictionary path ") + std::string(path) +
67  std::string(" does not exist.");
68  LOG(ERROR) << err;
69  throw DictPayloadUnavailable(err);
70 }
#define LOG(tag)
Definition: Logger.h:285
int open(const char *path, int flags, int mode)
Definition: heavyai_fs.cpp:66

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

string_dict_hash_t anonymous_namespace{StringDictionary.cpp}::hash_string ( const std::string_view &  str)

Definition at line 90 of file StringDictionary.cpp.

Referenced by StringDictionary::buildDictionaryTranslationMap(), StringDictionary::getBulk(), StringDictionary::getOrAddBulk(), StringDictionary::getOrAddImpl(), StringDictionary::getUnlocked(), StringDictionary::hashStrings(), StringDictionary::increaseHashTableCapacity(), StringDictionary::increaseHashTableCapacityFromStorageAndMemory(), and StringDictionary::StringDictionary().

90  {
91  string_dict_hash_t str_hash = 1;
92  // rely on fact that unsigned overflow is defined and wraps
93  for (size_t i = 0; i < str.size(); ++i) {
94  str_hash = str_hash * 997 + str[i];
95  }
96  return str_hash;
97 }
uint32_t string_dict_hash_t

+ Here is the caller graph for this function:

bool anonymous_namespace{StringDictionary.cpp}::is_regexp_like ( const std::string &  str,
const std::string &  pattern,
const char  escape 
)

Definition at line 1135 of file StringDictionary.cpp.

References regexp_like().

Referenced by StringDictionary::getRegexpLike(), and StringDictionaryProxy::getRegexpLike().

1137  {
1138  return regexp_like(str.c_str(), str.size(), pattern.c_str(), pattern.size(), escape);
1139 }
RUNTIME_EXPORT DEVICE bool regexp_like(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: Regexp.cpp:39

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const uint64_t anonymous_namespace{StringDictionary.cpp}::round_up_p2 ( const uint64_t  num)

Definition at line 72 of file StringDictionary.cpp.

Referenced by StringDictionary::StringDictionary().

72  {
73  uint64_t in = num;
74  in--;
75  in |= in >> 1;
76  in |= in >> 2;
77  in |= in >> 4;
78  in |= in >> 8;
79  in |= in >> 16;
80  in++;
81  // TODO MAT deal with case where filesize has been increased but reality is
82  // we are constrained to 2^31.
83  // In that situation this calculation will wrap to zero
84  if (in == 0 || (in > (UINT32_MAX))) {
85  in = UINT32_MAX;
86  }
87  return in;
88 }

+ Here is the caller graph for this function:

template<class T >
void anonymous_namespace{StringDictionary.cpp}::throw_encoding_error ( std::string_view  str,
const shared::StringDictKey dict_key 
)

Definition at line 401 of file StringDictionary.cpp.

References logger::ERROR, LOG, StringDictionary::MAX_STRCOUNT, and heavydb.dtypes::T.

401  {
402  std::ostringstream oss;
403  oss << "The text encoded column using dictionary " << dict_key
404  << " has exceeded it's limit of " << sizeof(T) * 8 << " bits ("
405  << static_cast<size_t>(max_valid_int_value<T>() + 1) << " unique values) "
406  << "while attempting to add the new string '" << str << "'. ";
407 
408  if (sizeof(T) < 4) {
409  // Todo: Implement automatic type widening for dictionary-encoded text
410  // columns/all fixed length columm types (at least if not defined
411  // with fixed encoding size), or short of that, ALTER TABLE
412  // COLUMN TYPE to at least allow the user to do this manually
413  // without re-creating the table
414 
415  oss << "To load more data, please re-create the table with "
416  << "this column as type TEXT ENCODING DICT(" << sizeof(T) * 2 * 8 << ") ";
417  if (sizeof(T) == 1) {
418  oss << "or TEXT ENCODING DICT(32) ";
419  }
420  oss << "and reload your data.";
421  } else {
422  // Todo: Implement TEXT ENCODING DICT(64) type which should essentially
423  // preclude overflows.
424  oss << "Currently dictionary-encoded text columns support a maximum of "
426  << " strings. Consider recreating the table with "
427  << "this column as type TEXT ENCODING NONE and reloading your data.";
428  }
429  LOG(ERROR) << oss.str();
430  throw std::runtime_error(oss.str());
431 }
#define LOG(tag)
Definition: Logger.h:285
static constexpr size_t MAX_STRCOUNT
void anonymous_namespace{StringDictionary.cpp}::throw_string_too_long_error ( std::string_view  str,
const shared::StringDictKey dict_key 
)

Definition at line 433 of file StringDictionary.cpp.

References logger::ERROR, LOG, and StringDictionary::MAX_STRLEN.

Referenced by StringDictionary::getBulk().

434  {
435  std::ostringstream oss;
436  oss << "The string '" << str << " could not be inserted into the dictionary "
437  << dict_key << " because it exceeded the maximum allowable "
438  << "length of " << StringDictionary::MAX_STRLEN << " characters (string was "
439  << str.size() << " characters).";
440  LOG(ERROR) << oss.str();
441  throw std::runtime_error(oss.str());
442 }
#define LOG(tag)
Definition: Logger.h:285
static constexpr size_t MAX_STRLEN

+ Here is the caller graph for this function:

Variable Documentation

const int anonymous_namespace{StringDictionary.cpp}::SYSTEM_PAGE_SIZE = heavyai::get_page_size()