OmniSciDB
a5dc49c757
|
#include <StringDictionary.h>
Classes | |
struct | compare_cache_value_t |
struct | PayloadString |
class | StringCallback |
struct | StringIdxEntry |
Public Member Functions | |
StringDictionary (const shared::StringDictKey &dict_key, const std::string &folder, const bool isTemp, const bool recover, const bool materializeHashes=false, size_t initial_capacity=256) | |
StringDictionary (const LeafHostInfo &host, const shared::StringDictKey &dict_key) | |
~StringDictionary () noexcept | |
const shared::StringDictKey & | getDictKey () const noexcept |
void | eachStringSerially (int64_t const generation, StringCallback &) const |
std::function< int32_t(std::string const &)> | makeLambdaStringToId () const |
int32_t | getOrAdd (const std::string &str) noexcept |
template<class T , class String > | |
size_t | getBulk (const std::vector< String > &string_vec, T *encoded_vec) const |
template<class T , class String > | |
size_t | getBulk (const std::vector< String > &string_vec, T *encoded_vec, const int64_t generation) const |
template<class T , class String > | |
void | getOrAddBulk (const std::vector< String > &string_vec, T *encoded_vec) |
template<class T , class String > | |
void | getOrAddBulkParallel (const std::vector< String > &string_vec, T *encoded_vec) |
template<class String > | |
void | getOrAddBulkArray (const std::vector< std::vector< String >> &string_array_vec, std::vector< std::vector< int32_t >> &ids_array_vec) |
template<class String > | |
int32_t | getIdOfString (const String &) const |
std::string | getString (int32_t string_id) const |
std::string_view | getStringView (int32_t string_id) const |
std::pair< char *, size_t > | getStringBytes (int32_t string_id) const noexcept |
size_t | storageEntryCount () const |
template<typename T > | |
std::vector< T > | getLike (const std::string &pattern, const bool icase, const bool is_simple, const char escape, const size_t generation) const |
template<typename T > | |
std::vector< T > | getLikeImpl (const std::string &pattern, const bool icase, const bool is_simple, const char escape, const size_t generation) const |
std::vector< int32_t > | getCompare (const std::string &pattern, const std::string &comp_operator, const size_t generation) |
std::vector< int32_t > | getRegexpLike (const std::string &pattern, const char escape, const size_t generation) const |
std::vector< std::string > | copyStrings () const |
std::vector< std::string_view > | getStringViews () const |
std::vector< std::string_view > | getStringViews (const size_t generation) const |
std::vector< int32_t > | buildDictionaryTranslationMap (const std::shared_ptr< StringDictionary > dest_dict, StringLookupCallback const &dest_transient_lookup_callback) const |
size_t | buildDictionaryTranslationMap (const StringDictionary *dest_dict, int32_t *translated_ids, const int64_t source_generation, const int64_t dest_generation, const bool dest_has_transients, StringLookupCallback const &dest_transient_lookup_callback, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const |
void | buildDictionaryNumericTranslationMap (Datum *translated_ids, const int64_t source_generation, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const |
bool | checkpoint () noexcept |
bool | isClient () const noexcept |
void | update_leaf (const LeafHostInfo &host_info) |
size_t | computeCacheSize () const |
template<> | |
std::vector< int32_t > | getLike (const std::string &pattern, const bool icase, const bool is_simple, const char escape, const size_t generation) const |
template<> | |
std::vector< int64_t > | getLike (const std::string &pattern, const bool icase, const bool is_simple, const char escape, const size_t generation) const |
Static Public Member Functions | |
static void | populate_string_ids (std::vector< int32_t > &dest_ids, StringDictionary *dest_dict, const std::vector< int32_t > &source_ids, const StringDictionary *source_dict, const std::vector< std::string const * > &transient_string_vec={}) |
Populates provided dest_ids vector with string ids corresponding to given source strings. More... | |
static void | populate_string_array_ids (std::vector< std::vector< int32_t >> &dest_array_ids, StringDictionary *dest_dict, const std::vector< std::vector< int32_t >> &source_array_ids, const StringDictionary *source_dict) |
Static Public Attributes | |
static constexpr int32_t | INVALID_STR_ID = -1 |
static constexpr size_t | MAX_STRLEN = (1 << 15) - 1 |
static constexpr size_t | MAX_STRCOUNT = (1U << 31) - 1 |
Private Member Functions | |
void | processDictionaryFutures (std::vector< std::future< std::vector< std::pair< string_dict_hash_t, unsigned int >>>> &dictionary_futures) |
size_t | getNumStringsFromStorage (const size_t storage_slots) const noexcept |
bool | fillRateIsHigh (const size_t num_strings) const noexcept |
void | increaseHashTableCapacity () noexcept |
template<class String > | |
void | increaseHashTableCapacityFromStorageAndMemory (const size_t str_count, const size_t storage_high_water_mark, const std::vector< String > &input_strings, const std::vector< size_t > &string_memory_ids, const std::vector< string_dict_hash_t > &input_strings_hashes) noexcept |
int32_t | getOrAddImpl (const std::string_view &str) noexcept |
template<class String > | |
void | hashStrings (const std::vector< String > &string_vec, std::vector< string_dict_hash_t > &hashes) const noexcept |
int32_t | getUnlocked (const std::string_view sv) const noexcept |
std::string | getStringUnlocked (int32_t string_id) const noexcept |
std::string_view | getStringViewUnlocked (int32_t string_id) const noexcept |
std::string | getStringChecked (const int string_id) const noexcept |
std::string_view | getStringViewChecked (const int string_id) const noexcept |
std::pair< char *, size_t > | getStringBytesChecked (const int string_id) const noexcept |
template<class String > | |
uint32_t | computeBucket (const string_dict_hash_t hash, const String &input_string, const std::vector< int32_t > &string_id_string_dict_hash_table) const noexcept |
template<class String > | |
uint32_t | computeBucketFromStorageAndMemory (const string_dict_hash_t input_string_hash, const String &input_string, const std::vector< int32_t > &string_id_string_dict_hash_table, const size_t storage_high_water_mark, const std::vector< String > &input_strings, const std::vector< size_t > &string_memory_ids) const noexcept |
uint32_t | computeUniqueBucketWithHash (const string_dict_hash_t hash, const std::vector< int32_t > &string_id_string_dict_hash_table) noexcept |
void | checkAndConditionallyIncreasePayloadCapacity (const size_t write_length) |
void | checkAndConditionallyIncreaseOffsetCapacity (const size_t write_length) |
template<class String > | |
void | appendToStorage (const String str) noexcept |
template<class String > | |
void | appendToStorageBulk (const std::vector< String > &input_strings, const std::vector< size_t > &string_memory_ids, const size_t sum_new_strings_lengths) noexcept |
PayloadString | getStringFromStorage (const int string_id) const noexcept |
std::string_view | getStringFromStorageFast (const int string_id) const noexcept |
void | addPayloadCapacity (const size_t min_capacity_requested=0) noexcept |
void | addOffsetCapacity (const size_t min_capacity_requested=0) noexcept |
size_t | addStorageCapacity (int fd, const size_t min_capacity_requested=0) noexcept |
void * | addMemoryCapacity (void *addr, size_t &mem_size, const size_t min_capacity_requested=0) noexcept |
void | invalidateInvertedIndex () noexcept |
std::vector< int32_t > | getEquals (std::string pattern, std::string comp_operator, size_t generation) |
void | buildSortedCache () |
void | insertInSortedCache (std::string str, int32_t str_id) |
void | sortCache (std::vector< int32_t > &cache) |
void | mergeSortedCache (std::vector< int32_t > &temp_sorted_cache) |
compare_cache_value_t * | binary_search_cache (const std::string &pattern) const |
Private Attributes | |
const shared::StringDictKey | dict_key_ |
const std::string | folder_ |
size_t | str_count_ |
size_t | collisions_ |
std::vector< int32_t > | string_id_string_dict_hash_table_ |
std::vector< string_dict_hash_t > | hash_cache_ |
std::vector< int32_t > | sorted_cache |
bool | isTemp_ |
bool | materialize_hashes_ |
std::string | offsets_path_ |
int | payload_fd_ |
int | offset_fd_ |
StringIdxEntry * | offset_map_ |
char * | payload_map_ |
size_t | offset_file_size_ |
size_t | payload_file_size_ |
size_t | payload_file_off_ |
std::shared_mutex | rw_mutex_ |
std::map< std::tuple < std::string, bool, bool, char >, std::vector< int32_t > > | like_i32_cache_ |
std::map< std::tuple < std::string, bool, bool, char >, std::vector< int64_t > > | like_i64_cache_ |
size_t | like_cache_size_ |
std::map< std::pair < std::string, char > , std::vector< int32_t > > | regex_cache_ |
size_t | regex_cache_size_ |
std::map< std::string, int32_t > | equal_cache_ |
size_t | equal_cache_size_ |
DictionaryCache< std::string, compare_cache_value_t > | compare_cache_ |
size_t | compare_cache_size_ |
std::shared_ptr< std::vector < std::string > > | strings_cache_ |
size_t | strings_cache_size_ |
std::unique_ptr < StringDictionaryClient > | client_ |
std::unique_ptr < StringDictionaryClient > | client_no_timeout_ |
char * | CANARY_BUFFER {nullptr} |
size_t | canary_buffer_size = 0 |
Friends | |
class | StringLocalCallback |
Definition at line 54 of file StringDictionary.h.
StringDictionary::StringDictionary | ( | const shared::StringDictKey & | dict_key, |
const std::string & | folder, | ||
const bool | isTemp, | ||
const bool | recover, | ||
const bool | materializeHashes = false , |
||
size_t | initial_capacity = 256 |
||
) |
Definition at line 121 of file StringDictionary.cpp.
References addOffsetCapacity(), addPayloadCapacity(), threading_serial::async(), CHECK_EQ, heavyai::checked_mmap(), anonymous_namespace{StringDictionary.cpp}::checked_open(), collisions_, heavyai::file_size(), getNumStringsFromStorage(), getStringFromStorage(), hash_cache_, anonymous_namespace{StringDictionary.cpp}::hash_string(), INVALID_STR_ID, isTemp_, LOG, materialize_hashes_, offset_fd_, offset_file_size_, offset_map_, offsets_path_, payload_fd_, payload_file_size_, payload_map_, processDictionaryFutures(), anonymous_namespace{StringDictionary.cpp}::round_up_p2(), rw_mutex_, str_count_, string_id_string_dict_hash_table_, VLOG, and logger::WARNING.
StringDictionary::StringDictionary | ( | const LeafHostInfo & | host, |
const shared::StringDictKey & | dict_key | ||
) |
Definition at line 354 of file StringDictionary.cpp.
|
noexcept |
Definition at line 364 of file StringDictionary.cpp.
References CANARY_BUFFER, CHECK, CHECK_GE, heavyai::checked_munmap(), heavyai::close(), isClient(), isTemp_, offset_fd_, offset_file_size_, offset_map_, payload_fd_, payload_file_size_, and payload_map_.
|
privatenoexcept |
Definition at line 1605 of file StringDictionary.cpp.
References CHECK, and anonymous_namespace{StringDictionary.cpp}::SYSTEM_PAGE_SIZE.
|
privatenoexcept |
Definition at line 1575 of file StringDictionary.cpp.
Referenced by checkAndConditionallyIncreaseOffsetCapacity(), and StringDictionary().
|
privatenoexcept |
Definition at line 1566 of file StringDictionary.cpp.
Referenced by checkAndConditionallyIncreasePayloadCapacity(), and StringDictionary().
|
privatenoexcept |
Definition at line 1584 of file StringDictionary.cpp.
References CHECK, CHECK_NE, anonymous_namespace{StringDictionary.cpp}::SYSTEM_PAGE_SIZE, and File_Namespace::write().
|
privatenoexcept |
Definition at line 1511 of file StringDictionary.cpp.
References StringDictionary::StringIdxEntry::size.
Referenced by getOrAddBulk().
|
privatenoexcept |
Definition at line 1525 of file StringDictionary.cpp.
Referenced by getOrAddBulkParallel().
|
private |
void StringDictionary::buildDictionaryNumericTranslationMap | ( | Datum * | translated_ids, |
const int64_t | source_generation, | ||
const std::vector< StringOps_Namespace::StringOpInfo > & | string_op_infos | ||
) | const |
Definition at line 2028 of file StringDictionary.cpp.
References CHECK, CHECK_GE, CHECK_GT, CHECK_LE, DEBUG_TIMER, getStringFromStorageFast(), ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, threading_serial::parallel_for(), rw_mutex_, and str_count_.
std::vector< int32_t > StringDictionary::buildDictionaryTranslationMap | ( | const std::shared_ptr< StringDictionary > | dest_dict, |
StringLookupCallback const & | dest_transient_lookup_callback | ||
) | const |
Definition at line 1844 of file StringDictionary.cpp.
References DEBUG_TIMER, and storageEntryCount().
size_t StringDictionary::buildDictionaryTranslationMap | ( | const StringDictionary * | dest_dict, |
int32_t * | translated_ids, | ||
const int64_t | source_generation, | ||
const int64_t | dest_generation, | ||
const bool | dest_has_transients, | ||
StringLookupCallback const & | dest_transient_lookup_callback, | ||
const std::vector< StringOps_Namespace::StringOpInfo > & | string_op_infos | ||
) | const |
Definition at line 1881 of file StringDictionary.cpp.
References CHECK_GE, CHECK_LE, client_no_timeout_, computeBucket(), DEBUG_TIMER, getDictKey(), getStringFromStorageFast(), hash_cache_, anonymous_namespace{StringDictionary.cpp}::hash_string(), INVALID_STR_ID, materialize_hashes_, ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, order_translation_locks(), threading_serial::parallel_for(), rw_mutex_, str_count_, and string_id_string_dict_hash_table_.
|
private |
Definition at line 1674 of file StringDictionary.cpp.
References mergeSortedCache(), sortCache(), sorted_cache, and str_count_.
Referenced by getCompare().
|
private |
Definition at line 1490 of file StringDictionary.cpp.
References addOffsetCapacity(), CHECK, CHECK_GE, heavyai::checked_mmap(), heavyai::checked_munmap(), isTemp_, offset_fd_, offset_file_size_, offset_map_, and str_count_.
|
private |
Definition at line 1471 of file StringDictionary.cpp.
References addPayloadCapacity(), CHECK, CHECK_GE, heavyai::checked_mmap(), heavyai::checked_munmap(), isTemp_, payload_fd_, payload_file_off_, payload_file_size_, and payload_map_.
|
noexcept |
Definition at line 1651 of file StringDictionary.cpp.
References CHECK, client_, heavyai::fsync(), isClient(), isTemp_, heavyai::msync(), offset_fd_, offset_file_size_, offset_map_, payload_fd_, payload_file_size_, and payload_map_.
Referenced by import_export::TypedImportBuffer::stringDictCheckpoint().
|
privatenoexcept |
Definition at line 1373 of file StringDictionary.cpp.
Referenced by buildDictionaryTranslationMap(), getBulk(), and getOrAddBulk().
|
privatenoexcept |
memcmp(input_string.data(), candidate_storage_string.c_str_ptr, input_string.size())) {
Definition at line 1403 of file StringDictionary.cpp.
Referenced by getOrAddBulkParallel().
size_t StringDictionary::computeCacheSize | ( | ) | const |
Definition at line 2111 of file StringDictionary.cpp.
References compare_cache_size_, equal_cache_size_, hash_cache_, like_cache_size_, regex_cache_size_, rw_mutex_, sorted_cache, string_id_string_dict_hash_table_, and strings_cache_size_.
|
privatenoexcept |
Definition at line 1452 of file StringDictionary.cpp.
Referenced by increaseHashTableCapacity(), and processDictionaryFutures().
std::vector< std::string > StringDictionary::copyStrings | ( | ) | const |
Definition at line 1191 of file StringDictionary.cpp.
References gpu_enabled::accumulate(), threading_serial::async(), CHECK_EQ, CHECK_GT, CHECK_LE, gpu_enabled::copy(), cpu_threads(), getStringUnlocked(), isClient(), rw_mutex_, str_count_, strings_cache_, and strings_cache_size_.
void StringDictionary::eachStringSerially | ( | int64_t const | generation, |
StringCallback & | serial_callback | ||
) | const |
Definition at line 276 of file StringDictionary.cpp.
References CHECK_LE, client_, getStringFromStorageFast(), isClient(), anonymous_namespace{Utm.h}::n, rw_mutex_, storageEntryCount(), and str_count_.
Referenced by makeLambdaStringToId().
|
privatenoexcept |
Definition at line 1253 of file StringDictionary.cpp.
Referenced by getOrAddBulk(), and getOrAddBulkParallel().
size_t StringDictionary::getBulk | ( | const std::vector< String > & | string_vec, |
T * | encoded_vec | ||
) | const |
Definition at line 495 of file StringDictionary.cpp.
size_t StringDictionary::getBulk | ( | const std::vector< String > & | string_vec, |
T * | encoded_vec, | ||
const int64_t | generation | ||
) | const |
Definition at line 508 of file StringDictionary.cpp.
References CHECK_GE, computeBucket(), dict_key_, anonymous_namespace{StringDictionary.cpp}::hash_string(), INVALID_STR_ID, MAX_STRLEN, ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, threading_serial::parallel_for(), rw_mutex_, storageEntryCount(), string_id_string_dict_hash_table_, and anonymous_namespace{StringDictionary.cpp}::throw_string_too_long_error().
std::vector< int32_t > StringDictionary::getCompare | ( | const std::string & | pattern, |
const std::string & | comp_operator, | ||
const size_t | generation | ||
) |
Definition at line 981 of file StringDictionary.cpp.
References anonymous_namespace{Utm.h}::a, buildSortedCache(), client_, compare_cache_, compare_cache_size_, getEquals(), getStringFromStorage(), isClient(), gpu_enabled::lower_bound(), rw_mutex_, sorted_cache, str_count_, string_eq(), and string_lt().
|
noexcept |
Definition at line 320 of file StringDictionary.cpp.
References dict_key_.
Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap(), RowSetMemoryOwner::addStringProxyNumericTranslationMap(), RowSetMemoryOwner::addStringProxyUnionTranslationMap(), and buildDictionaryTranslationMap().
|
private |
Definition at line 920 of file StringDictionary.cpp.
References CHECK, CHECK_GT, CHECK_LE, cpu_threads(), equal_cache_, equal_cache_size_, getStringUnlocked(), MAX_STRLEN, run_benchmark_import::result, and str_count_.
Referenced by getCompare().
template int32_t StringDictionary::getIdOfString | ( | const String & | ) | const |
Definition at line 751 of file StringDictionary.cpp.
References client_, getUnlocked(), isClient(), and rw_mutex_.
std::vector<T> StringDictionary::getLike | ( | const std::string & | pattern, |
const bool | icase, | ||
const bool | is_simple, | ||
const char | escape, | ||
const size_t | generation | ||
) | const |
std::vector<int32_t> StringDictionary::getLike | ( | const std::string & | pattern, |
const bool | icase, | ||
const bool | is_simple, | ||
const char | escape, | ||
const size_t | generation | ||
) | const |
Definition at line 869 of file StringDictionary.cpp.
References CHECK, and run_benchmark_import::result.
std::vector<int64_t> StringDictionary::getLike | ( | const std::string & | pattern, |
const bool | icase, | ||
const bool | is_simple, | ||
const char | escape, | ||
const size_t | generation | ||
) | const |
Definition at line 895 of file StringDictionary.cpp.
References CHECK, and run_benchmark_import::result.
std::vector< T > StringDictionary::getLikeImpl | ( | const std::string & | pattern, |
const bool | icase, | ||
const bool | is_simple, | ||
const char | escape, | ||
const size_t | generation | ||
) | const |
Definition at line 818 of file StringDictionary.cpp.
References gpu_enabled::copy(), cpu_threads(), getStringUnlocked(), threading_serial::parallel_for(), gpu_enabled::partial_sum(), run_benchmark_import::result, string_ilike(), string_ilike_simple(), string_like(), and string_like_simple().
|
privatenoexcept |
Method to retrieve number of strings in storage via a binary search for the first canary
storage_slots | number of storage entries we should search to find the minimum canary |
Definition at line 331 of file StringDictionary.cpp.
References CHECK_GE.
Referenced by StringDictionary().
|
noexcept |
Definition at line 388 of file StringDictionary.cpp.
References CHECK_EQ.
template void StringDictionary::getOrAddBulk | ( | const std::vector< String > & | string_vec, |
T * | encoded_vec | ||
) |
Definition at line 597 of file StringDictionary.cpp.
References appendToStorage(), CHECK, CHECK_LT, computeBucket(), dict_key_, fillRateIsHigh(), g_enable_stringdict_parallel, getOrAddBulkParallel(), hash_cache_, anonymous_namespace{StringDictionary.cpp}::hash_string(), increaseHashTableCapacity(), INVALID_STR_ID, invalidateInvertedIndex(), materialize_hashes_, MAX_STRCOUNT, MAX_STRLEN, offsets_path_, rw_mutex_, str_count_, and string_id_string_dict_hash_table_.
Referenced by import_export::TypedImportBuffer::addDictEncodedString(), ArrowForeignStorageBase::convertArrowDictionary(), ArrowForeignStorageBase::createDictionaryEncodedColumn(), data_conversion::StringViewToStringDictEncoder< IdType >::encodeAndAppendData(), foreign_storage::ParquetStringEncoder< V >::encodeAndCopyContiguous(), getOrAddBulkArray(), and populate_string_ids().
template void StringDictionary::getOrAddBulkArray | ( | const std::vector< std::vector< String >> & | string_array_vec, |
std::vector< std::vector< int32_t >> & | ids_array_vec | ||
) |
Definition at line 447 of file StringDictionary.cpp.
References client_no_timeout_, and getOrAddBulk().
Referenced by import_export::TypedImportBuffer::addDictEncodedStringArray().
void StringDictionary::getOrAddBulkParallel | ( | const std::vector< String > & | string_vec, |
T * | encoded_vec | ||
) |
Definition at line 655 of file StringDictionary.cpp.
References appendToStorageBulk(), CHECK, CHECK_LT, computeBucketFromStorageAndMemory(), dict_key_, fillRateIsHigh(), hash_cache_, hashStrings(), increaseHashTableCapacityFromStorageAndMemory(), INVALID_STR_ID, invalidateInvertedIndex(), materialize_hashes_, MAX_STRCOUNT, MAX_STRLEN, offsets_path_, rw_mutex_, str_count_, and string_id_string_dict_hash_table_.
Referenced by getOrAddBulk().
|
privatenoexcept |
Definition at line 1313 of file StringDictionary.cpp.
References CHECK, CHECK_LT, and anonymous_namespace{StringDictionary.cpp}::hash_string().
std::vector< int32_t > StringDictionary::getRegexpLike | ( | const std::string & | pattern, |
const char | escape, | ||
const size_t | generation | ||
) | const |
Definition at line 1143 of file StringDictionary.cpp.
References CHECK, CHECK_GT, CHECK_LE, client_, cpu_threads(), getStringUnlocked(), anonymous_namespace{StringDictionary.cpp}::is_regexp_like(), isClient(), regex_cache_, regex_cache_size_, run_benchmark_import::result, rw_mutex_, and str_count_.
std::string StringDictionary::getString | ( | int32_t | string_id | ) | const |
Definition at line 773 of file StringDictionary.cpp.
References client_, getStringUnlocked(), isClient(), and rw_mutex_.
Referenced by StringValueConverter::convertToColumnarFormatFromDict(), and populate_string_ids().
|
noexcept |
Definition at line 800 of file StringDictionary.cpp.
References CHECK, CHECK_LE, and CHECK_LT.
|
privatenoexcept |
Definition at line 1365 of file StringDictionary.cpp.
References CHECK.
|
privatenoexcept |
Definition at line 1352 of file StringDictionary.cpp.
References CHECK.
Referenced by increaseHashTableCapacity().
|
privatenoexcept |
Definition at line 1551 of file StringDictionary.cpp.
References CHECK_GE, StringDictionary::StringIdxEntry::off, and StringDictionary::StringIdxEntry::size.
Referenced by getCompare(), mergeSortedCache(), sortCache(), and StringDictionary().
|
privatenoexcept |
Definition at line 1545 of file StringDictionary.cpp.
References StringDictionary::StringIdxEntry::off, and StringDictionary::StringIdxEntry::size.
Referenced by buildDictionaryNumericTranslationMap(), buildDictionaryTranslationMap(), eachStringSerially(), and getStringViews().
|
privatenoexcept |
Definition at line 789 of file StringDictionary.cpp.
References CHECK_LT.
Referenced by copyStrings(), getEquals(), getLikeImpl(), getRegexpLike(), and getString().
std::string_view StringDictionary::getStringView | ( | int32_t | string_id | ) | const |
Definition at line 783 of file StringDictionary.cpp.
References CHECK, getStringViewUnlocked(), isClient(), and rw_mutex_.
Referenced by data_conversion::anonymous_namespace{StringViewSource.h}::get_materialized_string_views().
|
privatenoexcept |
Definition at line 1358 of file StringDictionary.cpp.
References CHECK.
std::vector< std::string_view > StringDictionary::getStringViews | ( | ) | const |
Definition at line 1840 of file StringDictionary.cpp.
References storageEntryCount().
std::vector< std::string_view > StringDictionary::getStringViews | ( | const size_t | generation | ) | const |
Definition at line 1787 of file StringDictionary.cpp.
References CHECK_GE, CHECK_LE, DEBUG_TIMER, getStringFromStorageFast(), MAX_STRCOUNT, ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, threading_serial::parallel_for(), rw_mutex_, and storageEntryCount().
|
privatenoexcept |
Definition at line 794 of file StringDictionary.cpp.
References CHECK_LT.
Referenced by getStringView().
|
privatenoexcept |
Definition at line 766 of file StringDictionary.cpp.
References anonymous_namespace{StringDictionary.cpp}::hash_string().
Referenced by getIdOfString().
|
privatenoexcept |
Method to hash a vector of strings in parallel.
string_vec | input vector of strings to be hashed |
hashes | space for the output - should be pre-sized to match string_vec size |
Definition at line 478 of file StringDictionary.cpp.
References CHECK_EQ, anonymous_namespace{StringDictionary.cpp}::hash_string(), and threading_serial::parallel_for().
Referenced by getOrAddBulkParallel().
|
privatenoexcept |
Definition at line 1257 of file StringDictionary.cpp.
References computeUniqueBucketWithHash(), getStringChecked(), hash_cache_, anonymous_namespace{StringDictionary.cpp}::hash_string(), INVALID_STR_ID, materialize_hashes_, str_count_, and string_id_string_dict_hash_table_.
Referenced by getOrAddBulk().
|
privatenoexcept |
Definition at line 1280 of file StringDictionary.cpp.
References anonymous_namespace{StringDictionary.cpp}::hash_string().
Referenced by getOrAddBulkParallel().
|
private |
|
privatenoexcept |
Definition at line 1626 of file StringDictionary.cpp.
References compare_cache_, compare_cache_size_, equal_cache_, equal_cache_size_, like_cache_size_, like_i32_cache_, like_i64_cache_, regex_cache_, regex_cache_size_, and gpu_enabled::swap().
Referenced by getOrAddBulk(), and getOrAddBulkParallel().
|
noexcept |
Definition at line 1670 of file StringDictionary.cpp.
References client_.
Referenced by checkpoint(), copyStrings(), eachStringSerially(), getCompare(), getIdOfString(), getRegexpLike(), getString(), getStringView(), makeLambdaStringToId(), storageEntryCount(), and ~StringDictionary().
std::function< int32_t(std::string const &)> StringDictionary::makeLambdaStringToId | ( | ) | const |
Definition at line 263 of file StringDictionary.cpp.
References CHECK, eachStringSerially(), INVALID_STR_ID, and isClient().
|
private |
Definition at line 1698 of file StringDictionary.cpp.
References getStringFromStorage(), sorted_cache, and string_lt().
Referenced by buildSortedCache().
|
static |
Definition at line 1746 of file StringDictionary.cpp.
References threading_serial::async(), populate_string_ids(), and logger::thread_id().
Referenced by DictionaryValueConverter< TARGET_TYPE >::processArrayBuffer().
|
static |
Populates provided dest_ids
vector with string ids corresponding to given source strings.
Given a vector of source string ids and corresponding source dictionary, this method populates a vector of destination string ids by either returning the string id of matching strings in the destination dictionary or creating new entries in the dictionary. Source string ids can also be transient if they were created by a function (e.g LOWER/UPPER functions). A map of transient string ids to string values is provided in order to handle this use case.
dest_ids | - vector of destination string ids to be populated |
dest_dict | - destination dictionary |
source_ids | - vector of source string ids for which destination ids are needed |
source_dict | - source dictionary |
transient_string_vec | - ordered vector of string value pointers |
Definition at line 1722 of file StringDictionary.cpp.
References CHECK_LT, getOrAddBulk(), getString(), and StringDictionaryProxy::transientIdToIndex().
Referenced by populate_string_array_ids(), and DictionaryValueConverter< TARGET_TYPE >::processBuffer().
|
private |
Definition at line 300 of file StringDictionary.cpp.
References computeUniqueBucketWithHash(), hash_cache_, materialize_hashes_, payload_file_off_, str_count_, and string_id_string_dict_hash_table_.
Referenced by StringDictionary().
|
private |
Definition at line 1685 of file StringDictionary.cpp.
References anonymous_namespace{Utm.h}::a, getStringFromStorage(), gpu_enabled::sort(), and string_lt().
Referenced by buildSortedCache().
size_t StringDictionary::storageEntryCount | ( | ) | const |
Definition at line 809 of file StringDictionary.cpp.
References client_, isClient(), rw_mutex_, and str_count_.
Referenced by buildDictionaryTranslationMap(), eachStringSerially(), getBulk(), and getStringViews().
void StringDictionary::update_leaf | ( | const LeafHostInfo & | host_info | ) |
Definition at line 386 of file StringDictionary.cpp.
|
friend |
Definition at line 79 of file StringDictionary.h.
|
private |
Definition at line 310 of file StringDictionary.h.
Referenced by ~StringDictionary().
|
private |
Definition at line 311 of file StringDictionary.h.
|
mutableprivate |
Definition at line 307 of file StringDictionary.h.
Referenced by checkpoint(), eachStringSerially(), getCompare(), getIdOfString(), getRegexpLike(), getString(), isClient(), and storageEntryCount().
|
mutableprivate |
Definition at line 308 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), and getOrAddBulkArray().
|
private |
Definition at line 279 of file StringDictionary.h.
Referenced by StringDictionary().
|
mutableprivate |
Definition at line 303 of file StringDictionary.h.
Referenced by getCompare(), and invalidateInvertedIndex().
|
mutableprivate |
Definition at line 304 of file StringDictionary.h.
Referenced by computeCacheSize(), getCompare(), and invalidateInvertedIndex().
|
private |
Definition at line 276 of file StringDictionary.h.
Referenced by getBulk(), getDictKey(), getOrAddBulk(), and getOrAddBulkParallel().
|
mutableprivate |
Definition at line 301 of file StringDictionary.h.
Referenced by getEquals(), and invalidateInvertedIndex().
|
mutableprivate |
Definition at line 302 of file StringDictionary.h.
Referenced by computeCacheSize(), getEquals(), and invalidateInvertedIndex().
|
private |
Definition at line 277 of file StringDictionary.h.
|
private |
Definition at line 281 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), computeCacheSize(), getOrAddBulk(), getOrAddBulkParallel(), increaseHashTableCapacity(), processDictionaryFutures(), and StringDictionary().
|
static |
Definition at line 181 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), StringDictionaryProxy::buildIntersectionTranslationMapToOtherProxyUnlocked(), StringDictionaryProxy::buildUnionTranslationMapToOtherProxy(), count_matches_impl(), count_matches_sharded(), CodeGenerator::createInValuesBitmap(), anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), fill_hash_join_buff_impl(), fill_hash_join_buff_sharded_impl(), fill_row_ids_for_window_framing_impl(), fill_row_ids_impl(), fill_row_ids_sharded_impl(), TableFunctions_Namespace::OneHotEncoder_Namespace::get_min_max_keys(), getBulk(), StringDictionaryProxy::getIdOfString(), StringDictionaryProxy::getIdOfStringNoGeneration(), getOrAddBulk(), getOrAddBulkParallel(), StringDictionaryProxy::getOrAddTransientBulk(), StringDictionaryProxy::getOrAddTransientImpl(), increaseHashTableCapacity(), StringDictionaryProxy::initIdMap(), StringDictionaryProxy::lookupTransientStringUnlocked(), makeLambdaStringToId(), anonymous_namespace{HashJoinRuntime.cpp}::map_str_id_to_outer_dict(), GenericKeyHandler::operator()(), StringLocalCallback::operator()(), StringNetworkCallback::operator()(), StringDictionary(), StringDictionaryProxy::transientLookupBulkParallelUnlocked(), StringDictionaryProxy::transientLookupBulkUnlocked(), and truncate_to_generation().
|
private |
Definition at line 283 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreaseOffsetCapacity(), checkAndConditionallyIncreasePayloadCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
mutableprivate |
Definition at line 298 of file StringDictionary.h.
Referenced by computeCacheSize(), and invalidateInvertedIndex().
|
mutableprivate |
Definition at line 295 of file StringDictionary.h.
Referenced by invalidateInvertedIndex().
|
mutableprivate |
Definition at line 297 of file StringDictionary.h.
Referenced by invalidateInvertedIndex().
|
private |
Definition at line 284 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), getOrAddBulk(), getOrAddBulkParallel(), increaseHashTableCapacity(), processDictionaryFutures(), and StringDictionary().
|
static |
Definition at line 183 of file StringDictionary.h.
Referenced by getOrAddBulk(), getOrAddBulkParallel(), getStringViews(), and anonymous_namespace{StringDictionary.cpp}::throw_encoding_error().
|
static |
Definition at line 182 of file StringDictionary.h.
Referenced by import_export::TypedImportBuffer::addDictEncodedString(), import_export::TypedImportBuffer::addDictEncodedStringArray(), import_export::TypedImportBuffer::addDictStringWithTruncation(), foreign_storage::ParquetDetectStringEncoder::appendData(), foreign_storage::ParquetStringEncoder< V >::appendDataTrackErrors(), data_conversion::StringViewToStringNoneEncoder::encodeAndAppendData(), data_conversion::StringViewToStringDictEncoder< IdType >::encodeAndAppendData(), foreign_storage::ParquetStringEncoder< V >::encodeAndCopyContiguous(), getBulk(), getEquals(), getOrAddBulk(), getOrAddBulkParallel(), import_export::delimited_parser::parse_string_array(), foreign_storage::RegexFileBufferParser::parseBuffer(), anonymous_namespace{StringDictionary.cpp}::throw_string_too_long_error(), foreign_storage::ParquetDetectStringEncoder::validate(), and foreign_storage::ParquetStringEncoder< V >::validateAndAppendData().
|
private |
Definition at line 287 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreaseOffsetCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
private |
Definition at line 290 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreaseOffsetCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
private |
Definition at line 288 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreaseOffsetCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
private |
Definition at line 285 of file StringDictionary.h.
Referenced by getOrAddBulk(), getOrAddBulkParallel(), and StringDictionary().
|
private |
Definition at line 286 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreasePayloadCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
private |
Definition at line 292 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreasePayloadCapacity(), and processDictionaryFutures().
|
private |
Definition at line 291 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreasePayloadCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
private |
Definition at line 289 of file StringDictionary.h.
Referenced by checkAndConditionallyIncreasePayloadCapacity(), checkpoint(), StringDictionary(), and ~StringDictionary().
|
mutableprivate |
Definition at line 299 of file StringDictionary.h.
Referenced by getRegexpLike(), and invalidateInvertedIndex().
|
mutableprivate |
Definition at line 300 of file StringDictionary.h.
Referenced by computeCacheSize(), getRegexpLike(), and invalidateInvertedIndex().
|
mutableprivate |
Definition at line 293 of file StringDictionary.h.
Referenced by buildDictionaryNumericTranslationMap(), buildDictionaryTranslationMap(), computeCacheSize(), copyStrings(), eachStringSerially(), getBulk(), getCompare(), getIdOfString(), getOrAddBulk(), getOrAddBulkParallel(), getRegexpLike(), getString(), getStringView(), getStringViews(), storageEntryCount(), and StringDictionary().
|
private |
Definition at line 282 of file StringDictionary.h.
Referenced by buildSortedCache(), computeCacheSize(), getCompare(), and mergeSortedCache().
|
private |
Definition at line 278 of file StringDictionary.h.
Referenced by buildDictionaryNumericTranslationMap(), buildDictionaryTranslationMap(), buildSortedCache(), checkAndConditionallyIncreaseOffsetCapacity(), copyStrings(), eachStringSerially(), getCompare(), getEquals(), getOrAddBulk(), getOrAddBulkParallel(), getRegexpLike(), increaseHashTableCapacity(), processDictionaryFutures(), storageEntryCount(), and StringDictionary().
|
private |
Definition at line 280 of file StringDictionary.h.
Referenced by buildDictionaryTranslationMap(), computeCacheSize(), getBulk(), getOrAddBulk(), getOrAddBulkParallel(), increaseHashTableCapacity(), processDictionaryFutures(), and StringDictionary().
|
mutableprivate |
Definition at line 305 of file StringDictionary.h.
Referenced by copyStrings().
|
mutableprivate |
Definition at line 306 of file StringDictionary.h.
Referenced by computeCacheSize(), and copyStrings().