OmniSciDB
a5dc49c757
|
#include <StringDictionaryProxy.h>
Classes | |
struct | HeterogeneousStringEqual |
struct | HeterogeneousStringHash |
class | TranslationMap |
Public Types | |
using | IdMap = TranslationMap< int32_t > |
using | TransientMap = robin_hood::unordered_node_map< std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual > |
Public Member Functions | |
StringDictionaryProxy (StringDictionaryProxy const &)=delete | |
StringDictionaryProxy const & | operator= (StringDictionaryProxy const &)=delete |
StringDictionaryProxy (std::shared_ptr< StringDictionary > sd, const shared::StringDictKey &string_dict_key, const int64_t generation) | |
const shared::StringDictKey & | getDictKey () const noexcept |
bool | operator== (StringDictionaryProxy const &) const |
bool | operator!= (StringDictionaryProxy const &) const |
int32_t | getOrAdd (const std::string &str) noexcept |
StringDictionary * | getDictionary () const noexcept |
int64_t | getGeneration () const noexcept |
std::vector< int32_t > | getTransientBulk (const std::vector< std::string > &strings) const |
Executes read-only lookup of a vector of strings and returns a vector of their integer ids. More... | |
int32_t | getOrAddTransient (const std::string &) |
int32_t | getOrAddTransient (const std::string_view) |
std::vector< int32_t > | getOrAddTransientBulk (const std::vector< std::string > &strings) |
int32_t | getIdOfString (const std::string &str) const |
int32_t | getIdOfStringNoGeneration (const std::string &str) const |
std::string | getString (int32_t string_id) const |
std::vector< std::string > | getStrings (const std::vector< int32_t > &string_ids) const |
std::pair< const char *, size_t > | getStringBytes (int32_t string_id) const noexcept |
IdMap | initIdMap () const |
TranslationMap< Datum > | buildNumericTranslationMap (const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const |
Builds a vectorized string_id translation map from this proxy to dest_proxy. More... | |
IdMap | buildIntersectionTranslationMapToOtherProxy (const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const |
IdMap | buildUnionTranslationMapToOtherProxy (StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_types) const |
size_t | storageEntryCount () const |
Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary. More... | |
size_t | transientEntryCount () const |
Returns the number of transient string entries for this proxy,. More... | |
size_t | entryCount () const |
Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount() More... | |
void | updateGeneration (const int64_t generation) noexcept |
template<typename T > | |
std::vector< T > | getLike (const std::string &pattern, const bool icase, const bool is_simple, const char escape) const |
std::vector< int32_t > | getCompare (const std::string &pattern, const std::string &comp_operator) const |
std::vector< int32_t > | getRegexpLike (const std::string &pattern, const char escape) const |
const std::vector< std::string const * > & | getTransientVector () const |
void | eachStringSerially (StringDictionary::StringCallback &) const |
IdMap | transientUnion (StringDictionaryProxy const &) |
Static Public Member Functions | |
static unsigned | transientIdToIndex (int32_t const id) |
static int32_t | transientIndexToId (unsigned const index) |
Private Member Functions | |
std::string | getStringUnlocked (const int32_t string_id) const |
size_t | transientEntryCountUnlocked () const |
size_t | entryCountUnlocked () const |
size_t | persistedC () const |
template<typename String > | |
int32_t | getOrAddTransientImpl (String) |
template<typename String > | |
int32_t | lookupTransientStringUnlocked (const String &lookup_string) const |
size_t | getTransientBulkImpl (const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const |
template<typename String > | |
size_t | transientLookupBulk (const std::vector< String > &lookup_strings, int32_t *string_ids, const bool take_read_lock) const |
template<typename String > | |
size_t | transientLookupBulkUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const |
template<typename String > | |
size_t | transientLookupBulkParallelUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const |
IdMap | buildIntersectionTranslationMapToOtherProxyUnlocked (const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const |
template<typename String > | |
int32_t | getIdOfStringFromClient (String const &) const |
template<typename String > | |
int32_t | getOrAddTransientUnlocked (String const &) |
Private Attributes | |
std::shared_ptr< StringDictionary > | string_dict_ |
const shared::StringDictKey | string_dict_key_ |
TransientMap | transient_str_to_int_ |
std::vector< std::string const * > | transient_string_vec_ |
int64_t | generation_ |
std::shared_mutex | rw_mutex_ |
Friends | |
class | StringLocalCallback |
class | StringNetworkCallback |
Definition at line 39 of file StringDictionaryProxy.h.
using StringDictionaryProxy::IdMap = TranslationMap<int32_t> |
Definition at line 141 of file StringDictionaryProxy.h.
using StringDictionaryProxy::TransientMap = robin_hood::unordered_node_map<std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual> |
Definition at line 243 of file StringDictionaryProxy.h.
|
delete |
StringDictionaryProxy::StringDictionaryProxy | ( | std::shared_ptr< StringDictionary > | sd, |
const shared::StringDictKey & | string_dict_key, | ||
const int64_t | generation | ||
) |
Definition at line 39 of file StringDictionaryProxy.cpp.
StringDictionaryProxy::IdMap StringDictionaryProxy::buildIntersectionTranslationMapToOtherProxy | ( | const StringDictionaryProxy * | dest_proxy, |
const std::vector< StringOps_Namespace::StringOpInfo > & | string_op_infos | ||
) | const |
Definition at line 382 of file StringDictionaryProxy.cpp.
References buildIntersectionTranslationMapToOtherProxyUnlocked(), getDictKey(), order_translation_locks(), and rw_mutex_.
Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap().
|
private |
Definition at line 265 of file StringDictionaryProxy.cpp.
References CHECK_GT, CHECK_LE, StringDictionaryProxy::TranslationMap< T >::data(), DEBUG_TIMER, StringDictionaryProxy::TranslationMap< T >::empty(), entryCountUnlocked(), generation_, getTransientBulkImpl(), StringDictionaryProxy::TranslationMap< T >::getVectorMap(), initIdMap(), StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), StringDictionaryProxy::TranslationMap< T >::numTransients(), StringDictionaryProxy::TranslationMap< T >::setNumUntranslatedStrings(), StringDictionaryProxy::TranslationMap< T >::setRangeEnd(), StringDictionaryProxy::TranslationMap< T >::setRangeStart(), StringDictionaryProxy::TranslationMap< T >::storageData(), storageEntryCount(), string_dict_, shared::transform(), transient_string_vec_, transientEntryCountUnlocked(), and VLOG.
Referenced by buildIntersectionTranslationMapToOtherProxy(), and buildUnionTranslationMapToOtherProxy().
StringDictionaryProxy::TranslationMap< Datum > StringDictionaryProxy::buildNumericTranslationMap | ( | const std::vector< StringOps_Namespace::StringOpInfo > & | string_op_infos | ) | const |
Builds a vectorized string_id translation map from this proxy to dest_proxy.
dest_proxy | StringDictionaryProxy that we are to map this proxy's string ids to |
Definition at line 217 of file StringDictionaryProxy.cpp.
References CHECK, DEBUG_TIMER, generation_, getStringUnlocked(), threading_serial::parallel_for(), string_dict_, and transient_string_vec_.
Referenced by RowSetMemoryOwner::addStringProxyNumericTranslationMap().
StringDictionaryProxy::IdMap StringDictionaryProxy::buildUnionTranslationMapToOtherProxy | ( | StringDictionaryProxy * | dest_proxy, |
const std::vector< StringOps_Namespace::StringOpInfo > & | string_op_types | ||
) | const |
Definition at line 396 of file StringDictionaryProxy.cpp.
References buildIntersectionTranslationMapToOtherProxyUnlocked(), DEBUG_TIMER, getDictKey(), getOrAddTransientUnlocked(), getStringUnlocked(), StringDictionary::INVALID_STR_ID, order_translation_locks(), rw_mutex_, string_dict_, and transientEntryCountUnlocked().
Referenced by RowSetMemoryOwner::addStringProxyUnionTranslationMap().
void StringDictionaryProxy::eachStringSerially | ( | StringDictionary::StringCallback & | serial_callback | ) | const |
Definition at line 605 of file StringDictionaryProxy.cpp.
References generation_, string_dict_, and transient_string_vec_.
Referenced by transientUnion().
size_t StringDictionaryProxy::entryCount | ( | ) | const |
Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount()
Definition at line 599 of file StringDictionaryProxy.cpp.
References entryCountUnlocked(), and rw_mutex_.
|
private |
Definition at line 595 of file StringDictionaryProxy.cpp.
References storageEntryCount(), and transientEntryCountUnlocked().
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and entryCount().
std::vector< int32_t > StringDictionaryProxy::getCompare | ( | const std::string & | pattern, |
const std::string & | comp_operator | ||
) | const |
Definition at line 523 of file StringDictionaryProxy.cpp.
References CHECK_GE, anonymous_namespace{StringDictionaryProxy.cpp}::do_compare(), generation_, run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().
Referenced by anonymous_namespace{StringOpsIR.cpp}::get_compared_ids().
|
noexcept |
Definition at line 798 of file StringDictionaryProxy.cpp.
References string_dict_.
Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap(), RowSetMemoryOwner::addStringProxyNumericTranslationMap(), and RowSetMemoryOwner::addStringProxyUnionTranslationMap().
|
inlinenoexcept |
Definition at line 47 of file StringDictionaryProxy.h.
References string_dict_key_.
Referenced by buildIntersectionTranslationMapToOtherProxy(), buildUnionTranslationMapToOtherProxy(), TransientStringLiteralsVisitor::visitStringOper(), and TransientStringLiteralsVisitor::visitUOper().
|
noexcept |
int32_t StringDictionaryProxy::getIdOfString | ( | const std::string & | str | ) | const |
Definition at line 119 of file StringDictionaryProxy.cpp.
References getIdOfStringFromClient(), StringDictionary::INVALID_STR_ID, rw_mutex_, and transient_str_to_int_.
Referenced by anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), TableFunctions_Namespace::OneHotEncoder_Namespace::one_hot_encode(), and Executor::serializeLiterals().
|
private |
Definition at line 131 of file StringDictionaryProxy.cpp.
References CHECK_GE, generation_, string_dict_, and truncate_to_generation().
Referenced by getIdOfString(), and getOrAddTransientImpl().
int32_t StringDictionaryProxy::getIdOfStringNoGeneration | ( | const std::string & | str | ) | const |
Definition at line 136 of file StringDictionaryProxy.cpp.
References StringDictionary::INVALID_STR_ID, rw_mutex_, string_dict_, and transient_str_to_int_.
template std::vector< int64_t > StringDictionaryProxy::getLike< int64_t > | ( | const std::string & | pattern, |
const bool | icase, | ||
const bool | is_simple, | ||
const char | escape | ||
) | const |
Definition at line 469 of file StringDictionaryProxy.cpp.
References CHECK_GE, generation_, run_benchmark_import::result, string_dict_, string_ilike(), string_ilike_simple(), string_like(), string_like_simple(), heavydb.dtypes::T, transient_string_vec_, and transientIndexToId().
|
noexcept |
Definition at line 558 of file StringDictionaryProxy.cpp.
Referenced by DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId().
int32_t StringDictionaryProxy::getOrAddTransient | ( | const std::string & | str | ) |
Definition at line 111 of file StringDictionaryProxy.cpp.
Referenced by apply_multi_input_string_ops_and_encode(), apply_string_ops_and_encode(), populate_output_stats_cols(), Executor::serializeLiterals(), string_compress(), TransientStringLiteralsVisitor::visitConstant(), TransientStringLiteralsVisitor::visitStringOper(), and write_string_to_proxy().
int32_t StringDictionaryProxy::getOrAddTransient | ( | const std::string_view | sv | ) |
Definition at line 115 of file StringDictionaryProxy.cpp.
std::vector< int32_t > StringDictionaryProxy::getOrAddTransientBulk | ( | const std::vector< std::string > & | strings | ) |
Definition at line 60 of file StringDictionaryProxy.cpp.
References CHECK_GE, generation_, getOrAddTransientUnlocked(), StringDictionary::INVALID_STR_ID, rw_mutex_, and string_dict_.
Referenced by supported_ml_frameworks__cpu_(), and tf_torch_raster_obj_detect__cpu_template().
|
private |
Definition at line 102 of file StringDictionaryProxy.cpp.
References getIdOfStringFromClient(), getOrAddTransientUnlocked(), StringDictionary::INVALID_STR_ID, and rw_mutex_.
|
private |
Definition at line 89 of file StringDictionaryProxy.cpp.
References transient_str_to_int_, transient_string_vec_, and transientIndexToId().
Referenced by buildUnionTranslationMapToOtherProxy(), getOrAddTransientBulk(), getOrAddTransientImpl(), StringLocalCallback::operator()(), and StringNetworkCallback::operator()().
std::vector< int32_t > StringDictionaryProxy::getRegexpLike | ( | const std::string & | pattern, |
const char | escape | ||
) | const |
Definition at line 546 of file StringDictionaryProxy.cpp.
References CHECK_GE, generation_, anonymous_namespace{StringDictionary.cpp}::is_regexp_like(), run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().
std::string StringDictionaryProxy::getString | ( | int32_t | string_id | ) | const |
Definition at line 172 of file StringDictionaryProxy.cpp.
References getStringUnlocked(), and rw_mutex_.
Referenced by anonymous_namespace{ResultSetIteration.cpp}::build_string_array_target_value(), StringValueConverter::convertToColumnarFormatFromDict(), DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId(), anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), ResultSet::getString(), intersect_translate_string_id_to_other_dict(), TableFunctions_Namespace::OneHotEncoder_Namespace::one_hot_encode(), and union_translate_string_id_to_other_dict().
|
noexcept |
Definition at line 562 of file StringDictionaryProxy.cpp.
References CHECK_LT.
Referenced by anonymous_namespace{ExternalExecutor.cpp}::decode_string(), string_decompress(), StringDictionaryProxy_getStringBytes(), and StringDictionaryProxy_getStringLength().
std::vector< std::string > StringDictionaryProxy::getStrings | ( | const std::vector< int32_t > & | string_ids | ) | const |
Definition at line 189 of file StringDictionaryProxy.cpp.
References string_dict_, transient_string_vec_, and transientIdToIndex().
|
private |
Definition at line 180 of file StringDictionaryProxy.cpp.
References CHECK_LT, storageEntryCount(), string_dict_, transient_string_vec_, and transientIdToIndex().
Referenced by buildNumericTranslationMap(), buildUnionTranslationMapToOtherProxy(), and getString().
std::vector< int32_t > StringDictionaryProxy::getTransientBulk | ( | const std::vector< std::string > & | strings | ) | const |
Executes read-only lookup of a vector of strings and returns a vector of their integer ids.
This function, unlike getOrAddTransientBulk, will not add strings to the dictionary. Use this function if strings that don't currently exist in the StringDictionaryProxy should not be added to the proxy as transient entries. This method also has performance advantages over getOrAddTransientBulk for read-only use cases, in that it can: 1) Take a read lock instead of a write lock for the transient lookups 2) Use a tbb::parallel_for implementation of the transient string lookups as we are guaranteed that the underlying map of strings to int ids cannot change
strings | - Vector of strings to perform string id lookups on |
Definition at line 52 of file StringDictionaryProxy.cpp.
References CHECK_GE, generation_, and getTransientBulkImpl().
|
private |
Definition at line 695 of file StringDictionaryProxy.cpp.
References generation_, string_dict_, and transientLookupBulk().
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and getTransientBulk().
|
inline |
Definition at line 245 of file StringDictionaryProxy.h.
References transient_string_vec_.
Referenced by DictionaryValueConverter< TARGET_TYPE >::DictionaryValueConverter(), and DictionaryValueConverter< TARGET_TYPE >::processBuffer().
|
inline |
Definition at line 143 of file StringDictionaryProxy.h.
References generation_, StringDictionary::INVALID_STR_ID, and transient_string_vec_.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and transientUnion().
|
private |
Definition at line 209 of file StringDictionaryProxy.cpp.
References StringDictionary::INVALID_STR_ID, and transient_str_to_int_.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().
bool StringDictionaryProxy::operator!= | ( | StringDictionaryProxy const & | rhs | ) | const |
Definition at line 811 of file StringDictionaryProxy.cpp.
References operator==().
|
delete |
bool StringDictionaryProxy::operator== | ( | StringDictionaryProxy const & | rhs | ) | const |
Definition at line 806 of file StringDictionaryProxy.cpp.
References string_dict_key_, and transient_str_to_int_.
Referenced by operator!=().
|
private |
size_t StringDictionaryProxy::storageEntryCount | ( | ) | const |
Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary.
Definition at line 574 of file StringDictionaryProxy.cpp.
References CHECK_LE, generation_, and string_dict_.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), entryCountUnlocked(), and getStringUnlocked().
size_t StringDictionaryProxy::transientEntryCount | ( | ) | const |
Returns the number of transient string entries for this proxy,.
Definition at line 590 of file StringDictionaryProxy.cpp.
References rw_mutex_, and transientEntryCountUnlocked().
|
private |
Definition at line 581 of file StringDictionaryProxy.cpp.
References CHECK_LE, and transient_str_to_int_.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), buildUnionTranslationMapToOtherProxy(), entryCountUnlocked(), and transientEntryCount().
|
inlinestatic |
Definition at line 251 of file StringDictionaryProxy.h.
Referenced by getStrings(), getStringUnlocked(), and StringDictionary::populate_string_ids().
|
inlinestatic |
Definition at line 256 of file StringDictionaryProxy.h.
Referenced by DictionaryValueConverter< TARGET_TYPE >::DictionaryValueConverter(), getCompare(), getLike(), getOrAddTransientUnlocked(), getRegexpLike(), and ArrowResultSetConverter::initializeColumnBuilder().
|
private |
Definition at line 715 of file StringDictionaryProxy.cpp.
References rw_mutex_, transient_str_to_int_, transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().
Referenced by getTransientBulkImpl().
|
private |
Definition at line 755 of file StringDictionaryProxy.cpp.
References CHECK_GE, StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, and threading_serial::parallel_for().
Referenced by transientLookupBulk().
|
private |
Definition at line 735 of file StringDictionaryProxy.cpp.
References StringDictionary::INVALID_STR_ID, and lookupTransientStringUnlocked().
Referenced by transientLookupBulk().
StringDictionaryProxy::IdMap StringDictionaryProxy::transientUnion | ( | StringDictionaryProxy const & | sdp_rhs | ) |
Definition at line 669 of file StringDictionaryProxy.cpp.
References eachStringSerially(), initIdMap(), and string_dict_.
|
noexcept |
|
friend |
Definition at line 309 of file StringDictionaryProxy.h.
|
friend |
Definition at line 310 of file StringDictionaryProxy.h.
|
private |
Definition at line 300 of file StringDictionaryProxy.h.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), buildNumericTranslationMap(), eachStringSerially(), getCompare(), getGeneration(), getIdOfStringFromClient(), getLike(), getOrAddTransientBulk(), getRegexpLike(), getTransientBulk(), getTransientBulkImpl(), initIdMap(), and storageEntryCount().
|
mutableprivate |
Definition at line 301 of file StringDictionaryProxy.h.
Referenced by buildIntersectionTranslationMapToOtherProxy(), buildUnionTranslationMapToOtherProxy(), entryCount(), getIdOfString(), getIdOfStringNoGeneration(), getOrAddTransientBulk(), getOrAddTransientImpl(), getString(), transientEntryCount(), and transientLookupBulk().
|
private |
Definition at line 295 of file StringDictionaryProxy.h.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), buildNumericTranslationMap(), buildUnionTranslationMapToOtherProxy(), eachStringSerially(), getCompare(), getDictionary(), getIdOfStringFromClient(), getIdOfStringNoGeneration(), getLike(), getOrAddTransientBulk(), getRegexpLike(), getStrings(), getStringUnlocked(), getTransientBulkImpl(), StringLocalCallback::operator()(), storageEntryCount(), and transientUnion().
|
private |
Definition at line 296 of file StringDictionaryProxy.h.
Referenced by getDictKey(), and operator==().
|
private |
Definition at line 297 of file StringDictionaryProxy.h.
Referenced by getIdOfString(), getIdOfStringNoGeneration(), getOrAddTransientUnlocked(), lookupTransientStringUnlocked(), operator==(), transientEntryCountUnlocked(), and transientLookupBulk().
|
private |
Definition at line 299 of file StringDictionaryProxy.h.
Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), buildNumericTranslationMap(), eachStringSerially(), getCompare(), getLike(), getOrAddTransientUnlocked(), getRegexpLike(), getStrings(), getStringUnlocked(), getTransientVector(), and initIdMap().