OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringDictionaryProxy Class Reference

#include <StringDictionaryProxy.h>

+ Collaboration diagram for StringDictionaryProxy:

Classes

struct  HeterogeneousStringEqual
 
struct  HeterogeneousStringHash
 
class  TranslationMap
 

Public Types

using IdMap = TranslationMap< int32_t >
 
using TransientMap = robin_hood::unordered_node_map< std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual >
 

Public Member Functions

 StringDictionaryProxy (StringDictionaryProxy const &)=delete
 
StringDictionaryProxy const & operator= (StringDictionaryProxy const &)=delete
 
 StringDictionaryProxy (std::shared_ptr< StringDictionary > sd, const shared::StringDictKey &string_dict_key, const int64_t generation)
 
const shared::StringDictKeygetDictKey () const noexcept
 
bool operator== (StringDictionaryProxy const &) const
 
bool operator!= (StringDictionaryProxy const &) const
 
int32_t getOrAdd (const std::string &str) noexcept
 
StringDictionarygetDictionary () const noexcept
 
int64_t getGeneration () const noexcept
 
std::vector< int32_t > getTransientBulk (const std::vector< std::string > &strings) const
 Executes read-only lookup of a vector of strings and returns a vector of their integer ids. More...
 
int32_t getOrAddTransient (const std::string &)
 
int32_t getOrAddTransient (const std::string_view)
 
std::vector< int32_t > getOrAddTransientBulk (const std::vector< std::string > &strings)
 
int32_t getIdOfString (const std::string &str) const
 
int32_t getIdOfStringNoGeneration (const std::string &str) const
 
std::string getString (int32_t string_id) const
 
std::vector< std::string > getStrings (const std::vector< int32_t > &string_ids) const
 
std::pair< const char *, size_t > getStringBytes (int32_t string_id) const noexcept
 
IdMap initIdMap () const
 
TranslationMap< DatumbuildNumericTranslationMap (const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
 Builds a vectorized string_id translation map from this proxy to dest_proxy. More...
 
IdMap buildIntersectionTranslationMapToOtherProxy (const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
 
IdMap buildUnionTranslationMapToOtherProxy (StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_types) const
 
size_t storageEntryCount () const
 Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary. More...
 
size_t transientEntryCount () const
 Returns the number of transient string entries for this proxy,. More...
 
size_t entryCount () const
 Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount() More...
 
void updateGeneration (const int64_t generation) noexcept
 
template<typename T >
std::vector< T > getLike (const std::string &pattern, const bool icase, const bool is_simple, const char escape) const
 
std::vector< int32_t > getCompare (const std::string &pattern, const std::string &comp_operator) const
 
std::vector< int32_t > getRegexpLike (const std::string &pattern, const char escape) const
 
const std::vector< std::string
const * > & 
getTransientVector () const
 
void eachStringSerially (StringDictionary::StringCallback &) const
 
IdMap transientUnion (StringDictionaryProxy const &)
 

Static Public Member Functions

static unsigned transientIdToIndex (int32_t const id)
 
static int32_t transientIndexToId (unsigned const index)
 

Private Member Functions

std::string getStringUnlocked (const int32_t string_id) const
 
size_t transientEntryCountUnlocked () const
 
size_t entryCountUnlocked () const
 
size_t persistedC () const
 
template<typename String >
int32_t getOrAddTransientImpl (String)
 
template<typename String >
int32_t lookupTransientStringUnlocked (const String &lookup_string) const
 
size_t getTransientBulkImpl (const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const
 
template<typename String >
size_t transientLookupBulk (const std::vector< String > &lookup_strings, int32_t *string_ids, const bool take_read_lock) const
 
template<typename String >
size_t transientLookupBulkUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const
 
template<typename String >
size_t transientLookupBulkParallelUnlocked (const std::vector< String > &lookup_strings, int32_t *string_ids) const
 
IdMap buildIntersectionTranslationMapToOtherProxyUnlocked (const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
 
template<typename String >
int32_t getIdOfStringFromClient (String const &) const
 
template<typename String >
int32_t getOrAddTransientUnlocked (String const &)
 

Private Attributes

std::shared_ptr< StringDictionarystring_dict_
 
const shared::StringDictKey string_dict_key_
 
TransientMap transient_str_to_int_
 
std::vector< std::string const * > transient_string_vec_
 
int64_t generation_
 
std::shared_mutex rw_mutex_
 

Friends

class StringLocalCallback
 
class StringNetworkCallback
 

Detailed Description

Definition at line 39 of file StringDictionaryProxy.h.

Member Typedef Documentation

Definition at line 141 of file StringDictionaryProxy.h.

using StringDictionaryProxy::TransientMap = robin_hood::unordered_node_map<std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual>

Definition at line 243 of file StringDictionaryProxy.h.

Constructor & Destructor Documentation

StringDictionaryProxy::StringDictionaryProxy ( StringDictionaryProxy const &  )
delete
StringDictionaryProxy::StringDictionaryProxy ( std::shared_ptr< StringDictionary sd,
const shared::StringDictKey string_dict_key,
const int64_t  generation 
)

Definition at line 39 of file StringDictionaryProxy.cpp.

42  : string_dict_(sd), string_dict_key_(string_dict_key), generation_(generation) {}
const shared::StringDictKey string_dict_key_
std::shared_ptr< StringDictionary > string_dict_

Member Function Documentation

StringDictionaryProxy::IdMap StringDictionaryProxy::buildIntersectionTranslationMapToOtherProxy ( const StringDictionaryProxy dest_proxy,
const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_infos 
) const

Definition at line 382 of file StringDictionaryProxy.cpp.

References buildIntersectionTranslationMapToOtherProxyUnlocked(), getDictKey(), order_translation_locks(), and rw_mutex_.

Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap().

384  {
385  const auto& source_dict_id = getDictKey();
386  const auto& dest_dict_id = dest_proxy->getDictKey();
387 
388  std::shared_lock<std::shared_mutex> source_proxy_read_lock(rw_mutex_, std::defer_lock);
389  std::unique_lock<std::shared_mutex> dest_proxy_write_lock(dest_proxy->rw_mutex_,
390  std::defer_lock);
392  source_dict_id, dest_dict_id, source_proxy_read_lock, dest_proxy_write_lock);
393  return buildIntersectionTranslationMapToOtherProxyUnlocked(dest_proxy, string_op_infos);
394 }
IdMap buildIntersectionTranslationMapToOtherProxyUnlocked(const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
void order_translation_locks(const shared::StringDictKey &source_dict_key, const shared::StringDictKey &dest_dict_key, std::shared_lock< std::shared_mutex > &source_read_lock, std::shared_lock< std::shared_mutex > &dest_read_lock)
const shared::StringDictKey & getDictKey() const noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryProxy::IdMap StringDictionaryProxy::buildIntersectionTranslationMapToOtherProxyUnlocked ( const StringDictionaryProxy dest_proxy,
const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_infos 
) const
private

Definition at line 265 of file StringDictionaryProxy.cpp.

References CHECK_GT, CHECK_LE, StringDictionaryProxy::TranslationMap< T >::data(), DEBUG_TIMER, StringDictionaryProxy::TranslationMap< T >::empty(), entryCountUnlocked(), generation_, getTransientBulkImpl(), StringDictionaryProxy::TranslationMap< T >::getVectorMap(), initIdMap(), StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), StringDictionaryProxy::TranslationMap< T >::numTransients(), StringDictionaryProxy::TranslationMap< T >::setNumUntranslatedStrings(), StringDictionaryProxy::TranslationMap< T >::setRangeEnd(), StringDictionaryProxy::TranslationMap< T >::setRangeStart(), StringDictionaryProxy::TranslationMap< T >::storageData(), storageEntryCount(), string_dict_, shared::transform(), transient_string_vec_, transientEntryCountUnlocked(), and VLOG.

Referenced by buildIntersectionTranslationMapToOtherProxy(), and buildUnionTranslationMapToOtherProxy().

267  {
268  auto timer = DEBUG_TIMER(__func__);
269  IdMap id_map = initIdMap();
270 
271  if (id_map.empty()) {
272  return id_map;
273  }
274 
275  const StringOps_Namespace::StringOps string_ops(string_op_infos);
276 
277  // First map transient strings, store at front of vector map
278  const size_t num_transient_entries = id_map.numTransients();
279  size_t num_transient_strings_not_translated = 0UL;
280  if (num_transient_entries) {
281  std::vector<std::string> transient_lookup_strings(num_transient_entries);
282  if (string_ops.size()) {
284  transient_string_vec_.cend(),
285  transient_lookup_strings.rbegin(),
286  [&](std::string const* ptr) { return string_ops(*ptr); });
287  } else {
289  transient_string_vec_.cend(),
290  transient_lookup_strings.rbegin(),
291  [](std::string const* ptr) { return *ptr; });
292  }
293 
294  // This lookup may have a different snapshot of
295  // dest_proxy transients and dictionary than what happens under
296  // the below dest_proxy_read_lock. We may need an unlocked version of
297  // getTransientBulk to ensure consistency (I don't believe
298  // current behavior would cause crashes/races, verify this though)
299 
300  // Todo(mattp): Consider variant of getTransientBulkImp that can take
301  // a vector of pointer-to-strings so we don't have to materialize
302  // transient_string_vec_ into transient_lookup_strings.
303 
304  num_transient_strings_not_translated =
305  dest_proxy->getTransientBulkImpl(transient_lookup_strings, id_map.data(), false);
306  }
307 
308  // Now map strings in dictionary
309  // We place non-transient strings after the transient strings
310  // if they exist, otherwise at index 0
311  int32_t* translation_map_stored_entries_ptr = id_map.storageData();
312 
313  auto dest_transient_lookup_callback = [dest_proxy, translation_map_stored_entries_ptr](
314  const std::string_view& source_string,
315  const int32_t source_string_id) {
316  translation_map_stored_entries_ptr[source_string_id] =
317  dest_proxy->lookupTransientStringUnlocked(source_string);
318  return translation_map_stored_entries_ptr[source_string_id] ==
320  };
321 
322  const size_t num_dest_transients = dest_proxy->transientEntryCountUnlocked();
323  const size_t num_persisted_strings_not_translated =
324  generation_ > 0 ? string_dict_->buildDictionaryTranslationMap(
325  dest_proxy->string_dict_.get(),
326  translation_map_stored_entries_ptr,
327  generation_,
328  dest_proxy->generation_,
329  num_dest_transients > 0UL,
330  dest_transient_lookup_callback,
331  string_op_infos)
332  : 0UL;
333 
334  const size_t num_dest_entries = dest_proxy->entryCountUnlocked();
335  const size_t num_total_entries =
336  id_map.getVectorMap().size() - 1UL /* account for skipped entry -1 */;
337  CHECK_GT(num_total_entries, 0UL);
338  const size_t num_strings_not_translated =
339  num_transient_strings_not_translated + num_persisted_strings_not_translated;
340  CHECK_LE(num_strings_not_translated, num_total_entries);
341  id_map.setNumUntranslatedStrings(num_strings_not_translated);
342 
343  // Below is a conservative setting of range based on the size of the destination proxy,
344  // but probably not worth a scan over the data (or inline computation as we translate)
345  // to compute the actual ranges
346 
347  id_map.setRangeStart(
348  num_dest_transients > 0 ? -1 - static_cast<int32_t>(num_dest_transients) : 0);
349  id_map.setRangeEnd(dest_proxy->storageEntryCount());
350 
351  const size_t num_entries_translated = num_total_entries - num_strings_not_translated;
352  const float match_pct =
353  100.0 * static_cast<float>(num_entries_translated) / num_total_entries;
354  VLOG(1) << std::fixed << std::setprecision(2) << match_pct << "% ("
355  << num_entries_translated << " entries) from dictionary ("
356  << string_dict_->getDictKey() << ") with " << num_total_entries
357  << " total entries ( " << num_transient_entries << " literals)"
358  << " translated to dictionary (" << dest_proxy->string_dict_->getDictKey()
359  << ") with " << num_dest_entries << " total entries ("
360  << dest_proxy->transientEntryCountUnlocked() << " literals).";
361 
362  return id_map;
363 }
size_t transientEntryCountUnlocked() const
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...
#define CHECK_GT(x, y)
Definition: Logger.h:305
TranslationMap< int32_t > IdMap
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
int32_t lookupTransientStringUnlocked(const String &lookup_string) const
size_t getTransientBulkImpl(const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const
OUTPUT transform(INPUT const &input, FUNC const &func)
Definition: misc.h:329
#define CHECK_LE(x, y)
Definition: Logger.h:304
#define DEBUG_TIMER(name)
Definition: Logger.h:412
#define VLOG(n)
Definition: Logger.h:388

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryProxy::TranslationMap< Datum > StringDictionaryProxy::buildNumericTranslationMap ( const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_infos) const

Builds a vectorized string_id translation map from this proxy to dest_proxy.

Parameters
dest_proxyStringDictionaryProxy that we are to map this proxy's string ids to
Returns
An IdMap which encapsulates a std::vector<int32_t> of string ids for both transient and non-transient strings, mapping to their translated string_ids. offset_ is defined to be the number of transient entries + 1. The ordering of values in the vector_map_ is:
  • the transient ids (there are offset_-1 of these)
  • INVALID_STR_ID (=-1)
  • the non-transient string ids For example if there are 3 transient entries in this proxy and 20 in the underlying string dictionary, then vector_map_ will be of size() == 24 and offset_=3+1. The formula to translate ids is new_id = vector_map_[offset_ + old_id]. It is always the case that vector_map_[offset_-1]==-1 so that INVALID_STR_ID maps to INVALID_STR_ID.

Definition at line 217 of file StringDictionaryProxy.cpp.

References CHECK, DEBUG_TIMER, generation_, getStringUnlocked(), threading_serial::parallel_for(), string_dict_, and transient_string_vec_.

Referenced by RowSetMemoryOwner::addStringProxyNumericTranslationMap().

218  {
219  auto timer = DEBUG_TIMER(__func__);
220  CHECK(string_op_infos.size());
221  TranslationMap<Datum> translation_map(transient_string_vec_.size(), generation_);
222  if (translation_map.empty()) {
223  return translation_map;
224  }
225 
226  const StringOps_Namespace::StringOps string_ops(string_op_infos);
227 
228  const size_t num_transient_entries = translation_map.numTransients();
229  if (num_transient_entries) {
230  const int32_t map_domain_start = translation_map.domainStart();
231  if (num_transient_entries > 10000UL) {
233  tbb::blocked_range<int32_t>(map_domain_start, -1),
234  [&](const tbb::blocked_range<int32_t>& r) {
235  const int32_t start_idx = r.begin();
236  const int32_t end_idx = r.end();
237  for (int32_t source_string_id = start_idx; source_string_id < end_idx;
238  ++source_string_id) {
239  const auto source_string = getStringUnlocked(source_string_id);
240  translation_map[source_string_id] = string_ops.numericEval(source_string);
241  }
242  });
243  } else {
244  for (int32_t source_string_id = map_domain_start; source_string_id < -1;
245  ++source_string_id) {
246  const auto source_string = getStringUnlocked(source_string_id);
247  translation_map[source_string_id] = string_ops.numericEval(source_string);
248  }
249  }
250  }
251 
252  Datum* translation_map_stored_entries_ptr = translation_map.storageData();
253  if (generation_ > 0) {
254  string_dict_->buildDictionaryNumericTranslationMap(
255  translation_map_stored_entries_ptr, generation_, string_op_infos);
256  }
257  translation_map.setNumUntranslatedStrings(0UL);
258 
259  // Todo(todd): Set range start/end with scan
260 
261  return translation_map;
262 }
std::string getStringUnlocked(const int32_t string_id) const
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
#define CHECK(condition)
Definition: Logger.h:291
#define DEBUG_TIMER(name)
Definition: Logger.h:412
Definition: Datum.h:71

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryProxy::IdMap StringDictionaryProxy::buildUnionTranslationMapToOtherProxy ( StringDictionaryProxy dest_proxy,
const std::vector< StringOps_Namespace::StringOpInfo > &  string_op_types 
) const

Definition at line 396 of file StringDictionaryProxy.cpp.

References buildIntersectionTranslationMapToOtherProxyUnlocked(), DEBUG_TIMER, getDictKey(), getOrAddTransientUnlocked(), getStringUnlocked(), StringDictionary::INVALID_STR_ID, order_translation_locks(), rw_mutex_, string_dict_, and transientEntryCountUnlocked().

Referenced by RowSetMemoryOwner::addStringProxyUnionTranslationMap().

398  {
399  auto timer = DEBUG_TIMER(__func__);
400 
401  const auto& source_dict_id = getDictKey();
402  const auto& dest_dict_id = dest_proxy->getDictKey();
403  std::shared_lock<std::shared_mutex> source_proxy_read_lock(rw_mutex_, std::defer_lock);
404  std::unique_lock<std::shared_mutex> dest_proxy_write_lock(dest_proxy->rw_mutex_,
405  std::defer_lock);
407  source_dict_id, dest_dict_id, source_proxy_read_lock, dest_proxy_write_lock);
408 
409  auto id_map =
410  buildIntersectionTranslationMapToOtherProxyUnlocked(dest_proxy, string_op_infos);
411  if (id_map.empty()) {
412  return id_map;
413  }
414  const auto num_untranslated_strings = id_map.numUntranslatedStrings();
415  if (num_untranslated_strings > 0) {
416  const size_t total_post_translation_dest_transients =
417  num_untranslated_strings + dest_proxy->transientEntryCountUnlocked();
418  constexpr size_t max_allowed_transients =
419  static_cast<size_t>(std::numeric_limits<int32_t>::max() -
420  2); /* -2 accounts for INVALID_STR_ID and NULL value */
421  if (total_post_translation_dest_transients > max_allowed_transients) {
422  std::stringstream ss;
423  ss << "Union translation to dictionary " << getDictKey() << " would result in "
424  << total_post_translation_dest_transients
425  << " transient entries, which is more than limit of " << max_allowed_transients
426  << " transients.";
427  throw std::runtime_error(ss.str());
428  }
429  const int32_t map_domain_start = id_map.domainStart();
430  const int32_t map_domain_end = id_map.domainEnd();
431 
432  const StringOps_Namespace::StringOps string_ops(string_op_infos);
433  const bool has_string_ops = string_ops.size();
434 
435  // First iterate over transient strings and add to dest map
436  // Todo (todd): Add call to fetch string_views (local) or strings (distributed)
437  // for all non-translated ids to avoid string-by-string fetch
438 
439  for (int32_t source_string_id = map_domain_start; source_string_id < -1;
440  ++source_string_id) {
441  if (id_map[source_string_id] == StringDictionary::INVALID_STR_ID) {
442  const auto source_string = getStringUnlocked(source_string_id);
443  const auto dest_string_id = dest_proxy->getOrAddTransientUnlocked(
444  has_string_ops ? string_ops(source_string) : source_string);
445  id_map[source_string_id] = dest_string_id;
446  }
447  }
448  // Now iterate over stored strings
449  for (int32_t source_string_id = 0; source_string_id < map_domain_end;
450  ++source_string_id) {
451  if (id_map[source_string_id] == StringDictionary::INVALID_STR_ID) {
452  const auto source_string = string_dict_->getString(source_string_id);
453  const auto dest_string_id = dest_proxy->getOrAddTransientUnlocked(
454  has_string_ops ? string_ops(source_string) : source_string);
455  id_map[source_string_id] = dest_string_id;
456  }
457  }
458  }
459  // We may have added transients to the destination proxy, use this to update
460  // our id map range (used downstream for ExpressionRange)
461 
462  const size_t num_dest_transients = dest_proxy->transientEntryCountUnlocked();
463  id_map.setRangeStart(
464  num_dest_transients > 0 ? -1 - static_cast<int32_t>(num_dest_transients) : 0);
465  return id_map;
466 }
size_t transientEntryCountUnlocked() const
std::string getStringUnlocked(const int32_t string_id) const
IdMap buildIntersectionTranslationMapToOtherProxyUnlocked(const StringDictionaryProxy *dest_proxy, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
void order_translation_locks(const shared::StringDictKey &source_dict_key, const shared::StringDictKey &dest_dict_key, std::shared_lock< std::shared_mutex > &source_read_lock, std::shared_lock< std::shared_mutex > &dest_read_lock)
int32_t getOrAddTransientUnlocked(String const &)
#define DEBUG_TIMER(name)
Definition: Logger.h:412
const shared::StringDictKey & getDictKey() const noexcept

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

void StringDictionaryProxy::eachStringSerially ( StringDictionary::StringCallback serial_callback) const

Definition at line 605 of file StringDictionaryProxy.cpp.

References generation_, string_dict_, and transient_string_vec_.

Referenced by transientUnion().

606  {
607  constexpr int32_t max_transient_id = -2;
608  // Iterate over transient strings.
609  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
610  std::string const& str = *transient_string_vec_[index];
611  int32_t const string_id = max_transient_id - index;
612  serial_callback(str, string_id);
613  }
614  // Iterate over non-transient strings.
615  string_dict_->eachStringSerially(generation_, serial_callback);
616 }
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_

+ Here is the caller graph for this function:

size_t StringDictionaryProxy::entryCount ( ) const

Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount()

Returns
size_t Number of total string entries for this proxy

Definition at line 599 of file StringDictionaryProxy.cpp.

References entryCountUnlocked(), and rw_mutex_.

599  {
600  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
601  return entryCountUnlocked();
602 }

+ Here is the call graph for this function:

size_t StringDictionaryProxy::entryCountUnlocked ( ) const
private

Definition at line 595 of file StringDictionaryProxy.cpp.

References storageEntryCount(), and transientEntryCountUnlocked().

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and entryCount().

595  {
597 }
size_t transientEntryCountUnlocked() const
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getCompare ( const std::string &  pattern,
const std::string &  comp_operator 
) const

Definition at line 523 of file StringDictionaryProxy.cpp.

References CHECK_GE, anonymous_namespace{StringDictionaryProxy.cpp}::do_compare(), generation_, run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().

Referenced by anonymous_namespace{StringOpsIR.cpp}::get_compared_ids().

525  {
526  CHECK_GE(generation_, 0);
527  auto result = string_dict_->getCompare(pattern, comp_operator, generation_);
528  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
529  if (do_compare(*transient_string_vec_[index], pattern, comp_operator)) {
530  result.push_back(transientIndexToId(index));
531  }
532  }
533  return result;
534 }
#define CHECK_GE(x, y)
Definition: Logger.h:306
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
static int32_t transientIndexToId(unsigned const index)
bool do_compare(const std::string &str, const std::string &pattern, const std::string &comp_operator)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionary * StringDictionaryProxy::getDictionary ( ) const
noexcept

Definition at line 798 of file StringDictionaryProxy.cpp.

References string_dict_.

Referenced by RowSetMemoryOwner::addStringProxyIntersectionTranslationMap(), RowSetMemoryOwner::addStringProxyNumericTranslationMap(), and RowSetMemoryOwner::addStringProxyUnionTranslationMap().

798  {
799  return string_dict_.get();
800 }
std::shared_ptr< StringDictionary > string_dict_

+ Here is the caller graph for this function:

const shared::StringDictKey& StringDictionaryProxy::getDictKey ( ) const
inlinenoexcept

Definition at line 47 of file StringDictionaryProxy.h.

References string_dict_key_.

Referenced by buildIntersectionTranslationMapToOtherProxy(), buildUnionTranslationMapToOtherProxy(), TransientStringLiteralsVisitor::visitStringOper(), and TransientStringLiteralsVisitor::visitUOper().

47 { return string_dict_key_; };
const shared::StringDictKey string_dict_key_

+ Here is the caller graph for this function:

int64_t StringDictionaryProxy::getGeneration ( ) const
noexcept

Definition at line 802 of file StringDictionaryProxy.cpp.

References generation_.

802  {
803  return generation_;
804 }
int32_t StringDictionaryProxy::getIdOfString ( const std::string &  str) const

Definition at line 119 of file StringDictionaryProxy.cpp.

References getIdOfStringFromClient(), StringDictionary::INVALID_STR_ID, rw_mutex_, and transient_str_to_int_.

Referenced by anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), TableFunctions_Namespace::OneHotEncoder_Namespace::one_hot_encode(), and Executor::serializeLiterals().

119  {
120  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
121  auto const str_id = getIdOfStringFromClient(str);
122  if (str_id != StringDictionary::INVALID_STR_ID || transient_str_to_int_.empty()) {
123  return str_id;
124  }
125  auto it = transient_str_to_int_.find(str);
126  return it != transient_str_to_int_.end() ? it->second
128 }
int32_t getIdOfStringFromClient(String const &) const
static constexpr int32_t INVALID_STR_ID

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
int32_t StringDictionaryProxy::getIdOfStringFromClient ( String const &  str) const
private

Definition at line 131 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, string_dict_, and truncate_to_generation().

Referenced by getIdOfString(), and getOrAddTransientImpl().

131  {
132  CHECK_GE(generation_, 0);
133  return truncate_to_generation(string_dict_->getIdOfString(str), generation_);
134 }
#define CHECK_GE(x, y)
Definition: Logger.h:306
std::shared_ptr< StringDictionary > string_dict_
int32_t truncate_to_generation(const int32_t id, const size_t generation)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getIdOfStringNoGeneration ( const std::string &  str) const

Definition at line 136 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, rw_mutex_, string_dict_, and transient_str_to_int_.

136  {
137  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
138  auto str_id = string_dict_->getIdOfString(str);
139  if (str_id != StringDictionary::INVALID_STR_ID || transient_str_to_int_.empty()) {
140  return str_id;
141  }
142  auto it = transient_str_to_int_.find(str);
143  return it != transient_str_to_int_.end() ? it->second
145 }
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
template<typename T >
template std::vector< int64_t > StringDictionaryProxy::getLike< int64_t > ( const std::string &  pattern,
const bool  icase,
const bool  is_simple,
const char  escape 
) const

Definition at line 469 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, run_benchmark_import::result, string_dict_, string_ilike(), string_ilike_simple(), string_like(), string_like_simple(), heavydb.dtypes::T, transient_string_vec_, and transientIndexToId().

472  {
473  CHECK_GE(generation_, 0);
474  auto result = string_dict_->getLike<T>(pattern, icase, is_simple, escape, generation_);
475  auto is_like_impl = icase ? is_simple ? string_ilike_simple : string_ilike
476  : is_simple ? string_like_simple
477  : string_like;
478  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
479  auto const str = *transient_string_vec_[index];
480  if (is_like_impl(str.c_str(), str.size(), pattern.c_str(), pattern.size(), escape)) {
481  result.push_back(transientIndexToId(index));
482  }
483  }
484  return result;
485 }
#define CHECK_GE(x, y)
Definition: Logger.h:306
RUNTIME_EXPORT DEVICE bool string_ilike_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, char escape_char)
Definition: StringLike.cpp:61
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
RUNTIME_EXPORT DEVICE bool string_like(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:250
static int32_t transientIndexToId(unsigned const index)
RUNTIME_EXPORT DEVICE bool string_like_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, char escape_char)
Definition: StringLike.cpp:43
RUNTIME_EXPORT DEVICE bool string_ilike(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:261

+ Here is the call graph for this function:

int32_t StringDictionaryProxy::getOrAdd ( const std::string &  str)
noexcept

Definition at line 558 of file StringDictionaryProxy.cpp.

Referenced by DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId().

558  {
559  return string_dict_->getOrAdd(str);
560 }
std::shared_ptr< StringDictionary > string_dict_

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getOrAddTransient ( const std::string &  str)

Definition at line 111 of file StringDictionaryProxy.cpp.

Referenced by apply_multi_input_string_ops_and_encode(), apply_string_ops_and_encode(), populate_output_stats_cols(), Executor::serializeLiterals(), string_compress(), TransientStringLiteralsVisitor::visitConstant(), TransientStringLiteralsVisitor::visitStringOper(), and write_string_to_proxy().

111  {
112  return getOrAddTransientImpl<std::string const&>(str);
113 }

+ Here is the caller graph for this function:

int32_t StringDictionaryProxy::getOrAddTransient ( const std::string_view  sv)

Definition at line 115 of file StringDictionaryProxy.cpp.

115  {
116  return getOrAddTransientImpl<std::string_view const>(sv);
117 }
std::vector< int32_t > StringDictionaryProxy::getOrAddTransientBulk ( const std::vector< std::string > &  strings)

Definition at line 60 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, getOrAddTransientUnlocked(), StringDictionary::INVALID_STR_ID, rw_mutex_, and string_dict_.

Referenced by supported_ml_frameworks__cpu_(), and tf_torch_raster_obj_detect__cpu_template().

61  {
63  const size_t num_strings = strings.size();
64  std::vector<int32_t> string_ids(num_strings);
65  if (num_strings == 0) {
66  return string_ids;
67  }
68  // Since new strings added to a StringDictionaryProxy are not materialized in the
69  // proxy's underlying StringDictionary, we can use the fast parallel
70  // StringDictionary::getBulk method to fetch ids from the underlying dictionary (which
71  // will return StringDictionary::INVALID_STR_ID for strings that don't exist)
72 
73  // Don't need to be under lock here as the string ids for strings in the underlying
74  // materialized dictionary are immutable
75  const size_t num_strings_not_found =
76  string_dict_->getBulk(strings, string_ids.data(), generation_);
77  if (num_strings_not_found > 0) {
78  std::lock_guard<std::shared_mutex> write_lock(rw_mutex_);
79  for (size_t string_idx = 0; string_idx < num_strings; ++string_idx) {
80  if (string_ids[string_idx] == StringDictionary::INVALID_STR_ID) {
81  string_ids[string_idx] = getOrAddTransientUnlocked(strings[string_idx]);
82  }
83  }
84  }
85  return string_ids;
86 }
#define CHECK_GE(x, y)
Definition: Logger.h:306
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< StringDictionary > string_dict_
int32_t getOrAddTransientUnlocked(String const &)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
int32_t StringDictionaryProxy::getOrAddTransientImpl ( String  str)
private

Definition at line 102 of file StringDictionaryProxy.cpp.

References getIdOfStringFromClient(), getOrAddTransientUnlocked(), StringDictionary::INVALID_STR_ID, and rw_mutex_.

102  {
103  auto const string_id = getIdOfStringFromClient(str);
104  if (string_id != StringDictionary::INVALID_STR_ID) {
105  return string_id;
106  }
107  std::lock_guard<std::shared_mutex> write_lock(rw_mutex_);
108  return getOrAddTransientUnlocked(str);
109 }
int32_t getIdOfStringFromClient(String const &) const
static constexpr int32_t INVALID_STR_ID
int32_t getOrAddTransientUnlocked(String const &)

+ Here is the call graph for this function:

template<typename String >
int32_t StringDictionaryProxy::getOrAddTransientUnlocked ( String const &  str)
private

Definition at line 89 of file StringDictionaryProxy.cpp.

References transient_str_to_int_, transient_string_vec_, and transientIndexToId().

Referenced by buildUnionTranslationMapToOtherProxy(), getOrAddTransientBulk(), getOrAddTransientImpl(), StringLocalCallback::operator()(), and StringNetworkCallback::operator()().

89  {
90  unsigned const new_index = transient_str_to_int_.size();
91  auto transient_id = transientIndexToId(new_index);
92  auto const emplaced = transient_str_to_int_.emplace(str, transient_id);
93  if (emplaced.second) { // (str, transient_id) was added to transient_str_to_int_.
94  transient_string_vec_.push_back(&emplaced.first->first);
95  } else { // str already exists in transient_str_to_int_. Return existing transient_id.
96  transient_id = emplaced.first->second;
97  }
98  return transient_id;
99 }
std::vector< std::string const * > transient_string_vec_
static int32_t transientIndexToId(unsigned const index)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getRegexpLike ( const std::string &  pattern,
const char  escape 
) const

Definition at line 546 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, anonymous_namespace{StringDictionary.cpp}::is_regexp_like(), run_benchmark_import::result, string_dict_, transient_string_vec_, and transientIndexToId().

547  {
548  CHECK_GE(generation_, 0);
549  auto result = string_dict_->getRegexpLike(pattern, escape, generation_);
550  for (unsigned index = 0; index < transient_string_vec_.size(); ++index) {
551  if (is_regexp_like(*transient_string_vec_[index], pattern, escape)) {
552  result.push_back(transientIndexToId(index));
553  }
554  }
555  return result;
556 }
#define CHECK_GE(x, y)
Definition: Logger.h:306
bool is_regexp_like(const std::string &str, const std::string &pattern, const char escape)
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
static int32_t transientIndexToId(unsigned const index)

+ Here is the call graph for this function:

std::string StringDictionaryProxy::getString ( int32_t  string_id) const

Definition at line 172 of file StringDictionaryProxy.cpp.

References getStringUnlocked(), and rw_mutex_.

Referenced by anonymous_namespace{ResultSetIteration.cpp}::build_string_array_target_value(), StringValueConverter::convertToColumnarFormatFromDict(), DictionaryValueConverter< TARGET_TYPE >::convertTransientStringIdToPermanentId(), anonymous_namespace{RelAlgTranslator.cpp}::fill_dictionary_encoded_in_vals(), ResultSet::getString(), intersect_translate_string_id_to_other_dict(), TableFunctions_Namespace::OneHotEncoder_Namespace::one_hot_encode(), and union_translate_string_id_to_other_dict().

172  {
173  if (inline_int_null_value<int32_t>() == string_id) {
174  return "";
175  }
176  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
177  return getStringUnlocked(string_id);
178 }
std::string getStringUnlocked(const int32_t string_id) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< const char *, size_t > StringDictionaryProxy::getStringBytes ( int32_t  string_id) const
noexcept

Definition at line 562 of file StringDictionaryProxy.cpp.

References CHECK_LT.

Referenced by anonymous_namespace{ExternalExecutor.cpp}::decode_string(), string_decompress(), StringDictionaryProxy_getStringBytes(), and StringDictionaryProxy_getStringLength().

563  {
564  if (string_id >= 0) {
565  return string_dict_.get()->getStringBytes(string_id);
566  }
567  unsigned const string_index = transientIdToIndex(string_id);
568  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
569  CHECK_LT(string_index, transient_string_vec_.size());
570  std::string const* const str_ptr = transient_string_vec_[string_index];
571  return {str_ptr->c_str(), str_ptr->size()};
572 }
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
#define CHECK_LT(x, y)
Definition: Logger.h:303
static unsigned transientIdToIndex(int32_t const id)

+ Here is the caller graph for this function:

std::vector< std::string > StringDictionaryProxy::getStrings ( const std::vector< int32_t > &  string_ids) const

Definition at line 189 of file StringDictionaryProxy.cpp.

References string_dict_, transient_string_vec_, and transientIdToIndex().

190  {
191  std::vector<std::string> strings;
192  if (!string_ids.empty()) {
193  strings.reserve(string_ids.size());
194  for (const auto string_id : string_ids) {
195  if (string_id >= 0) {
196  strings.emplace_back(string_dict_->getString(string_id));
197  } else if (inline_int_null_value<int32_t>() == string_id) {
198  strings.emplace_back("");
199  } else {
200  unsigned const string_index = transientIdToIndex(string_id);
201  strings.emplace_back(*transient_string_vec_[string_index]);
202  }
203  }
204  }
205  return strings;
206 }
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
static unsigned transientIdToIndex(int32_t const id)

+ Here is the call graph for this function:

std::string StringDictionaryProxy::getStringUnlocked ( const int32_t  string_id) const
private

Definition at line 180 of file StringDictionaryProxy.cpp.

References CHECK_LT, storageEntryCount(), string_dict_, transient_string_vec_, and transientIdToIndex().

Referenced by buildNumericTranslationMap(), buildUnionTranslationMapToOtherProxy(), and getString().

180  {
181  if (string_id >= 0 && storageEntryCount() > 0) {
182  return string_dict_->getString(string_id);
183  }
184  unsigned const string_index = transientIdToIndex(string_id);
185  CHECK_LT(string_index, transient_string_vec_.size());
186  return *transient_string_vec_[string_index];
187 }
size_t storageEntryCount() const
Returns the number of string entries in the underlying string dictionary, at this proxy&#39;s generation_...
std::shared_ptr< StringDictionary > string_dict_
std::vector< std::string const * > transient_string_vec_
#define CHECK_LT(x, y)
Definition: Logger.h:303
static unsigned transientIdToIndex(int32_t const id)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< int32_t > StringDictionaryProxy::getTransientBulk ( const std::vector< std::string > &  strings) const

Executes read-only lookup of a vector of strings and returns a vector of their integer ids.

This function, unlike getOrAddTransientBulk, will not add strings to the dictionary. Use this function if strings that don't currently exist in the StringDictionaryProxy should not be added to the proxy as transient entries. This method also has performance advantages over getOrAddTransientBulk for read-only use cases, in that it can: 1) Take a read lock instead of a write lock for the transient lookups 2) Use a tbb::parallel_for implementation of the transient string lookups as we are guaranteed that the underlying map of strings to int ids cannot change

Parameters
strings- Vector of strings to perform string id lookups on
Returns
A vector of string_ids of the same length as strings, containing the id of any strings for which were found in the underlying StringDictionary instance or in the proxy's tranient map, otherwise StringDictionary::INVALID_STR_ID for strings not found.

Definition at line 52 of file StringDictionaryProxy.cpp.

References CHECK_GE, generation_, and getTransientBulkImpl().

53  {
55  std::vector<int32_t> string_ids(strings.size());
56  getTransientBulkImpl(strings, string_ids.data(), true);
57  return string_ids;
58 }
#define CHECK_GE(x, y)
Definition: Logger.h:306
size_t getTransientBulkImpl(const std::vector< std::string > &strings, int32_t *string_ids, const bool take_read_lock) const

+ Here is the call graph for this function:

size_t StringDictionaryProxy::getTransientBulkImpl ( const std::vector< std::string > &  strings,
int32_t *  string_ids,
const bool  take_read_lock 
) const
private

Definition at line 695 of file StringDictionaryProxy.cpp.

References generation_, string_dict_, and transientLookupBulk().

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and getTransientBulk().

698  {
699  const size_t num_strings = strings.size();
700  if (num_strings == 0) {
701  return 0UL;
702  }
703  // StringDictionary::getBulk returns the number of strings not found
704  if (string_dict_->getBulk(strings, string_ids, generation_) == 0UL) {
705  return 0UL;
706  }
707 
708  // If here, dictionary could not find at least 1 target string,
709  // now look these up in the transient dictionary
710  // transientLookupBulk returns the number of strings not found
711  return transientLookupBulk(strings, string_ids, take_read_lock);
712 }
size_t transientLookupBulk(const std::vector< String > &lookup_strings, int32_t *string_ids, const bool take_read_lock) const
std::shared_ptr< StringDictionary > string_dict_

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

const std::vector<std::string const*>& StringDictionaryProxy::getTransientVector ( ) const
inline

Definition at line 245 of file StringDictionaryProxy.h.

References transient_string_vec_.

Referenced by DictionaryValueConverter< TARGET_TYPE >::DictionaryValueConverter(), and DictionaryValueConverter< TARGET_TYPE >::processBuffer().

245  {
246  return transient_string_vec_;
247  }
std::vector< std::string const * > transient_string_vec_

+ Here is the caller graph for this function:

IdMap StringDictionaryProxy::initIdMap ( ) const
inline

Definition at line 143 of file StringDictionaryProxy.h.

References generation_, StringDictionary::INVALID_STR_ID, and transient_string_vec_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), and transientUnion().

143  {
144  return IdMap(
146  }
TranslationMap< int32_t > IdMap
static constexpr int32_t INVALID_STR_ID
std::vector< std::string const * > transient_string_vec_

+ Here is the caller graph for this function:

template<typename String >
int32_t StringDictionaryProxy::lookupTransientStringUnlocked ( const String &  lookup_string) const
private

Definition at line 209 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, and transient_str_to_int_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().

210  {
211  const auto it = transient_str_to_int_.find(lookup_string);
213  : it->second;
214 }
static constexpr int32_t INVALID_STR_ID

+ Here is the caller graph for this function:

bool StringDictionaryProxy::operator!= ( StringDictionaryProxy const &  rhs) const

Definition at line 811 of file StringDictionaryProxy.cpp.

References operator==().

811  {
812  return !operator==(rhs);
813 }
bool operator==(StringDictionaryProxy const &) const

+ Here is the call graph for this function:

StringDictionaryProxy const& StringDictionaryProxy::operator= ( StringDictionaryProxy const &  )
delete
bool StringDictionaryProxy::operator== ( StringDictionaryProxy const &  rhs) const

Definition at line 806 of file StringDictionaryProxy.cpp.

References string_dict_key_, and transient_str_to_int_.

Referenced by operator!=().

806  {
807  return string_dict_key_ == rhs.string_dict_key_ &&
808  transient_str_to_int_ == rhs.transient_str_to_int_;
809 }
const shared::StringDictKey string_dict_key_

+ Here is the caller graph for this function:

size_t StringDictionaryProxy::persistedC ( ) const
private
size_t StringDictionaryProxy::storageEntryCount ( ) const

Returns the number of string entries in the underlying string dictionary, at this proxy's generation_ if it is set/valid, otherwise just the current size of the dictionary.

Returns
size_t Number of entries in the string dictionary (at this proxy's generation if set)

Definition at line 574 of file StringDictionaryProxy.cpp.

References CHECK_LE, generation_, and string_dict_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), entryCountUnlocked(), and getStringUnlocked().

574  {
575  const size_t num_storage_entries{generation_ == -1 ? string_dict_->storageEntryCount()
576  : generation_};
577  CHECK_LE(num_storage_entries, static_cast<size_t>(std::numeric_limits<int32_t>::max()));
578  return num_storage_entries;
579 }
std::shared_ptr< StringDictionary > string_dict_
#define CHECK_LE(x, y)
Definition: Logger.h:304

+ Here is the caller graph for this function:

size_t StringDictionaryProxy::transientEntryCount ( ) const

Returns the number of transient string entries for this proxy,.

Returns
size_t Number of transient string entries for this proxy

Definition at line 590 of file StringDictionaryProxy.cpp.

References rw_mutex_, and transientEntryCountUnlocked().

590  {
591  std::shared_lock<std::shared_mutex> read_lock(rw_mutex_);
593 }
size_t transientEntryCountUnlocked() const

+ Here is the call graph for this function:

size_t StringDictionaryProxy::transientEntryCountUnlocked ( ) const
private

Definition at line 581 of file StringDictionaryProxy.cpp.

References CHECK_LE, and transient_str_to_int_.

Referenced by buildIntersectionTranslationMapToOtherProxyUnlocked(), buildUnionTranslationMapToOtherProxy(), entryCountUnlocked(), and transientEntryCount().

581  {
582  // CHECK_LE(num_storage_entries,
583  // static_cast<size_t>(std::numeric_limits<int32_t>::max()));
584  const size_t num_transient_entries{transient_str_to_int_.size()};
585  CHECK_LE(num_transient_entries,
586  static_cast<size_t>(std::numeric_limits<int32_t>::max()) - 1);
587  return num_transient_entries;
588 }
#define CHECK_LE(x, y)
Definition: Logger.h:304

+ Here is the caller graph for this function:

static unsigned StringDictionaryProxy::transientIdToIndex ( int32_t const  id)
inlinestatic

Definition at line 251 of file StringDictionaryProxy.h.

Referenced by getStrings(), getStringUnlocked(), and StringDictionary::populate_string_ids().

251  {
252  constexpr int max_transient_string_id = -2;
253  return static_cast<unsigned>(max_transient_string_id - id);
254  }

+ Here is the caller graph for this function:

static int32_t StringDictionaryProxy::transientIndexToId ( unsigned const  index)
inlinestatic

Definition at line 256 of file StringDictionaryProxy.h.

Referenced by DictionaryValueConverter< TARGET_TYPE >::DictionaryValueConverter(), getCompare(), getLike(), getOrAddTransientUnlocked(), getRegexpLike(), and ArrowResultSetConverter::initializeColumnBuilder().

256  {
257  constexpr int max_transient_string_id = -2;
258  return static_cast<int32_t>(max_transient_string_id - index);
259  }

+ Here is the caller graph for this function:

template<typename String >
size_t StringDictionaryProxy::transientLookupBulk ( const std::vector< String > &  lookup_strings,
int32_t *  string_ids,
const bool  take_read_lock 
) const
private

Definition at line 715 of file StringDictionaryProxy.cpp.

References rw_mutex_, transient_str_to_int_, transientLookupBulkParallelUnlocked(), and transientLookupBulkUnlocked().

Referenced by getTransientBulkImpl().

718  {
719  const size_t num_strings = lookup_strings.size();
720  auto read_lock = take_read_lock ? std::shared_lock<std::shared_mutex>(rw_mutex_)
721  : std::shared_lock<std::shared_mutex>();
722 
723  if (num_strings == static_cast<size_t>(0) || transient_str_to_int_.empty()) {
724  return 0UL;
725  }
726  constexpr size_t tbb_parallel_threshold{20000};
727  if (num_strings < tbb_parallel_threshold) {
728  return transientLookupBulkUnlocked(lookup_strings, string_ids);
729  } else {
730  return transientLookupBulkParallelUnlocked(lookup_strings, string_ids);
731  }
732 }
size_t transientLookupBulkUnlocked(const std::vector< String > &lookup_strings, int32_t *string_ids) const
size_t transientLookupBulkParallelUnlocked(const std::vector< String > &lookup_strings, int32_t *string_ids) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
size_t StringDictionaryProxy::transientLookupBulkParallelUnlocked ( const std::vector< String > &  lookup_strings,
int32_t *  string_ids 
) const
private

Definition at line 755 of file StringDictionaryProxy.cpp.

References CHECK_GE, StringDictionary::INVALID_STR_ID, lookupTransientStringUnlocked(), ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, and threading_serial::parallel_for().

Referenced by transientLookupBulk().

757  {
758  const size_t num_lookup_strings = lookup_strings.size();
759  const size_t target_inputs_per_thread = 20000L;
760  ThreadInfo thread_info(
761  std::thread::hardware_concurrency(), num_lookup_strings, target_inputs_per_thread);
762  CHECK_GE(thread_info.num_threads, 1L);
763  CHECK_GE(thread_info.num_elems_per_thread, 1L);
764 
765  std::vector<size_t> num_strings_not_found_per_thread(thread_info.num_threads, 0UL);
766 
767  tbb::task_arena limited_arena(thread_info.num_threads);
768  limited_arena.execute([&] {
770  tbb::blocked_range<size_t>(
771  0, num_lookup_strings, thread_info.num_elems_per_thread /* tbb grain_size */),
772  [&](const tbb::blocked_range<size_t>& r) {
773  const size_t start_idx = r.begin();
774  const size_t end_idx = r.end();
775  size_t num_local_strings_not_found = 0;
776  for (size_t string_idx = start_idx; string_idx < end_idx; ++string_idx) {
777  if (string_ids[string_idx] != StringDictionary::INVALID_STR_ID) {
778  continue;
779  }
780  string_ids[string_idx] =
781  lookupTransientStringUnlocked(lookup_strings[string_idx]);
782  if (string_ids[string_idx] == StringDictionary::INVALID_STR_ID) {
783  num_local_strings_not_found++;
784  }
785  }
786  const size_t tbb_thread_idx = tbb::this_task_arena::current_thread_index();
787  num_strings_not_found_per_thread[tbb_thread_idx] = num_local_strings_not_found;
788  },
789  tbb::simple_partitioner());
790  });
791  size_t num_strings_not_found = 0;
792  for (int64_t thread_idx = 0; thread_idx < thread_info.num_threads; ++thread_idx) {
793  num_strings_not_found += num_strings_not_found_per_thread[thread_idx];
794  }
795  return num_strings_not_found;
796 }
#define CHECK_GE(x, y)
Definition: Logger.h:306
static constexpr int32_t INVALID_STR_ID
int32_t lookupTransientStringUnlocked(const String &lookup_string) const
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename String >
size_t StringDictionaryProxy::transientLookupBulkUnlocked ( const std::vector< String > &  lookup_strings,
int32_t *  string_ids 
) const
private

Definition at line 735 of file StringDictionaryProxy.cpp.

References StringDictionary::INVALID_STR_ID, and lookupTransientStringUnlocked().

Referenced by transientLookupBulk().

737  {
738  const size_t num_strings = lookup_strings.size();
739  size_t num_strings_not_found = 0;
740  for (size_t string_idx = 0; string_idx < num_strings; ++string_idx) {
741  if (string_ids[string_idx] != StringDictionary::INVALID_STR_ID) {
742  continue;
743  }
744  // If we're here it means we need to look up this string as we don't
745  // have a valid id for it
746  string_ids[string_idx] = lookupTransientStringUnlocked(lookup_strings[string_idx]);
747  if (string_ids[string_idx] == StringDictionary::INVALID_STR_ID) {
748  num_strings_not_found++;
749  }
750  }
751  return num_strings_not_found;
752 }
static constexpr int32_t INVALID_STR_ID
int32_t lookupTransientStringUnlocked(const String &lookup_string) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

StringDictionaryProxy::IdMap StringDictionaryProxy::transientUnion ( StringDictionaryProxy const &  sdp_rhs)

Definition at line 669 of file StringDictionaryProxy.cpp.

References eachStringSerially(), initIdMap(), and string_dict_.

670  {
671  IdMap id_map = sdp_rhs.initIdMap();
672  // serial_callback cannot be parallelized due to calling getOrAddTransientUnlocked().
673  std::unique_ptr<StringDictionary::StringCallback> serial_callback;
674  if (string_dict_->isClient()) {
675  serial_callback = std::make_unique<StringNetworkCallback>(this, id_map);
676  } else {
677  serial_callback = std::make_unique<StringLocalCallback>(this, id_map);
678  }
679  // Import all non-duplicate strings (transient and non-transient) and add to id_map.
680  sdp_rhs.eachStringSerially(*serial_callback);
681  return id_map;
682 }
TranslationMap< int32_t > IdMap
std::shared_ptr< StringDictionary > string_dict_

+ Here is the call graph for this function:

void StringDictionaryProxy::updateGeneration ( const int64_t  generation)
noexcept

Definition at line 684 of file StringDictionaryProxy.cpp.

References CHECK_EQ.

684  {
685  if (generation == -1) {
686  return;
687  }
688  if (generation_ != -1) {
689  CHECK_EQ(generation_, generation);
690  return;
691  }
692  generation_ = generation;
693 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301

Friends And Related Function Documentation

friend class StringLocalCallback
friend

Definition at line 309 of file StringDictionaryProxy.h.

friend class StringNetworkCallback
friend

Definition at line 310 of file StringDictionaryProxy.h.

Member Data Documentation

const shared::StringDictKey StringDictionaryProxy::string_dict_key_
private

Definition at line 296 of file StringDictionaryProxy.h.

Referenced by getDictKey(), and operator==().


The documentation for this class was generated from the following files: