22 #include <shared_mutex>
24 #include <string_view>
37 namespace StringOps_Namespace {
57 const std::string& folder,
60 const bool materializeHashes =
false,
61 size_t initial_capacity = 256);
70 virtual void operator()(std::string
const&, int32_t
const string_id) = 0;
71 virtual void operator()(std::string_view
const, int32_t
const string_id) = 0;
81 int32_t
getOrAdd(
const std::string& str) noexcept;
82 template <
class T,
class String>
83 size_t getBulk(
const std::vector<String>& string_vec,
T* encoded_vec)
const;
84 template <
class T,
class String>
85 size_t getBulk(
const std::vector<String>& string_vec,
87 const int64_t generation)
const;
88 template <
class T,
class String>
89 void getOrAddBulk(
const std::vector<String>& string_vec,
T* encoded_vec);
90 template <
class T,
class String>
92 template <
class String>
94 std::vector<std::vector<int32_t>>& ids_array_vec);
95 template <
class String>
97 std::string
getString(int32_t string_id)
const;
99 std::pair<char*, size_t>
getStringBytes(int32_t string_id)
const noexcept;
102 template <
typename T>
103 std::vector<T>
getLike(
const std::string& pattern,
105 const bool is_simple,
107 const size_t generation)
const;
109 template <
typename T>
110 std::vector<T>
getLikeImpl(
const std::string& pattern,
112 const bool is_simple,
114 const size_t generation)
const;
116 std::vector<int32_t>
getCompare(
const std::string& pattern,
117 const std::string& comp_operator,
118 const size_t generation);
120 std::vector<int32_t>
getRegexpLike(
const std::string& pattern,
122 const size_t generation)
const;
127 std::vector<std::string_view>
getStringViews(
const size_t generation)
const;
130 const std::shared_ptr<StringDictionary> dest_dict,
135 int32_t* translated_ids,
136 const int64_t source_generation,
137 const int64_t dest_generation,
138 const bool dest_has_transients,
140 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos)
const;
143 Datum* translated_ids,
144 const int64_t source_generation,
145 const std::vector<StringOps_Namespace::StringOpInfo>& string_op_infos)
const;
169 std::vector<int32_t>& dest_ids,
171 const std::vector<int32_t>& source_ids,
173 const std::vector<std::
string const*>& transient_string_vec = {});
176 std::vector<std::vector<int32_t>>& dest_array_ids,
178 const std::vector<std::vector<int32_t>>& source_array_ids,
210 std::vector<std::future<std::vector<std::pair<string_dict_hash_t, unsigned int>>>>&
215 template <class String>
217 const
size_t str_count,
218 const
size_t storage_high_water_mark,
219 const std::vector<String>& input_strings,
220 const std::vector<
size_t>& string_memory_ids,
222 int32_t
getOrAddImpl(const std::string_view& str) noexcept;
223 template <class String>
224 void hashStrings(const std::vector<String>& string_vec,
227 int32_t
getUnlocked(const std::string_view sv) const noexcept;
233 template <class String>
236 const String& input_string,
237 const std::vector<int32_t>& string_id_string_dict_hash_table) const noexcept;
238 template <class String>
241 const String& input_string,
242 const std::vector<int32_t>& string_id_string_dict_hash_table,
243 const
size_t storage_high_water_mark,
244 const std::vector<String>& input_strings,
245 const std::vector<
size_t>& string_memory_ids) const noexcept;
248 const std::vector<int32_t>& string_id_string_dict_hash_table) noexcept;
252 template <class String>
254 template <class String>
256 const std::vector<
size_t>& string_memory_ids,
257 const
size_t sum_new_strings_lengths) noexcept;
265 const
size_t min_capacity_requested = 0) noexcept;
267 std::vector<int32_t>
getEquals(std::
string pattern,
268 std::
string comp_operator,
272 void sortCache(std::vector<int32_t>& cache);
294 mutable std::map<std::tuple<std::
string,
bool,
bool,
char>, std::vector<int32_t>>
296 mutable std::map<std::tuple<std::
string,
bool,
bool,
char>, std::vector<int64_t>>
299 mutable std::map<std::pair<std::
string,
char>, std::vector<int32_t>>
regex_cache_;
319 const std::vector<int32_t>& source_ids,
321 const int32_t dest_generation);
StringIdxEntry * offset_map_
size_t payload_file_size_
bool isClient() const noexcept
void increaseHashTableCapacity() noexcept
void checkAndConditionallyIncreasePayloadCapacity(const size_t write_length)
size_t addStorageCapacity(int fd, const size_t min_capacity_requested=0) noexcept
std::vector< int32_t > getRegexpLike(const std::string &pattern, const char escape, const size_t generation) const
size_t getBulk(const std::vector< String > &string_vec, T *encoded_vec) const
std::vector< int32_t > buildDictionaryTranslationMap(const std::shared_ptr< StringDictionary > dest_dict, StringLookupCallback const &dest_transient_lookup_callback) const
std::vector< std::string > copyStrings() const
void buildDictionaryNumericTranslationMap(Datum *translated_ids, const int64_t source_generation, const std::vector< StringOps_Namespace::StringOpInfo > &string_op_infos) const
const shared::StringDictKey & getDictKey() const noexcept
std::map< std::tuple< std::string, bool, bool, char >, std::vector< int64_t > > like_i64_cache_
std::pair< char *, size_t > getStringBytesChecked(const int string_id) const noexcept
size_t storageEntryCount() const
StringDictionary(const shared::StringDictKey &dict_key, const std::string &folder, const bool isTemp, const bool recover, const bool materializeHashes=false, size_t initial_capacity=256)
void addOffsetCapacity(const size_t min_capacity_requested=0) noexcept
uint32_t computeBucketFromStorageAndMemory(const string_dict_hash_t input_string_hash, const String &input_string, const std::vector< int32_t > &string_id_string_dict_hash_table, const size_t storage_high_water_mark, const std::vector< String > &input_strings, const std::vector< size_t > &string_memory_ids) const noexcept
std::string getStringChecked(const int string_id) const noexcept
size_t canary_buffer_size
std::vector< string_dict_hash_t > hash_cache_
DictionaryCache< std::string, compare_cache_value_t > compare_cache_
bool fillRateIsHigh(const size_t num_strings) const noexcept
void * addMemoryCapacity(void *addr, size_t &mem_size, const size_t min_capacity_requested=0) noexcept
std::map< std::tuple< std::string, bool, bool, char >, std::vector< int32_t > > like_i32_cache_
std::string offsets_path_
static void populate_string_ids(std::vector< int32_t > &dest_ids, StringDictionary *dest_dict, const std::vector< int32_t > &source_ids, const StringDictionary *source_dict, const std::vector< std::string const * > &transient_string_vec={})
Populates provided dest_ids vector with string ids corresponding to given source strings.
std::vector< T > getLikeImpl(const std::string &pattern, const bool icase, const bool is_simple, const char escape, const size_t generation) const
std::shared_mutex rw_mutex_
std::string_view getStringFromStorageFast(const int string_id) const noexcept
int32_t getOrAdd(const std::string &str) noexcept
std::map< std::pair< std::string, char >, std::vector< int32_t > > regex_cache_
size_t computeCacheSize() const
std::unique_ptr< StringDictionaryClient > client_
std::string getStringUnlocked(int32_t string_id) const noexcept
int32_t getIdOfString(const String &) const
std::string_view getStringView(int32_t string_id) const
static constexpr size_t MAX_STRCOUNT
std::vector< int32_t > getEquals(std::string pattern, std::string comp_operator, size_t generation)
uint32_t computeBucket(const string_dict_hash_t hash, const String &input_string, const std::vector< int32_t > &string_id_string_dict_hash_table) const noexcept
size_t compare_cache_size_
static constexpr int32_t INVALID_STR_ID
std::shared_ptr< std::vector< std::string > > strings_cache_
virtual void operator()(std::string const &, int32_t const string_id)=0
std::vector< int32_t > getCompare(const std::string &pattern, const std::string &comp_operator, const size_t generation)
const std::string folder_
void appendToStorageBulk(const std::vector< String > &input_strings, const std::vector< size_t > &string_memory_ids, const size_t sum_new_strings_lengths) noexcept
void addPayloadCapacity(const size_t min_capacity_requested=0) noexcept
std::map< std::string, int32_t > equal_cache_
uint32_t computeUniqueBucketWithHash(const string_dict_hash_t hash, const std::vector< int32_t > &string_id_string_dict_hash_table) noexcept
void getOrAddBulkArray(const std::vector< std::vector< String >> &string_array_vec, std::vector< std::vector< int32_t >> &ids_array_vec)
int32_t getUnlocked(const std::string_view sv) const noexcept
void appendToStorage(const String str) noexcept
void getOrAddBulk(const std::vector< String > &string_vec, T *encoded_vec)
const shared::StringDictKey dict_key_
std::pair< char *, size_t > getStringBytes(int32_t string_id) const noexcept
DictPayloadUnavailable(const std::string &err)
void processDictionaryFutures(std::vector< std::future< std::vector< std::pair< string_dict_hash_t, unsigned int >>>> &dictionary_futures)
void translate_string_ids(std::vector< int32_t > &dest_ids, const LeafHostInfo &dict_server_host, const shared::StringDictKey &dest_dict_key, const std::vector< int32_t > &source_ids, const shared::StringDictKey &source_dict_key, const int32_t dest_generation)
bool checkpoint() noexcept
void mergeSortedCache(std::vector< int32_t > &temp_sorted_cache)
void eachStringSerially(int64_t const generation, StringCallback &) const
size_t getNumStringsFromStorage(const size_t storage_slots) const noexcept
void update_leaf(const LeafHostInfo &host_info)
size_t strings_cache_size_
std::string getString(int32_t string_id) const
void hashStrings(const std::vector< String > &string_vec, std::vector< string_dict_hash_t > &hashes) const noexcept
std::vector< std::string_view > getStringViews() const
std::unique_ptr< StringDictionaryClient > client_no_timeout_
virtual ~StringCallback()=default
void increaseHashTableCapacityFromStorageAndMemory(const size_t str_count, const size_t storage_high_water_mark, const std::vector< String > &input_strings, const std::vector< size_t > &string_memory_ids, const std::vector< string_dict_hash_t > &input_strings_hashes) noexcept
std::function< int32_t(std::string const &)> makeLambdaStringToId() const
void checkAndConditionallyIncreaseOffsetCapacity(const size_t write_length)
static void populate_string_array_ids(std::vector< std::vector< int32_t >> &dest_array_ids, StringDictionary *dest_dict, const std::vector< std::vector< int32_t >> &source_array_ids, const StringDictionary *source_dict)
void invalidateInvertedIndex() noexcept
uint32_t string_dict_hash_t
std::vector< T > getLike(const std::string &pattern, const bool icase, const bool is_simple, const char escape, const size_t generation) const
std::string_view getStringViewUnlocked(int32_t string_id) const noexcept
std::vector< int32_t > string_id_string_dict_hash_table_
void sortCache(std::vector< int32_t > &cache)
std::string_view getStringViewChecked(const int string_id) const noexcept
static constexpr size_t MAX_STRLEN
void getOrAddBulkParallel(const std::vector< String > &string_vec, T *encoded_vec)
std::function< bool(std::string_view, int32_t string_id)> StringLookupCallback
bool g_enable_stringdict_parallel
PayloadString getStringFromStorage(const int string_id) const noexcept
std::shared_timed_mutex shared_mutex
compare_cache_value_t * binary_search_cache(const std::string &pattern) const
void insertInSortedCache(std::string str, int32_t str_id)
std::vector< int32_t > sorted_cache
int32_t getOrAddImpl(const std::string_view &str) noexcept
int32_t truncate_to_generation(const int32_t id, const size_t generation)
~StringDictionary() noexcept