OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
UtilityTableFunctions.cpp File Reference
#include <string>
#include "Shared/ThreadInfo.h"
#include "UtilityTableFunctions.h"
#include <chrono>
#include <random>
#include <thread>
+ Include dependency graph for UtilityTableFunctions.cpp:

Go to the source code of this file.

Functions

int64_t numStepsBetween (int64_t start, int64_t stop, int64_t step)
 
template<typename T >
int64_t numStepsBetween (Timestamp start, Timestamp stop, T step)
 
template<typename T , typename K >
int32_t generate_series_parallel (const T start, const T stop, const K step, Column< T > &series_output)
 
template<typename T , typename K >
NEVER_INLINE HOST int32_t generate_series__cpu_template (TableFunctionManager &mgr, const T start, const T stop, const K step, Column< T > &series_output)
 
template<typename T >
NEVER_INLINE HOST int32_t generate_series__cpu_template (TableFunctionManager &mgr, const T start, const T stop, Column< T > &series_output)
 
HOST std::string gen_random_str (std::mt19937 &generator, const int64_t str_len)
 
EXTENSION_NOINLINE_HOST int32_t generate_random_strings__cpu_ (TableFunctionManager &mgr, const int64_t num_strings, const int64_t string_length, Column< int64_t > &output_id, Column< TextEncodingDict > &output_strings)
 
template int64_t numStepsBetween (Timestamp, Timestamp, DayTimeInterval)
 
template int64_t numStepsBetween (Timestamp, Timestamp, YearMonthTimeInterval)
 
template int32_t generate_series_parallel (int64_t, int64_t, int64_t, Column< int64_t > &)
 
template int32_t generate_series_parallel (Timestamp, Timestamp, DayTimeInterval, Column< Timestamp > &)
 
template int32_t generate_series_parallel (Timestamp, Timestamp, YearMonthTimeInterval, Column< Timestamp > &)
 
template int32_t generate_series__cpu_template (TableFunctionManager &, int64_t, int64_t, int64_t, Column< int64_t > &)
 
template int32_t generate_series__cpu_template (TableFunctionManager &, Timestamp, Timestamp, DayTimeInterval, Column< Timestamp > &)
 
template int32_t generate_series__cpu_template (TableFunctionManager &, Timestamp, Timestamp, YearMonthTimeInterval, Column< Timestamp > &)
 
template int32_t generate_series__cpu_template (TableFunctionManager &, int64_t, int64_t, Column< int64_t > &)
 

Function Documentation

HOST std::string gen_random_str ( std::mt19937 &  generator,
const int64_t  str_len 
)

Definition at line 99 of file UtilityTableFunctions.cpp.

Referenced by generate_random_strings__cpu_().

99  {
100  constexpr char alphanum_lookup_table[] =
101  "0123456789"
102  "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
103  "abcdefghijklmnopqrstuvwxyz";
104  constexpr size_t char_mod = sizeof(alphanum_lookup_table) - 1;
105  std::uniform_int_distribution<int32_t> rand_distribution(0, char_mod);
106 
107  std::string tmp_s;
108  tmp_s.reserve(str_len);
109  for (int i = 0; i < str_len; ++i) {
110  tmp_s += alphanum_lookup_table[rand_distribution(generator)];
111  }
112  return tmp_s;
113 }

+ Here is the caller graph for this function:

EXTENSION_NOINLINE_HOST int32_t generate_random_strings__cpu_ ( TableFunctionManager mgr,
const int64_t  num_strings,
const int64_t  string_length,
Column< int64_t > &  output_id,
Column< TextEncodingDict > &  output_strings 
)

Definition at line 121 of file UtilityTableFunctions.cpp.

References CHECK_LE, DEBUG_TIMER, gen_random_str(), ThreadInfo::num_elems_per_thread, ThreadInfo::num_threads, threading_serial::parallel_for(), and TableFunctionManager::set_output_row_size().

125  {
126  auto timer = DEBUG_TIMER(__func__);
127  // Check for out-of-range errors for the input parameters
128  // in the function instead of with require due to issue encountered
129  // with require over multiple variables
130  constexpr int64_t max_strings{10000000L};
131  constexpr int64_t max_str_len{10000L};
132  if (num_strings > max_strings) {
133  return mgr.ERROR_MESSAGE(
134  "generate_random_strings: num_strings must be between 0 and 10,000,000.");
135  }
136  if (string_length > max_str_len) {
137  return mgr.ERROR_MESSAGE(
138  "generate_random_strings: string_length must be between 1 and 10,000.");
139  }
140  if (num_strings == 0L) {
141  // Bail early as there is no work to be done
142  return 0;
143  }
144 
145  mgr.set_output_row_size(num_strings);
146  constexpr int64_t target_strings_per_thread{5000};
147  const ThreadInfo thread_info(
148  std::thread::hardware_concurrency(), num_strings, target_strings_per_thread);
149  std::vector<std::mt19937> per_thread_rand_generators;
150  per_thread_rand_generators.reserve(thread_info.num_threads);
151  for (int64_t thread_idx = 0; thread_idx < thread_info.num_threads; ++thread_idx) {
152  const uint64_t seed = std::chrono::duration_cast<std::chrono::nanoseconds>(
153  std::chrono::system_clock::now().time_since_epoch())
154  .count() +
155  thread_idx * 971;
156  per_thread_rand_generators.emplace_back(seed);
157  }
158  std::vector<std::string> rand_strings(num_strings);
159  tbb::task_arena limited_arena(thread_info.num_threads);
160  limited_arena.execute([&] {
161  CHECK_LE(tbb::this_task_arena::max_concurrency(), thread_info.num_threads);
163  tbb::blocked_range<int64_t>(0, num_strings, thread_info.num_elems_per_thread),
164  [&](const tbb::blocked_range<int64_t>& r) {
165  const int64_t tbb_thread_idx = tbb::this_task_arena::current_thread_index();
166  const int64_t start_out_idx = r.begin();
167  const int64_t end_out_idx = r.end();
168  for (int64_t out_idx = start_out_idx; out_idx != end_out_idx; ++out_idx) {
169  rand_strings[out_idx] =
170  gen_random_str(per_thread_rand_generators[tbb_thread_idx], string_length);
171  }
172  },
173  tbb::simple_partitioner());
174  });
175  const std::vector<int32_t> rand_string_ids =
176  output_strings.string_dict_proxy_->getOrAddTransientBulk(rand_strings);
177  for (int64_t row_idx = 0; row_idx < num_strings; row_idx++) {
178  output_id[row_idx] = row_idx;
179  output_strings[row_idx] = rand_string_ids[row_idx];
180  }
181  return num_strings;
182 }
void set_output_row_size(int64_t num_rows)
Definition: heavydbTypes.h:373
StringDictionaryProxy * string_dict_proxy_
#define CHECK_LE(x, y)
Definition: Logger.h:304
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
std::vector< int32_t > getOrAddTransientBulk(const std::vector< std::string > &strings)
#define DEBUG_TIMER(name)
Definition: Logger.h:412
HOST std::string gen_random_str(std::mt19937 &generator, const int64_t str_len)

+ Here is the call graph for this function:

template<typename T , typename K >
NEVER_INLINE HOST int32_t generate_series__cpu_template ( TableFunctionManager mgr,
const T  start,
const T  stop,
const K  step,
Column< T > &  series_output 
)

Definition at line 58 of file UtilityTableFunctions.cpp.

References generate_series_parallel(), numStepsBetween(), TableFunctionManager::set_output_row_size(), and SUCCESS.

Referenced by generate_series__cpu_template().

62  {
63  const int64_t PARALLEL_THRESHOLD{10000L};
64  const int64_t num_rows = numStepsBetween(start, stop, step) + 1;
65  if (num_rows <= 0) {
66  mgr.set_output_row_size(0);
67  return 0;
68  }
69  // set_output_row_size ensures that the output buffer size will be
70  // in a reasonable range (up to 16 TiB), and if it is not, an
71  // OutOfHostMemory exception will be thrown.
72  mgr.set_output_row_size(num_rows);
73 
74 #ifdef HAVE_TBB
75  if (num_rows > PARALLEL_THRESHOLD) {
76  return generate_series_parallel(start, stop, step, series_output);
77  }
78 #endif
79 
80  for (int64_t out_idx = 0; out_idx != num_rows; ++out_idx) {
81  series_output[out_idx] = start + (step * out_idx);
82  }
83  return SUCCESS;
84 }
void set_output_row_size(int64_t num_rows)
Definition: heavydbTypes.h:373
#define SUCCESS
Definition: heavydbTypes.h:76
int64_t numStepsBetween(int64_t start, int64_t stop, int64_t step)
int32_t generate_series_parallel(const T start, const T stop, const K step, Column< T > &series_output)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template<typename T >
NEVER_INLINE HOST int32_t generate_series__cpu_template ( TableFunctionManager mgr,
const T  start,
const T  stop,
Column< T > &  series_output 
)

Definition at line 87 of file UtilityTableFunctions.cpp.

References generate_series__cpu_template().

90  {
92  mgr, start, stop, static_cast<int64_t>(1), series_output);
93 }
NEVER_INLINE HOST int32_t generate_series__cpu_template(TableFunctionManager &mgr, const T start, const T stop, const K step, Column< T > &series_output)

+ Here is the call graph for this function:

template int32_t generate_series__cpu_template ( TableFunctionManager ,
int64_t  ,
int64_t  ,
int64_t  ,
Column< int64_t > &   
)
template int32_t generate_series__cpu_template ( TableFunctionManager ,
Timestamp  ,
Timestamp  ,
DayTimeInterval  ,
Column< Timestamp > &   
)
template int32_t generate_series__cpu_template ( TableFunctionManager ,
Timestamp  ,
Timestamp  ,
YearMonthTimeInterval  ,
Column< Timestamp > &   
)
template int32_t generate_series__cpu_template ( TableFunctionManager ,
int64_t  ,
int64_t  ,
Column< int64_t > &   
)
template<typename T , typename K >
int32_t generate_series_parallel ( const T  start,
const T  stop,
const K  step,
Column< T > &  series_output 
)

Definition at line 39 of file UtilityTableFunctions.cpp.

References numStepsBetween(), threading_serial::parallel_for(), and SUCCESS.

Referenced by generate_series__cpu_template().

42  {
43  const int64_t num_rows = numStepsBetween(start, stop, step) + 1;
44 
45  tbb::parallel_for(tbb::blocked_range<int64_t>(0, num_rows),
46  [&](const tbb::blocked_range<int64_t>& r) {
47  const int64_t start_out_idx = r.begin();
48  const int64_t end_out_idx = r.end();
49  for (int64_t out_idx = start_out_idx; out_idx != end_out_idx;
50  ++out_idx) {
51  series_output[out_idx] = start + (step * out_idx);
52  }
53  });
54  return SUCCESS;
55 }
#define SUCCESS
Definition: heavydbTypes.h:76
void parallel_for(const blocked_range< Int > &range, const Body &body, const Partitioner &p=Partitioner())
int64_t numStepsBetween(int64_t start, int64_t stop, int64_t step)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

template int32_t generate_series_parallel ( int64_t  ,
int64_t  ,
int64_t  ,
Column< int64_t > &   
)
template int32_t generate_series_parallel ( Timestamp  ,
Timestamp  ,
DayTimeInterval  ,
Column< Timestamp > &   
)
template int32_t generate_series_parallel ( Timestamp  ,
Timestamp  ,
YearMonthTimeInterval  ,
Column< Timestamp > &   
)
int64_t numStepsBetween ( int64_t  start,
int64_t  stop,
int64_t  step 
)

Definition at line 29 of file UtilityTableFunctions.cpp.

Referenced by generate_series__cpu_template(), and generate_series_parallel().

29  {
30  return (stop - start) / step;
31 }

+ Here is the caller graph for this function:

template<typename T >
int64_t numStepsBetween ( Timestamp  start,
Timestamp  stop,
step 
)

Definition at line 34 of file UtilityTableFunctions.cpp.

34  {
35  return step.numStepsBetween(start, stop);
36 }
template int64_t numStepsBetween ( Timestamp  ,
Timestamp  ,
DayTimeInterval   
)
template int64_t numStepsBetween ( Timestamp  ,
Timestamp  ,
YearMonthTimeInterval   
)