OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
run_synthetic_benchmark.py
Go to the documentation of this file.
1 from run_benchmark import benchmark
2 import sys
3 import os
4 from argparse import ArgumentParser
5 from synthetic_benchmark.create_table import SyntheticTable
6 from analyze_benchmark import PrettyPrint, BenchmarkLoader
7 
8 if __name__ == "__main__":
9  # Parse the command line arguments:
10  parser = ArgumentParser()
11  required = parser.add_argument_group("required arguments")
12  required.add_argument("--user", dest="user", default="admin")
13  required.add_argument(
14  "--password", dest="password", default="HyperInteractive"
15  )
16  required.add_argument("--name", dest="name", default="heavyai")
17  required.add_argument("--server", dest="server", default="localhost")
18  required.add_argument("--port", dest="port", default="6274")
19  required.add_argument("--dest_user", dest="dest_user", default="admin")
20  required.add_argument(
21  "--dest_password", dest="dest_password", default="HyperInteractive"
22  )
23  required.add_argument("--dest_name", dest="dest_name", default="heavyai")
24  required.add_argument(
25  "--dest_server", dest="dest_server", default="localhost"
26  )
27  required.add_argument("--dest_port", dest="dest_port", default="6274")
28  required.add_argument(
29  "--table_name",
30  dest="table_name",
31  default="omnisci_syn_bench",
32  help="Table name to contain all the generated random synthetic data.",
33  )
34  required.add_argument(
35  "--fragment_size",
36  dest="fragment_size",
37  default="32000000",
38  help="Fragment size to be used for the synthetic data on the database",
39  )
40  required.add_argument(
41  "--num_fragments",
42  dest="num_fragments",
43  default="4",
44  help="Number of fragments used to generate synthetic data: "
45  + "Total rows in the table: num_fragments * fragment_size.",
46  )
47  required.add_argument(
48  "--data_dir",
49  dest="data_dir",
50  default=os.getcwd() + "/../build/synthetic_data",
51  help="This directory is used (or gets created) to store generated "
52  + "random synthetic data (csv files), as well as final results.",
53  )
54  required.add_argument(
55  "--result_dir",
56  dest="result_dir",
57  default=os.getcwd() + "/../build/synthetic_results",
58  help="This directory is used to store results."
59  + " Final results are restructured within this directory based on "
60  + " the benchmark label and the GPU label.",
61  )
62  required.add_argument(
63  "--query",
64  dest="query",
65  default="all",
66  help="Specifies the the query group to execute particular benchmark queries. "
67  + "For example, BaselineHash, MultiStep, NonGroupedAgg, etc."
68  + "All query groups can be found at Benchmarks/synthetic_benchmark/queries/ "
69  + "The default value is to run all queries (all).",
70  )
71  required.add_argument(
72  "--label",
73  dest="label",
74  help="This label is used to differentiate different benchmark runs.",
75  )
76  required.add_argument(
77  "--iterations",
78  dest="iterations",
79  default="2",
80  help="Number of iterations used for the benchmark. The first "
81  + "iteration will not be included in the final measurements, "
82  + "unless specifically asked to report that attribute.",
83  )
84  required.add_argument(
85  "--gpu_count",
86  dest="gpu_count",
87  default="1",
88  help="Number of GPUs used for the benchmark.",
89  )
90  required.add_argument(
91  "--gpu_label",
92  dest="gpu_label",
93  default="GPU",
94  help="This label is used to categorize the stored results (.json) of the benchmark queries. "
95  + " Results are stored at {data_dir}/results/{label}/{gpu_label}/Benchmarks/{query}.json",
96  )
97  required.add_argument(
98  "--attribute",
99  dest="attribute",
100  default="query_exec_avg",
101  help="This attribute is used to print out results for each query group. "
102  + "Default value is query_total_avg",
103  )
104  required.add_argument(
105  "--skip_data_gen_and_import",
106  dest="skip_data_gen_and_import",
107  action="store_true",
108  help="Skips the data generation, table creation, and import. "
109  + "Note that in this case there will be no "
110  + "guarantee whether the table exists and data is stored as expected."
111  + "It is user's responsibility to make sure everything is in place.",
112  )
113  required.add_argument(
114  "--print_results",
115  dest="print_results",
116  action="store_true",
117  help="If enabled, the results for each particular query group is printed in stdout.",
118  )
119 
120  args = parser.parse_args()
121 
122  assert args.label, "Label is required to store query results."
123 
124  # create (or verify existence) of a table with synthetic data in it:
125  try:
126  if not os.path.isdir(args.data_dir):
127  os.makedirs(args.data_dir)
128  except OSError:
129  print("Failed to create directory %s" % (args.data_dir))
130 
131  # create a synthetic table in the database so that benchmark queries can run on them
132  if args.skip_data_gen_and_import is False:
133  print(" === Preparing the required synthetic data...")
134  if args.port != args.dest_port or args.server != args.dest_server:
135  is_remote_server = True
136  else:
137  is_remote_server = False
138 
139  try:
140  synthetic_table = SyntheticTable(
141  table_name=args.table_name,
142  fragment_size=int(args.fragment_size),
143  num_fragments=int(args.num_fragments),
144  db_name=args.name,
145  db_user=args.user,
146  db_password=args.password,
147  db_server=args.server,
148  db_port=int(args.port),
149  data_dir_path=args.data_dir,
150  is_remote_server=is_remote_server,
151  )
152  # verify the table existence in the database, if not, creates data,
153  # then creates the table, and then import the data to the table
154  synthetic_table.createDataAndImportTable()
155  except:
156  raise Exception("Aborting the benchmark, no valid data found.")
157  else:
158  print(" === Data generation is skipped...")
159 
160  # final results' destination (generated .json files)
161  try:
162  if not os.path.isdir(args.result_dir):
163  os.makedirs(args.result_dir)
164  except OSError:
165  print("Failed to create directory %s" % (args.result_dir))
166 
167  result_dir_name = (
168  args.result_dir
169  + "/"
170  + args.label
171  + "/"
172  + args.gpu_label
173  + "/Benchmarks/"
174  )
175  try:
176  if not os.path.isdir(result_dir_name):
177  os.makedirs(result_dir_name)
178  except OSError:
179  print("Failed to create directory %s" % (result_dir_name))
180 
181  query_dir = "synthetic_benchmark/queries/"
182  assert os.path.isdir(query_dir)
183  all_query_list = os.listdir(query_dir)
184 
185  # adjusting the required benchmark arguments with current arguments
186  benchmark_args = ["--user", args.user]
187  benchmark_args += ["--passwd", args.password]
188  benchmark_args += ["--server", args.server]
189  benchmark_args += ["--port", args.port]
190  benchmark_args += ["--name", args.name]
191  benchmark_args += ["--table", args.table_name]
192  benchmark_args += ["--iterations", args.iterations]
193  benchmark_args += ["--gpu-count", args.gpu_count]
194  benchmark_args += ["--destination", "file_json"]
195  benchmark_args += ["--dest-user", args.dest_user]
196  benchmark_args += ["--dest-passwd", args.dest_password]
197  benchmark_args += ["--dest-server", args.dest_server]
198  benchmark_args += ["--dest-port", args.dest_port]
199  benchmark_args += ["--dest-name", args.dest_name]
200  benchmark_args += [
201  "--dest-table-schema-file",
202  "results_table_schemas/query-results.sql",
203  ]
204  benchmark_args += ["--label", args.label]
205  benchmark_args += [
206  "--output-file-json",
207  result_dir_name + args.query + ".json",
208  ]
209  benchmark_args += ["--queries-dir", query_dir + args.query]
210 
211  if args.query == "all":
212  for query_group in sorted(all_query_list):
213  # adjusting query-related args
214  benchmark_args[-4:] = [
215  "--output-file-json",
216  result_dir_name + query_group + ".json",
217  "--queries-dir",
218  query_dir + query_group,
219  ]
220  print(" === Running benchmark queries for %s" % (query_group))
221  benchmark(benchmark_args)
222 
223  if args.print_results:
224  refBench = BenchmarkLoader(
225  result_dir_name, os.listdir(result_dir_name)
226  )
227  refBench.load(query_group + ".json")
228  printer = PrettyPrint(
229  refBench, None, args.attribute, False
230  ).printAttribute()
231 
232  else:
233  assert args.query in all_query_list, "Invalid query directory entered,"
234  print(" === Running benchmark queries for %s" % (args.query))
235  benchmark(benchmark_args)
236  if args.print_results:
237  refBench = BenchmarkLoader(
238  result_dir_name, os.listdir(result_dir_name)
239  )
240  refBench.load(args.query + ".json")
241  printer = PrettyPrint(
242  refBench, None, args.attribute, False
243  ).printAttribute()