OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
run_benchmark Namespace Reference

Functions

def verify_destinations
 
def get_connection
 
def get_run_vars
 
def get_gpu_info
 
def get_machine_info
 
def read_query_files
 
def read_setup_teardown_query_files
 
def validate_setup_teardown_query_file
 
def validate_query_file
 
def execute_query
 
def calculate_query_times
 
def clear_memory
 
def clear_system_caches
 
def get_mem_usage
 
def run_query
 
def run_setup_teardown_query
 
def json_format_handler
 
def create_results_dataset
 
def send_results_db
 
def send_results_file_json
 
def send_results_jenkins_bench
 
def send_results_output
 
def process_arguments
 
def benchmark
 

Function Documentation

def run_benchmark.benchmark (   input_arguments)

Definition at line 1629 of file run_benchmark.py.

References File_Namespace.append(), create_results_dataset(), get_connection(), get_gpu_info(), get_machine_info(), get_run_vars(), process_arguments(), read_query_files(), read_setup_teardown_query_files(), run_query(), run_setup_teardown_query(), send_results_db(), send_results_file_json(), send_results_jenkins_bench(), send_results_output(), split(), to_string(), and verify_destinations().

1630 def benchmark(input_arguments):
1631  # Set input args to vars
1632  args = process_arguments(input_arguments)
1633  verbose = args.verbose
1634  quiet = args.quiet
1635  source_db_user = args.user
1636  source_db_passwd = args.passwd
1637  source_db_server = args.server
1638  source_db_port = args.port
1639  source_db_name = args.name
1640  source_table = args.table
1641  label = args.label
1642  queries_dir = args.queries_dir
1643  iterations = args.iterations
1644  gpu_count = args.gpu_count
1645  gpu_name = args.gpu_name
1646  no_gather_conn_gpu_info = args.no_gather_conn_gpu_info
1647  no_gather_nvml_gpu_info = args.no_gather_nvml_gpu_info
1648  gather_nvml_gpu_info = args.gather_nvml_gpu_info
1649  machine_name = args.machine_name
1650  machine_uname = args.machine_uname
1651  destinations = args.destination
1652  dest_db_user = args.dest_user
1653  dest_db_passwd = args.dest_passwd
1654  dest_db_server = args.dest_server
1655  dest_db_port = args.dest_port
1656  dest_db_name = args.dest_name
1657  dest_table = args.dest_table
1658  dest_table_schema_file = args.dest_table_schema_file
1659  output_file_json = args.output_file_json
1660  output_file_jenkins = args.output_file_jenkins
1661  output_tag_jenkins = args.output_tag_jenkins
1662  setup_teardown_queries_dir = args.setup_teardown_queries_dir
1663  run_setup_teardown_per_query = args.run_setup_teardown_per_query
1664  foreign_table_filename = args.foreign_table_filename
1665  jenkins_thresholds_name = args.jenkins_thresholds_name
1666  jenkins_thresholds_field = args.jenkins_thresholds_field
1667  clear_all_memory_pre_query = args.clear_all_memory_pre_query
1668  cuda_block_grid_perf_test = args.cuda_block_grid_perf_test
1669  show_simplified_result = args.show_simplified_result
1670  if args.cuda_grid_sizes is None:
1671  grid_size_range = [2]
1672  else:
1673  grid_size_range = args.cuda_grid_sizes
1674  if args.cuda_block_sizes is None:
1675  block_size_range = [1024]
1676  else:
1677  block_size_range = args.cuda_block_sizes
1678 
1679  # Hard-coded vars
1680  trim = 0.15
1681 
1682  # Set logging output level
1683  if verbose:
1684  logging.basicConfig(level=logging.DEBUG)
1685  elif quiet:
1686  logging.basicConfig(level=logging.WARNING)
1687  else:
1688  logging.basicConfig(level=logging.INFO)
1689 
1690  # Input validation
1691  if (iterations > 1) is not True:
1692  # Need > 1 iteration as first iteration is dropped from calculations
1693  logging.error("Iterations must be greater than 1")
1694  exit(1)
1696  destinations=destinations,
1697  dest_db_server=dest_db_server,
1698  output_file_json=output_file_json,
1699  output_file_jenkins=output_file_jenkins,
1700  ):
1701  logging.debug("Destination(s) have been verified.")
1702  else:
1703  logging.error("No valid destination(s) have been set. Exiting.")
1704  exit(1)
1705 
1706  # Establish connection to mapd db
1707  con = get_connection(
1708  db_user=source_db_user,
1709  db_passwd=source_db_passwd,
1710  db_server=source_db_server,
1711  db_port=source_db_port,
1712  db_name=source_db_name,
1713  )
1714  if not con:
1715  exit(1) # Exit if cannot connect to db
1716  # Set run-specific variables (time, uid, etc.)
1717  run_vars = get_run_vars(con=con)
1718  # Set GPU info depending on availability
1719  gpu_info = get_gpu_info(
1720  gpu_name=gpu_name,
1721  no_gather_conn_gpu_info=no_gather_conn_gpu_info,
1722  con=con,
1723  conn_machine_name=run_vars["conn_machine_name"],
1724  no_gather_nvml_gpu_info=no_gather_nvml_gpu_info,
1725  gather_nvml_gpu_info=gather_nvml_gpu_info,
1726  gpu_count=gpu_count,
1727  )
1728  # Set run machine info
1729  machine_info = get_machine_info(
1730  conn_machine_name=run_vars["conn_machine_name"],
1731  machine_name=machine_name,
1732  machine_uname=machine_uname,
1733  )
1734  # Read queries from files, set to queries dir in PWD if not passed in
1735  if not queries_dir:
1736  queries_dir = os.path.join(os.path.dirname(__file__), "queries")
1737  query_list = read_query_files(
1738  queries_dir=queries_dir, source_table=source_table
1739  )
1740  if not query_list:
1741  exit(1)
1742  # Read setup/teardown queries if they exist
1743  setup_query_list, teardown_query_list = \
1744  read_setup_teardown_query_files(queries_dir=setup_teardown_queries_dir,
1745  source_table=source_table,
1746  foreign_table_filename=foreign_table_filename)
1747  # Check at what granularity we want to run setup or teardown queries at
1748  run_global_setup_queries = setup_query_list is not None and not run_setup_teardown_per_query
1749  run_per_query_setup_queries = setup_query_list is not None and run_setup_teardown_per_query
1750  run_global_teardown_queries = teardown_query_list is not None and not run_setup_teardown_per_query
1751  run_per_query_teardown_queries = teardown_query_list is not None and run_setup_teardown_per_query
1752  # Run global setup queries if they exist
1753  queries_results = []
1754  st_qr = run_setup_teardown_query(queries=setup_query_list,
1755  do_run=run_global_setup_queries, trim=trim, con=con)
1756  queries_results.extend(st_qr)
1757  new_query_list = {"query_group": "", "queries": []}
1758  new_query_list.update(query_group=query_list["query_group"])
1759  if cuda_block_grid_perf_test:
1760  for query_info in query_list["queries"]:
1761  query = query_info["mapdql"]
1762  for block_size in block_size_range:
1763  for grid_size in grid_size_range:
1764  query_hint = "SELECT /*+ g_cuda_block_size(" + str(block_size) + "), "
1765  query_hint += "g_cuda_grid_size_multiplier(" + str(grid_size) + ") */ "
1766  query_name = query_info["name"] + "_block_size_" + str(block_size) + "_grid_size_" + str(grid_size)
1767  new_query = re.sub("select", "SELECT", query, re.IGNORECASE)
1768  new_query = new_query.replace("SELECT", query_hint, 1)
1769  new_query_list["queries"].append(
1770  {"name": query_name, "mapdql": new_query})
1771  cuda_opt_query_hint = "SELECT /*+ cuda_opt_param */ "
1772  cuda_opt_query_name = query_info["name"] + "_block_size_-1_grid_size_-1"
1773  cuda_opt_new_query = re.sub("select", "SELECT", query, re.IGNORECASE)
1774  cuda_opt_new_query = cuda_opt_new_query.replace("SELECT", cuda_opt_query_hint, 1)
1775  new_query_list["queries"].append(
1776  {"name": cuda_opt_query_name, "mapdql": cuda_opt_new_query})
1777  query_list = new_query_list
1778 
1779  # Run queries
1780  for query in query_list["queries"]:
1781  # Run setup queries
1782  st_qr = run_setup_teardown_query(
1783  queries=setup_query_list, do_run=run_per_query_setup_queries, trim=trim, con=con)
1784  queries_results.extend(st_qr)
1785  # Run benchmark query
1786  query_result = run_query(
1787  query=query, iterations=iterations, trim=trim, con=con,
1788  clear_all_memory_pre_query=clear_all_memory_pre_query
1789  )
1790  queries_results.append(query_result)
1791  # Run tear-down queries
1792  st_qr = run_setup_teardown_query(
1793  queries=teardown_query_list, do_run=run_per_query_teardown_queries, trim=trim, con=con)
1794  queries_results.extend(st_qr)
1795  logging.info("Completed all queries.")
1796  # Run global tear-down queries if they exist
1797  st_qr = run_setup_teardown_query(queries=teardown_query_list,
1798  do_run=run_global_teardown_queries, trim=trim, con=con)
1799  queries_results.extend(st_qr)
1800  logging.debug("Closing source db connection.")
1801  con.close()
1802  # Generate results dataset
1803  results_dataset = create_results_dataset(
1804  run_guid=run_vars["run_guid"],
1805  run_timestamp=run_vars["run_timestamp"],
1806  run_connection=run_vars["run_connection"],
1807  run_machine_name=machine_info["run_machine_name"],
1808  run_machine_uname=machine_info["run_machine_uname"],
1809  run_driver=run_vars["run_driver"],
1810  run_version=run_vars["run_version"],
1811  run_version_short=run_vars["run_version_short"],
1812  label=label,
1813  source_db_gpu_count=gpu_info["source_db_gpu_count"],
1814  source_db_gpu_driver_ver=gpu_info["source_db_gpu_driver_ver"],
1815  source_db_gpu_name=gpu_info["source_db_gpu_name"],
1816  source_db_gpu_mem=gpu_info["source_db_gpu_mem"],
1817  source_table=source_table,
1818  trim=trim,
1819  iterations=iterations,
1820  query_group=query_list["query_group"],
1821  queries_results=queries_results,
1822  )
1823  results_dataset_json = json.dumps(
1824  results_dataset, default=json_format_handler, indent=2
1825  )
1826  successful_results_dataset = [
1827  x for x in results_dataset if x["succeeded"] is not False
1828  ]
1829  successful_results_dataset_results = []
1830  for results_dataset_entry in successful_results_dataset:
1831  successful_results_dataset_results.append(
1832  results_dataset_entry["results"]
1833  )
1834 
1835  # Send results to destination(s)
1836  sent_destination = True
1837  if "mapd_db" in destinations:
1838  if not send_results_db(
1839  results_dataset=successful_results_dataset_results,
1840  table=dest_table,
1841  db_user=dest_db_user,
1842  db_passwd=dest_db_passwd,
1843  db_server=dest_db_server,
1844  db_port=dest_db_port,
1845  db_name=dest_db_name,
1846  table_schema_file=dest_table_schema_file,
1847  ):
1848  sent_destination = False
1849  if "file_json" in destinations:
1850  if not send_results_file_json(
1851  results_dataset_json=results_dataset_json,
1852  output_file_json=output_file_json,
1853  ):
1854  sent_destination = False
1855  if "jenkins_bench" in destinations:
1857  results_dataset=successful_results_dataset_results,
1858  thresholds_name=jenkins_thresholds_name,
1859  thresholds_field=jenkins_thresholds_field,
1860  output_tag_jenkins=output_tag_jenkins,
1861  output_file_jenkins=output_file_jenkins,
1862  ):
1863  sent_destination = False
1864  if "output" in destinations:
1865  if not send_results_output(results_dataset_json=results_dataset_json):
1866  sent_destination = False
1867  if not sent_destination:
1868  logging.error("Sending results to one or more destinations failed")
1869  exit(1)
1870  else:
1871  logging.info(
1872  "Succesfully loaded query results info into destination(s)"
1873  )
1874  if show_simplified_result:
1875  res_header = ['Query', 'Block', 'Grid', 'First', 'Min', 'Max', 'Avg']
1876  for i in range(1, iterations):
1877  res_header.append("Run-" + str(i))
1878  res_data = []
1879  for perf_info in results_dataset:
1880  tok = perf_info["name"].split("_")
1881  cur_query_perf = [tok[0], tok[3], tok[6], str(perf_info["results"]["query_exec_first"]),
1882  str(perf_info["results"]["query_exec_min"]), str(perf_info["results"]["query_exec_max"]),
1883  str(perf_info["results"]["query_exec_avg"])]
1884  for query_time in perf_info["debug"]["query_exec_times"]:
1885  cur_query_perf.append(str(query_time))
1886  res_data.append(cur_query_perf)
1887  res_df = pd.DataFrame(res_data, columns=res_header)
1888  res_df['Query'] = res_df['Query'].astype(str)
1889  res_df['Block'] = res_df['Block'].astype(int)
1890  res_df['Grid'] = res_df['Grid'].astype(float)
1891  res_df['Min'] = res_df['Min'].astype(int)
1892  res_str = res_df.to_string(header=True, index=False)
1893  col_desc = "(Block: cuda block size, Grid: cuda grid size multiplier (cuda grid size = # SMs * multiplier))"
1894  print("[Benchmark result in ms.]\n" + col_desc + "\n" + res_str)
1895 
1896  df1 = res_df.groupby(["Query", "Block"]).apply(
1897  lambda v: v.sort_values(by=['Min'], ascending=[True])).reset_index(drop=True)
1898  df2 = df1.groupby(["Query", "Block"]).head(1)
1899  per_block_str = df2[['Query', 'Block', 'Grid', 'Min']].to_string(index=False)
1900  per_block_str = per_block_str.replace("-1.00", "opt")
1901  per_block_str = per_block_str.replace("-1", "opt")
1902  print("[Best performance per block size in ms.]\n" + per_block_str)
1903 
1904  df3 = res_df.groupby(["Query", "Grid"]).apply(
1905  lambda v: v.sort_values(by=['Min'], ascending=[True])).reset_index(drop=True)
1906  df4 = df3.groupby(["Query", "Grid"]).head(1)
1907  per_grid_str = df4[['Query', 'Grid', 'Block', 'Min']].to_string(index=False)
1908  per_grid_str = per_grid_str.replace("-1.00", "opt")
1909  per_grid_str = per_grid_str.replace("-1", "opt")
1910  print("[Best performance per grid size]\n" + per_grid_str)
1911 
1912  overall_best = res_df.sort_values('Min', ascending=True)[['Query', 'Block', 'Grid', 'Min']].head(1)
1913  print("[Best performance in all conditions in ms.]\n" + overall_best.to_string(index=False))
1914 
size_t append(FILE *f, const size_t size, const int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:158
std::string to_string(char const *&&v)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
def verify_destinations
def create_results_dataset
def read_setup_teardown_query_files
def send_results_file_json
def run_setup_teardown_query
def send_results_jenkins_bench

+ Here is the call graph for this function:

def run_benchmark.calculate_query_times (   kwargs)
  Calculates aggregate query times from all iteration times

  Kwargs:
    total_times(list): List of total time calculations
    execution_times(list): List of execution_time calculations
    results_iter_times(list): List of results_iter_time calculations
    connect_times(list): List of connect_time calculations
    trim(float): Amount to trim from iterations set to gather trimmed
                 values. Enter as deciman corresponding to percent to
                 trim - ex: 0.15 to trim 15%.

  Returns:
    query_execution(dict): Query times
    False(bool): The query failed. Exception should be logged.

Definition at line 525 of file run_benchmark.py.

Referenced by create_results_dataset().

526 def calculate_query_times(**kwargs):
527  """
528  Calculates aggregate query times from all iteration times
529 
530  Kwargs:
531  total_times(list): List of total time calculations
532  execution_times(list): List of execution_time calculations
533  results_iter_times(list): List of results_iter_time calculations
534  connect_times(list): List of connect_time calculations
535  trim(float): Amount to trim from iterations set to gather trimmed
536  values. Enter as deciman corresponding to percent to
537  trim - ex: 0.15 to trim 15%.
538 
539  Returns:
540  query_execution(dict): Query times
541  False(bool): The query failed. Exception should be logged.
542  """
543  trim_size = int(kwargs["trim"] * len(kwargs["total_times"]))
544  return {
545  "total_time_avg": round(numpy.mean(kwargs["total_times"]), 1),
546  "total_time_min": round(numpy.min(kwargs["total_times"]), 1),
547  "total_time_max": round(numpy.max(kwargs["total_times"]), 1),
548  "total_time_85": round(numpy.percentile(kwargs["total_times"], 85), 1),
549  "total_time_trimmed_avg": round(
550  numpy.mean(
551  numpy.sort(kwargs["total_times"])[trim_size:-trim_size]
552  ),
553  1,
554  )
555  if trim_size
556  else round(numpy.mean(kwargs["total_times"]), 1),
557  "total_times": kwargs["total_times"],
558  "execution_time_avg": round(numpy.mean(kwargs["execution_times"]), 1),
559  "execution_time_min": round(numpy.min(kwargs["execution_times"]), 1),
560  "execution_time_max": round(numpy.max(kwargs["execution_times"]), 1),
561  "execution_time_85": round(
562  numpy.percentile(kwargs["execution_times"], 85), 1
563  ),
564  "execution_time_25": round(
565  numpy.percentile(kwargs["execution_times"], 25), 1
566  ),
567  "execution_time_std": round(numpy.std(kwargs["execution_times"]), 1),
568  "execution_time_trimmed_avg": round(
569  numpy.mean(
570  numpy.sort(kwargs["execution_times"])[trim_size:-trim_size]
571  )
572  )
573  if trim_size > 0
574  else round(numpy.mean(kwargs["execution_times"]), 1),
575  "execution_time_trimmed_max": round(
576  numpy.max(
577  numpy.sort(kwargs["execution_times"])[trim_size:-trim_size]
578  )
579  )
580  if trim_size > 0
581  else round(numpy.max(kwargs["execution_times"]), 1),
582  "execution_times": kwargs["execution_times"],
583  "connect_time_avg": round(numpy.mean(kwargs["connect_times"]), 1),
584  "connect_time_min": round(numpy.min(kwargs["connect_times"]), 1),
585  "connect_time_max": round(numpy.max(kwargs["connect_times"]), 1),
586  "connect_time_85": round(
587  numpy.percentile(kwargs["connect_times"], 85), 1
588  ),
589  "results_iter_time_avg": round(
590  numpy.mean(kwargs["results_iter_times"]), 1
591  ),
592  "results_iter_time_min": round(
593  numpy.min(kwargs["results_iter_times"]), 1
594  ),
595  "results_iter_time_max": round(
596  numpy.max(kwargs["results_iter_times"]), 1
597  ),
598  "results_iter_time_85": round(
599  numpy.percentile(kwargs["results_iter_times"], 85), 1
600  ),
601  }
602 
def calculate_query_times

+ Here is the caller graph for this function:

def run_benchmark.clear_memory (   kwargs)
  Clears CPU or GPU memory

  Kwargs:
    con(class 'pymapd.connection.Connection'): Mapd connection
    mem_type(str): [gpu, cpu] Type of memory to clear

  Returns:
      None

Definition at line 603 of file run_benchmark.py.

Referenced by run_query().

604 def clear_memory(**kwargs):
605  """
606  Clears CPU or GPU memory
607 
608  Kwargs:
609  con(class 'pymapd.connection.Connection'): Mapd connection
610  mem_type(str): [gpu, cpu] Type of memory to clear
611 
612  Returns:
613  None
614  """
615  try:
616  session = kwargs["con"]._session
617  mem_type = kwargs["mem_type"]
618  if mem_type == 'cpu':
619  kwargs["con"]._client.clear_cpu_memory(session)
620  elif mem_type == 'gpu':
621  kwargs["con"]._client.clear_gpu_memory(session)
622  else:
623  raise TypeError("Unkown mem_type '" + str(mem_type) + "' supplied to 'clear_memory'")
624  except Exception as e:
625  errormessage = "Clear memory failed with error: " + str(e)
626  logging.error(errormessage)
627 

+ Here is the caller graph for this function:

def run_benchmark.clear_system_caches ( )
  Clears system caches

Definition at line 628 of file run_benchmark.py.

Referenced by run_query().

629 def clear_system_caches():
630  """
631  Clears system caches
632  """
633  try:
634  os.system('sudo sh -c "/bin/echo 3 > /proc/sys/vm/drop_caches"')
635  except Exception as e:
636  errormessage = "Clear system caches failed with error: " + str(e)
637  logging.error(errormessage)
638 

+ Here is the caller graph for this function:

def run_benchmark.create_results_dataset (   kwargs)
  Create results dataset

  Kwargs:
    run_guid(str): Run GUID
    run_timestamp(datetime): Run timestamp
    run_connection(str): Connection string
    run_machine_name(str): Run machine name
    run_machine_uname(str): Run machine uname
    run_driver(str): Run driver
    run_version(str): Version of DB
    run_version_short(str): Shortened version of DB
    label(str): Run label
    source_db_gpu_count(int): Number of GPUs on run machine
    source_db_gpu_driver_ver(str): GPU driver version
    source_db_gpu_name(str): GPU name
    source_db_gpu_mem(str): Amount of GPU mem on run machine
    source_table(str): Table to run query against
    trim(float): Trim decimal to remove from top and bottom of results
    iterations(int): Number of iterations of each query to run
    query_group(str): Query group, usually matches table name
    query_results(dict):::
        query_name(str): Name of query
        query_mapdql(str): Query to run
        query_id(str): Query ID
        query_succeeded(bool): Query succeeded
        query_error_info(str): Query error info
        result_count(int): Number of results returned
        initial_iteration_results(dict):::
            first_execution_time(float): Execution time for first query
                iteration
            first_connect_time(float):  Connect time for first query
                iteration
            first_results_iter_time(float): Results iteration time for
                first query iteration
            first_total_time(float): Total time for first iteration
            first_cpu_mem_usage(float): CPU memory usage for first query
                iteration
            first_gpu_mem_usage(float): GPU memory usage for first query
                iteration
        noninitial_iteration_results(list):::
            execution_time(float): Time (in ms) that pymapd reports
                backend spent on query.
            connect_time(float): Time (in ms) for overhead of query,
                calculated by subtracting backend execution time from
                time spent on the execution function.
            results_iter_time(float): Time (in ms) it took to for
                pymapd.fetchone() to iterate through all of the results.
            total_time(float): Time (in ms) from adding all above times.
        query_total_elapsed_time(int): Total elapsed time for query

  Returns:
    results_dataset(list):::
        result_dataset(dict): Query results dataset

Definition at line 933 of file run_benchmark.py.

References calculate_query_times().

Referenced by benchmark().

934 def create_results_dataset(**kwargs):
935  """
936  Create results dataset
937 
938  Kwargs:
939  run_guid(str): Run GUID
940  run_timestamp(datetime): Run timestamp
941  run_connection(str): Connection string
942  run_machine_name(str): Run machine name
943  run_machine_uname(str): Run machine uname
944  run_driver(str): Run driver
945  run_version(str): Version of DB
946  run_version_short(str): Shortened version of DB
947  label(str): Run label
948  source_db_gpu_count(int): Number of GPUs on run machine
949  source_db_gpu_driver_ver(str): GPU driver version
950  source_db_gpu_name(str): GPU name
951  source_db_gpu_mem(str): Amount of GPU mem on run machine
952  source_table(str): Table to run query against
953  trim(float): Trim decimal to remove from top and bottom of results
954  iterations(int): Number of iterations of each query to run
955  query_group(str): Query group, usually matches table name
956  query_results(dict):::
957  query_name(str): Name of query
958  query_mapdql(str): Query to run
959  query_id(str): Query ID
960  query_succeeded(bool): Query succeeded
961  query_error_info(str): Query error info
962  result_count(int): Number of results returned
963  initial_iteration_results(dict):::
964  first_execution_time(float): Execution time for first query
965  iteration
966  first_connect_time(float): Connect time for first query
967  iteration
968  first_results_iter_time(float): Results iteration time for
969  first query iteration
970  first_total_time(float): Total time for first iteration
971  first_cpu_mem_usage(float): CPU memory usage for first query
972  iteration
973  first_gpu_mem_usage(float): GPU memory usage for first query
974  iteration
975  noninitial_iteration_results(list):::
976  execution_time(float): Time (in ms) that pymapd reports
977  backend spent on query.
978  connect_time(float): Time (in ms) for overhead of query,
979  calculated by subtracting backend execution time from
980  time spent on the execution function.
981  results_iter_time(float): Time (in ms) it took to for
982  pymapd.fetchone() to iterate through all of the results.
983  total_time(float): Time (in ms) from adding all above times.
984  query_total_elapsed_time(int): Total elapsed time for query
985 
986  Returns:
987  results_dataset(list):::
988  result_dataset(dict): Query results dataset
989  """
990  results_dataset = []
991  for query_results in kwargs["queries_results"]:
992  if query_results["query_succeeded"]:
993  # Aggregate iteration values
994  execution_times, connect_times, results_iter_times, total_times = (
995  [],
996  [],
997  [],
998  [],
999  )
1000  detailed_timing_last_iteration = {}
1001  if len(query_results["noninitial_iteration_results"]) == 0:
1002  # A single query run (most likely a setup or teardown query)
1003  initial_result = query_results["initial_iteration_results"]
1004  execution_times.append(initial_result["first_execution_time"])
1005  connect_times.append(initial_result["first_connect_time"])
1006  results_iter_times.append(
1007  initial_result["first_results_iter_time"]
1008  )
1009  total_times.append(initial_result["first_total_time"])
1010  # Special case
1011  result_count = 1
1012  else:
1013  # More than one query run
1014  for noninitial_result in query_results[
1015  "noninitial_iteration_results"
1016  ]:
1017  execution_times.append(noninitial_result["execution_time"])
1018  connect_times.append(noninitial_result["connect_time"])
1019  results_iter_times.append(
1020  noninitial_result["results_iter_time"]
1021  )
1022  total_times.append(noninitial_result["total_time"])
1023  # Overwrite result count, same for each iteration
1024  result_count = noninitial_result["result_count"]
1025 
1026  # If available, getting the last iteration's component-wise timing information as a json structure
1027  if (
1028  query_results["noninitial_iteration_results"][-1]["debug_info"]
1029  ):
1030  detailed_timing_last_iteration = json.loads(
1031  query_results["noninitial_iteration_results"][-1][
1032  "debug_info"
1033  ]
1034  )["timer"]
1035  # Calculate query times
1036  logging.debug(
1037  "Calculating times from query " + query_results["query_id"]
1038  )
1039  query_times = calculate_query_times(
1040  total_times=total_times,
1041  execution_times=execution_times,
1042  connect_times=connect_times,
1043  results_iter_times=results_iter_times,
1044  trim=kwargs[
1045  "trim"
1046  ], # Trim top and bottom n% for trimmed calculations
1047  )
1048  result_dataset = {
1049  "name": query_results["query_name"],
1050  "mapdql": query_results["query_mapdql"],
1051  "succeeded": True,
1052  "results": {
1053  "run_guid": kwargs["run_guid"],
1054  "run_timestamp": kwargs["run_timestamp"],
1055  "run_connection": kwargs["run_connection"],
1056  "run_machine_name": kwargs["run_machine_name"],
1057  "run_machine_uname": kwargs["run_machine_uname"],
1058  "run_driver": kwargs["run_driver"],
1059  "run_version": kwargs["run_version"],
1060  "run_version_short": kwargs["run_version_short"],
1061  "run_label": kwargs["label"],
1062  "run_gpu_count": kwargs["source_db_gpu_count"],
1063  "run_gpu_driver_ver": kwargs["source_db_gpu_driver_ver"],
1064  "run_gpu_name": kwargs["source_db_gpu_name"],
1065  "run_gpu_mem_mb": kwargs["source_db_gpu_mem"],
1066  "run_table": kwargs["source_table"],
1067  "query_group": kwargs["query_group"],
1068  "query_id": query_results["query_id"],
1069  "query_result_set_count": result_count,
1070  "query_error_info": query_results["query_error_info"],
1071  "query_conn_first": query_results[
1072  "initial_iteration_results"
1073  ]["first_connect_time"],
1074  "query_conn_avg": query_times["connect_time_avg"],
1075  "query_conn_min": query_times["connect_time_min"],
1076  "query_conn_max": query_times["connect_time_max"],
1077  "query_conn_85": query_times["connect_time_85"],
1078  "query_exec_first": query_results[
1079  "initial_iteration_results"
1080  ]["first_execution_time"],
1081  "query_exec_avg": query_times["execution_time_avg"],
1082  "query_exec_min": query_times["execution_time_min"],
1083  "query_exec_max": query_times["execution_time_max"],
1084  "query_exec_85": query_times["execution_time_85"],
1085  "query_exec_25": query_times["execution_time_25"],
1086  "query_exec_stdd": query_times["execution_time_std"],
1087  "query_exec_trimmed_avg": query_times[
1088  "execution_time_trimmed_avg"
1089  ],
1090  "query_exec_trimmed_max": query_times[
1091  "execution_time_trimmed_max"
1092  ],
1093  # Render queries not supported yet
1094  "query_render_first": None,
1095  "query_render_avg": None,
1096  "query_render_min": None,
1097  "query_render_max": None,
1098  "query_render_85": None,
1099  "query_render_25": None,
1100  "query_render_stdd": None,
1101  "query_total_first": query_results[
1102  "initial_iteration_results"
1103  ]["first_total_time"],
1104  "query_total_avg": query_times["total_time_avg"],
1105  "query_total_min": query_times["total_time_min"],
1106  "query_total_max": query_times["total_time_max"],
1107  "query_total_85": query_times["total_time_85"],
1108  "query_total_all": query_results[
1109  "query_total_elapsed_time"
1110  ],
1111  "query_total_trimmed_avg": query_times[
1112  "total_time_trimmed_avg"
1113  ],
1114  "results_iter_count": kwargs["iterations"],
1115  "results_iter_first": query_results[
1116  "initial_iteration_results"
1117  ]["first_results_iter_time"],
1118  "results_iter_avg": query_times["results_iter_time_avg"],
1119  "results_iter_min": query_times["results_iter_time_min"],
1120  "results_iter_max": query_times["results_iter_time_max"],
1121  "results_iter_85": query_times["results_iter_time_85"],
1122  "cpu_mem_usage_mb": query_results[
1123  "initial_iteration_results"
1124  ]["first_cpu_mem_usage"],
1125  "gpu_mem_usage_mb": query_results[
1126  "initial_iteration_results"
1127  ]["first_gpu_mem_usage"],
1128  },
1129  "debug": {
1130  "query_exec_times": query_times["execution_times"],
1131  "query_total_times": query_times["total_times"],
1132  "detailed_timing_last_iteration": detailed_timing_last_iteration,
1133  },
1134  }
1135  elif not query_results["query_succeeded"]:
1136  result_dataset = {
1137  "name": query_results["query_name"],
1138  "mapdql": query_results["query_mapdql"],
1139  "succeeded": False,
1140  }
1141  results_dataset.append(result_dataset)
1142  logging.debug("All values set for query " + query_results["query_id"])
1143  return results_dataset
1144 
def create_results_dataset
def calculate_query_times

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.execute_query (   kwargs)
  Executes a query against the connected db using pymapd
  https://pymapd.readthedocs.io/en/latest/usage.html#querying

  Kwargs:
    query_name(str): Name of query
    query_mapdql(str): Query to run
    iteration(int): Iteration number
    con(class): Connection class

  Returns:
    query_execution(dict):::
      result_count(int): Number of results returned
      execution_time(float): Time (in ms) that pymapd reports
                             backend spent on query.
      connect_time(float): Time (in ms) for overhead of query, calculated
                           by subtracting backend execution time
                           from time spent on the execution function.
      results_iter_time(float): Time (in ms) it took to for
                                pymapd.fetchone() to iterate through all
                                of the results.
      total_time(float): Time (in ms) from adding all above times.
    False(bool): The query failed. Exception should be logged.

Definition at line 443 of file run_benchmark.py.

Referenced by run_query().

444 def execute_query(**kwargs):
445  """
446  Executes a query against the connected db using pymapd
447  https://pymapd.readthedocs.io/en/latest/usage.html#querying
448 
449  Kwargs:
450  query_name(str): Name of query
451  query_mapdql(str): Query to run
452  iteration(int): Iteration number
453  con(class): Connection class
454 
455  Returns:
456  query_execution(dict):::
457  result_count(int): Number of results returned
458  execution_time(float): Time (in ms) that pymapd reports
459  backend spent on query.
460  connect_time(float): Time (in ms) for overhead of query, calculated
461  by subtracting backend execution time
462  from time spent on the execution function.
463  results_iter_time(float): Time (in ms) it took to for
464  pymapd.fetchone() to iterate through all
465  of the results.
466  total_time(float): Time (in ms) from adding all above times.
467  False(bool): The query failed. Exception should be logged.
468  """
469  start_time = timeit.default_timer()
470  try:
471  # Run the query
472  query_result = kwargs["con"].execute(kwargs["query_mapdql"])
473  logging.debug(
474  "Completed iteration "
475  + str(kwargs["iteration"])
476  + " of query "
477  + kwargs["query_name"]
478  )
479  except (pymapd.exceptions.ProgrammingError, pymapd.exceptions.Error):
480  logging.exception(
481  "Error running query "
482  + kwargs["query_name"]
483  + " during iteration "
484  + str(kwargs["iteration"])
485  )
486  return False
487 
488  # Calculate times
489  query_elapsed_time = (timeit.default_timer() - start_time) * 1000
490  execution_time = query_result._result.execution_time_ms
491  debug_info = query_result._result.debug
492  connect_time = round((query_elapsed_time - execution_time), 1)
493  # Iterate through each result from the query
494  logging.debug(
495  "Counting results from query"
496  + kwargs["query_name"]
497  + " iteration "
498  + str(kwargs["iteration"])
499  )
500  result_count = 0
501  start_time = timeit.default_timer()
502  while query_result.fetchone():
503  result_count += 1
504  results_iter_time = round(
505  ((timeit.default_timer() - start_time) * 1000), 1
506  )
507  query_execution = {
508  "result_count": result_count,
509  "execution_time": execution_time,
510  "connect_time": connect_time,
511  "results_iter_time": results_iter_time,
512  "total_time": execution_time + connect_time + results_iter_time,
513  "debug_info": debug_info,
514  }
515  logging.debug(
516  "Execution results for query"
517  + kwargs["query_name"]
518  + " iteration "
519  + str(kwargs["iteration"])
520  + ": "
521  + str(query_execution)
522  )
523  return query_execution
524 

+ Here is the caller graph for this function:

def run_benchmark.get_connection (   kwargs)
  Connects to the db using pymapd
  https://pymapd.readthedocs.io/en/latest/usage.html#connecting

  Kwargs:
    db_user(str): DB username
    db_passwd(str): DB password
    db_server(str): DB host
    db_port(int): DB port
    db_name(str): DB name

  Returns:
    con(class): Connection class
    False(bool): The connection failed. Exception should be logged.

Definition at line 63 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), benchmark(), and send_results_db().

63 
64 def get_connection(**kwargs):
65  """
66  Connects to the db using pymapd
67  https://pymapd.readthedocs.io/en/latest/usage.html#connecting
68 
69  Kwargs:
70  db_user(str): DB username
71  db_passwd(str): DB password
72  db_server(str): DB host
73  db_port(int): DB port
74  db_name(str): DB name
75 
76  Returns:
77  con(class): Connection class
78  False(bool): The connection failed. Exception should be logged.
79  """
80  try:
81  logging.debug("Connecting to mapd db...")
82  con = pymapd.connect(
83  user=kwargs["db_user"],
84  password=kwargs["db_passwd"],
85  host=kwargs["db_server"],
86  port=kwargs["db_port"],
87  dbname=kwargs["db_name"],
88  )
89  logging.info("Succesfully connected to mapd db")
90  return con
91  except (pymapd.exceptions.OperationalError, pymapd.exceptions.Error):
92  logging.exception("Error connecting to database.")
93  return False
94 

+ Here is the caller graph for this function:

def run_benchmark.get_gpu_info (   kwargs)
  Gets run machine GPU info

  Kwargs:
    gpu_name(str): GPU name from input param
    no_gather_conn_gpu_info(bool): Gather GPU info fields
    con(class 'pymapd.connection.Connection'): Mapd connection
    conn_machine_name(str): Name of run machine
    no_gather_nvml_gpu_info(bool): Do not gather GPU info using nvml
    gather_nvml_gpu_info(bool): Gather GPU info using nvml
    gpu_count(int): Number of GPUs on run machine

  Returns:
    gpu_info(dict):::
        conn_gpu_count(int): Number of GPUs gathered from pymapd con
        source_db_gpu_count(int): Number of GPUs on run machine
        source_db_gpu_mem(str): Amount of GPU mem on run machine
        source_db_gpu_driver_ver(str): GPU driver version
        source_db_gpu_name(str): GPU name

Definition at line 136 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

137 def get_gpu_info(**kwargs):
138  """
139  Gets run machine GPU info
140 
141  Kwargs:
142  gpu_name(str): GPU name from input param
143  no_gather_conn_gpu_info(bool): Gather GPU info fields
144  con(class 'pymapd.connection.Connection'): Mapd connection
145  conn_machine_name(str): Name of run machine
146  no_gather_nvml_gpu_info(bool): Do not gather GPU info using nvml
147  gather_nvml_gpu_info(bool): Gather GPU info using nvml
148  gpu_count(int): Number of GPUs on run machine
149 
150  Returns:
151  gpu_info(dict):::
152  conn_gpu_count(int): Number of GPUs gathered from pymapd con
153  source_db_gpu_count(int): Number of GPUs on run machine
154  source_db_gpu_mem(str): Amount of GPU mem on run machine
155  source_db_gpu_driver_ver(str): GPU driver version
156  source_db_gpu_name(str): GPU name
157  """
158  # Set GPU info fields
159  conn_gpu_count = None
160  source_db_gpu_count = None
161  source_db_gpu_mem = None
162  source_db_gpu_driver_ver = ""
163  source_db_gpu_name = ""
164  if kwargs["no_gather_conn_gpu_info"]:
165  logging.debug(
166  "--no-gather-conn-gpu-info passed, "
167  + "using blank values for source database GPU info fields "
168  + "[run_gpu_count, run_gpu_mem_mb] "
169  )
170  else:
171  logging.debug(
172  "Gathering source database GPU info fields "
173  + "[run_gpu_count, run_gpu_mem_mb] "
174  + "using pymapd connection info. "
175  )
176  conn_hardware_info = kwargs["con"]._client.get_hardware_info(
177  kwargs["con"]._session
178  )
179  conn_gpu_count = conn_hardware_info.hardware_info[0].num_gpu_allocated
180  if conn_gpu_count == 0 or conn_gpu_count is None:
181  no_gather_nvml_gpu_info = True
182  if conn_gpu_count == 0:
183  logging.warning(
184  "0 GPUs detected from connection info, "
185  + "using blank values for source database GPU info fields "
186  + "If running against cpu-only server, make sure to set "
187  + "--no-gather-nvml-gpu-info and --no-gather-conn-gpu-info."
188  )
189  else:
190  no_gather_nvml_gpu_info = kwargs["no_gather_nvml_gpu_info"]
191  source_db_gpu_count = conn_gpu_count
192  try:
193  source_db_gpu_mem = int(
194  conn_hardware_info.hardware_info[0].gpu_info[0].memory
195  / 1000000
196  )
197  except IndexError:
198  logging.error("GPU memory info not available from connection.")
199  if no_gather_nvml_gpu_info:
200  logging.debug(
201  "--no-gather-nvml-gpu-info passed, "
202  + "using blank values for source database GPU info fields "
203  + "[gpu_driver_ver, run_gpu_name] "
204  )
205  elif (
206  kwargs["conn_machine_name"] == "localhost"
207  or kwargs["gather_nvml_gpu_info"]
208  ):
209  logging.debug(
210  "Gathering source database GPU info fields "
211  + "[gpu_driver_ver, run_gpu_name] "
212  + "from local GPU using pynvml. "
213  )
214  import pynvml
215 
216  pynvml.nvmlInit()
217  source_db_gpu_driver_ver = pynvml.nvmlSystemGetDriverVersion().decode()
218  for i in range(source_db_gpu_count):
219  handle = pynvml.nvmlDeviceGetHandleByIndex(i)
220  # Assume all cards are the same, overwrite name value
221  source_db_gpu_name = pynvml.nvmlDeviceGetName(handle).decode()
222  pynvml.nvmlShutdown()
223  # If gpu_count argument passed in, override gathered value
224  if kwargs["gpu_count"]:
225  source_db_gpu_count = kwargs["gpu_count"]
226  if kwargs["gpu_name"]:
227  source_db_gpu_name = kwargs["gpu_name"]
228  gpu_info = {
229  "conn_gpu_count": conn_gpu_count,
230  "source_db_gpu_count": source_db_gpu_count,
231  "source_db_gpu_mem": source_db_gpu_mem,
232  "source_db_gpu_driver_ver": source_db_gpu_driver_ver,
233  "source_db_gpu_name": source_db_gpu_name,
234  }
235  return gpu_info
236 

+ Here is the caller graph for this function:

def run_benchmark.get_machine_info (   kwargs)
  Gets run machine GPU info

  Kwargs:
    conn_machine_name(str): Name of machine from pymapd con
    machine_name(str): Name of machine if passed in
    machine_uname(str): Uname of machine if passed in

  Returns:
    machine_info(dict):::
        run_machine_name(str): Run machine name
        run_machine_uname(str): Run machine uname

Definition at line 237 of file run_benchmark.py.

References join().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

238 def get_machine_info(**kwargs):
239  """
240  Gets run machine GPU info
241 
242  Kwargs:
243  conn_machine_name(str): Name of machine from pymapd con
244  machine_name(str): Name of machine if passed in
245  machine_uname(str): Uname of machine if passed in
246 
247  Returns:
248  machine_info(dict):::
249  run_machine_name(str): Run machine name
250  run_machine_uname(str): Run machine uname
251  """
252  # Set machine names, using local info if connected to localhost
253  if kwargs["conn_machine_name"] == "localhost":
254  local_uname = os.uname()
255  # If --machine-name passed in, override pymapd con value
256  if kwargs["machine_name"]:
257  run_machine_name = kwargs["machine_name"]
258  else:
259  if kwargs["conn_machine_name"] == "localhost":
260  run_machine_name = local_uname.nodename.split(".")[0]
261  else:
262  run_machine_name = kwargs["conn_machine_name"]
263  # If --machine-uname passed in, override pymapd con value
264  if kwargs["machine_uname"]:
265  run_machine_uname = kwargs["machine_uname"]
266  else:
267  if kwargs["conn_machine_name"] == "localhost":
268  run_machine_uname = " ".join(local_uname)
269  else:
270  run_machine_uname = ""
271  machine_info = {
272  "run_machine_name": run_machine_name,
273  "run_machine_uname": run_machine_uname,
274  }
275  return machine_info
276 
std::string join(T const &container, std::string const &delim)

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.get_mem_usage (   kwargs)
  Calculates memory statistics from mapd_server _client.get_memory call

  Kwargs:
    con(class 'pymapd.connection.Connection'): Mapd connection
    mem_type(str): [gpu, cpu] Type of memory to gather metrics for

  Returns:
    ramusage(dict):::
      usedram(float): Amount of memory (in MB) used
      freeram(float): Amount of memory (in MB) free
      totalallocated(float): Total amount of memory (in MB) allocated
      errormessage(str): Error if returned by get_memory call
      rawdata(list): Raw data returned from get_memory call

Definition at line 639 of file run_benchmark.py.

Referenced by run_benchmark_arrow.run_query(), and run_query().

640 def get_mem_usage(**kwargs):
641  """
642  Calculates memory statistics from mapd_server _client.get_memory call
643 
644  Kwargs:
645  con(class 'pymapd.connection.Connection'): Mapd connection
646  mem_type(str): [gpu, cpu] Type of memory to gather metrics for
647 
648  Returns:
649  ramusage(dict):::
650  usedram(float): Amount of memory (in MB) used
651  freeram(float): Amount of memory (in MB) free
652  totalallocated(float): Total amount of memory (in MB) allocated
653  errormessage(str): Error if returned by get_memory call
654  rawdata(list): Raw data returned from get_memory call
655  """
656  try:
657  con_mem_data_list = kwargs["con"]._client.get_memory(
658  session=kwargs["con"]._session, memory_level=kwargs["mem_type"]
659  )
660  usedram = 0
661  freeram = 0
662  for con_mem_data in con_mem_data_list:
663  page_size = con_mem_data.page_size
664  node_memory_data_list = con_mem_data.node_memory_data
665  for node_memory_data in node_memory_data_list:
666  ram = node_memory_data.num_pages * page_size
667  is_free = node_memory_data.is_free
668  if is_free:
669  freeram += ram
670  else:
671  usedram += ram
672  totalallocated = usedram + freeram
673  if totalallocated > 0:
674  totalallocated = round(totalallocated / 1024 / 1024, 1)
675  usedram = round(usedram / 1024 / 1024, 1)
676  freeram = round(freeram / 1024 / 1024, 1)
677  ramusage = {}
678  ramusage["usedram"] = usedram
679  ramusage["freeram"] = freeram
680  ramusage["totalallocated"] = totalallocated
681  ramusage["errormessage"] = ""
682  except Exception as e:
683  errormessage = "Get memory failed with error: " + str(e)
684  logging.error(errormessage)
685  ramusage["errormessage"] = errormessage
686  return ramusage
687 

+ Here is the caller graph for this function:

def run_benchmark.get_run_vars (   kwargs)
  Gets/sets run-specific vars such as time, uid, etc.

  Kwargs:
    con(class 'pymapd.connection.Connection'): Mapd connection

  Returns:
    run_vars(dict):::
        run_guid(str): Run GUID
        run_timestamp(datetime): Run timestamp
        run_connection(str): Connection string
        run_driver(str): Run driver
        run_version(str): Version of DB
        run_version_short(str): Shortened version of DB
        conn_machine_name(str): Name of run machine

Definition at line 95 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

95 
96 def get_run_vars(**kwargs):
97  """
98  Gets/sets run-specific vars such as time, uid, etc.
99 
100  Kwargs:
101  con(class 'pymapd.connection.Connection'): Mapd connection
102 
103  Returns:
104  run_vars(dict):::
105  run_guid(str): Run GUID
106  run_timestamp(datetime): Run timestamp
107  run_connection(str): Connection string
108  run_driver(str): Run driver
109  run_version(str): Version of DB
110  run_version_short(str): Shortened version of DB
111  conn_machine_name(str): Name of run machine
112  """
113  run_guid = str(uuid.uuid4())
114  logging.debug("Run guid: " + run_guid)
115  run_timestamp = datetime.datetime.now()
116  run_connection = str(kwargs["con"])
117  logging.debug("Connection string: " + run_connection)
118  run_driver = "" # TODO
119  run_version = kwargs["con"]._client.get_version()
120  if "-" in run_version:
121  run_version_short = run_version.split("-")[0]
122  else:
123  run_version_short = run_version
124  conn_machine_name = re.search(r"@(.*?):", run_connection).group(1)
125  run_vars = {
126  "run_guid": run_guid,
127  "run_timestamp": run_timestamp,
128  "run_connection": run_connection,
129  "run_driver": run_driver,
130  "run_version": run_version,
131  "run_version_short": run_version_short,
132  "conn_machine_name": conn_machine_name,
133  }
134  return run_vars
135 

+ Here is the caller graph for this function:

def run_benchmark.json_format_handler (   x)

Definition at line 924 of file run_benchmark.py.

925 def json_format_handler(x):
926  # Function to allow json to deal with datetime and numpy int
927  if isinstance(x, datetime.datetime):
928  return x.isoformat()
929  if isinstance(x, numpy.int64):
930  return int(x)
931  raise TypeError("Unknown type")
932 
def run_benchmark.process_arguments (   input_arguments)

Definition at line 1327 of file run_benchmark.py.

Referenced by benchmark().

1328 def process_arguments(input_arguments):
1329  # Parse input parameters
1330  parser = ArgumentParser()
1331  optional = parser._action_groups.pop()
1332  required = parser.add_argument_group("required arguments")
1333  parser._action_groups.append(optional)
1334  optional.add_argument(
1335  "-v", "--verbose", action="store_true", help="Turn on debug logging"
1336  )
1337  optional.add_argument(
1338  "-q",
1339  "--quiet",
1340  action="store_true",
1341  help="Suppress script outuput " + "(except warnings and errors)",
1342  )
1343  required.add_argument(
1344  "-u",
1345  "--user",
1346  dest="user",
1347  default="mapd",
1348  help="Source database user",
1349  )
1350  required.add_argument(
1351  "-p",
1352  "--passwd",
1353  dest="passwd",
1354  default="HyperInteractive",
1355  help="Source database password",
1356  )
1357  required.add_argument(
1358  "-s",
1359  "--server",
1360  dest="server",
1361  default="localhost",
1362  help="Source database server hostname",
1363  )
1364  optional.add_argument(
1365  "-o",
1366  "--port",
1367  dest="port",
1368  type=int,
1369  default=6274,
1370  help="Source database server port",
1371  )
1372  required.add_argument(
1373  "-n",
1374  "--name",
1375  dest="name",
1376  default="mapd",
1377  help="Source database name",
1378  )
1379  required.add_argument(
1380  "-t",
1381  "--table",
1382  dest="table",
1383  required=True,
1384  help="Source db table name",
1385  )
1386  required.add_argument(
1387  "-l",
1388  "--label",
1389  dest="label",
1390  required=True,
1391  help="Benchmark run label",
1392  )
1393  required.add_argument(
1394  "-d",
1395  "--queries-dir",
1396  dest="queries_dir",
1397  help='Absolute path to dir with query files. \
1398  [Default: "queries" dir in same location as script]',
1399  )
1400  required.add_argument(
1401  "-i",
1402  "--iterations",
1403  dest="iterations",
1404  type=int,
1405  required=True,
1406  help="Number of iterations per query. Must be > 1",
1407  )
1408  optional.add_argument(
1409  "-g",
1410  "--gpu-count",
1411  dest="gpu_count",
1412  type=int,
1413  default=None,
1414  help="Number of GPUs. Not required when gathering local gpu info",
1415  )
1416  optional.add_argument(
1417  "-G",
1418  "--gpu-name",
1419  dest="gpu_name",
1420  type=str,
1421  default="",
1422  help="Name of GPU(s). Not required when gathering local gpu info",
1423  )
1424  optional.add_argument(
1425  "--no-gather-conn-gpu-info",
1426  dest="no_gather_conn_gpu_info",
1427  action="store_true",
1428  help="Do not gather source database GPU info fields "
1429  + "[run_gpu_count, run_gpu_mem_mb] "
1430  + "using pymapd connection info. "
1431  + "Use when testing a CPU-only server.",
1432  )
1433  optional.add_argument(
1434  "--no-gather-nvml-gpu-info",
1435  dest="no_gather_nvml_gpu_info",
1436  action="store_true",
1437  help="Do not gather source database GPU info fields "
1438  + "[gpu_driver_ver, run_gpu_name] "
1439  + "from local GPU using pynvml. "
1440  + 'Defaults to True when source server is not "localhost". '
1441  + "Use when testing a CPU-only server.",
1442  )
1443  optional.add_argument(
1444  "--gather-nvml-gpu-info",
1445  dest="gather_nvml_gpu_info",
1446  action="store_true",
1447  help="Gather source database GPU info fields "
1448  + "[gpu_driver_ver, run_gpu_name] "
1449  + "from local GPU using pynvml. "
1450  + 'Defaults to True when source server is "localhost". '
1451  + "Only use when benchmarking against same machine that this script "
1452  + "is run from.",
1453  )
1454  optional.add_argument(
1455  "-m",
1456  "--machine-name",
1457  dest="machine_name",
1458  help="Name of source machine",
1459  )
1460  optional.add_argument(
1461  "-a",
1462  "--machine-uname",
1463  dest="machine_uname",
1464  help="Uname info from " + "source machine",
1465  )
1466  optional.add_argument(
1467  "-e",
1468  "--destination",
1469  dest="destination",
1470  default="mapd_db",
1471  help="Destination type: [mapd_db, file_json, output, jenkins_bench] "
1472  + "Multiple values can be input seperated by commas, "
1473  + 'ex: "mapd_db,file_json"',
1474  )
1475  optional.add_argument(
1476  "-U",
1477  "--dest-user",
1478  dest="dest_user",
1479  default="mapd",
1480  help="Destination mapd_db database user",
1481  )
1482  optional.add_argument(
1483  "-P",
1484  "--dest-passwd",
1485  dest="dest_passwd",
1486  default="HyperInteractive",
1487  help="Destination mapd_db database password",
1488  )
1489  optional.add_argument(
1490  "-S",
1491  "--dest-server",
1492  dest="dest_server",
1493  help="Destination mapd_db database server hostname"
1494  + ' (required if destination = "mapd_db")',
1495  )
1496  optional.add_argument(
1497  "-O",
1498  "--dest-port",
1499  dest="dest_port",
1500  type=int,
1501  default=6274,
1502  help="Destination mapd_db database server port",
1503  )
1504  optional.add_argument(
1505  "-N",
1506  "--dest-name",
1507  dest="dest_name",
1508  default="mapd",
1509  help="Destination mapd_db database name",
1510  )
1511  optional.add_argument(
1512  "-T",
1513  "--dest-table",
1514  dest="dest_table",
1515  default="results",
1516  help="Destination mapd_db table name",
1517  )
1518  optional.add_argument(
1519  "-C",
1520  "--dest-table-schema-file",
1521  dest="dest_table_schema_file",
1522  default="results_table_schemas/query-results.sql",
1523  help="Destination table schema file. This must be an executable "
1524  + "CREATE TABLE statement that matches the output of this script. It "
1525  + "is required when creating the results table. Default location is "
1526  + 'in "./results_table_schemas/query-results.sql"',
1527  )
1528  optional.add_argument(
1529  "-j",
1530  "--output-file-json",
1531  dest="output_file_json",
1532  help="Absolute path of .json output file "
1533  + '(required if destination = "file_json")',
1534  )
1535  optional.add_argument(
1536  "-J",
1537  "--output-file-jenkins",
1538  dest="output_file_jenkins",
1539  help="Absolute path of jenkins benchmark .json output file "
1540  + '(required if destination = "jenkins_bench")',
1541  )
1542  optional.add_argument(
1543  "-E",
1544  "--output-tag-jenkins",
1545  dest="output_tag_jenkins",
1546  default="",
1547  help="Jenkins benchmark result tag. "
1548  + 'Optional, appended to table name in "group" field',
1549  )
1550  optional.add_argument(
1551  "--setup-teardown-queries-dir",
1552  dest="setup_teardown_queries_dir",
1553  type=str,
1554  default=None,
1555  help='Absolute path to dir with setup & teardown query files. '
1556  'Query files with "setup" in the filename will be executed in '
1557  'the setup stage, likewise query files with "teardown" in '
1558  'the filenname will be executed in the tear-down stage. Queries '
1559  'execute in lexical order. [Default: None, meaning this option is '
1560  'not used.]',
1561  )
1562  optional.add_argument(
1563  "--clear-all-memory-pre-query",
1564  dest="clear_all_memory_pre_query",
1565  action="store_true",
1566  help='Clear gpu & cpu memory before every query.'
1567  ' [Default: False]'
1568  )
1569  optional.add_argument(
1570  "--run-setup-teardown-per-query",
1571  dest="run_setup_teardown_per_query",
1572  action="store_true",
1573  help='Run setup & teardown steps per query. '
1574  'If set, setup-teardown-queries-dir must be specified. '
1575  'If not set, but setup-teardown-queries-dir is specified '
1576  'setup & tear-down queries will run globally, that is, '
1577  'once per script invocation.'
1578  ' [Default: False]'
1579  )
1580  optional.add_argument(
1581  "-F",
1582  "--foreign-table-filename",
1583  dest="foreign_table_filename",
1584  default=None,
1585  help="Path to file containing template for import query. "
1586  "Path must be relative to the FOREIGN SERVER. "
1587  "Occurances of \"##FILE##\" within setup/teardown queries will be"
1588  " replaced with this. "
1589  )
1590  optional.add_argument(
1591  "--jenkins-thresholds-name",
1592  dest="jenkins_thresholds_name",
1593  default="average",
1594  help="Name of Jenkins output field.",
1595  )
1596  optional.add_argument(
1597  "--jenkins-thresholds-field",
1598  dest="jenkins_thresholds_field",
1599  default="query_exec_trimmed_avg",
1600  help="Field to report as jenkins output value.",
1601  )
1602  optional.add_argument(
1603  "--cuda-block-grid-perf-test",
1604  dest="cuda_block_grid_perf_test",
1605  action="store_true",
1606  help="Performance test while varying cuda block and grid sizes.",
1607  )
1608  optional.add_argument(
1609  "--show-simplified-result",
1610  dest="show_simplified_result",
1611  action="store_true",
1612  help="Show simplified benchmark result per query via stdout.",
1613  )
1614  optional.add_argument(
1615  "--cuda-grid-sizes",
1616  dest="cuda_grid_sizes",
1617  nargs="+", type=float,
1618  help="List of grid size multipliers used to benchmark (valid iff --cuda-block-grid-perf-test is enabled).",
1619  )
1620  optional.add_argument(
1621  "--cuda-block-sizes",
1622  dest="cuda_block_sizes",
1623  nargs="+", type=int,
1624  help="List of block used to benchmark (valid iff --cuda-block-grid-perf-test is enabled).",
1625  )
1626  args = parser.parse_args(args=input_arguments)
1627  return args
1628 

+ Here is the caller graph for this function:

def run_benchmark.read_query_files (   kwargs)
  Gets run machine GPU info

  Kwargs:
    queries_dir(str): Directory with query files
    source_table(str): Table to run query against

  Returns:
    query_list(dict):::
        query_group(str): Query group, usually matches table name
        queries(list)
            query(dict):::
                name(str): Name of query
                mapdql(str): Query syntax to run
    False(bool): Unable to find queries dir

Definition at line 277 of file run_benchmark.py.

References File_Namespace.append(), heavyai.open(), split(), and validate_query_file().

Referenced by run_benchmark_arrow.benchmark(), benchmark(), and read_setup_teardown_query_files().

278 def read_query_files(**kwargs):
279  """
280  Gets run machine GPU info
281 
282  Kwargs:
283  queries_dir(str): Directory with query files
284  source_table(str): Table to run query against
285 
286  Returns:
287  query_list(dict):::
288  query_group(str): Query group, usually matches table name
289  queries(list)
290  query(dict):::
291  name(str): Name of query
292  mapdql(str): Query syntax to run
293  False(bool): Unable to find queries dir
294  """
295  # Read query files contents and write to query_list
296  query_list = {"query_group": "", "queries": []}
297  query_group = kwargs["queries_dir"].split("/")[-1]
298  query_list.update(query_group=query_group)
299  logging.debug("Queries dir: " + kwargs["queries_dir"])
300  try:
301  for query_filename in sorted(os.listdir(kwargs["queries_dir"])):
302  logging.debug("Validating query filename: " + query_filename)
303  if validate_query_file(query_filename=query_filename):
304  with open(
305  kwargs["queries_dir"] + "/" + query_filename, "r"
306  ) as query_filepath:
307  logging.debug(
308  "Reading query with filename: " + query_filename
309  )
310  query_mapdql = query_filepath.read().replace("\n", " ")
311  query_mapdql = query_mapdql.replace(
312  "##TAB##", kwargs["source_table"]
313  )
314  query_list["queries"].append(
315  {"name": query_filename, "mapdql": query_mapdql}
316  )
317  logging.info("Read all query files")
318  return query_list
319  except FileNotFoundError:
320  logging.exception("Could not find queries directory.")
321  return False
322 
size_t append(FILE *f, const size_t size, const int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:158
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
int open(const char *path, int flags, int mode)
Definition: heavyai_fs.cpp:66

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.read_setup_teardown_query_files (   kwargs)
  Get queries to run for setup and teardown from directory

  Kwargs:
    queries_dir(str): Directory with query files
    source_table(str): Table to run query against
    foreign_table_filename(str): File to create foreign table from

  Returns:
    setup_queries(query_list): List of setup queries
    teardown_queries(query_list): List of teardown queries
    False(bool): Unable to find queries dir

query_list is described by:
query_list(dict):::
    query_group(str): Query group, usually matches table name
    queries(list)
        query(dict):::
            name(str): Name of query
            mapdql(str): Query syntax to run

Definition at line 323 of file run_benchmark.py.

References read_query_files(), and validate_setup_teardown_query_file().

Referenced by benchmark().

324 def read_setup_teardown_query_files(**kwargs):
325  """
326  Get queries to run for setup and teardown from directory
327 
328  Kwargs:
329  queries_dir(str): Directory with query files
330  source_table(str): Table to run query against
331  foreign_table_filename(str): File to create foreign table from
332 
333  Returns:
334  setup_queries(query_list): List of setup queries
335  teardown_queries(query_list): List of teardown queries
336  False(bool): Unable to find queries dir
337 
338  query_list is described by:
339  query_list(dict):::
340  query_group(str): Query group, usually matches table name
341  queries(list)
342  query(dict):::
343  name(str): Name of query
344  mapdql(str): Query syntax to run
345  """
346  setup_teardown_queries_dir = kwargs['queries_dir']
347  source_table = kwargs['source_table']
348  # Read setup/tear-down queries if they exist
349  setup_teardown_query_list = None
350  if setup_teardown_queries_dir is not None:
351  setup_teardown_query_list = read_query_files(
352  queries_dir=setup_teardown_queries_dir,
353  source_table=source_table
354  )
355  if kwargs["foreign_table_filename"] is not None:
356  for query in setup_teardown_query_list['queries']:
357  query['mapdql'] = query['mapdql'].replace(
358  "##FILE##", kwargs["foreign_table_filename"])
359  # Filter setup queries
360  setup_query_list = None
361  if setup_teardown_query_list is not None:
362  setup_query_list = filter(
364  query_filename=x['name'], check_which='setup', quiet=True),
365  setup_teardown_query_list['queries'])
366  setup_query_list = list(setup_query_list)
367  # Filter teardown queries
368  teardown_query_list = None
369  if setup_teardown_query_list is not None:
370  teardown_query_list = filter(
372  query_filename=x['name'], check_which='teardown', quiet=True),
373  setup_teardown_query_list['queries'])
374  teardown_query_list = list(teardown_query_list)
375  return setup_query_list, teardown_query_list
376 
def validate_setup_teardown_query_file
def read_setup_teardown_query_files

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.run_query (   kwargs)
  Takes query name, syntax, and iteration count and calls the
    execute_query function for each iteration. Reports total, iteration,
    and exec timings, memory usage, and failure status.

  Kwargs:
    query(dict):::
        name(str): Name of query
        mapdql(str): Query syntax to run
    iterations(int): Number of iterations of each query to run
    trim(float): Trim decimal to remove from top and bottom of results
    con(class 'pymapd.connection.Connection'): Mapd connection
    clear_all_memory_pre_query(bool,optional): Flag to determine if memory is cleared
    between query runs

  Returns:
    query_results(dict):::
        query_name(str): Name of query
        query_mapdql(str): Query to run
        query_id(str): Query ID
        query_succeeded(bool): Query succeeded
        query_error_info(str): Query error info
        result_count(int): Number of results returned
        initial_iteration_results(dict):::
            first_execution_time(float): Execution time for first query
                iteration
            first_connect_time(float):  Connect time for first query
                iteration
            first_results_iter_time(float): Results iteration time for
                first query iteration
            first_total_time(float): Total time for first iteration
            first_cpu_mem_usage(float): CPU memory usage for first query
                iteration
            first_gpu_mem_usage(float): GPU memory usage for first query
                iteration
        noninitial_iteration_results(list):::
            execution_time(float): Time (in ms) that pymapd reports
                backend spent on query.
            connect_time(float): Time (in ms) for overhead of query,
                calculated by subtracting backend execution time from
                time spent on the execution function.
            results_iter_time(float): Time (in ms) it took to for
                pymapd.fetchone() to iterate through all of the results.
            total_time(float): Time (in ms) from adding all above times.
        query_total_elapsed_time(int): Total elapsed time for query
    False(bool): The query failed. Exception should be logged.

Definition at line 688 of file run_benchmark.py.

References File_Namespace.append(), clear_memory(), clear_system_caches(), execute_query(), and get_mem_usage().

Referenced by benchmark(), RelAlgExecutor.executeRelAlgQuery(), and run_setup_teardown_query().

689 def run_query(**kwargs):
690  """
691  Takes query name, syntax, and iteration count and calls the
692  execute_query function for each iteration. Reports total, iteration,
693  and exec timings, memory usage, and failure status.
694 
695  Kwargs:
696  query(dict):::
697  name(str): Name of query
698  mapdql(str): Query syntax to run
699  iterations(int): Number of iterations of each query to run
700  trim(float): Trim decimal to remove from top and bottom of results
701  con(class 'pymapd.connection.Connection'): Mapd connection
702  clear_all_memory_pre_query(bool,optional): Flag to determine if memory is cleared
703  between query runs
704 
705  Returns:
706  query_results(dict):::
707  query_name(str): Name of query
708  query_mapdql(str): Query to run
709  query_id(str): Query ID
710  query_succeeded(bool): Query succeeded
711  query_error_info(str): Query error info
712  result_count(int): Number of results returned
713  initial_iteration_results(dict):::
714  first_execution_time(float): Execution time for first query
715  iteration
716  first_connect_time(float): Connect time for first query
717  iteration
718  first_results_iter_time(float): Results iteration time for
719  first query iteration
720  first_total_time(float): Total time for first iteration
721  first_cpu_mem_usage(float): CPU memory usage for first query
722  iteration
723  first_gpu_mem_usage(float): GPU memory usage for first query
724  iteration
725  noninitial_iteration_results(list):::
726  execution_time(float): Time (in ms) that pymapd reports
727  backend spent on query.
728  connect_time(float): Time (in ms) for overhead of query,
729  calculated by subtracting backend execution time from
730  time spent on the execution function.
731  results_iter_time(float): Time (in ms) it took to for
732  pymapd.fetchone() to iterate through all of the results.
733  total_time(float): Time (in ms) from adding all above times.
734  query_total_elapsed_time(int): Total elapsed time for query
735  False(bool): The query failed. Exception should be logged.
736  """
737  logging.info(
738  "Running query: "
739  + kwargs["query"]["name"]
740  + " iterations: "
741  + str(kwargs["iterations"])
742  )
743  query_id = kwargs["query"]["name"].rsplit(".")[
744  0
745  ] # Query ID = filename without extention
746  query_results = {
747  "query_name": kwargs["query"]["name"],
748  "query_mapdql": kwargs["query"]["mapdql"],
749  "query_id": query_id,
750  "query_succeeded": True,
751  "query_error_info": "",
752  "initial_iteration_results": {},
753  "noninitial_iteration_results": [],
754  "query_total_elapsed_time": 0,
755  }
756  query_total_start_time = timeit.default_timer()
757  # Run iterations of query
758  for iteration in range(kwargs["iterations"]):
759  # Gather memory before running query iteration
760  logging.debug("Getting pre-query memory usage on CPU")
761  pre_query_cpu_mem_usage = get_mem_usage(
762  con=kwargs["con"], mem_type="cpu"
763  )
764  logging.debug("Getting pre-query memory usage on GPU")
765  pre_query_gpu_mem_usage = get_mem_usage(
766  con=kwargs["con"], mem_type="gpu"
767  )
768  if "clear_all_memory_pre_query" in kwargs and kwargs["clear_all_memory_pre_query"]:
769  # Clear GPU & CPU memory
770  clear_memory(
771  con=kwargs["con"], mem_type="cpu"
772  )
773  clear_memory(
774  con=kwargs["con"], mem_type="gpu"
775  )
777  # Run query iteration
778  logging.debug(
779  "Running iteration "
780  + str(iteration)
781  + " of query "
782  + kwargs["query"]["name"]
783  )
784  query_result = execute_query(
785  query_name=kwargs["query"]["name"],
786  query_mapdql=kwargs["query"]["mapdql"],
787  iteration=iteration,
788  con=kwargs["con"],
789  )
790  # Gather memory after running query iteration
791  logging.debug("Getting post-query memory usage on CPU")
792  post_query_cpu_mem_usage = get_mem_usage(
793  con=kwargs["con"], mem_type="cpu"
794  )
795  logging.debug("Getting post-query memory usage on GPU")
796  post_query_gpu_mem_usage = get_mem_usage(
797  con=kwargs["con"], mem_type="gpu"
798  )
799  # Calculate total (post minus pre) memory usage after query iteration
800  query_cpu_mem_usage = round(
801  post_query_cpu_mem_usage["usedram"]
802  - pre_query_cpu_mem_usage["usedram"],
803  1,
804  )
805  query_gpu_mem_usage = round(
806  post_query_gpu_mem_usage["usedram"]
807  - pre_query_gpu_mem_usage["usedram"],
808  1,
809  )
810  if query_result:
811  query_results.update(
812  query_error_info="" # TODO - interpret query error info
813  )
814  # Assign first query iteration times
815  if iteration == 0:
816  first_execution_time = round(query_result["execution_time"], 1)
817  first_connect_time = round(query_result["connect_time"], 1)
818  first_results_iter_time = round(
819  query_result["results_iter_time"], 1
820  )
821  first_total_time = (
822  first_execution_time
823  + first_connect_time
824  + first_results_iter_time
825  )
826  query_results.update(
827  initial_iteration_results={
828  "first_execution_time": first_execution_time,
829  "first_connect_time": first_connect_time,
830  "first_results_iter_time": first_results_iter_time,
831  "first_total_time": first_total_time,
832  "first_cpu_mem_usage": query_cpu_mem_usage,
833  "first_gpu_mem_usage": query_gpu_mem_usage,
834  }
835  )
836  else:
837  # Put noninitial iterations into query_result list
838  query_results["noninitial_iteration_results"].append(
839  query_result
840  )
841  # Verify no change in memory for noninitial iterations
842  if query_cpu_mem_usage != 0.0:
843  logging.error(
844  (
845  "Noninitial iteration ({0}) of query ({1}) "
846  + "shows non-zero CPU memory usage: {2}"
847  ).format(
848  iteration,
849  kwargs["query"]["name"],
850  query_cpu_mem_usage,
851  )
852  )
853  if query_gpu_mem_usage != 0.0:
854  logging.error(
855  (
856  "Noninitial iteration ({0}) of query ({1}) "
857  + "shows non-zero GPU memory usage: {2}"
858  ).format(
859  iteration,
860  kwargs["query"]["name"],
861  query_gpu_mem_usage,
862  )
863  )
864  else:
865  logging.warning(
866  "Error detected during execution of query: "
867  + kwargs["query"]["name"]
868  + ". This query will be skipped and "
869  + "times will not reported"
870  )
871  query_results.update(query_succeeded=False)
872  break
873  # Calculate time for all iterations to run
874  query_total_elapsed_time = round(
875  ((timeit.default_timer() - query_total_start_time) * 1000), 1
876  )
877  query_results.update(query_total_elapsed_time=query_total_elapsed_time)
878  logging.info(
879  "Completed all iterations of query " + kwargs["query"]["name"]
880  )
881  return query_results
882 
size_t append(FILE *f, const size_t size, const int8_t *buf)
Appends the specified number of bytes to the end of the file f from buf.
Definition: File.cpp:158

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.run_setup_teardown_query (   kwargs)
    Convenience wrapper around `run_query` to run a setup or
    teardown query

  Kwargs:
    queries(query_list): List of queries to run
    do_run(bool): If true will run query, otherwise do nothing
    trim(float): Trim decimal to remove from top and bottom of results
    con(class 'pymapd.connection.Connection'): Mapd connection

  Returns:
    See return value for `run_query`

    query_list is described by:
    queries(list)
        query(dict):::
            name(str): Name of query
            mapdql(str): Query syntax to run
            [setup : queries(list)]
            [teardown : queries(list)]

Definition at line 883 of file run_benchmark.py.

References run_query().

Referenced by benchmark().

884 def run_setup_teardown_query(**kwargs):
885  """
886  Convenience wrapper around `run_query` to run a setup or
887  teardown query
888 
889  Kwargs:
890  queries(query_list): List of queries to run
891  do_run(bool): If true will run query, otherwise do nothing
892  trim(float): Trim decimal to remove from top and bottom of results
893  con(class 'pymapd.connection.Connection'): Mapd connection
894 
895  Returns:
896  See return value for `run_query`
897 
898  query_list is described by:
899  queries(list)
900  query(dict):::
901  name(str): Name of query
902  mapdql(str): Query syntax to run
903  [setup : queries(list)]
904  [teardown : queries(list)]
905  """
906  query_results = list()
907  if kwargs['do_run']:
908  for query in kwargs['queries']:
909  result = run_query(
910  query=query, iterations=1,
911  trim=kwargs['trim'],
912  con=kwargs['con']
913  )
914  if not result['query_succeeded']:
915  logging.warning(
916  "Error setup or teardown query: "
917  + query["name"]
918  + ". did not complete."
919  )
920  else:
921  query_results.append(result)
922  return query_results
923 
def run_setup_teardown_query

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_db (   kwargs)
  Send results dataset to a database using pymapd

  Kwargs:
    results_dataset(list):::
        result_dataset(dict): Query results dataset
    table(str): Results destination table name
    db_user(str): Results destination user name
    db_passwd(str): Results destination password
    db_server(str): Results destination server address
    db_port(int): Results destination server port
    db_name(str): Results destination database name
    table_schema_file(str): Path to destination database schema file

  Returns:
    True(bool): Sending results to destination database succeeded
    False(bool): Sending results to destination database failed. Exception
        should be logged.

Definition at line 1145 of file run_benchmark.py.

References get_connection(), and heavyai.open().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

1146 def send_results_db(**kwargs):
1147  """
1148  Send results dataset to a database using pymapd
1149 
1150  Kwargs:
1151  results_dataset(list):::
1152  result_dataset(dict): Query results dataset
1153  table(str): Results destination table name
1154  db_user(str): Results destination user name
1155  db_passwd(str): Results destination password
1156  db_server(str): Results destination server address
1157  db_port(int): Results destination server port
1158  db_name(str): Results destination database name
1159  table_schema_file(str): Path to destination database schema file
1160 
1161  Returns:
1162  True(bool): Sending results to destination database succeeded
1163  False(bool): Sending results to destination database failed. Exception
1164  should be logged.
1165  """
1166  # Create dataframe from list of query results
1167  logging.debug("Converting results list to pandas dataframe")
1168  results_df = DataFrame(kwargs["results_dataset"])
1169  # Establish connection to destination db
1170  logging.debug("Connecting to destination db")
1171  dest_con = get_connection(
1172  db_user=kwargs["db_user"],
1173  db_passwd=kwargs["db_passwd"],
1174  db_server=kwargs["db_server"],
1175  db_port=kwargs["db_port"],
1176  db_name=kwargs["db_name"],
1177  )
1178  if not dest_con:
1179  logging.exception("Could not connect to destination db.")
1180  return False
1181  # Load results into db, creating table if it does not exist
1182  tables = dest_con.get_tables()
1183  if kwargs["table"] not in tables:
1184  logging.info("Destination table does not exist. Creating.")
1185  try:
1186  with open(kwargs["table_schema_file"], "r") as table_schema:
1187  logging.debug(
1188  "Reading table_schema_file: " + kwargs["table_schema_file"]
1189  )
1190  create_table_sql = table_schema.read().replace("\n", " ")
1191  create_table_sql = create_table_sql.replace(
1192  "##TAB##", kwargs["table"]
1193  )
1194  except FileNotFoundError:
1195  logging.exception("Could not find destination table_schema_file.")
1196  return False
1197  try:
1198  logging.debug("Executing create destination table query")
1199  dest_con.execute(create_table_sql)
1200  logging.debug("Destination table created.")
1201  except (pymapd.exceptions.ProgrammingError, pymapd.exceptions.Error):
1202  logging.exception("Error running destination table creation")
1203  return False
1204  logging.info("Loading results into destination db")
1205  try:
1206  dest_con.load_table_columnar(
1207  kwargs["table"],
1208  results_df,
1209  preserve_index=False,
1210  chunk_size_bytes=0,
1211  col_names_from_schema=True,
1212  )
1213  except (pymapd.exceptions.ProgrammingError, pymapd.exceptions.Error):
1214  logging.exception("Error loading results into destination db")
1215  dest_con.close()
1216  return False
1217  dest_con.close()
1218  return True
1219 
int open(const char *path, int flags, int mode)
Definition: heavyai_fs.cpp:66

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_file_json (   kwargs)
  Send results dataset to a local json file

  Kwargs:
    results_dataset_json(str): Json-formatted query results dataset
    output_file_json (str): Location of .json file output

  Returns:
    True(bool): Sending results to json file succeeded
    False(bool): Sending results to json file failed. Exception
        should be logged.

Definition at line 1220 of file run_benchmark.py.

References heavyai.open().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

1221 def send_results_file_json(**kwargs):
1222  """
1223  Send results dataset to a local json file
1224 
1225  Kwargs:
1226  results_dataset_json(str): Json-formatted query results dataset
1227  output_file_json (str): Location of .json file output
1228 
1229  Returns:
1230  True(bool): Sending results to json file succeeded
1231  False(bool): Sending results to json file failed. Exception
1232  should be logged.
1233  """
1234  try:
1235  logging.debug("Opening json output file for writing")
1236  with open(kwargs["output_file_json"], "w") as file_json_open:
1237  logging.info(
1238  "Writing to output json file: " + kwargs["output_file_json"]
1239  )
1240  file_json_open.write(kwargs["results_dataset_json"])
1241  return True
1242  except IOError:
1243  logging.exception("Error writing results to json output file")
1244  return False
1245 
int open(const char *path, int flags, int mode)
Definition: heavyai_fs.cpp:66
def send_results_file_json

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_jenkins_bench (   kwargs)
  Send results dataset to a local json file formatted for use with jenkins
    benchmark plugin: https://github.com/jenkinsci/benchmark-plugin

  Kwargs:
    results_dataset(list):::
        result_dataset(dict): Query results dataset
    thresholds_name(str): Name to use for Jenkins result field
    thresholds_field(str): Field to use for query threshold in jenkins
    output_tag_jenkins(str): Jenkins benchmark result tag, for different
        sets from same table
    output_file_jenkins (str): Location of .json jenkins file output

  Returns:
    True(bool): Sending results to json file succeeded
    False(bool): Sending results to json file failed. Exception
        should be logged.

Definition at line 1246 of file run_benchmark.py.

References heavyai.open().

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

1247 def send_results_jenkins_bench(**kwargs):
1248  """
1249  Send results dataset to a local json file formatted for use with jenkins
1250  benchmark plugin: https://github.com/jenkinsci/benchmark-plugin
1251 
1252  Kwargs:
1253  results_dataset(list):::
1254  result_dataset(dict): Query results dataset
1255  thresholds_name(str): Name to use for Jenkins result field
1256  thresholds_field(str): Field to use for query threshold in jenkins
1257  output_tag_jenkins(str): Jenkins benchmark result tag, for different
1258  sets from same table
1259  output_file_jenkins (str): Location of .json jenkins file output
1260 
1261  Returns:
1262  True(bool): Sending results to json file succeeded
1263  False(bool): Sending results to json file failed. Exception
1264  should be logged.
1265  """
1266  jenkins_bench_results = []
1267  for result_dataset in kwargs["results_dataset"]:
1268  logging.debug("Constructing output for jenkins benchmark plugin")
1269  jenkins_bench_results.append(
1270  {
1271  "name": result_dataset["query_id"],
1272  "description": "",
1273  "parameters": [],
1274  "results": [
1275  {
1276  "name": result_dataset["query_id"]
1277  + "_"
1278  + kwargs["thresholds_name"],
1279  "description": "",
1280  "unit": "ms",
1281  "dblValue": result_dataset[kwargs["thresholds_field"]],
1282  }
1283  ],
1284  }
1285  )
1286  jenkins_bench_json = json.dumps(
1287  {
1288  "groups": [
1289  {
1290  "name": result_dataset["run_table"]
1291  + kwargs["output_tag_jenkins"],
1292  "description": "Source table: "
1293  + result_dataset["run_table"],
1294  "tests": jenkins_bench_results,
1295  }
1296  ]
1297  }
1298  )
1299  try:
1300  logging.debug("Opening jenkins_bench json output file for writing")
1301  with open(kwargs["output_file_jenkins"], "w") as file_jenkins_open:
1302  logging.info(
1303  "Writing to jenkins_bench json file: "
1304  + kwargs["output_file_jenkins"]
1305  )
1306  file_jenkins_open.write(jenkins_bench_json)
1307  return True
1308  except IOError:
1309  logging.exception("Error writing results to jenkins json output file")
1310  return False
1311 
int open(const char *path, int flags, int mode)
Definition: heavyai_fs.cpp:66
def send_results_jenkins_bench

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

def run_benchmark.send_results_output (   kwargs)
  Send results dataset script output

  Kwargs:
    results_dataset_json(str): Json-formatted query results dataset

  Returns:
    True(bool): Sending results to output succeeded

Definition at line 1312 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

1313 def send_results_output(**kwargs):
1314  """
1315  Send results dataset script output
1316 
1317  Kwargs:
1318  results_dataset_json(str): Json-formatted query results dataset
1319 
1320  Returns:
1321  True(bool): Sending results to output succeeded
1322  """
1323  logging.info("Printing query results to output")
1324  print(kwargs["results_dataset_json"])
1325  return True
1326 

+ Here is the caller graph for this function:

def run_benchmark.validate_query_file (   kwargs)
  Validates query file. Currently only checks the query file name

  Kwargs:
    query_filename(str): Name of query file

  Returns:
    True(bool): Query succesfully validated
    False(bool): Query failed validation

Definition at line 421 of file run_benchmark.py.

Referenced by read_query_files().

422 def validate_query_file(**kwargs):
423  """
424  Validates query file. Currently only checks the query file name
425 
426  Kwargs:
427  query_filename(str): Name of query file
428 
429  Returns:
430  True(bool): Query succesfully validated
431  False(bool): Query failed validation
432  """
433  if not kwargs["query_filename"].endswith(".sql"):
434  logging.warning(
435  "Query filename "
436  + kwargs["query_filename"]
437  + ' is invalid - does not end in ".sql". Skipping'
438  )
439  return False
440  else:
441  return True
442 

+ Here is the caller graph for this function:

def run_benchmark.validate_setup_teardown_query_file (   kwargs)
  Validates query file. Currently only checks the query file name, and
  checks for setup or teardown in basename

  Kwargs:
    query_filename(str): Name of query file
    check_which(bool): either 'setup' or 'teardown', decide which to
                       check
    quiet(bool): optional, if True, no warning is logged

  Returns:
    True(bool): Query succesfully validated
    False(bool): Query failed validation

Definition at line 377 of file run_benchmark.py.

Referenced by read_setup_teardown_query_files().

379  """
380  Validates query file. Currently only checks the query file name, and
381  checks for setup or teardown in basename
382 
383  Kwargs:
384  query_filename(str): Name of query file
385  check_which(bool): either 'setup' or 'teardown', decide which to
386  check
387  quiet(bool): optional, if True, no warning is logged
388 
389  Returns:
390  True(bool): Query succesfully validated
391  False(bool): Query failed validation
392  """
393  qfilename = kwargs["query_filename"]
394  basename = os.path.basename(qfilename)
395  check_str = False
396  if kwargs["check_which"] == 'setup':
397  check_str = basename.lower().find('setup') > -1
398  elif kwargs["check_which"] == 'teardown':
399  check_str = basename.lower().find('teardown') > -1
400  else:
401  raise TypeError('Unsupported `check_which` parameter.')
402  return_val = True
403  if not qfilename.endswith(".sql"):
404  logging.warning(
405  "Query filename "
406  + qfilename
407  + ' is invalid - does not end in ".sql". Skipping'
408  )
409  return_val = False
410  elif not check_str:
411  quiet = True if 'quiet' in kwargs and kwargs['quiet'] else False
412  if not quiet:
413  logging.warning(
414  "Query filename "
415  + qfilename
416  + ' does not match "setup" or "teardown". Skipping'
417  )
418  return_val = False
419  return return_val
420 
def validate_setup_teardown_query_file

+ Here is the caller graph for this function:

def run_benchmark.verify_destinations (   kwargs)
  Verify script output destination(s)

  Kwargs:
    destinations (list): List of destinations
    dest_db_server (str): DB output destination server
    output_file_json (str): Location of .json file output
    output_file_jenkins (str): Location of .json jenkins file output

  Returns:
    True(bool): Destination(s) is/are valid
    False(bool): Destination(s) is/are not valid

Definition at line 19 of file run_benchmark.py.

Referenced by run_benchmark_arrow.benchmark(), and benchmark().

19 
20 def verify_destinations(**kwargs):
21  """
22  Verify script output destination(s)
23 
24  Kwargs:
25  destinations (list): List of destinations
26  dest_db_server (str): DB output destination server
27  output_file_json (str): Location of .json file output
28  output_file_jenkins (str): Location of .json jenkins file output
29 
30  Returns:
31  True(bool): Destination(s) is/are valid
32  False(bool): Destination(s) is/are not valid
33  """
34  if "mapd_db" in kwargs["destinations"]:
35  valid_destination_set = True
36  if kwargs["dest_db_server"] is None:
37  # If dest_server is not set for mapd_db, then exit
38  logging.error(
39  '"dest_server" is required when destination = "mapd_db"'
40  )
41  if "file_json" in kwargs["destinations"]:
42  valid_destination_set = True
43  if kwargs["output_file_json"] is None:
44  # If output_file_json is not set for file_json, then exit
45  logging.error(
46  '"output_file_json" is required when destination = "file_json"'
47  )
48  if "output" in kwargs["destinations"]:
49  valid_destination_set = True
50  if "jenkins_bench" in kwargs["destinations"]:
51  valid_destination_set = True
52  if kwargs["output_file_jenkins"] is None:
53  # If output_file_jenkins is not set for jenkins_bench, then exit
54  logging.error(
55  '"output_file_jenkins" is required '
56  + 'when destination = "jenkins_bench"'
57  )
58  if not valid_destination_set:
59  return False
60  else:
61  return True
62 
def verify_destinations

+ Here is the caller graph for this function: