OmniSciDB
a5dc49c757
|
#include "QueryTemplateGenerator.h"
#include "IRCodegenUtils.h"
#include "Logger/Logger.h"
#include <llvm/IR/Constants.h>
#include <llvm/IR/IRBuilder.h>
#include <llvm/IR/Instructions.h>
#include <llvm/IR/Verifier.h>
Go to the source code of this file.
Classes | |
class | anonymous_namespace{QueryTemplateGenerator.cpp}::Params< NTYPES > |
Namespaces | |
anonymous_namespace{QueryTemplateGenerator.cpp} | |
Functions | |
template<typename... ATTRS> | |
llvm::AttributeList | anonymous_namespace{QueryTemplateGenerator.cpp}::make_attribute_list (llvm::Module const *const mod, unsigned const index, ATTRS const ...attrs) |
template<bool IS_GROUP_BY, size_t NTYPES = 13u> | |
Params< NTYPES > | anonymous_namespace{QueryTemplateGenerator.cpp}::make_params (llvm::Module const *const mod, bool const hoist_literals) |
llvm::Type * | anonymous_namespace{QueryTemplateGenerator.cpp}::get_pointer_element_type (llvm::Value *value) |
llvm::Function * | anonymous_namespace{QueryTemplateGenerator.cpp}::default_func_builder (llvm::Module *mod, const std::string &name) |
llvm::Function * | anonymous_namespace{QueryTemplateGenerator.cpp}::pos_start (llvm::Module *mod) |
llvm::Function * | anonymous_namespace{QueryTemplateGenerator.cpp}::group_buff_idx (llvm::Module *mod) |
llvm::Function * | anonymous_namespace{QueryTemplateGenerator.cpp}::pos_step (llvm::Module *mod) |
llvm::Function * | anonymous_namespace{QueryTemplateGenerator.cpp}::row_process (llvm::Module *mod, const size_t aggr_col_count, const bool hoist_literals) |
std::tuple< llvm::Function *, llvm::CallInst * > | query_template (llvm::Module *mod, const size_t aggr_col_count, const bool hoist_literals, const bool is_estimate_query, const GpuSharedMemoryContext &gpu_smem_context) |
std::tuple< llvm::Function *, llvm::CallInst * > | query_group_by_template (llvm::Module *mod, const bool hoist_literals, const QueryMemoryDescriptor &query_mem_desc, const ExecutorDeviceType device_type, const bool check_scan_limit, const GpuSharedMemoryContext &gpu_smem_context) |
std::tuple<llvm::Function*, llvm::CallInst*> query_group_by_template | ( | llvm::Module * | mod, |
const bool | hoist_literals, | ||
const QueryMemoryDescriptor & | query_mem_desc, | ||
const ExecutorDeviceType | device_type, | ||
const bool | check_scan_limit, | ||
const GpuSharedMemoryContext & | gpu_smem_context | ||
) |
Definition at line 553 of file QueryTemplateGenerator.cpp.
References CHECK, logger::FATAL, get_arg_by_name(), anonymous_namespace{QueryTemplateGenerator.cpp}::get_pointer_element_type(), GpuSharedMemoryContext::getSharedMemorySize(), GPU, anonymous_namespace{QueryTemplateGenerator.cpp}::group_buff_idx(), QueryMemoryDescriptor::hasVarlenOutput(), GpuSharedMemoryContext::isSharedMemoryUsed(), QueryMemoryDescriptor::isWarpSyncRequired(), LLVM_ALIGN, LOG, anonymous_namespace{QueryTemplateGenerator.cpp}::pos_start(), anonymous_namespace{QueryTemplateGenerator.cpp}::pos_step(), and anonymous_namespace{QueryTemplateGenerator.cpp}::row_process().
std::tuple<llvm::Function*, llvm::CallInst*> query_template | ( | llvm::Module * | mod, |
const size_t | aggr_col_count, | ||
const bool | hoist_literals, | ||
const bool | is_estimate_query, | ||
const GpuSharedMemoryContext & | gpu_smem_context | ||
) |
If GPU shared memory optimization is disabled, for each aggregate target, threads copy back their aggregate results (stored in registers) back into memory. This process is performed per processed fragment. In the host the final results are reduced (per target, for all threads and all fragments).
If GPU Shared memory optimization is enabled, we properly (atomically) aggregate all thread's results into memory, which makes the final reduction on host much cheaper. Here, we call a noop dummy write back function which will be properly replaced at runtime depending on the target expressions.
Definition at line 266 of file QueryTemplateGenerator.cpp.
References anonymous_namespace{RuntimeFunctions.cpp}::agg_func(), CHECK, logger::FATAL, get_arg_by_name(), anonymous_namespace{QueryTemplateGenerator.cpp}::get_pointer_element_type(), anonymous_namespace{QueryTemplateGenerator.cpp}::group_buff_idx(), GpuSharedMemoryContext::isSharedMemoryUsed(), LLVM_ALIGN, LOG, anonymous_namespace{QueryTemplateGenerator.cpp}::pos_start(), anonymous_namespace{QueryTemplateGenerator.cpp}::pos_step(), run_benchmark_import::result, anonymous_namespace{QueryTemplateGenerator.cpp}::row_process(), and to_string().