OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
ShowModelFeatureDetailsCommand Class Reference

#include <DdlCommandExecutor.h>

+ Inheritance diagram for ShowModelFeatureDetailsCommand:
+ Collaboration diagram for ShowModelFeatureDetailsCommand:

Public Member Functions

 ShowModelFeatureDetailsCommand (const DdlCommandData &ddl_data, std::shared_ptr< Catalog_Namespace::SessionInfo const > session_ptr)
 
ExecutionResult execute (bool read_only_mode) override
 
- Public Member Functions inherited from DdlCommand
 DdlCommand (const DdlCommandData &ddl_data, std::shared_ptr< Catalog_Namespace::SessionInfo const > session_ptr)
 

Private Member Functions

std::vector< TargetMetaInfoprepareLabelInfos () const
 
std::pair< std::vector< double >
, std::vector< std::vector
< double > > > 
extractExtraMetadata (std::shared_ptr< AbstractMLModel > model, std::vector< TargetMetaInfo > &label_infos) const
 
std::vector
< RelLogicalValues::RowValues
prepareLogicalValues (const MLModelMetadata &model_metadata, const std::vector< std::vector< std::string >> &cat_sub_features, std::vector< double > &extra_metadata, const std::vector< std::vector< double >> &eigenvectors, const std::vector< int64_t > &inverse_permutations) const
 

Additional Inherited Members

- Protected Attributes inherited from DdlCommand
const DdlCommandDataddl_data_
 
std::shared_ptr
< Catalog_Namespace::SessionInfo
const > 
session_ptr_
 

Detailed Description

Definition at line 295 of file DdlCommandExecutor.h.

Constructor & Destructor Documentation

ShowModelFeatureDetailsCommand::ShowModelFeatureDetailsCommand ( const DdlCommandData ddl_data,
std::shared_ptr< Catalog_Namespace::SessionInfo const >  session_ptr 
)

Definition at line 2197 of file DdlCommandExecutor.cpp.

References g_enable_ml_functions, and g_restrict_ml_model_metadata_to_superusers.

2200  : DdlCommand(ddl_data, session_ptr) {
2201  if (!g_enable_ml_functions) {
2202  throw std::runtime_error(
2203  "Cannot show model feature details. ML functions are disabled.");
2204  }
2206  // Check if user is super user
2207  const auto& current_user = session_ptr->get_currentUser();
2208  if (!current_user.isSuper) {
2209  throw std::runtime_error(
2210  "Cannot show model feature details. Showing model information to "
2211  "non-superusers is "
2212  "disabled.");
2213  }
2214  }
2215 }
bool g_restrict_ml_model_metadata_to_superusers
Definition: Execute.cpp:123
bool g_enable_ml_functions
Definition: Execute.cpp:122
DdlCommand(const DdlCommandData &ddl_data, std::shared_ptr< Catalog_Namespace::SessionInfo const > session_ptr)

Member Function Documentation

ExecutionResult ShowModelFeatureDetailsCommand::execute ( bool  read_only_mode)
overridevirtual

Executes the DDL command corresponding to provided JSON payload.

Parameters
_returnresult of DDL command execution (if applicable)

Implements DdlCommand.

Definition at line 2347 of file DdlCommandExecutor.cpp.

References CHECK, ResultSetLogicalValuesBuilder::create(), DdlCommand::ddl_data_, extractExtraMetadata(), anonymous_namespace{DdlCommandExecutor.cpp}::extractPayload(), g_ml_models, legacylockmgr::getExecuteReadLock(), MLModelMap::getModel(), prepareLabelInfos(), and prepareLogicalValues().

Referenced by heavydb.cursor.Cursor::executemany().

2347  {
2348  auto execute_read_lock = legacylockmgr::getExecuteReadLock();
2349  auto& ddl_payload = extractPayload(ddl_data_);
2350  CHECK(ddl_payload.HasMember("modelName")) << "Model name missing.";
2351  const auto model_name = ddl_payload["modelName"].GetString();
2352  const auto model = g_ml_models.getModel(model_name);
2353  const auto model_metadata = model->getModelMetadata();
2354  // const auto& features = model_metadata.getFeatures();
2355  // const auto model_type = model_metadata.getModelType();
2356  const auto& feature_permutations = model_metadata.getFeaturePermutations();
2357 
2358  std::vector<int64_t> inverse_permutations(feature_permutations.size());
2359  for (int64_t perm_idx = 0; perm_idx < static_cast<int64_t>(feature_permutations.size());
2360  ++perm_idx) {
2361  inverse_permutations[feature_permutations[perm_idx]] = perm_idx;
2362  }
2363 
2364  auto label_infos = prepareLabelInfos();
2365  auto [extra_metadata, eigenvectors] = extractExtraMetadata(model, label_infos);
2366 
2367  // Todo(todd): Make cat_sub_features accessible from MLModelMetadata so we don't have to
2368  // access and pass it separately
2369  const auto& cat_sub_features = model->getCatFeatureKeys();
2370  auto logical_values = prepareLogicalValues(model_metadata,
2371  cat_sub_features,
2372  extra_metadata,
2373  eigenvectors,
2374  inverse_permutations);
2375 
2376  // Create ResultSet
2377  std::shared_ptr<ResultSet> rSet = std::shared_ptr<ResultSet>(
2378  ResultSetLogicalValuesBuilder::create(label_infos, logical_values));
2379 
2380  return ExecutionResult(rSet, label_infos);
2381 }
std::pair< std::vector< double >, std::vector< std::vector< double > > > extractExtraMetadata(std::shared_ptr< AbstractMLModel > model, std::vector< TargetMetaInfo > &label_infos) const
auto getExecuteReadLock()
const DdlCommandData & ddl_data_
std::vector< TargetMetaInfo > prepareLabelInfos() const
const rapidjson::Value & extractPayload(const DdlCommandData &ddl_data)
std::shared_ptr< AbstractMLModel > getModel(const std::string &model_name) const
Definition: MLModel.h:51
static ResultSet * create(std::vector< TargetMetaInfo > &label_infos, std::vector< RelLogicalValues::RowValues > &logical_values)
MLModelMap g_ml_models
Definition: MLModel.h:125
#define CHECK(condition)
Definition: Logger.h:291
std::vector< RelLogicalValues::RowValues > prepareLogicalValues(const MLModelMetadata &model_metadata, const std::vector< std::vector< std::string >> &cat_sub_features, std::vector< double > &extra_metadata, const std::vector< std::vector< double >> &eigenvectors, const std::vector< int64_t > &inverse_permutations) const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::pair< std::vector< double >, std::vector< std::vector< double > > > ShowModelFeatureDetailsCommand::extractExtraMetadata ( std::shared_ptr< AbstractMLModel model,
std::vector< TargetMetaInfo > &  label_infos 
) const
private

Definition at line 2227 of file DdlCommandExecutor.cpp.

References CHECK_EQ, LinearRegressionModel::getCoefs(), PcaModel::getEigenvalues(), kDOUBLE, kTEXT, LINEAR_REG, PCA, and RANDOM_FOREST_REG.

Referenced by execute().

2229  {
2230  std::vector<double> extra_metadata;
2231  std::vector<std::vector<double>> eigenvectors;
2232 
2233  switch (model->getModelType()) {
2234  case MLModelType::LINEAR_REG: {
2235  label_infos.emplace_back("coefficient", SQLTypeInfo(kDOUBLE, true));
2236  const auto linear_reg_model =
2237  std::dynamic_pointer_cast<LinearRegressionModel>(model);
2238  extra_metadata = linear_reg_model->getCoefs();
2239  break;
2240  }
2241 #ifdef HAVE_ONEDAL
2243  const auto random_forest_reg_model =
2244  std::dynamic_pointer_cast<AbstractRandomForestModel>(model);
2245  extra_metadata = random_forest_reg_model->getVariableImportanceScores();
2246  if (!extra_metadata.empty()) {
2247  label_infos.emplace_back("feature_importance", SQLTypeInfo(kDOUBLE, true));
2248  }
2249  break;
2250  }
2251  case MLModelType::PCA: {
2252  label_infos.emplace_back("eigenvalue", SQLTypeInfo(kDOUBLE, true));
2253  label_infos.emplace_back("eigenvector", SQLTypeInfo(kTEXT, true));
2254  const auto pca_model = std::dynamic_pointer_cast<PcaModel>(model);
2255  extra_metadata = pca_model->getEigenvalues();
2256  eigenvectors = pca_model->getEigenvectors();
2257  CHECK_EQ(eigenvectors.size(), extra_metadata.size());
2258  break;
2259  }
2260 #endif // HAVE_ONEDAL
2261  default: {
2262  break;
2263  }
2264  }
2265 
2266  return std::make_pair(std::move(extra_metadata), std::move(eigenvectors));
2267 }
#define CHECK_EQ(x, y)
Definition: Logger.h:301
Definition: sqltypes.h:79
const std::vector< double > & getCoefs() const
Definition: MLModel.h:146
const std::vector< double > & getEigenvalues() const
Definition: MLModel.h:467

+ Here is the call graph for this function:

+ Here is the caller graph for this function:

std::vector< TargetMetaInfo > ShowModelFeatureDetailsCommand::prepareLabelInfos ( ) const
private

Definition at line 2217 of file DdlCommandExecutor.cpp.

References kBIGINT, and kTEXT.

Referenced by execute().

2217  {
2218  std::vector<TargetMetaInfo> label_infos;
2219  label_infos.emplace_back("feature_id", SQLTypeInfo(kBIGINT, true));
2220  label_infos.emplace_back("feature", SQLTypeInfo(kTEXT, true));
2221  label_infos.emplace_back("sub_feature_id", SQLTypeInfo(kBIGINT, true));
2222  label_infos.emplace_back("sub_feature", SQLTypeInfo(kTEXT, true));
2223  return label_infos;
2224 }
Definition: sqltypes.h:79

+ Here is the caller graph for this function:

std::vector< RelLogicalValues::RowValues > ShowModelFeatureDetailsCommand::prepareLogicalValues ( const MLModelMetadata model_metadata,
const std::vector< std::vector< std::string >> &  cat_sub_features,
std::vector< double > &  extra_metadata,
const std::vector< std::vector< double >> &  eigenvectors,
const std::vector< int64_t > &  inverse_permutations 
) const
private

Definition at line 2270 of file DdlCommandExecutor.cpp.

References anonymous_namespace{DdlCommandExecutor.cpp}::genLiteralBigInt(), anonymous_namespace{DdlCommandExecutor.cpp}::genLiteralDouble(), genLiteralStr(), MLModelMetadata::getFeatures(), MLModelMetadata::getModelType(), and LINEAR_REG.

Referenced by execute().

2275  {
2276  std::vector<RelLogicalValues::RowValues> logical_values;
2277  const auto model_type = model_metadata.getModelType();
2278  if (model_type == MLModelType::LINEAR_REG) {
2279  logical_values.emplace_back(RelLogicalValues::RowValues{});
2280  logical_values.back().emplace_back(genLiteralBigInt(0));
2281  logical_values.back().emplace_back(genLiteralStr("intercept"));
2282  logical_values.back().emplace_back(genLiteralBigInt(1));
2283  logical_values.back().emplace_back(genLiteralStr(""));
2284  logical_values.back().emplace_back(genLiteralDouble(extra_metadata[0]));
2285  extra_metadata.erase(extra_metadata.begin());
2286  }
2287  const auto& features = model_metadata.getFeatures();
2288  const int64_t num_features = static_cast<int64_t>(features.size());
2289  std::vector<int64_t> physical_feature_idx_prefix_sums = {0};
2290  for (int64_t feature_idx = 1; feature_idx < num_features; ++feature_idx) {
2291  if (feature_idx - 1 < static_cast<int64_t>(cat_sub_features.size())) {
2292  physical_feature_idx_prefix_sums.emplace_back(
2293  physical_feature_idx_prefix_sums.back() +
2294  static_cast<int64_t>(cat_sub_features[feature_idx - 1].size()));
2295  } else {
2296  physical_feature_idx_prefix_sums.emplace_back(
2297  physical_feature_idx_prefix_sums.back() + 1);
2298  }
2299  }
2300  for (int64_t original_feature_idx = 0; original_feature_idx < num_features;
2301  ++original_feature_idx) {
2302  const auto feature_idx = inverse_permutations.empty()
2303  ? original_feature_idx
2304  : inverse_permutations[original_feature_idx];
2305  int64_t num_sub_features =
2306  feature_idx >= static_cast<int64_t>(cat_sub_features.size())
2307  ? 0
2308  : static_cast<int64_t>(cat_sub_features[feature_idx].size());
2309  const bool has_sub_features = num_sub_features > 0;
2310  num_sub_features = num_sub_features == 0 ? 1 : num_sub_features;
2311  int64_t physical_feature_idx = physical_feature_idx_prefix_sums[feature_idx];
2312  for (int64_t sub_feature_idx = 0; sub_feature_idx < num_sub_features;
2313  ++sub_feature_idx) {
2314  logical_values.emplace_back(RelLogicalValues::RowValues{});
2315  // Make feature id one-based
2316  logical_values.back().emplace_back(genLiteralBigInt(original_feature_idx + 1));
2317  logical_values.back().emplace_back(genLiteralStr(features[original_feature_idx]));
2318  logical_values.back().emplace_back(genLiteralBigInt(sub_feature_idx + 1));
2319  if (has_sub_features) {
2320  logical_values.back().emplace_back(
2321  genLiteralStr(cat_sub_features[feature_idx][sub_feature_idx]));
2322  } else {
2323  logical_values.back().emplace_back(genLiteralStr(""));
2324  }
2325  if (!extra_metadata.empty()) {
2326  logical_values.back().emplace_back(
2327  genLiteralDouble(extra_metadata[physical_feature_idx]));
2328  }
2329  if (!eigenvectors.empty()) {
2330  std::ostringstream eigenvector_oss;
2331  eigenvector_oss << "[";
2332  for (size_t i = 0; i < eigenvectors[physical_feature_idx].size(); ++i) {
2333  if (i > 0) {
2334  eigenvector_oss << ", ";
2335  }
2336  eigenvector_oss << eigenvectors[physical_feature_idx][i];
2337  }
2338  eigenvector_oss << "]";
2339  logical_values.back().emplace_back(genLiteralStr(eigenvector_oss.str()));
2340  }
2341  physical_feature_idx++;
2342  }
2343  }
2344  return logical_values;
2345 }
std::unique_ptr< RexLiteral > genLiteralDouble(double val)
const std::vector< std::string > & getFeatures() const
std::unique_ptr< RexLiteral > genLiteralBigInt(int64_t val)
static std::unique_ptr< RexLiteral > genLiteralStr(std::string val)
Definition: DBHandler.cpp:7770
std::vector< std::unique_ptr< const RexScalar >> RowValues
Definition: RelAlgDag.h:2656
const MLModelType getModelType() const

+ Here is the call graph for this function:

+ Here is the caller graph for this function:


The documentation for this class was generated from the following files: