OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
TokenCompletionHints.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "TokenCompletionHints.h"
18 #include "Shared/StringTransform.h"
19 
20 #include <boost/algorithm/string.hpp>
21 #include <boost/algorithm/string/predicate.hpp>
22 #include <boost/tokenizer.hpp>
23 
24 namespace {
25 
26 bool is_qualified_identifier_part(const char ch) {
27  return isalnum(ch) || ch == '_' || ch == '.';
28 }
29 
30 } // namespace
31 
32 // Straightforward port from SqlAdvisor.getCompletionHints.
33 std::string find_last_word_from_cursor(const std::string& sql, const int64_t cursor) {
34  if (cursor > static_cast<int64_t>(sql.size())) {
35  return "";
36  }
37  auto word_start = cursor;
38  bool quoted = false;
39  while (word_start > 0 && is_qualified_identifier_part(sql[word_start - 1])) {
40  --word_start;
41  }
42  if ((word_start > 0) && (sql[word_start - 1] == '"')) {
43  quoted = true;
44  --word_start;
45  }
46 
47  if (word_start < 0) {
48  return "";
49  }
50 
51  // Search forwards to the end of the word we should remove. Eat up
52  // trailing double-quote, if any
53  auto word_end = cursor;
54  while (word_end < static_cast<int64_t>(sql.size()) &&
55  is_qualified_identifier_part(sql[word_end - 1])) {
56  ++word_end;
57  }
58  if (quoted && (word_end < static_cast<int64_t>(sql.size())) && (sql[word_end] == '"')) {
59  ++word_end;
60  }
61  std::string last_word(sql.begin() + word_start + (quoted ? 1 : 0),
62  sql.begin() + cursor);
63  return last_word;
64 }
65 
66 std::vector<TCompletionHint> just_whitelisted_keyword_hints(
67  const std::vector<TCompletionHint>& hints) {
68  static const std::unordered_set<std::string> whitelisted_keywords{
69  "WHERE", "GROUP", "BY", "COUNT", "AVG", "MAX", "MIN",
70  "SUM", "STDDEV_POP", "STDDEV_SAMP", "AS", "HAVING", "INNER", "JOIN",
71  "LEFT", "LIMIT", "OFFSET", "ON", "ORDER", "OUTER", "ASC",
72  "DESC", "DISTINCT", "IN", "IS", "NULL", "NOT", "AND",
73  "OR", "LIKE", "*", "(", ")"};
74  std::vector<TCompletionHint> filtered;
75  for (const auto& original_hint : hints) {
76  if (original_hint.type != TCompletionHintType::KEYWORD) {
77  filtered.push_back(original_hint);
78  continue;
79  }
80  auto filtered_hint = original_hint;
81  filtered_hint.hints.clear();
82  for (const auto& hint_token : original_hint.hints) {
83  if (whitelisted_keywords.find(to_upper(hint_token)) != whitelisted_keywords.end()) {
84  filtered_hint.hints.push_back(hint_token);
85  }
86  }
87  if (!filtered_hint.hints.empty()) {
88  filtered.push_back(filtered_hint);
89  }
90  }
91  return filtered;
92 }
93 
95  std::vector<TCompletionHint>& hints,
96  const std::string& last_word,
97  const std::unordered_map<std::string, std::unordered_set<std::string>>&
98  column_names_by_table) {
99  std::vector<std::string> last_word_tokens;
100  boost::split(last_word_tokens, last_word, boost::is_any_of("."));
101  if (last_word_tokens.size() < 2) {
102  return false;
103  }
104  const auto table_name = last_word_tokens[last_word_tokens.size() - 2];
105  const auto col_names_it = column_names_by_table.find(table_name);
106  if (col_names_it == column_names_by_table.end()) {
107  return false;
108  }
109  TCompletionHint column_hint;
110  column_hint.type = TCompletionHintType::COLUMN;
111  column_hint.replaced = last_word;
112  for (const auto& col_name : col_names_it->second) {
113  if (boost::istarts_with(col_name, last_word_tokens.back())) {
114  auto qualified_name = last_word_tokens;
115  qualified_name.back() = col_name;
116  column_hint.hints.push_back(boost::algorithm::join(qualified_name, "."));
117  }
118  }
119  if (!column_hint.hints.empty()) {
120  hints.push_back(column_hint);
121  }
122  return true;
123 }
124 
126  std::vector<TCompletionHint>& hints,
127  const std::string& last_word,
128  const std::unordered_map<std::string, std::unordered_set<std::string>>&
129  column_names_by_table) {
130  TCompletionHint column_hint;
131  column_hint.type = TCompletionHintType::COLUMN;
132  column_hint.replaced = last_word;
133  std::unordered_set<std::string> column_hints_deduped;
134  for (const auto& kv : column_names_by_table) {
135  for (const auto& col_name : kv.second) {
136  if (boost::istarts_with(col_name, last_word)) {
137  column_hints_deduped.insert(col_name);
138  }
139  }
140  }
141  column_hint.hints.insert(
142  column_hint.hints.end(), column_hints_deduped.begin(), column_hints_deduped.end());
143  if (!column_hint.hints.empty()) {
144  hints.push_back(column_hint);
145  }
146 }
147 
148 bool should_suggest_column_hints(const std::string& partial_query) {
149  boost::char_separator<char> sep(" \t\n", ",");
150  boost::tokenizer<boost::char_separator<char>> tokens(partial_query, sep);
151  const auto token_count = std::distance(tokens.begin(), tokens.end());
152  if (token_count == 1) {
153  return true;
154  }
155  std::string last_token;
156  std::string prev_to_last_token;
157  for (const auto& token : tokens) {
158  prev_to_last_token = last_token;
159  last_token = token;
160  }
161  return last_token == "," || (!partial_query.empty() && !isspace(partial_query.back()) &&
162  (prev_to_last_token.empty() || prev_to_last_token == "," ||
163  to_upper(prev_to_last_token) == "SELECT"));
164 }
std::vector< TCompletionHint > just_whitelisted_keyword_hints(const std::vector< TCompletionHint > &hints)
std::string join(T const &container, std::string const &delim)
void get_column_hints(std::vector< TCompletionHint > &hints, const std::string &last_word, const std::unordered_map< std::string, std::unordered_set< std::string >> &column_names_by_table)
bool get_qualified_column_hints(std::vector< TCompletionHint > &hints, const std::string &last_word, const std::unordered_map< std::string, std::unordered_set< std::string >> &column_names_by_table)
std::string find_last_word_from_cursor(const std::string &sql, const int64_t cursor)
std::vector< std::string > split(std::string_view str, std::string_view delim, std::optional< size_t > maxsplit)
split apart a string into a vector of substrings
bool should_suggest_column_hints(const std::string &partial_query)
std::string to_upper(const std::string &str)