OmniSciDB  a5dc49c757
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Groups Pages
StringLike.cpp
Go to the documentation of this file.
1 /*
2  * Copyright 2022 HEAVY.AI, Inc.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
24 #include "StringLike.h"
25 
26 enum LikeStatus {
29  kLIKE_ABORT, // means we run out of string characters to match against pattern, can
30  // abort early
31  kLIKE_ERROR // error condition
32 };
33 
34 DEVICE static int inline lowercase(char c) {
35  if ('A' <= c && c <= 'Z') {
36  return 'a' + (c - 'A');
37  }
38  return c;
39 }
40 
41 // escape_char does nothing, it's a placeholder to fit # arguments for both
42 // string_like and string_like_simple functions
43 extern "C" RUNTIME_EXPORT DEVICE bool string_like_simple(const char* str,
44  const int32_t str_len,
45  const char* pattern,
46  const int32_t pat_len,
47  char escape_char) {
48  int i, j;
49  int search_len = str_len - pat_len + 1;
50  for (i = 0; i < search_len; ++i) {
51  for (j = 0; j < pat_len && pattern[j] == str[j + i]; ++j) {
52  }
53  if (j >= pat_len) {
54  return true;
55  }
56  }
57  return false;
58 }
59 
60 // escape_char does nothing and it is intentional as describe above
61 extern "C" RUNTIME_EXPORT DEVICE bool string_ilike_simple(const char* str,
62  const int32_t str_len,
63  const char* pattern,
64  const int32_t pat_len,
65  char escape_char) {
66  int i, j;
67  int search_len = str_len - pat_len + 1;
68  for (i = 0; i < search_len; ++i) {
69  for (j = 0; j < pat_len && pattern[j] == lowercase(str[j + i]); ++j) {
70  }
71  if (j >= pat_len) {
72  return true;
73  }
74  }
75  return false;
76 }
77 
78 #define STR_LIKE_SIMPLE_NULLABLE(base_func) \
79  extern "C" RUNTIME_EXPORT DEVICE int8_t base_func##_nullable(const char* lhs, \
80  const int32_t lhs_len, \
81  const char* rhs, \
82  const int32_t rhs_len, \
83  char escape_char, \
84  const int8_t bool_null) { \
85  if (!lhs || !rhs) { \
86  return bool_null; \
87  } \
88  return base_func(lhs, lhs_len, rhs, rhs_len, escape_char) ? 1 : 0; \
89  }
90 
93 
94 #undef STR_LIKE_SIMPLE_NULLABLE
95 
96 // internal recursive function for performing LIKE matching.
97 // when is_ilike is true, pattern is assumed to be already converted to all lowercase
98 DEVICE static LikeStatus string_like_match(const char* str,
99  const int32_t str_len,
100  const char* pattern,
101  const int32_t pat_len,
102  const char escape_char,
103  const bool is_ilike) {
104  const char* s = str;
105  int slen = str_len;
106  const char* p = pattern;
107  int plen = pat_len;
108 
109  while (slen > 0 && plen > 0) {
110  if (*p == escape_char) {
111  // next pattern char must match literally, whatever it is
112  p++;
113  plen--;
114  if (plen <= 0) {
115  return kLIKE_ERROR;
116  }
117  if ((!is_ilike && *s != *p) || (is_ilike && lowercase(*s) != *p)) {
118  return kLIKE_FALSE;
119  }
120  } else if (*p == '%') {
121  char firstpat;
122  p++;
123  plen--;
124  while (plen > 0) {
125  if (*p == '%') {
126  p++;
127  plen--;
128  } else if (*p == '_') {
129  if (slen <= 0) {
130  return kLIKE_ABORT;
131  }
132  s++;
133  slen--;
134  p++;
135  plen--;
136  } else {
137  break;
138  }
139  }
140  if (plen <= 0) {
141  return kLIKE_TRUE;
142  }
143  if (*p == escape_char) {
144  if (plen < 2) {
145  return kLIKE_ERROR;
146  }
147  firstpat = p[1];
148  } else {
149  firstpat = *p;
150  }
151 
152  while (slen > 0) {
153  bool match = false;
154  if (firstpat == '[' && *p != escape_char) {
155  const char* pp = p + 1;
156  int pplen = plen - 1;
157  while (pplen > 0 && *pp != ']') {
158  if ((!is_ilike && *s == *pp) || (is_ilike && lowercase(*s) == *pp)) {
159  match = true;
160  break;
161  }
162  pp++;
163  pplen--;
164  }
165  if (pplen <= 0) {
166  return kLIKE_ERROR; // malformed
167  }
168  } else if ((!is_ilike && *s == firstpat) ||
169  (is_ilike && lowercase(*s) == firstpat)) {
170  match = true;
171  }
172  if (match) {
173  LikeStatus status = string_like_match(s, slen, p, plen, escape_char, is_ilike);
174  if (status != kLIKE_FALSE) {
175  return status;
176  }
177  }
178  s++;
179  slen--;
180  }
181  return kLIKE_ABORT;
182  } else if (*p == '_') {
183  s++;
184  slen--;
185  p++;
186  plen--;
187  continue;
188  } else if (*p == '[') {
189  const char* pp = p + 1;
190  int pplen = plen - 1;
191  bool match = false;
192  while (pplen > 0 && *pp != ']') {
193  if ((!is_ilike && *s == *pp) || (is_ilike && lowercase(*s) == *pp)) {
194  match = true;
195  break;
196  }
197  pp++;
198  pplen--;
199  }
200  if (match) {
201  s++;
202  slen--;
203  pplen--;
204  const char* x;
205  for (x = pp + 1; *x != ']' && pplen > 0; x++, pplen--) {
206  ;
207  }
208  if (pplen <= 0) {
209  return kLIKE_ERROR; // malformed
210  }
211  plen -= (x - p + 1);
212  p = x + 1;
213  continue;
214  } else {
215  return kLIKE_FALSE;
216  }
217  } else if ((!is_ilike && *s != *p) || (is_ilike && lowercase(*s) != *p)) {
218  return kLIKE_FALSE;
219  }
220  s++;
221  slen--;
222  p++;
223  plen--;
224  }
225  if (slen > 0) {
226  return kLIKE_FALSE;
227  }
228  while (plen > 0 && *p == '%') {
229  p++;
230  plen--;
231  }
232  if (plen <= 0) {
233  return kLIKE_TRUE;
234  }
235  return kLIKE_ABORT;
236 }
237 
238 /*
239  * @brief string_like performs the SQL LIKE and ILIKE operation
240  * @param str string argument to be matched against pattern. single-byte
241  * character set only for now. null-termination not required.
242  * @param str_len length of str
243  * @param pattern pattern string for SQL LIKE
244  * @param pat_len length of pattern
245  * @param escape_char the escape character. '\\' is expected by default.
246  * @param is_ilike true if it is ILIKE, i.e., case-insensitive matching
247  * @return true if str matchs pattern, false otherwise. error condition
248  * not handled for now.
249  */
250 extern "C" RUNTIME_EXPORT DEVICE bool string_like(const char* str,
251  const int32_t str_len,
252  const char* pattern,
253  const int32_t pat_len,
254  const char escape_char) {
255  // @TODO(wei/alex) add runtime error handling
256  LikeStatus status =
257  string_like_match(str, str_len, pattern, pat_len, escape_char, false);
258  return status == kLIKE_TRUE;
259 }
260 
261 extern "C" RUNTIME_EXPORT DEVICE bool string_ilike(const char* str,
262  const int32_t str_len,
263  const char* pattern,
264  const int32_t pat_len,
265  const char escape_char) {
266  // @TODO(wei/alex) add runtime error handling
267  LikeStatus status =
268  string_like_match(str, str_len, pattern, pat_len, escape_char, true);
269  return status == kLIKE_TRUE;
270 }
271 
272 extern "C" RUNTIME_EXPORT DEVICE int32_t StringCompare(const char* s1,
273  const int32_t s1_len,
274  const char* s2,
275  const int32_t s2_len) {
276  const char* s1_ = s1;
277  const char* s2_ = s2;
278 
279  while (s1_ < s1 + s1_len && s2_ < s2 + s2_len && *s1_ == *s2_) {
280  s1_++;
281  s2_++;
282  }
283 
284  unsigned char c1 = (s1_ < s1 + s1_len) ? (*(unsigned char*)s1_) : 0;
285  unsigned char c2 = (s2_ < s2 + s2_len) ? (*(unsigned char*)s2_) : 0;
286 
287  return c1 - c2;
288 }
289 
290 #define STR_LIKE_NULLABLE(base_func) \
291  extern "C" RUNTIME_EXPORT DEVICE int8_t base_func##_nullable(const char* lhs, \
292  const int32_t lhs_len, \
293  const char* rhs, \
294  const int32_t rhs_len, \
295  const char escape_char, \
296  const int8_t bool_null) { \
297  if (!lhs || !rhs) { \
298  return bool_null; \
299  } \
300  return base_func(lhs, lhs_len, rhs, rhs_len, escape_char) ? 1 : 0; \
301  }
302 
305 
306 #undef STR_LIKE_NULLABLE
307 
308 extern "C" RUNTIME_EXPORT DEVICE bool string_lt(const char* lhs,
309  const int32_t lhs_len,
310  const char* rhs,
311  const int32_t rhs_len) {
312  return StringCompare(lhs, lhs_len, rhs, rhs_len) < 0;
313 }
314 
315 extern "C" RUNTIME_EXPORT DEVICE bool string_le(const char* lhs,
316  const int32_t lhs_len,
317  const char* rhs,
318  const int32_t rhs_len) {
319  return StringCompare(lhs, lhs_len, rhs, rhs_len) <= 0;
320 }
321 
322 extern "C" RUNTIME_EXPORT DEVICE bool string_gt(const char* lhs,
323  const int32_t lhs_len,
324  const char* rhs,
325  const int32_t rhs_len) {
326  return StringCompare(lhs, lhs_len, rhs, rhs_len) > 0;
327 }
328 
329 extern "C" RUNTIME_EXPORT DEVICE bool string_ge(const char* lhs,
330  const int32_t lhs_len,
331  const char* rhs,
332  const int32_t rhs_len) {
333  return StringCompare(lhs, lhs_len, rhs, rhs_len) >= 0;
334 }
335 
336 extern "C" RUNTIME_EXPORT DEVICE bool string_eq(const char* lhs,
337  const int32_t lhs_len,
338  const char* rhs,
339  const int32_t rhs_len) {
340  return StringCompare(lhs, lhs_len, rhs, rhs_len) == 0;
341 }
342 
343 extern "C" RUNTIME_EXPORT DEVICE bool string_ne(const char* lhs,
344  const int32_t lhs_len,
345  const char* rhs,
346  const int32_t rhs_len) {
347  return StringCompare(lhs, lhs_len, rhs, rhs_len) != 0;
348 }
349 
350 #define STR_CMP_NULLABLE(base_func) \
351  extern "C" RUNTIME_EXPORT DEVICE int8_t base_func##_nullable(const char* lhs, \
352  const int32_t lhs_len, \
353  const char* rhs, \
354  const int32_t rhs_len, \
355  const int8_t bool_null) { \
356  if (!lhs || !rhs) { \
357  return bool_null; \
358  } \
359  return base_func(lhs, lhs_len, rhs, rhs_len) ? 1 : 0; \
360  }
361 
368 
369 #undef STR_CMP_NULLABLE
RUNTIME_EXPORT DEVICE int32_t StringCompare(const char *s1, const int32_t s1_len, const char *s2, const int32_t s2_len)
Definition: StringLike.cpp:272
RUNTIME_EXPORT DEVICE bool string_eq(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:336
RUNTIME_EXPORT DEVICE bool string_gt(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:322
RUNTIME_EXPORT DEVICE bool string_ilike_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, char escape_char)
Definition: StringLike.cpp:61
RUNTIME_EXPORT DEVICE bool string_le(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:315
RUNTIME_EXPORT DEVICE bool string_ge(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:329
RUNTIME_EXPORT DEVICE bool string_lt(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:308
#define DEVICE
#define STR_LIKE_NULLABLE(base_func)
Definition: StringLike.cpp:290
#define STR_LIKE_SIMPLE_NULLABLE(base_func)
Definition: StringLike.cpp:78
static DEVICE LikeStatus string_like_match(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char, const bool is_ilike)
Definition: StringLike.cpp:98
RUNTIME_EXPORT DEVICE bool string_like(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:250
static DEVICE int lowercase(char c)
Definition: StringLike.cpp:34
Functions to support the LIKE and ILIKE operator in SQL. Only single-byte character set is supported ...
#define RUNTIME_EXPORT
RUNTIME_EXPORT DEVICE bool string_like_simple(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, char escape_char)
Definition: StringLike.cpp:43
LikeStatus
Definition: StringLike.cpp:26
#define STR_CMP_NULLABLE(base_func)
Definition: StringLike.cpp:350
RUNTIME_EXPORT DEVICE bool string_ne(const char *lhs, const int32_t lhs_len, const char *rhs, const int32_t rhs_len)
Definition: StringLike.cpp:343
RUNTIME_EXPORT DEVICE bool string_ilike(const char *str, const int32_t str_len, const char *pattern, const int32_t pat_len, const char escape_char)
Definition: StringLike.cpp:261