4 import TableFunctionsFactory_node
as tf_node
5 from collections
import deque
7 if sys.version_info > (3, 0):
8 from collections.abc
import Iterable
10 from collections
import Iterable
44 One of the tokens in the list above
46 Corresponding string in the text
55 Token.GREATER:
"GREATER",
58 Token.RARROW:
"RARROW",
59 Token.STRING:
"STRING",
60 Token.NUMBER:
"NUMBER",
67 Token.IDENTIFIER:
"IDENTIFIER",
69 Token.BOOLEAN:
"BOOLEAN"
71 return names.get(token)
74 return 'Token(%s, "%s")' % (Token.tok_name(self.
type), self.
lexeme)
120 self._tokens.append(
Token(type, lexeme))
135 return char
in (
"-",)
184 if char ==
'"' and curr !=
'\\':
193 NUMBER: [-]([0-9]*[.])?[0-9]+
201 elif char ==
"." and not found_dot:
213 IDENTIFIER: [A-Za-z_][A-Za-z0-9_]*
217 if char
and char.isalnum()
or char ==
"_":
228 return self.
peek().isalpha()
or self.
peek() ==
"_"
231 return self.
peek() ==
'"'
234 return self.
peek().isdigit()
or (self.
peek() ==
'-' \
238 return self.
peek().isalpha()
241 return self.
peek().isspace()
247 'Could not match char "%s" at pos %d on line\n %s' % (char, curr, self.
line)
252 return identifier.lower() ==
'cursor'
276 msg =
"Expected token %s but got %s at pos %d.\n Tokens: %s" % (
278 Token.tok_name(expected_type),
282 assert curr_token.type == expected_type, msg
286 """consumes the current token iff its type matches the
287 expected_type. Otherwise, an error is raised
290 if curr_token.type == expected_type:
294 expected_token = Token.tok_name(expected_type)
296 'Token mismatch at function consume. '
297 'Expected type "%s" but got token "%s"\n\n'
298 'Tokens: %s\n' % (expected_token, curr_token, self.
_tokens)
309 msg =
"\n\nError while trying to parse token %s at pos %d.\n" "Tokens: %s" % (
318 return curr_token.type == expected_type
326 udtf: IDENTIFIER "(" (args)? ")" ("|" annotation)* "->" args ("," templates)? ("|" "output_row_size" "=" primitive)?
333 if not self.
match(Token.RPAR):
352 assert idtn ==
"output_row_size", idtn
355 key =
"kPreFlightParameter"
356 sizer = tf_node.AnnotationNode(key, value=node.type)
360 for arg
in input_args:
363 i += arg.type.cursor_length()
if arg.type.is_cursor()
else 1
365 for i, arg
in enumerate(output_args):
369 return tf_node.UdtfNode(name, input_args, output_args, annotations, templates, sizer, self.
line)
374 args: arg IDENTIFIER ("," arg)*
389 self.
_curr = curr + 1
396 arg: type IDENTIFIER? ("|" annotation)*
406 annotations.append(tf_node.AnnotationNode(
'name', name))
410 if ahead.type == Token.IDENTIFIER
and ahead.lexeme ==
'output_row_size':
415 return tf_node.ArgNode(typ, annotations)
430 if not self.
match(Token.LESS):
440 composed: "Cursor" "<" arg ("," arg)* ">"
441 | IDENTIFIER "<" type ("," type)* ">"
449 while self.
match(Token.COMMA):
454 while self.
match(Token.COMMA):
458 return tf_node.ComposedNode(idtn, inner)
463 primitive: IDENTIFIER
470 if self.
match(Token.IDENTIFIER):
472 elif self.
match(Token.NUMBER):
474 elif self.
match(Token.STRING):
476 elif self.
match(Token.BOOLEAN):
480 return tf_node.PrimitiveNode(lexeme)
485 templates: template ("," template)*
499 template: IDENTIFIER "=" "[" IDENTIFIER ("," IDENTIFIER)* "]"
508 while self.
match(Token.COMMA):
512 return tf_node.TemplateNode(key, tuple(types))
517 annotation: IDENTIFIER "=" IDENTIFIER ("<" NUMBER ("," NUMBER) ">")?
518 | IDENTIFIER "=" "[" PRIMITIVE? ("," PRIMITIVE)* "]"
519 | "require" "=" STRING
520 | "default" "=" STRING | NUMBER | BOOLEAN
529 elif key ==
"default":
530 if self.
match(Token.NUMBER):
532 elif self.
match(Token.STRING):
534 elif self.
match(Token.BOOLEAN):
538 'Unable to parse value in \"default\" annotation.\n'
539 'Expected type NUMBER, STRING or BOOLEAN.\n'
540 'Found token: "%s" of type "%s" \n'
546 if not self.
match(Token.RSQB):
548 while self.
match(Token.COMMA):
556 if self.
match(Token.GREATER):
557 value +=
"<%s>" % (-1)
560 if self.
match(Token.COMMA):
563 value +=
"<%s,%s>" % (num1, num2)
565 value +=
"<%s>" % (num1)
567 return tf_node.AnnotationNode(key, value)
572 IDENTIFIER: [A-Za-z_][A-Za-z0-9_]*
576 token = self.
consume(Token.IDENTIFIER)
586 token = self.
consume(Token.STRING)
592 NUMBER: [-]([0-9]*[.])?[0-9]+
596 token = self.
consume(Token.NUMBER)
602 BOOLEAN: \bTrue\b|\bFalse\b
606 token = self.
consume(Token.BOOLEAN)
610 new_token = token.lexeme.lower().capitalize()
616 udtf: IDENTIFIER "(" (args)? ")" ("|" annotation)* "->" args ("," templates)? ("|" "output_row_size" "=" primitive)?
620 arg: type IDENTIFIER? ("|" annotation)*
625 composed: "Cursor" "<" arg ("," arg)* ">"
626 | IDENTIFIER "<" type ("," type)* ">"
628 primitive: IDENTIFIER
633 annotation: IDENTIFIER "=" IDENTIFIER ("<" NUMBER ("," NUMBER) ">")?
634 | IDENTIFIER "=" "[" PRIMITIVE? ("," PRIMITIVE)* "]"
635 | "require" "=" STRING
636 | "default" "=" STRING | NUMBER | BOOLEAN
638 templates: template ("," template)
639 template: IDENTIFIER "=" "[" IDENTIFIER ("," IDENTIFIER)* "]"
641 IDENTIFIER: [A-Za-z_][A-Za-z0-9_]*
644 BOOLEAN: \bTrue\b|\bFalse\b
657 if isinstance(node, Iterable):
def is_token_identifier_or_boolean
def can_token_be_double_char
def consume_identifier_or_boolean