sqlglot.dialects.snowflake
1from __future__ import annotations 2 3import typing as t 4 5from sqlglot import exp, generator, jsonpath, parser, tokens, transforms 6from sqlglot.dialects.dialect import ( 7 Dialect, 8 NormalizationStrategy, 9 build_timetostr_or_tochar, 10 binary_from_function, 11 build_default_decimal_type, 12 build_replace_with_optional_replacement, 13 build_timestamp_from_parts, 14 date_delta_sql, 15 date_trunc_to_time, 16 datestrtodate_sql, 17 build_formatted_time, 18 if_sql, 19 inline_array_sql, 20 max_or_greatest, 21 min_or_least, 22 rename_func, 23 timestamptrunc_sql, 24 timestrtotime_sql, 25 var_map_sql, 26 map_date_part, 27 no_timestamp_sql, 28 strposition_sql, 29 timestampdiff_sql, 30 no_make_interval_sql, 31 groupconcat_sql, 32) 33from sqlglot.generator import unsupported_args 34from sqlglot.helper import find_new_name, flatten, is_float, is_int, seq_get 35from sqlglot.optimizer.scope import build_scope, find_all_in_scope 36from sqlglot.tokens import TokenType 37 38if t.TYPE_CHECKING: 39 from sqlglot._typing import E, B 40 41 42# from https://docs.snowflake.com/en/sql-reference/functions/to_timestamp.html 43def _build_datetime( 44 name: str, kind: exp.DataType.Type, safe: bool = False 45) -> t.Callable[[t.List], exp.Func]: 46 def _builder(args: t.List) -> exp.Func: 47 value = seq_get(args, 0) 48 scale_or_fmt = seq_get(args, 1) 49 50 int_value = value is not None and is_int(value.name) 51 int_scale_or_fmt = scale_or_fmt is not None and scale_or_fmt.is_int 52 53 if isinstance(value, exp.Literal) or (value and scale_or_fmt): 54 # Converts calls like `TO_TIME('01:02:03')` into casts 55 if len(args) == 1 and value.is_string and not int_value: 56 return ( 57 exp.TryCast(this=value, to=exp.DataType.build(kind), requires_string=True) 58 if safe 59 else exp.cast(value, kind) 60 ) 61 62 # Handles `TO_TIMESTAMP(str, fmt)` and `TO_TIMESTAMP(num, scale)` as special 63 # cases so we can transpile them, since they're relatively common 64 if kind == exp.DataType.Type.TIMESTAMP: 65 if not safe and (int_value or int_scale_or_fmt): 66 # TRY_TO_TIMESTAMP('integer') is not parsed into exp.UnixToTime as 67 # it's not easily transpilable 68 return exp.UnixToTime(this=value, scale=scale_or_fmt) 69 if not int_scale_or_fmt and not is_float(value.name): 70 expr = build_formatted_time(exp.StrToTime, "snowflake")(args) 71 expr.set("safe", safe) 72 return expr 73 74 if kind in (exp.DataType.Type.DATE, exp.DataType.Type.TIME) and not int_value: 75 klass = exp.TsOrDsToDate if kind == exp.DataType.Type.DATE else exp.TsOrDsToTime 76 formatted_exp = build_formatted_time(klass, "snowflake")(args) 77 formatted_exp.set("safe", safe) 78 return formatted_exp 79 80 return exp.Anonymous(this=name, expressions=args) 81 82 return _builder 83 84 85def _build_object_construct(args: t.List) -> t.Union[exp.StarMap, exp.Struct]: 86 expression = parser.build_var_map(args) 87 88 if isinstance(expression, exp.StarMap): 89 return expression 90 91 return exp.Struct( 92 expressions=[ 93 exp.PropertyEQ(this=k, expression=v) for k, v in zip(expression.keys, expression.values) 94 ] 95 ) 96 97 98def _build_datediff(args: t.List) -> exp.DateDiff: 99 return exp.DateDiff( 100 this=seq_get(args, 2), expression=seq_get(args, 1), unit=map_date_part(seq_get(args, 0)) 101 ) 102 103 104def _build_date_time_add(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 105 def _builder(args: t.List) -> E: 106 return expr_type( 107 this=seq_get(args, 2), 108 expression=seq_get(args, 1), 109 unit=map_date_part(seq_get(args, 0)), 110 ) 111 112 return _builder 113 114 115def _build_bitwise(expr_type: t.Type[B], name: str) -> t.Callable[[t.List], B | exp.Anonymous]: 116 def _builder(args: t.List) -> B | exp.Anonymous: 117 if len(args) == 3: 118 return exp.Anonymous(this=name, expressions=args) 119 120 return binary_from_function(expr_type)(args) 121 122 return _builder 123 124 125# https://docs.snowflake.com/en/sql-reference/functions/div0 126def _build_if_from_div0(args: t.List) -> exp.If: 127 lhs = exp._wrap(seq_get(args, 0), exp.Binary) 128 rhs = exp._wrap(seq_get(args, 1), exp.Binary) 129 130 cond = exp.EQ(this=rhs, expression=exp.Literal.number(0)).and_( 131 exp.Is(this=lhs, expression=exp.null()).not_() 132 ) 133 true = exp.Literal.number(0) 134 false = exp.Div(this=lhs, expression=rhs) 135 return exp.If(this=cond, true=true, false=false) 136 137 138# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 139def _build_if_from_zeroifnull(args: t.List) -> exp.If: 140 cond = exp.Is(this=seq_get(args, 0), expression=exp.Null()) 141 return exp.If(this=cond, true=exp.Literal.number(0), false=seq_get(args, 0)) 142 143 144# https://docs.snowflake.com/en/sql-reference/functions/zeroifnull 145def _build_if_from_nullifzero(args: t.List) -> exp.If: 146 cond = exp.EQ(this=seq_get(args, 0), expression=exp.Literal.number(0)) 147 return exp.If(this=cond, true=exp.Null(), false=seq_get(args, 0)) 148 149 150def _regexpilike_sql(self: Snowflake.Generator, expression: exp.RegexpILike) -> str: 151 flag = expression.text("flag") 152 153 if "i" not in flag: 154 flag += "i" 155 156 return self.func( 157 "REGEXP_LIKE", expression.this, expression.expression, exp.Literal.string(flag) 158 ) 159 160 161def _build_regexp_replace(args: t.List) -> exp.RegexpReplace: 162 regexp_replace = exp.RegexpReplace.from_arg_list(args) 163 164 if not regexp_replace.args.get("replacement"): 165 regexp_replace.set("replacement", exp.Literal.string("")) 166 167 return regexp_replace 168 169 170def _show_parser(*args: t.Any, **kwargs: t.Any) -> t.Callable[[Snowflake.Parser], exp.Show]: 171 def _parse(self: Snowflake.Parser) -> exp.Show: 172 return self._parse_show_snowflake(*args, **kwargs) 173 174 return _parse 175 176 177def _date_trunc_to_time(args: t.List) -> exp.DateTrunc | exp.TimestampTrunc: 178 trunc = date_trunc_to_time(args) 179 trunc.set("unit", map_date_part(trunc.args["unit"])) 180 return trunc 181 182 183def _unqualify_pivot_columns(expression: exp.Expression) -> exp.Expression: 184 """ 185 Snowflake doesn't allow columns referenced in UNPIVOT to be qualified, 186 so we need to unqualify them. Same goes for ANY ORDER BY <column>. 187 188 Example: 189 >>> from sqlglot import parse_one 190 >>> expr = parse_one("SELECT * FROM m_sales UNPIVOT(sales FOR month IN (m_sales.jan, feb, mar, april))") 191 >>> print(_unqualify_pivot_columns(expr).sql(dialect="snowflake")) 192 SELECT * FROM m_sales UNPIVOT(sales FOR month IN (jan, feb, mar, april)) 193 """ 194 if isinstance(expression, exp.Pivot): 195 if expression.unpivot: 196 expression = transforms.unqualify_columns(expression) 197 else: 198 for field in expression.fields: 199 field_expr = seq_get(field.expressions if field else [], 0) 200 201 if isinstance(field_expr, exp.PivotAny): 202 unqualified_field_expr = transforms.unqualify_columns(field_expr) 203 t.cast(exp.Expression, field).set("expressions", unqualified_field_expr, 0) 204 205 return expression 206 207 208def _flatten_structured_types_unless_iceberg(expression: exp.Expression) -> exp.Expression: 209 assert isinstance(expression, exp.Create) 210 211 def _flatten_structured_type(expression: exp.DataType) -> exp.DataType: 212 if expression.this in exp.DataType.NESTED_TYPES: 213 expression.set("expressions", None) 214 return expression 215 216 props = expression.args.get("properties") 217 if isinstance(expression.this, exp.Schema) and not (props and props.find(exp.IcebergProperty)): 218 for schema_expression in expression.this.expressions: 219 if isinstance(schema_expression, exp.ColumnDef): 220 column_type = schema_expression.kind 221 if isinstance(column_type, exp.DataType): 222 column_type.transform(_flatten_structured_type, copy=False) 223 224 return expression 225 226 227def _unnest_generate_date_array(unnest: exp.Unnest) -> None: 228 generate_date_array = unnest.expressions[0] 229 start = generate_date_array.args.get("start") 230 end = generate_date_array.args.get("end") 231 step = generate_date_array.args.get("step") 232 233 if not start or not end or not isinstance(step, exp.Interval) or step.name != "1": 234 return 235 236 unit = step.args.get("unit") 237 238 unnest_alias = unnest.args.get("alias") 239 if unnest_alias: 240 unnest_alias = unnest_alias.copy() 241 sequence_value_name = seq_get(unnest_alias.columns, 0) or "value" 242 else: 243 sequence_value_name = "value" 244 245 # We'll add the next sequence value to the starting date and project the result 246 date_add = _build_date_time_add(exp.DateAdd)( 247 [unit, exp.cast(sequence_value_name, "int"), exp.cast(start, "date")] 248 ) 249 250 # We use DATEDIFF to compute the number of sequence values needed 251 number_sequence = Snowflake.Parser.FUNCTIONS["ARRAY_GENERATE_RANGE"]( 252 [exp.Literal.number(0), _build_datediff([unit, start, end]) + 1] 253 ) 254 255 unnest.set("expressions", [number_sequence]) 256 257 unnest_parent = unnest.parent 258 if isinstance(unnest_parent, exp.Join): 259 select = unnest_parent.parent 260 if isinstance(select, exp.Select): 261 replace_column_name = ( 262 sequence_value_name 263 if isinstance(sequence_value_name, str) 264 else sequence_value_name.name 265 ) 266 267 scope = build_scope(select) 268 if scope: 269 for column in scope.columns: 270 if column.name.lower() == replace_column_name.lower(): 271 column.replace( 272 date_add.as_(replace_column_name) 273 if isinstance(column.parent, exp.Select) 274 else date_add 275 ) 276 277 lateral = exp.Lateral(this=unnest_parent.this.pop()) 278 unnest_parent.replace(exp.Join(this=lateral)) 279 else: 280 unnest.replace( 281 exp.select(date_add.as_(sequence_value_name)) 282 .from_(unnest.copy()) 283 .subquery(unnest_alias) 284 ) 285 286 287def _transform_generate_date_array(expression: exp.Expression) -> exp.Expression: 288 if isinstance(expression, exp.Select): 289 for generate_date_array in expression.find_all(exp.GenerateDateArray): 290 parent = generate_date_array.parent 291 292 # If GENERATE_DATE_ARRAY is used directly as an array (e.g passed into ARRAY_LENGTH), the transformed Snowflake 293 # query is the following (it'll be unnested properly on the next iteration due to copy): 294 # SELECT ref(GENERATE_DATE_ARRAY(...)) -> SELECT ref((SELECT ARRAY_AGG(*) FROM UNNEST(GENERATE_DATE_ARRAY(...)))) 295 if not isinstance(parent, exp.Unnest): 296 unnest = exp.Unnest(expressions=[generate_date_array.copy()]) 297 generate_date_array.replace( 298 exp.select(exp.ArrayAgg(this=exp.Star())).from_(unnest).subquery() 299 ) 300 301 if ( 302 isinstance(parent, exp.Unnest) 303 and isinstance(parent.parent, (exp.From, exp.Join)) 304 and len(parent.expressions) == 1 305 ): 306 _unnest_generate_date_array(parent) 307 308 return expression 309 310 311def _build_regexp_extract(expr_type: t.Type[E]) -> t.Callable[[t.List], E]: 312 def _builder(args: t.List) -> E: 313 return expr_type( 314 this=seq_get(args, 0), 315 expression=seq_get(args, 1), 316 position=seq_get(args, 2), 317 occurrence=seq_get(args, 3), 318 parameters=seq_get(args, 4), 319 group=seq_get(args, 5) or exp.Literal.number(0), 320 ) 321 322 return _builder 323 324 325def _regexpextract_sql(self, expression: exp.RegexpExtract | exp.RegexpExtractAll) -> str: 326 # Other dialects don't support all of the following parameters, so we need to 327 # generate default values as necessary to ensure the transpilation is correct 328 group = expression.args.get("group") 329 330 # To avoid generating all these default values, we set group to None if 331 # it's 0 (also default value) which doesn't trigger the following chain 332 if group and group.name == "0": 333 group = None 334 335 parameters = expression.args.get("parameters") or (group and exp.Literal.string("c")) 336 occurrence = expression.args.get("occurrence") or (parameters and exp.Literal.number(1)) 337 position = expression.args.get("position") or (occurrence and exp.Literal.number(1)) 338 339 return self.func( 340 "REGEXP_SUBSTR" if isinstance(expression, exp.RegexpExtract) else "REGEXP_EXTRACT_ALL", 341 expression.this, 342 expression.expression, 343 position, 344 occurrence, 345 parameters, 346 group, 347 ) 348 349 350def _json_extract_value_array_sql( 351 self: Snowflake.Generator, expression: exp.JSONValueArray | exp.JSONExtractArray 352) -> str: 353 json_extract = exp.JSONExtract(this=expression.this, expression=expression.expression) 354 ident = exp.to_identifier("x") 355 356 if isinstance(expression, exp.JSONValueArray): 357 this: exp.Expression = exp.cast(ident, to=exp.DataType.Type.VARCHAR) 358 else: 359 this = exp.ParseJSON(this=f"TO_JSON({ident})") 360 361 transform_lambda = exp.Lambda(expressions=[ident], this=this) 362 363 return self.func("TRANSFORM", json_extract, transform_lambda) 364 365 366def _qualify_unnested_columns(expression: exp.Expression) -> exp.Expression: 367 if isinstance(expression, exp.Select): 368 scope = build_scope(expression) 369 if not scope: 370 return expression 371 372 unnests = list(scope.find_all(exp.Unnest)) 373 374 if not unnests: 375 return expression 376 377 taken_source_names = set(scope.sources) 378 column_source: t.Dict[str, exp.Identifier] = {} 379 380 unnest_identifier: t.Optional[exp.Identifier] = None 381 orig_expression = expression.copy() 382 383 for unnest in unnests: 384 if not isinstance(unnest.parent, (exp.From, exp.Join)): 385 continue 386 387 # Try to infer column names produced by an unnest operator. This is only possible 388 # when we can peek into the (statically known) contents of the unnested value. 389 unnest_columns: t.Set[str] = set() 390 for unnest_expr in unnest.expressions: 391 if not isinstance(unnest_expr, exp.Array): 392 continue 393 394 for array_expr in unnest_expr.expressions: 395 if not ( 396 isinstance(array_expr, exp.Struct) 397 and array_expr.expressions 398 and all( 399 isinstance(struct_expr, exp.PropertyEQ) 400 for struct_expr in array_expr.expressions 401 ) 402 ): 403 continue 404 405 unnest_columns.update( 406 struct_expr.this.name.lower() for struct_expr in array_expr.expressions 407 ) 408 break 409 410 if unnest_columns: 411 break 412 413 unnest_alias = unnest.args.get("alias") 414 if not unnest_alias: 415 alias_name = find_new_name(taken_source_names, "value") 416 taken_source_names.add(alias_name) 417 418 # Produce a `TableAlias` AST similar to what is produced for BigQuery. This 419 # will be corrected later, when we generate SQL for the `Unnest` AST node. 420 aliased_unnest = exp.alias_(unnest, None, table=[alias_name]) 421 scope.replace(unnest, aliased_unnest) 422 423 unnest_identifier = aliased_unnest.args["alias"].columns[0] 424 else: 425 alias_columns = getattr(unnest_alias, "columns", []) 426 unnest_identifier = unnest_alias.this or seq_get(alias_columns, 0) 427 428 if not isinstance(unnest_identifier, exp.Identifier): 429 return orig_expression 430 431 column_source.update({c.lower(): unnest_identifier for c in unnest_columns}) 432 433 for column in scope.columns: 434 if column.table: 435 continue 436 437 table = column_source.get(column.name.lower()) 438 if ( 439 unnest_identifier 440 and not table 441 and len(scope.sources) == 1 442 and column.name.lower() != unnest_identifier.name.lower() 443 ): 444 table = unnest_identifier 445 446 column.set("table", table and table.copy()) 447 448 return expression 449 450 451def _eliminate_dot_variant_lookup(expression: exp.Expression) -> exp.Expression: 452 if isinstance(expression, exp.Select): 453 # This transformation is used to facilitate transpilation of BigQuery `UNNEST` operations 454 # to Snowflake. It should not affect roundtrip because `Unnest` nodes cannot be produced 455 # by Snowflake's parser. 456 # 457 # Additionally, at the time of writing this, BigQuery is the only dialect that produces a 458 # `TableAlias` node that only fills `columns` and not `this`, due to `UNNEST_COLUMN_ONLY`. 459 unnest_aliases = set() 460 for unnest in find_all_in_scope(expression, exp.Unnest): 461 unnest_alias = unnest.args.get("alias") 462 if ( 463 isinstance(unnest_alias, exp.TableAlias) 464 and (unnest_alias.args.get("column_only") or not unnest_alias.this) 465 and len(unnest_alias.columns) == 1 466 ): 467 unnest_aliases.add(unnest_alias.columns[0].name) 468 469 if unnest_aliases: 470 for c in find_all_in_scope(expression, exp.Column): 471 if c.table in unnest_aliases: 472 bracket_lhs = c.args["table"] 473 bracket_rhs = exp.Literal.string(c.name) 474 bracket = exp.Bracket(this=bracket_lhs, expressions=[bracket_rhs]) 475 476 if c.parent is expression: 477 # Retain column projection names by using aliases 478 c.replace(exp.alias_(bracket, c.this.copy())) 479 else: 480 c.replace(bracket) 481 482 return expression 483 484 485class Snowflake(Dialect): 486 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 487 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 488 NULL_ORDERING = "nulls_are_large" 489 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 490 SUPPORTS_USER_DEFINED_TYPES = False 491 SUPPORTS_SEMI_ANTI_JOIN = False 492 PREFER_CTE_ALIAS_COLUMN = True 493 TABLESAMPLE_SIZE_IS_PERCENT = True 494 COPY_PARAMS_ARE_CSV = False 495 ARRAY_AGG_INCLUDES_NULLS = None 496 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 497 TRY_CAST_REQUIRES_STRING = True 498 499 TIME_MAPPING = { 500 "YYYY": "%Y", 501 "yyyy": "%Y", 502 "YY": "%y", 503 "yy": "%y", 504 "MMMM": "%B", 505 "mmmm": "%B", 506 "MON": "%b", 507 "mon": "%b", 508 "MM": "%m", 509 "mm": "%m", 510 "DD": "%d", 511 "dd": "%-d", 512 "DY": "%a", 513 "dy": "%w", 514 "HH24": "%H", 515 "hh24": "%H", 516 "HH12": "%I", 517 "hh12": "%I", 518 "MI": "%M", 519 "mi": "%M", 520 "SS": "%S", 521 "ss": "%S", 522 "FF6": "%f", 523 "ff6": "%f", 524 } 525 526 DATE_PART_MAPPING = { 527 **Dialect.DATE_PART_MAPPING, 528 "ISOWEEK": "WEEKISO", 529 } 530 531 def quote_identifier(self, expression: E, identify: bool = True) -> E: 532 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 533 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 534 if ( 535 isinstance(expression, exp.Identifier) 536 and isinstance(expression.parent, exp.Table) 537 and expression.name.lower() == "dual" 538 ): 539 return expression # type: ignore 540 541 return super().quote_identifier(expression, identify=identify) 542 543 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 544 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 545 SINGLE_TOKENS.pop("$") 546 547 class Parser(parser.Parser): 548 IDENTIFY_PIVOT_STRINGS = True 549 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 550 COLON_IS_VARIANT_EXTRACT = True 551 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 552 553 ID_VAR_TOKENS = { 554 *parser.Parser.ID_VAR_TOKENS, 555 TokenType.MATCH_CONDITION, 556 } 557 558 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 559 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 560 561 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 562 563 FUNCTIONS = { 564 **parser.Parser.FUNCTIONS, 565 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 566 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 567 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 568 this=seq_get(args, 1), expression=seq_get(args, 0) 569 ), 570 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 571 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 572 start=seq_get(args, 0), 573 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 574 step=seq_get(args, 2), 575 ), 576 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 577 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 578 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 579 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 580 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 581 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 582 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 583 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 584 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 585 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 586 "DATE_TRUNC": _date_trunc_to_time, 587 "DATEADD": _build_date_time_add(exp.DateAdd), 588 "DATEDIFF": _build_datediff, 589 "DIV0": _build_if_from_div0, 590 "EDITDISTANCE": lambda args: exp.Levenshtein( 591 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 592 ), 593 "FLATTEN": exp.Explode.from_arg_list, 594 "GET_PATH": lambda args, dialect: exp.JSONExtract( 595 this=seq_get(args, 0), 596 expression=dialect.to_json_path(seq_get(args, 1)), 597 requires_json=True, 598 ), 599 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 600 "IFF": exp.If.from_arg_list, 601 "LAST_DAY": lambda args: exp.LastDay( 602 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 603 ), 604 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 605 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 606 "NULLIFZERO": _build_if_from_nullifzero, 607 "OBJECT_CONSTRUCT": _build_object_construct, 608 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 609 "REGEXP_REPLACE": _build_regexp_replace, 610 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 611 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 612 "REPLACE": build_replace_with_optional_replacement, 613 "RLIKE": exp.RegexpLike.from_arg_list, 614 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 615 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 616 "TIMEADD": _build_date_time_add(exp.TimeAdd), 617 "TIMEDIFF": _build_datediff, 618 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 619 "TIMESTAMPDIFF": _build_datediff, 620 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 621 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 622 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 623 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 624 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 625 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 626 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 627 "TRY_TO_TIMESTAMP": _build_datetime( 628 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 629 ), 630 "TO_CHAR": build_timetostr_or_tochar, 631 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 632 "TO_NUMBER": lambda args: exp.ToNumber( 633 this=seq_get(args, 0), 634 format=seq_get(args, 1), 635 precision=seq_get(args, 2), 636 scale=seq_get(args, 3), 637 ), 638 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 639 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 640 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 641 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 642 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 643 "TO_VARCHAR": exp.ToChar.from_arg_list, 644 "ZEROIFNULL": _build_if_from_zeroifnull, 645 } 646 647 FUNCTION_PARSERS = { 648 **parser.Parser.FUNCTION_PARSERS, 649 "DATE_PART": lambda self: self._parse_date_part(), 650 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 651 "LISTAGG": lambda self: self._parse_string_agg(), 652 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 653 } 654 FUNCTION_PARSERS.pop("TRIM") 655 656 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 657 658 ALTER_PARSERS = { 659 **parser.Parser.ALTER_PARSERS, 660 "UNSET": lambda self: self.expression( 661 exp.Set, 662 tag=self._match_text_seq("TAG"), 663 expressions=self._parse_csv(self._parse_id_var), 664 unset=True, 665 ), 666 } 667 668 STATEMENT_PARSERS = { 669 **parser.Parser.STATEMENT_PARSERS, 670 TokenType.GET: lambda self: self._parse_get(), 671 TokenType.PUT: lambda self: self._parse_put(), 672 TokenType.SHOW: lambda self: self._parse_show(), 673 } 674 675 PROPERTY_PARSERS = { 676 **parser.Parser.PROPERTY_PARSERS, 677 "CREDENTIALS": lambda self: self._parse_credentials_property(), 678 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 679 "LOCATION": lambda self: self._parse_location_property(), 680 "TAG": lambda self: self._parse_tag(), 681 "USING": lambda self: self._match_text_seq("TEMPLATE") 682 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 683 } 684 685 TYPE_CONVERTERS = { 686 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 687 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 688 } 689 690 SHOW_PARSERS = { 691 "DATABASES": _show_parser("DATABASES"), 692 "TERSE DATABASES": _show_parser("DATABASES"), 693 "SCHEMAS": _show_parser("SCHEMAS"), 694 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 695 "OBJECTS": _show_parser("OBJECTS"), 696 "TERSE OBJECTS": _show_parser("OBJECTS"), 697 "TABLES": _show_parser("TABLES"), 698 "TERSE TABLES": _show_parser("TABLES"), 699 "VIEWS": _show_parser("VIEWS"), 700 "TERSE VIEWS": _show_parser("VIEWS"), 701 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 702 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 703 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 704 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 705 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 706 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 707 "SEQUENCES": _show_parser("SEQUENCES"), 708 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 709 "STAGES": _show_parser("STAGES"), 710 "COLUMNS": _show_parser("COLUMNS"), 711 "USERS": _show_parser("USERS"), 712 "TERSE USERS": _show_parser("USERS"), 713 "FILE FORMATS": _show_parser("FILE FORMATS"), 714 "FUNCTIONS": _show_parser("FUNCTIONS"), 715 "PROCEDURES": _show_parser("PROCEDURES"), 716 "WAREHOUSES": _show_parser("WAREHOUSES"), 717 } 718 719 CONSTRAINT_PARSERS = { 720 **parser.Parser.CONSTRAINT_PARSERS, 721 "WITH": lambda self: self._parse_with_constraint(), 722 "MASKING": lambda self: self._parse_with_constraint(), 723 "PROJECTION": lambda self: self._parse_with_constraint(), 724 "TAG": lambda self: self._parse_with_constraint(), 725 } 726 727 STAGED_FILE_SINGLE_TOKENS = { 728 TokenType.DOT, 729 TokenType.MOD, 730 TokenType.SLASH, 731 } 732 733 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 734 735 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 736 737 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 738 739 LAMBDAS = { 740 **parser.Parser.LAMBDAS, 741 TokenType.ARROW: lambda self, expressions: self.expression( 742 exp.Lambda, 743 this=self._replace_lambda( 744 self._parse_assignment(), 745 expressions, 746 ), 747 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 748 ), 749 } 750 751 def _parse_use(self) -> exp.Use: 752 if self._match_text_seq("SECONDARY", "ROLES"): 753 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 754 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 755 return self.expression( 756 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 757 ) 758 759 return super()._parse_use() 760 761 def _negate_range( 762 self, this: t.Optional[exp.Expression] = None 763 ) -> t.Optional[exp.Expression]: 764 if not this: 765 return this 766 767 query = this.args.get("query") 768 if isinstance(this, exp.In) and isinstance(query, exp.Query): 769 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 770 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 771 # which can produce different results (most likely a SnowFlake bug). 772 # 773 # https://docs.snowflake.com/en/sql-reference/functions/in 774 # Context: https://github.com/tobymao/sqlglot/issues/3890 775 return self.expression( 776 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 777 ) 778 779 return self.expression(exp.Not, this=this) 780 781 def _parse_tag(self) -> exp.Tags: 782 return self.expression( 783 exp.Tags, 784 expressions=self._parse_wrapped_csv(self._parse_property), 785 ) 786 787 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 788 if self._prev.token_type != TokenType.WITH: 789 self._retreat(self._index - 1) 790 791 if self._match_text_seq("MASKING", "POLICY"): 792 policy = self._parse_column() 793 return self.expression( 794 exp.MaskingPolicyColumnConstraint, 795 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 796 expressions=self._match(TokenType.USING) 797 and self._parse_wrapped_csv(self._parse_id_var), 798 ) 799 if self._match_text_seq("PROJECTION", "POLICY"): 800 policy = self._parse_column() 801 return self.expression( 802 exp.ProjectionPolicyColumnConstraint, 803 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 804 ) 805 if self._match(TokenType.TAG): 806 return self._parse_tag() 807 808 return None 809 810 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 811 if self._match(TokenType.TAG): 812 return self._parse_tag() 813 814 return super()._parse_with_property() 815 816 def _parse_create(self) -> exp.Create | exp.Command: 817 expression = super()._parse_create() 818 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 819 # Replace the Table node with the enclosed Identifier 820 expression.this.replace(expression.this.this) 821 822 return expression 823 824 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 825 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 826 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 827 this = self._parse_var() or self._parse_type() 828 829 if not this: 830 return None 831 832 self._match(TokenType.COMMA) 833 expression = self._parse_bitwise() 834 this = map_date_part(this) 835 name = this.name.upper() 836 837 if name.startswith("EPOCH"): 838 if name == "EPOCH_MILLISECOND": 839 scale = 10**3 840 elif name == "EPOCH_MICROSECOND": 841 scale = 10**6 842 elif name == "EPOCH_NANOSECOND": 843 scale = 10**9 844 else: 845 scale = None 846 847 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 848 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 849 850 if scale: 851 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 852 853 return to_unix 854 855 return self.expression(exp.Extract, this=this, expression=expression) 856 857 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 858 if is_map: 859 # Keys are strings in Snowflake's objects, see also: 860 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 861 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 862 return self._parse_slice(self._parse_string()) 863 864 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 865 866 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 867 lateral = super()._parse_lateral() 868 if not lateral: 869 return lateral 870 871 if isinstance(lateral.this, exp.Explode): 872 table_alias = lateral.args.get("alias") 873 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 874 if table_alias and not table_alias.args.get("columns"): 875 table_alias.set("columns", columns) 876 elif not table_alias: 877 exp.alias_(lateral, "_flattened", table=columns, copy=False) 878 879 return lateral 880 881 def _parse_table_parts( 882 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 883 ) -> exp.Table: 884 # https://docs.snowflake.com/en/user-guide/querying-stage 885 if self._match(TokenType.STRING, advance=False): 886 table = self._parse_string() 887 elif self._match_text_seq("@", advance=False): 888 table = self._parse_location_path() 889 else: 890 table = None 891 892 if table: 893 file_format = None 894 pattern = None 895 896 wrapped = self._match(TokenType.L_PAREN) 897 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 898 if self._match_text_seq("FILE_FORMAT", "=>"): 899 file_format = self._parse_string() or super()._parse_table_parts( 900 is_db_reference=is_db_reference 901 ) 902 elif self._match_text_seq("PATTERN", "=>"): 903 pattern = self._parse_string() 904 else: 905 break 906 907 self._match(TokenType.COMMA) 908 909 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 910 else: 911 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 912 913 return table 914 915 def _parse_table( 916 self, 917 schema: bool = False, 918 joins: bool = False, 919 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 920 parse_bracket: bool = False, 921 is_db_reference: bool = False, 922 parse_partition: bool = False, 923 consume_pipe: bool = False, 924 ) -> t.Optional[exp.Expression]: 925 table = super()._parse_table( 926 schema=schema, 927 joins=joins, 928 alias_tokens=alias_tokens, 929 parse_bracket=parse_bracket, 930 is_db_reference=is_db_reference, 931 parse_partition=parse_partition, 932 ) 933 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 934 table_from_rows = table.this 935 for arg in exp.TableFromRows.arg_types: 936 if arg != "this": 937 table_from_rows.set(arg, table.args.get(arg)) 938 939 table = table_from_rows 940 941 return table 942 943 def _parse_id_var( 944 self, 945 any_token: bool = True, 946 tokens: t.Optional[t.Collection[TokenType]] = None, 947 ) -> t.Optional[exp.Expression]: 948 if self._match_text_seq("IDENTIFIER", "("): 949 identifier = ( 950 super()._parse_id_var(any_token=any_token, tokens=tokens) 951 or self._parse_string() 952 ) 953 self._match_r_paren() 954 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 955 956 return super()._parse_id_var(any_token=any_token, tokens=tokens) 957 958 def _parse_show_snowflake(self, this: str) -> exp.Show: 959 scope = None 960 scope_kind = None 961 962 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 963 # which is syntactically valid but has no effect on the output 964 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 965 966 history = self._match_text_seq("HISTORY") 967 968 like = self._parse_string() if self._match(TokenType.LIKE) else None 969 970 if self._match(TokenType.IN): 971 if self._match_text_seq("ACCOUNT"): 972 scope_kind = "ACCOUNT" 973 elif self._match_text_seq("CLASS"): 974 scope_kind = "CLASS" 975 scope = self._parse_table_parts() 976 elif self._match_text_seq("APPLICATION"): 977 scope_kind = "APPLICATION" 978 if self._match_text_seq("PACKAGE"): 979 scope_kind += " PACKAGE" 980 scope = self._parse_table_parts() 981 elif self._match_set(self.DB_CREATABLES): 982 scope_kind = self._prev.text.upper() 983 if self._curr: 984 scope = self._parse_table_parts() 985 elif self._curr: 986 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 987 scope = self._parse_table_parts() 988 989 return self.expression( 990 exp.Show, 991 **{ 992 "terse": terse, 993 "this": this, 994 "history": history, 995 "like": like, 996 "scope": scope, 997 "scope_kind": scope_kind, 998 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 999 "limit": self._parse_limit(), 1000 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1001 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1002 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1003 }, 1004 ) 1005 1006 def _parse_put(self) -> exp.Put | exp.Command: 1007 if self._curr.token_type != TokenType.STRING: 1008 return self._parse_as_command(self._prev) 1009 1010 return self.expression( 1011 exp.Put, 1012 this=self._parse_string(), 1013 target=self._parse_location_path(), 1014 properties=self._parse_properties(), 1015 ) 1016 1017 def _parse_get(self) -> t.Optional[exp.Expression]: 1018 start = self._prev 1019 1020 # If we detect GET( then we need to parse a function, not a statement 1021 if self._match(TokenType.L_PAREN): 1022 self._retreat(self._index - 2) 1023 return self._parse_expression() 1024 1025 target = self._parse_location_path() 1026 1027 # Parse as command if unquoted file path 1028 if self._curr.token_type == TokenType.URI_START: 1029 return self._parse_as_command(start) 1030 1031 return self.expression( 1032 exp.Get, 1033 this=self._parse_string(), 1034 target=target, 1035 properties=self._parse_properties(), 1036 ) 1037 1038 def _parse_location_property(self) -> exp.LocationProperty: 1039 self._match(TokenType.EQ) 1040 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1041 1042 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1043 # Parse either a subquery or a staged file 1044 return ( 1045 self._parse_select(table=True, parse_subquery_alias=False) 1046 if self._match(TokenType.L_PAREN, advance=False) 1047 else self._parse_table_parts() 1048 ) 1049 1050 def _parse_location_path(self) -> exp.Var: 1051 start = self._curr 1052 self._advance_any(ignore_reserved=True) 1053 1054 # We avoid consuming a comma token because external tables like @foo and @bar 1055 # can be joined in a query with a comma separator, as well as closing paren 1056 # in case of subqueries 1057 while self._is_connected() and not self._match_set( 1058 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1059 ): 1060 self._advance_any(ignore_reserved=True) 1061 1062 return exp.var(self._find_sql(start, self._prev)) 1063 1064 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1065 this = super()._parse_lambda_arg() 1066 1067 if not this: 1068 return this 1069 1070 typ = self._parse_types() 1071 1072 if typ: 1073 return self.expression(exp.Cast, this=this, to=typ) 1074 1075 return this 1076 1077 def _parse_foreign_key(self) -> exp.ForeignKey: 1078 # inlineFK, the REFERENCES columns are implied 1079 if self._match(TokenType.REFERENCES, advance=False): 1080 return self.expression(exp.ForeignKey) 1081 1082 # outoflineFK, explicitly names the columns 1083 return super()._parse_foreign_key() 1084 1085 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1086 self._match(TokenType.EQ) 1087 if self._match(TokenType.L_PAREN, advance=False): 1088 expressions = self._parse_wrapped_options() 1089 else: 1090 expressions = [self._parse_format_name()] 1091 1092 return self.expression( 1093 exp.FileFormatProperty, 1094 expressions=expressions, 1095 ) 1096 1097 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1098 return self.expression( 1099 exp.CredentialsProperty, 1100 expressions=self._parse_wrapped_options(), 1101 ) 1102 1103 def _parse_semantic_view(self) -> exp.SemanticView: 1104 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1105 1106 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1107 if self._match_text_seq("DIMENSIONS"): 1108 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1109 if self._match_text_seq("METRICS"): 1110 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1111 if self._match_text_seq("WHERE"): 1112 kwargs["where"] = self._parse_expression() 1113 1114 return self.expression(exp.SemanticView, **kwargs) 1115 1116 class Tokenizer(tokens.Tokenizer): 1117 STRING_ESCAPES = ["\\", "'"] 1118 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1119 RAW_STRINGS = ["$$"] 1120 COMMENTS = ["--", "//", ("/*", "*/")] 1121 NESTED_COMMENTS = False 1122 1123 KEYWORDS = { 1124 **tokens.Tokenizer.KEYWORDS, 1125 "FILE://": TokenType.URI_START, 1126 "BYTEINT": TokenType.INT, 1127 "EXCLUDE": TokenType.EXCEPT, 1128 "FILE FORMAT": TokenType.FILE_FORMAT, 1129 "GET": TokenType.GET, 1130 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1131 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1132 "MINUS": TokenType.EXCEPT, 1133 "NCHAR VARYING": TokenType.VARCHAR, 1134 "PUT": TokenType.PUT, 1135 "REMOVE": TokenType.COMMAND, 1136 "RM": TokenType.COMMAND, 1137 "SAMPLE": TokenType.TABLE_SAMPLE, 1138 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1139 "SQL_DOUBLE": TokenType.DOUBLE, 1140 "SQL_VARCHAR": TokenType.VARCHAR, 1141 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1142 "TAG": TokenType.TAG, 1143 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1144 "TOP": TokenType.TOP, 1145 "WAREHOUSE": TokenType.WAREHOUSE, 1146 "STAGE": TokenType.STAGE, 1147 "STREAMLIT": TokenType.STREAMLIT, 1148 } 1149 KEYWORDS.pop("/*+") 1150 1151 SINGLE_TOKENS = { 1152 **tokens.Tokenizer.SINGLE_TOKENS, 1153 "$": TokenType.PARAMETER, 1154 } 1155 1156 VAR_SINGLE_TOKENS = {"$"} 1157 1158 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1159 1160 class Generator(generator.Generator): 1161 PARAMETER_TOKEN = "$" 1162 MATCHED_BY_SOURCE = False 1163 SINGLE_STRING_INTERVAL = True 1164 JOIN_HINTS = False 1165 TABLE_HINTS = False 1166 QUERY_HINTS = False 1167 AGGREGATE_FILTER_SUPPORTED = False 1168 SUPPORTS_TABLE_COPY = False 1169 COLLATE_IS_FUNC = True 1170 LIMIT_ONLY_LITERALS = True 1171 JSON_KEY_VALUE_PAIR_SEP = "," 1172 INSERT_OVERWRITE = " OVERWRITE INTO" 1173 STRUCT_DELIMITER = ("(", ")") 1174 COPY_PARAMS_ARE_WRAPPED = False 1175 COPY_PARAMS_EQ_REQUIRED = True 1176 STAR_EXCEPT = "EXCLUDE" 1177 SUPPORTS_EXPLODING_PROJECTIONS = False 1178 ARRAY_CONCAT_IS_VAR_LEN = False 1179 SUPPORTS_CONVERT_TIMEZONE = True 1180 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1181 SUPPORTS_MEDIAN = True 1182 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1183 SUPPORTS_DECODE_CASE = True 1184 IS_BOOL_ALLOWED = False 1185 1186 TRANSFORMS = { 1187 **generator.Generator.TRANSFORMS, 1188 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1189 exp.ArgMax: rename_func("MAX_BY"), 1190 exp.ArgMin: rename_func("MIN_BY"), 1191 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1192 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1193 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1194 exp.AtTimeZone: lambda self, e: self.func( 1195 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1196 ), 1197 exp.BitwiseOr: rename_func("BITOR"), 1198 exp.BitwiseXor: rename_func("BITXOR"), 1199 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1200 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1201 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1202 exp.DateAdd: date_delta_sql("DATEADD"), 1203 exp.DateDiff: date_delta_sql("DATEDIFF"), 1204 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1205 exp.DatetimeDiff: timestampdiff_sql, 1206 exp.DateStrToDate: datestrtodate_sql, 1207 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1208 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1209 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1210 exp.DayOfYear: rename_func("DAYOFYEAR"), 1211 exp.Explode: rename_func("FLATTEN"), 1212 exp.Extract: lambda self, e: self.func( 1213 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1214 ), 1215 exp.FileFormatProperty: lambda self, 1216 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1217 exp.FromTimeZone: lambda self, e: self.func( 1218 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1219 ), 1220 exp.GenerateSeries: lambda self, e: self.func( 1221 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1222 ), 1223 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1224 exp.If: if_sql(name="IFF", false_value="NULL"), 1225 exp.JSONExtractArray: _json_extract_value_array_sql, 1226 exp.JSONExtractScalar: lambda self, e: self.func( 1227 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1228 ), 1229 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1230 exp.JSONPathRoot: lambda *_: "", 1231 exp.JSONValueArray: _json_extract_value_array_sql, 1232 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1233 rename_func("EDITDISTANCE") 1234 ), 1235 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1236 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1237 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1238 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1239 exp.MakeInterval: no_make_interval_sql, 1240 exp.Max: max_or_greatest, 1241 exp.Min: min_or_least, 1242 exp.ParseJSON: lambda self, e: self.func( 1243 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1244 ), 1245 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1246 exp.PercentileCont: transforms.preprocess( 1247 [transforms.add_within_group_for_percentiles] 1248 ), 1249 exp.PercentileDisc: transforms.preprocess( 1250 [transforms.add_within_group_for_percentiles] 1251 ), 1252 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1253 exp.RegexpExtract: _regexpextract_sql, 1254 exp.RegexpExtractAll: _regexpextract_sql, 1255 exp.RegexpILike: _regexpilike_sql, 1256 exp.Rand: rename_func("RANDOM"), 1257 exp.Select: transforms.preprocess( 1258 [ 1259 transforms.eliminate_window_clause, 1260 transforms.eliminate_distinct_on, 1261 transforms.explode_projection_to_unnest(), 1262 transforms.eliminate_semi_and_anti_joins, 1263 _transform_generate_date_array, 1264 _qualify_unnested_columns, 1265 _eliminate_dot_variant_lookup, 1266 ] 1267 ), 1268 exp.SHA: rename_func("SHA1"), 1269 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1270 exp.StartsWith: rename_func("STARTSWITH"), 1271 exp.EndsWith: rename_func("ENDSWITH"), 1272 exp.StrPosition: lambda self, e: strposition_sql( 1273 self, e, func_name="CHARINDEX", supports_position=True 1274 ), 1275 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1276 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1277 exp.Stuff: rename_func("INSERT"), 1278 exp.StPoint: rename_func("ST_MAKEPOINT"), 1279 exp.TimeAdd: date_delta_sql("TIMEADD"), 1280 exp.Timestamp: no_timestamp_sql, 1281 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1282 exp.TimestampDiff: lambda self, e: self.func( 1283 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1284 ), 1285 exp.TimestampTrunc: timestamptrunc_sql(), 1286 exp.TimeStrToTime: timestrtotime_sql, 1287 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1288 exp.ToArray: rename_func("TO_ARRAY"), 1289 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1290 exp.ToDouble: rename_func("TO_DOUBLE"), 1291 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1292 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1293 exp.TsOrDsToDate: lambda self, e: self.func( 1294 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1295 ), 1296 exp.TsOrDsToTime: lambda self, e: self.func( 1297 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1298 ), 1299 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1300 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1301 exp.Uuid: rename_func("UUID_STRING"), 1302 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1303 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1304 exp.Xor: rename_func("BOOLXOR"), 1305 } 1306 1307 SUPPORTED_JSON_PATH_PARTS = { 1308 exp.JSONPathKey, 1309 exp.JSONPathRoot, 1310 exp.JSONPathSubscript, 1311 } 1312 1313 TYPE_MAPPING = { 1314 **generator.Generator.TYPE_MAPPING, 1315 exp.DataType.Type.NESTED: "OBJECT", 1316 exp.DataType.Type.STRUCT: "OBJECT", 1317 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1318 } 1319 1320 TOKEN_MAPPING = { 1321 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1322 } 1323 1324 PROPERTIES_LOCATION = { 1325 **generator.Generator.PROPERTIES_LOCATION, 1326 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1327 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1328 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1329 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1330 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1331 } 1332 1333 UNSUPPORTED_VALUES_EXPRESSIONS = { 1334 exp.Map, 1335 exp.StarMap, 1336 exp.Struct, 1337 exp.VarMap, 1338 } 1339 1340 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1341 1342 def with_properties(self, properties: exp.Properties) -> str: 1343 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1344 1345 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1346 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1347 values_as_table = False 1348 1349 return super().values_sql(expression, values_as_table=values_as_table) 1350 1351 def datatype_sql(self, expression: exp.DataType) -> str: 1352 expressions = expression.expressions 1353 if ( 1354 expressions 1355 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1356 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1357 ): 1358 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1359 return "OBJECT" 1360 1361 return super().datatype_sql(expression) 1362 1363 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1364 return self.func( 1365 "TO_NUMBER", 1366 expression.this, 1367 expression.args.get("format"), 1368 expression.args.get("precision"), 1369 expression.args.get("scale"), 1370 ) 1371 1372 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1373 milli = expression.args.get("milli") 1374 if milli is not None: 1375 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1376 expression.set("nano", milli_to_nano) 1377 1378 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1379 1380 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1381 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1382 return self.func("TO_GEOGRAPHY", expression.this) 1383 if expression.is_type(exp.DataType.Type.GEOMETRY): 1384 return self.func("TO_GEOMETRY", expression.this) 1385 1386 return super().cast_sql(expression, safe_prefix=safe_prefix) 1387 1388 def trycast_sql(self, expression: exp.TryCast) -> str: 1389 value = expression.this 1390 1391 if value.type is None: 1392 from sqlglot.optimizer.annotate_types import annotate_types 1393 1394 value = annotate_types(value, dialect=self.dialect) 1395 1396 # Snowflake requires that TRY_CAST's value be a string 1397 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1398 # if we can deduce that the value is a string, then we can generate TRY_CAST 1399 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1400 return super().trycast_sql(expression) 1401 1402 return self.cast_sql(expression) 1403 1404 def log_sql(self, expression: exp.Log) -> str: 1405 if not expression.expression: 1406 return self.func("LN", expression.this) 1407 1408 return super().log_sql(expression) 1409 1410 def unnest_sql(self, expression: exp.Unnest) -> str: 1411 unnest_alias = expression.args.get("alias") 1412 offset = expression.args.get("offset") 1413 1414 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1415 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1416 1417 columns = [ 1418 exp.to_identifier("seq"), 1419 exp.to_identifier("key"), 1420 exp.to_identifier("path"), 1421 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1422 value, 1423 exp.to_identifier("this"), 1424 ] 1425 1426 if unnest_alias: 1427 unnest_alias.set("columns", columns) 1428 else: 1429 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1430 1431 table_input = self.sql(expression.expressions[0]) 1432 if not table_input.startswith("INPUT =>"): 1433 table_input = f"INPUT => {table_input}" 1434 1435 expression_parent = expression.parent 1436 1437 explode = ( 1438 f"FLATTEN({table_input})" 1439 if isinstance(expression_parent, exp.Lateral) 1440 else f"TABLE(FLATTEN({table_input}))" 1441 ) 1442 alias = self.sql(unnest_alias) 1443 alias = f" AS {alias}" if alias else "" 1444 value = ( 1445 "" 1446 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1447 else f"{value} FROM " 1448 ) 1449 1450 return f"{value}{explode}{alias}" 1451 1452 def show_sql(self, expression: exp.Show) -> str: 1453 terse = "TERSE " if expression.args.get("terse") else "" 1454 history = " HISTORY" if expression.args.get("history") else "" 1455 like = self.sql(expression, "like") 1456 like = f" LIKE {like}" if like else "" 1457 1458 scope = self.sql(expression, "scope") 1459 scope = f" {scope}" if scope else "" 1460 1461 scope_kind = self.sql(expression, "scope_kind") 1462 if scope_kind: 1463 scope_kind = f" IN {scope_kind}" 1464 1465 starts_with = self.sql(expression, "starts_with") 1466 if starts_with: 1467 starts_with = f" STARTS WITH {starts_with}" 1468 1469 limit = self.sql(expression, "limit") 1470 1471 from_ = self.sql(expression, "from") 1472 if from_: 1473 from_ = f" FROM {from_}" 1474 1475 privileges = self.expressions(expression, key="privileges", flat=True) 1476 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1477 1478 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1479 1480 def describe_sql(self, expression: exp.Describe) -> str: 1481 # Default to table if kind is unknown 1482 kind_value = expression.args.get("kind") or "TABLE" 1483 kind = f" {kind_value}" if kind_value else "" 1484 this = f" {self.sql(expression, 'this')}" 1485 expressions = self.expressions(expression, flat=True) 1486 expressions = f" {expressions}" if expressions else "" 1487 return f"DESCRIBE{kind}{this}{expressions}" 1488 1489 def generatedasidentitycolumnconstraint_sql( 1490 self, expression: exp.GeneratedAsIdentityColumnConstraint 1491 ) -> str: 1492 start = expression.args.get("start") 1493 start = f" START {start}" if start else "" 1494 increment = expression.args.get("increment") 1495 increment = f" INCREMENT {increment}" if increment else "" 1496 1497 order = expression.args.get("order") 1498 if order is not None: 1499 order_clause = " ORDER" if order else " NOORDER" 1500 else: 1501 order_clause = "" 1502 1503 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1504 1505 def cluster_sql(self, expression: exp.Cluster) -> str: 1506 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1507 1508 def struct_sql(self, expression: exp.Struct) -> str: 1509 keys = [] 1510 values = [] 1511 1512 for i, e in enumerate(expression.expressions): 1513 if isinstance(e, exp.PropertyEQ): 1514 keys.append( 1515 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1516 ) 1517 values.append(e.expression) 1518 else: 1519 keys.append(exp.Literal.string(f"_{i}")) 1520 values.append(e) 1521 1522 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1523 1524 @unsupported_args("weight", "accuracy") 1525 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1526 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1527 1528 def alterset_sql(self, expression: exp.AlterSet) -> str: 1529 exprs = self.expressions(expression, flat=True) 1530 exprs = f" {exprs}" if exprs else "" 1531 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1532 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1533 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1534 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1535 tag = self.expressions(expression, key="tag", flat=True) 1536 tag = f" TAG {tag}" if tag else "" 1537 1538 return f"SET{exprs}{file_format}{copy_options}{tag}" 1539 1540 def strtotime_sql(self, expression: exp.StrToTime): 1541 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1542 return self.func( 1543 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1544 ) 1545 1546 def timestampsub_sql(self, expression: exp.TimestampSub): 1547 return self.sql( 1548 exp.TimestampAdd( 1549 this=expression.this, 1550 expression=expression.expression * -1, 1551 unit=expression.unit, 1552 ) 1553 ) 1554 1555 def jsonextract_sql(self, expression: exp.JSONExtract): 1556 this = expression.this 1557 1558 # JSON strings are valid coming from other dialects such as BQ so 1559 # for these cases we PARSE_JSON preemptively 1560 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1561 "requires_json" 1562 ): 1563 this = exp.ParseJSON(this=this) 1564 1565 return self.func( 1566 "GET_PATH", 1567 this, 1568 expression.expression, 1569 ) 1570 1571 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1572 this = expression.this 1573 if this.is_string: 1574 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1575 1576 return self.func("TO_CHAR", this, self.format_time(expression)) 1577 1578 def datesub_sql(self, expression: exp.DateSub) -> str: 1579 value = expression.expression 1580 if value: 1581 value.replace(value * (-1)) 1582 else: 1583 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1584 1585 return date_delta_sql("DATEADD")(self, expression) 1586 1587 def select_sql(self, expression: exp.Select) -> str: 1588 limit = expression.args.get("limit") 1589 offset = expression.args.get("offset") 1590 if offset and not limit: 1591 expression.limit(exp.Null(), copy=False) 1592 return super().select_sql(expression) 1593 1594 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1595 is_materialized = expression.find(exp.MaterializedProperty) 1596 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1597 1598 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1599 # For materialized views, COPY GRANTS is located *before* the columns list 1600 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1601 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1602 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1603 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1604 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1605 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1606 1607 this_name = self.sql(expression.this, "this") 1608 copy_grants = self.sql(copy_grants_property) 1609 this_schema = self.schema_columns_sql(expression.this) 1610 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1611 1612 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1613 1614 return super().createable_sql(expression, locations) 1615 1616 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1617 this = expression.this 1618 1619 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1620 # and add it later as part of the WITHIN GROUP clause 1621 order = this if isinstance(this, exp.Order) else None 1622 if order: 1623 expression.set("this", order.this.pop()) 1624 1625 expr_sql = super().arrayagg_sql(expression) 1626 1627 if order: 1628 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1629 1630 return expr_sql 1631 1632 def array_sql(self, expression: exp.Array) -> str: 1633 expressions = expression.expressions 1634 1635 first_expr = seq_get(expressions, 0) 1636 if isinstance(first_expr, exp.Select): 1637 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1638 if first_expr.text("kind").upper() == "STRUCT": 1639 object_construct_args = [] 1640 for expr in first_expr.expressions: 1641 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1642 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1643 name = expr.this if isinstance(expr, exp.Alias) else expr 1644 1645 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1646 1647 array_agg = exp.ArrayAgg( 1648 this=_build_object_construct(args=object_construct_args) 1649 ) 1650 1651 first_expr.set("kind", None) 1652 first_expr.set("expressions", [array_agg]) 1653 1654 return self.sql(first_expr.subquery()) 1655 1656 return inline_array_sql(self, expression) 1657 1658 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1659 zone = self.sql(expression, "this") 1660 if not zone: 1661 return super().currentdate_sql(expression) 1662 1663 expr = exp.Cast( 1664 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1665 to=exp.DataType(this=exp.DataType.Type.DATE), 1666 ) 1667 return self.sql(expr) 1668 1669 def dot_sql(self, expression: exp.Dot) -> str: 1670 this = expression.this 1671 1672 if not this.type: 1673 from sqlglot.optimizer.annotate_types import annotate_types 1674 1675 this = annotate_types(this, dialect=self.dialect) 1676 1677 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1678 # Generate colon notation for the top level STRUCT 1679 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1680 1681 return super().dot_sql(expression)
486class Snowflake(Dialect): 487 # https://docs.snowflake.com/en/sql-reference/identifiers-syntax 488 NORMALIZATION_STRATEGY = NormalizationStrategy.UPPERCASE 489 NULL_ORDERING = "nulls_are_large" 490 TIME_FORMAT = "'YYYY-MM-DD HH24:MI:SS'" 491 SUPPORTS_USER_DEFINED_TYPES = False 492 SUPPORTS_SEMI_ANTI_JOIN = False 493 PREFER_CTE_ALIAS_COLUMN = True 494 TABLESAMPLE_SIZE_IS_PERCENT = True 495 COPY_PARAMS_ARE_CSV = False 496 ARRAY_AGG_INCLUDES_NULLS = None 497 ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN = False 498 TRY_CAST_REQUIRES_STRING = True 499 500 TIME_MAPPING = { 501 "YYYY": "%Y", 502 "yyyy": "%Y", 503 "YY": "%y", 504 "yy": "%y", 505 "MMMM": "%B", 506 "mmmm": "%B", 507 "MON": "%b", 508 "mon": "%b", 509 "MM": "%m", 510 "mm": "%m", 511 "DD": "%d", 512 "dd": "%-d", 513 "DY": "%a", 514 "dy": "%w", 515 "HH24": "%H", 516 "hh24": "%H", 517 "HH12": "%I", 518 "hh12": "%I", 519 "MI": "%M", 520 "mi": "%M", 521 "SS": "%S", 522 "ss": "%S", 523 "FF6": "%f", 524 "ff6": "%f", 525 } 526 527 DATE_PART_MAPPING = { 528 **Dialect.DATE_PART_MAPPING, 529 "ISOWEEK": "WEEKISO", 530 } 531 532 def quote_identifier(self, expression: E, identify: bool = True) -> E: 533 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 534 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 535 if ( 536 isinstance(expression, exp.Identifier) 537 and isinstance(expression.parent, exp.Table) 538 and expression.name.lower() == "dual" 539 ): 540 return expression # type: ignore 541 542 return super().quote_identifier(expression, identify=identify) 543 544 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 545 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 546 SINGLE_TOKENS.pop("$") 547 548 class Parser(parser.Parser): 549 IDENTIFY_PIVOT_STRINGS = True 550 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 551 COLON_IS_VARIANT_EXTRACT = True 552 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 553 554 ID_VAR_TOKENS = { 555 *parser.Parser.ID_VAR_TOKENS, 556 TokenType.MATCH_CONDITION, 557 } 558 559 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 560 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 561 562 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 563 564 FUNCTIONS = { 565 **parser.Parser.FUNCTIONS, 566 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 567 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 568 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 569 this=seq_get(args, 1), expression=seq_get(args, 0) 570 ), 571 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 572 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 573 start=seq_get(args, 0), 574 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 575 step=seq_get(args, 2), 576 ), 577 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 578 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 579 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 580 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 581 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 582 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 583 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 584 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 585 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 586 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 587 "DATE_TRUNC": _date_trunc_to_time, 588 "DATEADD": _build_date_time_add(exp.DateAdd), 589 "DATEDIFF": _build_datediff, 590 "DIV0": _build_if_from_div0, 591 "EDITDISTANCE": lambda args: exp.Levenshtein( 592 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 593 ), 594 "FLATTEN": exp.Explode.from_arg_list, 595 "GET_PATH": lambda args, dialect: exp.JSONExtract( 596 this=seq_get(args, 0), 597 expression=dialect.to_json_path(seq_get(args, 1)), 598 requires_json=True, 599 ), 600 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 601 "IFF": exp.If.from_arg_list, 602 "LAST_DAY": lambda args: exp.LastDay( 603 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 604 ), 605 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 606 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 607 "NULLIFZERO": _build_if_from_nullifzero, 608 "OBJECT_CONSTRUCT": _build_object_construct, 609 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 610 "REGEXP_REPLACE": _build_regexp_replace, 611 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 612 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 613 "REPLACE": build_replace_with_optional_replacement, 614 "RLIKE": exp.RegexpLike.from_arg_list, 615 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 616 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 617 "TIMEADD": _build_date_time_add(exp.TimeAdd), 618 "TIMEDIFF": _build_datediff, 619 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 620 "TIMESTAMPDIFF": _build_datediff, 621 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 622 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 623 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 624 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 625 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 626 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 627 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 628 "TRY_TO_TIMESTAMP": _build_datetime( 629 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 630 ), 631 "TO_CHAR": build_timetostr_or_tochar, 632 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 633 "TO_NUMBER": lambda args: exp.ToNumber( 634 this=seq_get(args, 0), 635 format=seq_get(args, 1), 636 precision=seq_get(args, 2), 637 scale=seq_get(args, 3), 638 ), 639 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 640 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 641 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 642 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 643 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 644 "TO_VARCHAR": exp.ToChar.from_arg_list, 645 "ZEROIFNULL": _build_if_from_zeroifnull, 646 } 647 648 FUNCTION_PARSERS = { 649 **parser.Parser.FUNCTION_PARSERS, 650 "DATE_PART": lambda self: self._parse_date_part(), 651 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 652 "LISTAGG": lambda self: self._parse_string_agg(), 653 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 654 } 655 FUNCTION_PARSERS.pop("TRIM") 656 657 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 658 659 ALTER_PARSERS = { 660 **parser.Parser.ALTER_PARSERS, 661 "UNSET": lambda self: self.expression( 662 exp.Set, 663 tag=self._match_text_seq("TAG"), 664 expressions=self._parse_csv(self._parse_id_var), 665 unset=True, 666 ), 667 } 668 669 STATEMENT_PARSERS = { 670 **parser.Parser.STATEMENT_PARSERS, 671 TokenType.GET: lambda self: self._parse_get(), 672 TokenType.PUT: lambda self: self._parse_put(), 673 TokenType.SHOW: lambda self: self._parse_show(), 674 } 675 676 PROPERTY_PARSERS = { 677 **parser.Parser.PROPERTY_PARSERS, 678 "CREDENTIALS": lambda self: self._parse_credentials_property(), 679 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 680 "LOCATION": lambda self: self._parse_location_property(), 681 "TAG": lambda self: self._parse_tag(), 682 "USING": lambda self: self._match_text_seq("TEMPLATE") 683 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 684 } 685 686 TYPE_CONVERTERS = { 687 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 688 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 689 } 690 691 SHOW_PARSERS = { 692 "DATABASES": _show_parser("DATABASES"), 693 "TERSE DATABASES": _show_parser("DATABASES"), 694 "SCHEMAS": _show_parser("SCHEMAS"), 695 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 696 "OBJECTS": _show_parser("OBJECTS"), 697 "TERSE OBJECTS": _show_parser("OBJECTS"), 698 "TABLES": _show_parser("TABLES"), 699 "TERSE TABLES": _show_parser("TABLES"), 700 "VIEWS": _show_parser("VIEWS"), 701 "TERSE VIEWS": _show_parser("VIEWS"), 702 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 703 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 704 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 705 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 706 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 707 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 708 "SEQUENCES": _show_parser("SEQUENCES"), 709 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 710 "STAGES": _show_parser("STAGES"), 711 "COLUMNS": _show_parser("COLUMNS"), 712 "USERS": _show_parser("USERS"), 713 "TERSE USERS": _show_parser("USERS"), 714 "FILE FORMATS": _show_parser("FILE FORMATS"), 715 "FUNCTIONS": _show_parser("FUNCTIONS"), 716 "PROCEDURES": _show_parser("PROCEDURES"), 717 "WAREHOUSES": _show_parser("WAREHOUSES"), 718 } 719 720 CONSTRAINT_PARSERS = { 721 **parser.Parser.CONSTRAINT_PARSERS, 722 "WITH": lambda self: self._parse_with_constraint(), 723 "MASKING": lambda self: self._parse_with_constraint(), 724 "PROJECTION": lambda self: self._parse_with_constraint(), 725 "TAG": lambda self: self._parse_with_constraint(), 726 } 727 728 STAGED_FILE_SINGLE_TOKENS = { 729 TokenType.DOT, 730 TokenType.MOD, 731 TokenType.SLASH, 732 } 733 734 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 735 736 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 737 738 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 739 740 LAMBDAS = { 741 **parser.Parser.LAMBDAS, 742 TokenType.ARROW: lambda self, expressions: self.expression( 743 exp.Lambda, 744 this=self._replace_lambda( 745 self._parse_assignment(), 746 expressions, 747 ), 748 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 749 ), 750 } 751 752 def _parse_use(self) -> exp.Use: 753 if self._match_text_seq("SECONDARY", "ROLES"): 754 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 755 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 756 return self.expression( 757 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 758 ) 759 760 return super()._parse_use() 761 762 def _negate_range( 763 self, this: t.Optional[exp.Expression] = None 764 ) -> t.Optional[exp.Expression]: 765 if not this: 766 return this 767 768 query = this.args.get("query") 769 if isinstance(this, exp.In) and isinstance(query, exp.Query): 770 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 771 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 772 # which can produce different results (most likely a SnowFlake bug). 773 # 774 # https://docs.snowflake.com/en/sql-reference/functions/in 775 # Context: https://github.com/tobymao/sqlglot/issues/3890 776 return self.expression( 777 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 778 ) 779 780 return self.expression(exp.Not, this=this) 781 782 def _parse_tag(self) -> exp.Tags: 783 return self.expression( 784 exp.Tags, 785 expressions=self._parse_wrapped_csv(self._parse_property), 786 ) 787 788 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 789 if self._prev.token_type != TokenType.WITH: 790 self._retreat(self._index - 1) 791 792 if self._match_text_seq("MASKING", "POLICY"): 793 policy = self._parse_column() 794 return self.expression( 795 exp.MaskingPolicyColumnConstraint, 796 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 797 expressions=self._match(TokenType.USING) 798 and self._parse_wrapped_csv(self._parse_id_var), 799 ) 800 if self._match_text_seq("PROJECTION", "POLICY"): 801 policy = self._parse_column() 802 return self.expression( 803 exp.ProjectionPolicyColumnConstraint, 804 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 805 ) 806 if self._match(TokenType.TAG): 807 return self._parse_tag() 808 809 return None 810 811 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 812 if self._match(TokenType.TAG): 813 return self._parse_tag() 814 815 return super()._parse_with_property() 816 817 def _parse_create(self) -> exp.Create | exp.Command: 818 expression = super()._parse_create() 819 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 820 # Replace the Table node with the enclosed Identifier 821 expression.this.replace(expression.this.this) 822 823 return expression 824 825 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 826 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 827 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 828 this = self._parse_var() or self._parse_type() 829 830 if not this: 831 return None 832 833 self._match(TokenType.COMMA) 834 expression = self._parse_bitwise() 835 this = map_date_part(this) 836 name = this.name.upper() 837 838 if name.startswith("EPOCH"): 839 if name == "EPOCH_MILLISECOND": 840 scale = 10**3 841 elif name == "EPOCH_MICROSECOND": 842 scale = 10**6 843 elif name == "EPOCH_NANOSECOND": 844 scale = 10**9 845 else: 846 scale = None 847 848 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 849 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 850 851 if scale: 852 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 853 854 return to_unix 855 856 return self.expression(exp.Extract, this=this, expression=expression) 857 858 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 859 if is_map: 860 # Keys are strings in Snowflake's objects, see also: 861 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 862 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 863 return self._parse_slice(self._parse_string()) 864 865 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 866 867 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 868 lateral = super()._parse_lateral() 869 if not lateral: 870 return lateral 871 872 if isinstance(lateral.this, exp.Explode): 873 table_alias = lateral.args.get("alias") 874 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 875 if table_alias and not table_alias.args.get("columns"): 876 table_alias.set("columns", columns) 877 elif not table_alias: 878 exp.alias_(lateral, "_flattened", table=columns, copy=False) 879 880 return lateral 881 882 def _parse_table_parts( 883 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 884 ) -> exp.Table: 885 # https://docs.snowflake.com/en/user-guide/querying-stage 886 if self._match(TokenType.STRING, advance=False): 887 table = self._parse_string() 888 elif self._match_text_seq("@", advance=False): 889 table = self._parse_location_path() 890 else: 891 table = None 892 893 if table: 894 file_format = None 895 pattern = None 896 897 wrapped = self._match(TokenType.L_PAREN) 898 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 899 if self._match_text_seq("FILE_FORMAT", "=>"): 900 file_format = self._parse_string() or super()._parse_table_parts( 901 is_db_reference=is_db_reference 902 ) 903 elif self._match_text_seq("PATTERN", "=>"): 904 pattern = self._parse_string() 905 else: 906 break 907 908 self._match(TokenType.COMMA) 909 910 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 911 else: 912 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 913 914 return table 915 916 def _parse_table( 917 self, 918 schema: bool = False, 919 joins: bool = False, 920 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 921 parse_bracket: bool = False, 922 is_db_reference: bool = False, 923 parse_partition: bool = False, 924 consume_pipe: bool = False, 925 ) -> t.Optional[exp.Expression]: 926 table = super()._parse_table( 927 schema=schema, 928 joins=joins, 929 alias_tokens=alias_tokens, 930 parse_bracket=parse_bracket, 931 is_db_reference=is_db_reference, 932 parse_partition=parse_partition, 933 ) 934 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 935 table_from_rows = table.this 936 for arg in exp.TableFromRows.arg_types: 937 if arg != "this": 938 table_from_rows.set(arg, table.args.get(arg)) 939 940 table = table_from_rows 941 942 return table 943 944 def _parse_id_var( 945 self, 946 any_token: bool = True, 947 tokens: t.Optional[t.Collection[TokenType]] = None, 948 ) -> t.Optional[exp.Expression]: 949 if self._match_text_seq("IDENTIFIER", "("): 950 identifier = ( 951 super()._parse_id_var(any_token=any_token, tokens=tokens) 952 or self._parse_string() 953 ) 954 self._match_r_paren() 955 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 956 957 return super()._parse_id_var(any_token=any_token, tokens=tokens) 958 959 def _parse_show_snowflake(self, this: str) -> exp.Show: 960 scope = None 961 scope_kind = None 962 963 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 964 # which is syntactically valid but has no effect on the output 965 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 966 967 history = self._match_text_seq("HISTORY") 968 969 like = self._parse_string() if self._match(TokenType.LIKE) else None 970 971 if self._match(TokenType.IN): 972 if self._match_text_seq("ACCOUNT"): 973 scope_kind = "ACCOUNT" 974 elif self._match_text_seq("CLASS"): 975 scope_kind = "CLASS" 976 scope = self._parse_table_parts() 977 elif self._match_text_seq("APPLICATION"): 978 scope_kind = "APPLICATION" 979 if self._match_text_seq("PACKAGE"): 980 scope_kind += " PACKAGE" 981 scope = self._parse_table_parts() 982 elif self._match_set(self.DB_CREATABLES): 983 scope_kind = self._prev.text.upper() 984 if self._curr: 985 scope = self._parse_table_parts() 986 elif self._curr: 987 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 988 scope = self._parse_table_parts() 989 990 return self.expression( 991 exp.Show, 992 **{ 993 "terse": terse, 994 "this": this, 995 "history": history, 996 "like": like, 997 "scope": scope, 998 "scope_kind": scope_kind, 999 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1000 "limit": self._parse_limit(), 1001 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1002 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1003 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1004 }, 1005 ) 1006 1007 def _parse_put(self) -> exp.Put | exp.Command: 1008 if self._curr.token_type != TokenType.STRING: 1009 return self._parse_as_command(self._prev) 1010 1011 return self.expression( 1012 exp.Put, 1013 this=self._parse_string(), 1014 target=self._parse_location_path(), 1015 properties=self._parse_properties(), 1016 ) 1017 1018 def _parse_get(self) -> t.Optional[exp.Expression]: 1019 start = self._prev 1020 1021 # If we detect GET( then we need to parse a function, not a statement 1022 if self._match(TokenType.L_PAREN): 1023 self._retreat(self._index - 2) 1024 return self._parse_expression() 1025 1026 target = self._parse_location_path() 1027 1028 # Parse as command if unquoted file path 1029 if self._curr.token_type == TokenType.URI_START: 1030 return self._parse_as_command(start) 1031 1032 return self.expression( 1033 exp.Get, 1034 this=self._parse_string(), 1035 target=target, 1036 properties=self._parse_properties(), 1037 ) 1038 1039 def _parse_location_property(self) -> exp.LocationProperty: 1040 self._match(TokenType.EQ) 1041 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1042 1043 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1044 # Parse either a subquery or a staged file 1045 return ( 1046 self._parse_select(table=True, parse_subquery_alias=False) 1047 if self._match(TokenType.L_PAREN, advance=False) 1048 else self._parse_table_parts() 1049 ) 1050 1051 def _parse_location_path(self) -> exp.Var: 1052 start = self._curr 1053 self._advance_any(ignore_reserved=True) 1054 1055 # We avoid consuming a comma token because external tables like @foo and @bar 1056 # can be joined in a query with a comma separator, as well as closing paren 1057 # in case of subqueries 1058 while self._is_connected() and not self._match_set( 1059 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1060 ): 1061 self._advance_any(ignore_reserved=True) 1062 1063 return exp.var(self._find_sql(start, self._prev)) 1064 1065 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1066 this = super()._parse_lambda_arg() 1067 1068 if not this: 1069 return this 1070 1071 typ = self._parse_types() 1072 1073 if typ: 1074 return self.expression(exp.Cast, this=this, to=typ) 1075 1076 return this 1077 1078 def _parse_foreign_key(self) -> exp.ForeignKey: 1079 # inlineFK, the REFERENCES columns are implied 1080 if self._match(TokenType.REFERENCES, advance=False): 1081 return self.expression(exp.ForeignKey) 1082 1083 # outoflineFK, explicitly names the columns 1084 return super()._parse_foreign_key() 1085 1086 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1087 self._match(TokenType.EQ) 1088 if self._match(TokenType.L_PAREN, advance=False): 1089 expressions = self._parse_wrapped_options() 1090 else: 1091 expressions = [self._parse_format_name()] 1092 1093 return self.expression( 1094 exp.FileFormatProperty, 1095 expressions=expressions, 1096 ) 1097 1098 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1099 return self.expression( 1100 exp.CredentialsProperty, 1101 expressions=self._parse_wrapped_options(), 1102 ) 1103 1104 def _parse_semantic_view(self) -> exp.SemanticView: 1105 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1106 1107 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1108 if self._match_text_seq("DIMENSIONS"): 1109 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1110 if self._match_text_seq("METRICS"): 1111 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1112 if self._match_text_seq("WHERE"): 1113 kwargs["where"] = self._parse_expression() 1114 1115 return self.expression(exp.SemanticView, **kwargs) 1116 1117 class Tokenizer(tokens.Tokenizer): 1118 STRING_ESCAPES = ["\\", "'"] 1119 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1120 RAW_STRINGS = ["$$"] 1121 COMMENTS = ["--", "//", ("/*", "*/")] 1122 NESTED_COMMENTS = False 1123 1124 KEYWORDS = { 1125 **tokens.Tokenizer.KEYWORDS, 1126 "FILE://": TokenType.URI_START, 1127 "BYTEINT": TokenType.INT, 1128 "EXCLUDE": TokenType.EXCEPT, 1129 "FILE FORMAT": TokenType.FILE_FORMAT, 1130 "GET": TokenType.GET, 1131 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1132 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1133 "MINUS": TokenType.EXCEPT, 1134 "NCHAR VARYING": TokenType.VARCHAR, 1135 "PUT": TokenType.PUT, 1136 "REMOVE": TokenType.COMMAND, 1137 "RM": TokenType.COMMAND, 1138 "SAMPLE": TokenType.TABLE_SAMPLE, 1139 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1140 "SQL_DOUBLE": TokenType.DOUBLE, 1141 "SQL_VARCHAR": TokenType.VARCHAR, 1142 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1143 "TAG": TokenType.TAG, 1144 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1145 "TOP": TokenType.TOP, 1146 "WAREHOUSE": TokenType.WAREHOUSE, 1147 "STAGE": TokenType.STAGE, 1148 "STREAMLIT": TokenType.STREAMLIT, 1149 } 1150 KEYWORDS.pop("/*+") 1151 1152 SINGLE_TOKENS = { 1153 **tokens.Tokenizer.SINGLE_TOKENS, 1154 "$": TokenType.PARAMETER, 1155 } 1156 1157 VAR_SINGLE_TOKENS = {"$"} 1158 1159 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW} 1160 1161 class Generator(generator.Generator): 1162 PARAMETER_TOKEN = "$" 1163 MATCHED_BY_SOURCE = False 1164 SINGLE_STRING_INTERVAL = True 1165 JOIN_HINTS = False 1166 TABLE_HINTS = False 1167 QUERY_HINTS = False 1168 AGGREGATE_FILTER_SUPPORTED = False 1169 SUPPORTS_TABLE_COPY = False 1170 COLLATE_IS_FUNC = True 1171 LIMIT_ONLY_LITERALS = True 1172 JSON_KEY_VALUE_PAIR_SEP = "," 1173 INSERT_OVERWRITE = " OVERWRITE INTO" 1174 STRUCT_DELIMITER = ("(", ")") 1175 COPY_PARAMS_ARE_WRAPPED = False 1176 COPY_PARAMS_EQ_REQUIRED = True 1177 STAR_EXCEPT = "EXCLUDE" 1178 SUPPORTS_EXPLODING_PROJECTIONS = False 1179 ARRAY_CONCAT_IS_VAR_LEN = False 1180 SUPPORTS_CONVERT_TIMEZONE = True 1181 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1182 SUPPORTS_MEDIAN = True 1183 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1184 SUPPORTS_DECODE_CASE = True 1185 IS_BOOL_ALLOWED = False 1186 1187 TRANSFORMS = { 1188 **generator.Generator.TRANSFORMS, 1189 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1190 exp.ArgMax: rename_func("MAX_BY"), 1191 exp.ArgMin: rename_func("MIN_BY"), 1192 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1193 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1194 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1195 exp.AtTimeZone: lambda self, e: self.func( 1196 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1197 ), 1198 exp.BitwiseOr: rename_func("BITOR"), 1199 exp.BitwiseXor: rename_func("BITXOR"), 1200 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1201 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1202 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1203 exp.DateAdd: date_delta_sql("DATEADD"), 1204 exp.DateDiff: date_delta_sql("DATEDIFF"), 1205 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1206 exp.DatetimeDiff: timestampdiff_sql, 1207 exp.DateStrToDate: datestrtodate_sql, 1208 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1209 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1210 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1211 exp.DayOfYear: rename_func("DAYOFYEAR"), 1212 exp.Explode: rename_func("FLATTEN"), 1213 exp.Extract: lambda self, e: self.func( 1214 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1215 ), 1216 exp.FileFormatProperty: lambda self, 1217 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1218 exp.FromTimeZone: lambda self, e: self.func( 1219 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1220 ), 1221 exp.GenerateSeries: lambda self, e: self.func( 1222 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1223 ), 1224 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1225 exp.If: if_sql(name="IFF", false_value="NULL"), 1226 exp.JSONExtractArray: _json_extract_value_array_sql, 1227 exp.JSONExtractScalar: lambda self, e: self.func( 1228 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1229 ), 1230 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1231 exp.JSONPathRoot: lambda *_: "", 1232 exp.JSONValueArray: _json_extract_value_array_sql, 1233 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1234 rename_func("EDITDISTANCE") 1235 ), 1236 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1237 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1238 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1239 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1240 exp.MakeInterval: no_make_interval_sql, 1241 exp.Max: max_or_greatest, 1242 exp.Min: min_or_least, 1243 exp.ParseJSON: lambda self, e: self.func( 1244 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1245 ), 1246 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1247 exp.PercentileCont: transforms.preprocess( 1248 [transforms.add_within_group_for_percentiles] 1249 ), 1250 exp.PercentileDisc: transforms.preprocess( 1251 [transforms.add_within_group_for_percentiles] 1252 ), 1253 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1254 exp.RegexpExtract: _regexpextract_sql, 1255 exp.RegexpExtractAll: _regexpextract_sql, 1256 exp.RegexpILike: _regexpilike_sql, 1257 exp.Rand: rename_func("RANDOM"), 1258 exp.Select: transforms.preprocess( 1259 [ 1260 transforms.eliminate_window_clause, 1261 transforms.eliminate_distinct_on, 1262 transforms.explode_projection_to_unnest(), 1263 transforms.eliminate_semi_and_anti_joins, 1264 _transform_generate_date_array, 1265 _qualify_unnested_columns, 1266 _eliminate_dot_variant_lookup, 1267 ] 1268 ), 1269 exp.SHA: rename_func("SHA1"), 1270 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1271 exp.StartsWith: rename_func("STARTSWITH"), 1272 exp.EndsWith: rename_func("ENDSWITH"), 1273 exp.StrPosition: lambda self, e: strposition_sql( 1274 self, e, func_name="CHARINDEX", supports_position=True 1275 ), 1276 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1277 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1278 exp.Stuff: rename_func("INSERT"), 1279 exp.StPoint: rename_func("ST_MAKEPOINT"), 1280 exp.TimeAdd: date_delta_sql("TIMEADD"), 1281 exp.Timestamp: no_timestamp_sql, 1282 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1283 exp.TimestampDiff: lambda self, e: self.func( 1284 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1285 ), 1286 exp.TimestampTrunc: timestamptrunc_sql(), 1287 exp.TimeStrToTime: timestrtotime_sql, 1288 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1289 exp.ToArray: rename_func("TO_ARRAY"), 1290 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1291 exp.ToDouble: rename_func("TO_DOUBLE"), 1292 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1293 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1294 exp.TsOrDsToDate: lambda self, e: self.func( 1295 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1296 ), 1297 exp.TsOrDsToTime: lambda self, e: self.func( 1298 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1299 ), 1300 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1301 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1302 exp.Uuid: rename_func("UUID_STRING"), 1303 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1304 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1305 exp.Xor: rename_func("BOOLXOR"), 1306 } 1307 1308 SUPPORTED_JSON_PATH_PARTS = { 1309 exp.JSONPathKey, 1310 exp.JSONPathRoot, 1311 exp.JSONPathSubscript, 1312 } 1313 1314 TYPE_MAPPING = { 1315 **generator.Generator.TYPE_MAPPING, 1316 exp.DataType.Type.NESTED: "OBJECT", 1317 exp.DataType.Type.STRUCT: "OBJECT", 1318 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1319 } 1320 1321 TOKEN_MAPPING = { 1322 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1323 } 1324 1325 PROPERTIES_LOCATION = { 1326 **generator.Generator.PROPERTIES_LOCATION, 1327 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1328 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1329 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1330 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1331 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1332 } 1333 1334 UNSUPPORTED_VALUES_EXPRESSIONS = { 1335 exp.Map, 1336 exp.StarMap, 1337 exp.Struct, 1338 exp.VarMap, 1339 } 1340 1341 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1342 1343 def with_properties(self, properties: exp.Properties) -> str: 1344 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1345 1346 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1347 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1348 values_as_table = False 1349 1350 return super().values_sql(expression, values_as_table=values_as_table) 1351 1352 def datatype_sql(self, expression: exp.DataType) -> str: 1353 expressions = expression.expressions 1354 if ( 1355 expressions 1356 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1357 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1358 ): 1359 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1360 return "OBJECT" 1361 1362 return super().datatype_sql(expression) 1363 1364 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1365 return self.func( 1366 "TO_NUMBER", 1367 expression.this, 1368 expression.args.get("format"), 1369 expression.args.get("precision"), 1370 expression.args.get("scale"), 1371 ) 1372 1373 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1374 milli = expression.args.get("milli") 1375 if milli is not None: 1376 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1377 expression.set("nano", milli_to_nano) 1378 1379 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1380 1381 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1382 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1383 return self.func("TO_GEOGRAPHY", expression.this) 1384 if expression.is_type(exp.DataType.Type.GEOMETRY): 1385 return self.func("TO_GEOMETRY", expression.this) 1386 1387 return super().cast_sql(expression, safe_prefix=safe_prefix) 1388 1389 def trycast_sql(self, expression: exp.TryCast) -> str: 1390 value = expression.this 1391 1392 if value.type is None: 1393 from sqlglot.optimizer.annotate_types import annotate_types 1394 1395 value = annotate_types(value, dialect=self.dialect) 1396 1397 # Snowflake requires that TRY_CAST's value be a string 1398 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1399 # if we can deduce that the value is a string, then we can generate TRY_CAST 1400 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1401 return super().trycast_sql(expression) 1402 1403 return self.cast_sql(expression) 1404 1405 def log_sql(self, expression: exp.Log) -> str: 1406 if not expression.expression: 1407 return self.func("LN", expression.this) 1408 1409 return super().log_sql(expression) 1410 1411 def unnest_sql(self, expression: exp.Unnest) -> str: 1412 unnest_alias = expression.args.get("alias") 1413 offset = expression.args.get("offset") 1414 1415 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1416 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1417 1418 columns = [ 1419 exp.to_identifier("seq"), 1420 exp.to_identifier("key"), 1421 exp.to_identifier("path"), 1422 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1423 value, 1424 exp.to_identifier("this"), 1425 ] 1426 1427 if unnest_alias: 1428 unnest_alias.set("columns", columns) 1429 else: 1430 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1431 1432 table_input = self.sql(expression.expressions[0]) 1433 if not table_input.startswith("INPUT =>"): 1434 table_input = f"INPUT => {table_input}" 1435 1436 expression_parent = expression.parent 1437 1438 explode = ( 1439 f"FLATTEN({table_input})" 1440 if isinstance(expression_parent, exp.Lateral) 1441 else f"TABLE(FLATTEN({table_input}))" 1442 ) 1443 alias = self.sql(unnest_alias) 1444 alias = f" AS {alias}" if alias else "" 1445 value = ( 1446 "" 1447 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1448 else f"{value} FROM " 1449 ) 1450 1451 return f"{value}{explode}{alias}" 1452 1453 def show_sql(self, expression: exp.Show) -> str: 1454 terse = "TERSE " if expression.args.get("terse") else "" 1455 history = " HISTORY" if expression.args.get("history") else "" 1456 like = self.sql(expression, "like") 1457 like = f" LIKE {like}" if like else "" 1458 1459 scope = self.sql(expression, "scope") 1460 scope = f" {scope}" if scope else "" 1461 1462 scope_kind = self.sql(expression, "scope_kind") 1463 if scope_kind: 1464 scope_kind = f" IN {scope_kind}" 1465 1466 starts_with = self.sql(expression, "starts_with") 1467 if starts_with: 1468 starts_with = f" STARTS WITH {starts_with}" 1469 1470 limit = self.sql(expression, "limit") 1471 1472 from_ = self.sql(expression, "from") 1473 if from_: 1474 from_ = f" FROM {from_}" 1475 1476 privileges = self.expressions(expression, key="privileges", flat=True) 1477 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1478 1479 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1480 1481 def describe_sql(self, expression: exp.Describe) -> str: 1482 # Default to table if kind is unknown 1483 kind_value = expression.args.get("kind") or "TABLE" 1484 kind = f" {kind_value}" if kind_value else "" 1485 this = f" {self.sql(expression, 'this')}" 1486 expressions = self.expressions(expression, flat=True) 1487 expressions = f" {expressions}" if expressions else "" 1488 return f"DESCRIBE{kind}{this}{expressions}" 1489 1490 def generatedasidentitycolumnconstraint_sql( 1491 self, expression: exp.GeneratedAsIdentityColumnConstraint 1492 ) -> str: 1493 start = expression.args.get("start") 1494 start = f" START {start}" if start else "" 1495 increment = expression.args.get("increment") 1496 increment = f" INCREMENT {increment}" if increment else "" 1497 1498 order = expression.args.get("order") 1499 if order is not None: 1500 order_clause = " ORDER" if order else " NOORDER" 1501 else: 1502 order_clause = "" 1503 1504 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1505 1506 def cluster_sql(self, expression: exp.Cluster) -> str: 1507 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1508 1509 def struct_sql(self, expression: exp.Struct) -> str: 1510 keys = [] 1511 values = [] 1512 1513 for i, e in enumerate(expression.expressions): 1514 if isinstance(e, exp.PropertyEQ): 1515 keys.append( 1516 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1517 ) 1518 values.append(e.expression) 1519 else: 1520 keys.append(exp.Literal.string(f"_{i}")) 1521 values.append(e) 1522 1523 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1524 1525 @unsupported_args("weight", "accuracy") 1526 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1527 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1528 1529 def alterset_sql(self, expression: exp.AlterSet) -> str: 1530 exprs = self.expressions(expression, flat=True) 1531 exprs = f" {exprs}" if exprs else "" 1532 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1533 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1534 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1535 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1536 tag = self.expressions(expression, key="tag", flat=True) 1537 tag = f" TAG {tag}" if tag else "" 1538 1539 return f"SET{exprs}{file_format}{copy_options}{tag}" 1540 1541 def strtotime_sql(self, expression: exp.StrToTime): 1542 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1543 return self.func( 1544 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1545 ) 1546 1547 def timestampsub_sql(self, expression: exp.TimestampSub): 1548 return self.sql( 1549 exp.TimestampAdd( 1550 this=expression.this, 1551 expression=expression.expression * -1, 1552 unit=expression.unit, 1553 ) 1554 ) 1555 1556 def jsonextract_sql(self, expression: exp.JSONExtract): 1557 this = expression.this 1558 1559 # JSON strings are valid coming from other dialects such as BQ so 1560 # for these cases we PARSE_JSON preemptively 1561 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1562 "requires_json" 1563 ): 1564 this = exp.ParseJSON(this=this) 1565 1566 return self.func( 1567 "GET_PATH", 1568 this, 1569 expression.expression, 1570 ) 1571 1572 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1573 this = expression.this 1574 if this.is_string: 1575 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1576 1577 return self.func("TO_CHAR", this, self.format_time(expression)) 1578 1579 def datesub_sql(self, expression: exp.DateSub) -> str: 1580 value = expression.expression 1581 if value: 1582 value.replace(value * (-1)) 1583 else: 1584 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1585 1586 return date_delta_sql("DATEADD")(self, expression) 1587 1588 def select_sql(self, expression: exp.Select) -> str: 1589 limit = expression.args.get("limit") 1590 offset = expression.args.get("offset") 1591 if offset and not limit: 1592 expression.limit(exp.Null(), copy=False) 1593 return super().select_sql(expression) 1594 1595 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1596 is_materialized = expression.find(exp.MaterializedProperty) 1597 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1598 1599 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1600 # For materialized views, COPY GRANTS is located *before* the columns list 1601 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1602 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1603 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1604 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1605 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1606 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1607 1608 this_name = self.sql(expression.this, "this") 1609 copy_grants = self.sql(copy_grants_property) 1610 this_schema = self.schema_columns_sql(expression.this) 1611 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1612 1613 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1614 1615 return super().createable_sql(expression, locations) 1616 1617 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1618 this = expression.this 1619 1620 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1621 # and add it later as part of the WITHIN GROUP clause 1622 order = this if isinstance(this, exp.Order) else None 1623 if order: 1624 expression.set("this", order.this.pop()) 1625 1626 expr_sql = super().arrayagg_sql(expression) 1627 1628 if order: 1629 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1630 1631 return expr_sql 1632 1633 def array_sql(self, expression: exp.Array) -> str: 1634 expressions = expression.expressions 1635 1636 first_expr = seq_get(expressions, 0) 1637 if isinstance(first_expr, exp.Select): 1638 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1639 if first_expr.text("kind").upper() == "STRUCT": 1640 object_construct_args = [] 1641 for expr in first_expr.expressions: 1642 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1643 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1644 name = expr.this if isinstance(expr, exp.Alias) else expr 1645 1646 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1647 1648 array_agg = exp.ArrayAgg( 1649 this=_build_object_construct(args=object_construct_args) 1650 ) 1651 1652 first_expr.set("kind", None) 1653 first_expr.set("expressions", [array_agg]) 1654 1655 return self.sql(first_expr.subquery()) 1656 1657 return inline_array_sql(self, expression) 1658 1659 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1660 zone = self.sql(expression, "this") 1661 if not zone: 1662 return super().currentdate_sql(expression) 1663 1664 expr = exp.Cast( 1665 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1666 to=exp.DataType(this=exp.DataType.Type.DATE), 1667 ) 1668 return self.sql(expr) 1669 1670 def dot_sql(self, expression: exp.Dot) -> str: 1671 this = expression.this 1672 1673 if not this.type: 1674 from sqlglot.optimizer.annotate_types import annotate_types 1675 1676 this = annotate_types(this, dialect=self.dialect) 1677 1678 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1679 # Generate colon notation for the top level STRUCT 1680 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1681 1682 return super().dot_sql(expression)
Specifies the strategy according to which identifiers should be normalized.
Default NULL ordering method to use if not explicitly set.
Possible values: "nulls_are_small", "nulls_are_large", "nulls_are_last"
Some dialects, such as Snowflake, allow you to reference a CTE column alias in the HAVING clause of the CTE. This flag will cause the CTE alias columns to override any projection aliases in the subquery.
For example, WITH y(c) AS ( SELECT SUM(a) FROM (SELECT 1 a) AS x HAVING c > 0 ) SELECT c FROM y;
will be rewritten as
WITH y(c) AS (
SELECT SUM(a) AS c FROM (SELECT 1 AS a) AS x HAVING c > 0
) SELECT c FROM y;
Associates this dialect's time formats with their equivalent Python strftime formats.
532 def quote_identifier(self, expression: E, identify: bool = True) -> E: 533 # This disables quoting DUAL in SELECT ... FROM DUAL, because Snowflake treats an 534 # unquoted DUAL keyword in a special way and does not map it to a user-defined table 535 if ( 536 isinstance(expression, exp.Identifier) 537 and isinstance(expression.parent, exp.Table) 538 and expression.name.lower() == "dual" 539 ): 540 return expression # type: ignore 541 542 return super().quote_identifier(expression, identify=identify)
Adds quotes to a given identifier.
Arguments:
- expression: The expression of interest. If it's not an
Identifier, this method is a no-op. - identify: If set to
False, the quotes will only be added if the identifier is deemed "unsafe", with respect to its characters and this dialect's normalization strategy.
Mapping of an escaped sequence (\n) to its unescaped version (
).
544 class JSONPathTokenizer(jsonpath.JSONPathTokenizer): 545 SINGLE_TOKENS = jsonpath.JSONPathTokenizer.SINGLE_TOKENS.copy() 546 SINGLE_TOKENS.pop("$")
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEX_STRINGS
- RAW_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- COMMENTS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
548 class Parser(parser.Parser): 549 IDENTIFY_PIVOT_STRINGS = True 550 DEFAULT_SAMPLING_METHOD = "BERNOULLI" 551 COLON_IS_VARIANT_EXTRACT = True 552 JSON_EXTRACT_REQUIRES_JSON_EXPRESSION = True 553 554 ID_VAR_TOKENS = { 555 *parser.Parser.ID_VAR_TOKENS, 556 TokenType.MATCH_CONDITION, 557 } 558 559 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS | {TokenType.WINDOW} 560 TABLE_ALIAS_TOKENS.discard(TokenType.MATCH_CONDITION) 561 562 COLON_PLACEHOLDER_TOKENS = ID_VAR_TOKENS | {TokenType.NUMBER} 563 564 FUNCTIONS = { 565 **parser.Parser.FUNCTIONS, 566 "APPROX_PERCENTILE": exp.ApproxQuantile.from_arg_list, 567 "ARRAY_CONSTRUCT": lambda args: exp.Array(expressions=args), 568 "ARRAY_CONTAINS": lambda args: exp.ArrayContains( 569 this=seq_get(args, 1), expression=seq_get(args, 0) 570 ), 571 "ARRAY_GENERATE_RANGE": lambda args: exp.GenerateSeries( 572 # ARRAY_GENERATE_RANGE has an exlusive end; we normalize it to be inclusive 573 start=seq_get(args, 0), 574 end=exp.Sub(this=seq_get(args, 1), expression=exp.Literal.number(1)), 575 step=seq_get(args, 2), 576 ), 577 "BITXOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 578 "BIT_XOR": _build_bitwise(exp.BitwiseXor, "BITXOR"), 579 "BITOR": _build_bitwise(exp.BitwiseOr, "BITOR"), 580 "BIT_OR": _build_bitwise(exp.BitwiseOr, "BITOR"), 581 "BITSHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BITSHIFTLEFT"), 582 "BIT_SHIFTLEFT": _build_bitwise(exp.BitwiseLeftShift, "BIT_SHIFTLEFT"), 583 "BITSHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BITSHIFTRIGHT"), 584 "BIT_SHIFTRIGHT": _build_bitwise(exp.BitwiseRightShift, "BIT_SHIFTRIGHT"), 585 "BOOLXOR": _build_bitwise(exp.Xor, "BOOLXOR"), 586 "DATE": _build_datetime("DATE", exp.DataType.Type.DATE), 587 "DATE_TRUNC": _date_trunc_to_time, 588 "DATEADD": _build_date_time_add(exp.DateAdd), 589 "DATEDIFF": _build_datediff, 590 "DIV0": _build_if_from_div0, 591 "EDITDISTANCE": lambda args: exp.Levenshtein( 592 this=seq_get(args, 0), expression=seq_get(args, 1), max_dist=seq_get(args, 2) 593 ), 594 "FLATTEN": exp.Explode.from_arg_list, 595 "GET_PATH": lambda args, dialect: exp.JSONExtract( 596 this=seq_get(args, 0), 597 expression=dialect.to_json_path(seq_get(args, 1)), 598 requires_json=True, 599 ), 600 "HEX_DECODE_BINARY": exp.Unhex.from_arg_list, 601 "IFF": exp.If.from_arg_list, 602 "LAST_DAY": lambda args: exp.LastDay( 603 this=seq_get(args, 0), unit=map_date_part(seq_get(args, 1)) 604 ), 605 "LEN": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 606 "LENGTH": lambda args: exp.Length(this=seq_get(args, 0), binary=True), 607 "NULLIFZERO": _build_if_from_nullifzero, 608 "OBJECT_CONSTRUCT": _build_object_construct, 609 "REGEXP_EXTRACT_ALL": _build_regexp_extract(exp.RegexpExtractAll), 610 "REGEXP_REPLACE": _build_regexp_replace, 611 "REGEXP_SUBSTR": _build_regexp_extract(exp.RegexpExtract), 612 "REGEXP_SUBSTR_ALL": _build_regexp_extract(exp.RegexpExtractAll), 613 "REPLACE": build_replace_with_optional_replacement, 614 "RLIKE": exp.RegexpLike.from_arg_list, 615 "SQUARE": lambda args: exp.Pow(this=seq_get(args, 0), expression=exp.Literal.number(2)), 616 "TABLE": lambda args: exp.TableFromRows(this=seq_get(args, 0)), 617 "TIMEADD": _build_date_time_add(exp.TimeAdd), 618 "TIMEDIFF": _build_datediff, 619 "TIMESTAMPADD": _build_date_time_add(exp.DateAdd), 620 "TIMESTAMPDIFF": _build_datediff, 621 "TIMESTAMPFROMPARTS": build_timestamp_from_parts, 622 "TIMESTAMP_FROM_PARTS": build_timestamp_from_parts, 623 "TIMESTAMPNTZFROMPARTS": build_timestamp_from_parts, 624 "TIMESTAMP_NTZ_FROM_PARTS": build_timestamp_from_parts, 625 "TRY_PARSE_JSON": lambda args: exp.ParseJSON(this=seq_get(args, 0), safe=True), 626 "TRY_TO_DATE": _build_datetime("TRY_TO_DATE", exp.DataType.Type.DATE, safe=True), 627 "TRY_TO_TIME": _build_datetime("TRY_TO_TIME", exp.DataType.Type.TIME, safe=True), 628 "TRY_TO_TIMESTAMP": _build_datetime( 629 "TRY_TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP, safe=True 630 ), 631 "TO_CHAR": build_timetostr_or_tochar, 632 "TO_DATE": _build_datetime("TO_DATE", exp.DataType.Type.DATE), 633 "TO_NUMBER": lambda args: exp.ToNumber( 634 this=seq_get(args, 0), 635 format=seq_get(args, 1), 636 precision=seq_get(args, 2), 637 scale=seq_get(args, 3), 638 ), 639 "TO_TIME": _build_datetime("TO_TIME", exp.DataType.Type.TIME), 640 "TO_TIMESTAMP": _build_datetime("TO_TIMESTAMP", exp.DataType.Type.TIMESTAMP), 641 "TO_TIMESTAMP_LTZ": _build_datetime("TO_TIMESTAMP_LTZ", exp.DataType.Type.TIMESTAMPLTZ), 642 "TO_TIMESTAMP_NTZ": _build_datetime("TO_TIMESTAMP_NTZ", exp.DataType.Type.TIMESTAMP), 643 "TO_TIMESTAMP_TZ": _build_datetime("TO_TIMESTAMP_TZ", exp.DataType.Type.TIMESTAMPTZ), 644 "TO_VARCHAR": exp.ToChar.from_arg_list, 645 "ZEROIFNULL": _build_if_from_zeroifnull, 646 } 647 648 FUNCTION_PARSERS = { 649 **parser.Parser.FUNCTION_PARSERS, 650 "DATE_PART": lambda self: self._parse_date_part(), 651 "OBJECT_CONSTRUCT_KEEP_NULL": lambda self: self._parse_json_object(), 652 "LISTAGG": lambda self: self._parse_string_agg(), 653 "SEMANTIC_VIEW": lambda self: self._parse_semantic_view(), 654 } 655 FUNCTION_PARSERS.pop("TRIM") 656 657 TIMESTAMPS = parser.Parser.TIMESTAMPS - {TokenType.TIME} 658 659 ALTER_PARSERS = { 660 **parser.Parser.ALTER_PARSERS, 661 "UNSET": lambda self: self.expression( 662 exp.Set, 663 tag=self._match_text_seq("TAG"), 664 expressions=self._parse_csv(self._parse_id_var), 665 unset=True, 666 ), 667 } 668 669 STATEMENT_PARSERS = { 670 **parser.Parser.STATEMENT_PARSERS, 671 TokenType.GET: lambda self: self._parse_get(), 672 TokenType.PUT: lambda self: self._parse_put(), 673 TokenType.SHOW: lambda self: self._parse_show(), 674 } 675 676 PROPERTY_PARSERS = { 677 **parser.Parser.PROPERTY_PARSERS, 678 "CREDENTIALS": lambda self: self._parse_credentials_property(), 679 "FILE_FORMAT": lambda self: self._parse_file_format_property(), 680 "LOCATION": lambda self: self._parse_location_property(), 681 "TAG": lambda self: self._parse_tag(), 682 "USING": lambda self: self._match_text_seq("TEMPLATE") 683 and self.expression(exp.UsingTemplateProperty, this=self._parse_statement()), 684 } 685 686 TYPE_CONVERTERS = { 687 # https://docs.snowflake.com/en/sql-reference/data-types-numeric#number 688 exp.DataType.Type.DECIMAL: build_default_decimal_type(precision=38, scale=0), 689 } 690 691 SHOW_PARSERS = { 692 "DATABASES": _show_parser("DATABASES"), 693 "TERSE DATABASES": _show_parser("DATABASES"), 694 "SCHEMAS": _show_parser("SCHEMAS"), 695 "TERSE SCHEMAS": _show_parser("SCHEMAS"), 696 "OBJECTS": _show_parser("OBJECTS"), 697 "TERSE OBJECTS": _show_parser("OBJECTS"), 698 "TABLES": _show_parser("TABLES"), 699 "TERSE TABLES": _show_parser("TABLES"), 700 "VIEWS": _show_parser("VIEWS"), 701 "TERSE VIEWS": _show_parser("VIEWS"), 702 "PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 703 "TERSE PRIMARY KEYS": _show_parser("PRIMARY KEYS"), 704 "IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 705 "TERSE IMPORTED KEYS": _show_parser("IMPORTED KEYS"), 706 "UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 707 "TERSE UNIQUE KEYS": _show_parser("UNIQUE KEYS"), 708 "SEQUENCES": _show_parser("SEQUENCES"), 709 "TERSE SEQUENCES": _show_parser("SEQUENCES"), 710 "STAGES": _show_parser("STAGES"), 711 "COLUMNS": _show_parser("COLUMNS"), 712 "USERS": _show_parser("USERS"), 713 "TERSE USERS": _show_parser("USERS"), 714 "FILE FORMATS": _show_parser("FILE FORMATS"), 715 "FUNCTIONS": _show_parser("FUNCTIONS"), 716 "PROCEDURES": _show_parser("PROCEDURES"), 717 "WAREHOUSES": _show_parser("WAREHOUSES"), 718 } 719 720 CONSTRAINT_PARSERS = { 721 **parser.Parser.CONSTRAINT_PARSERS, 722 "WITH": lambda self: self._parse_with_constraint(), 723 "MASKING": lambda self: self._parse_with_constraint(), 724 "PROJECTION": lambda self: self._parse_with_constraint(), 725 "TAG": lambda self: self._parse_with_constraint(), 726 } 727 728 STAGED_FILE_SINGLE_TOKENS = { 729 TokenType.DOT, 730 TokenType.MOD, 731 TokenType.SLASH, 732 } 733 734 FLATTEN_COLUMNS = ["SEQ", "KEY", "PATH", "INDEX", "VALUE", "THIS"] 735 736 SCHEMA_KINDS = {"OBJECTS", "TABLES", "VIEWS", "SEQUENCES", "UNIQUE KEYS", "IMPORTED KEYS"} 737 738 NON_TABLE_CREATABLES = {"STORAGE INTEGRATION", "TAG", "WAREHOUSE", "STREAMLIT"} 739 740 LAMBDAS = { 741 **parser.Parser.LAMBDAS, 742 TokenType.ARROW: lambda self, expressions: self.expression( 743 exp.Lambda, 744 this=self._replace_lambda( 745 self._parse_assignment(), 746 expressions, 747 ), 748 expressions=[e.this if isinstance(e, exp.Cast) else e for e in expressions], 749 ), 750 } 751 752 def _parse_use(self) -> exp.Use: 753 if self._match_text_seq("SECONDARY", "ROLES"): 754 this = self._match_texts(("ALL", "NONE")) and exp.var(self._prev.text.upper()) 755 roles = None if this else self._parse_csv(lambda: self._parse_table(schema=False)) 756 return self.expression( 757 exp.Use, kind="SECONDARY ROLES", this=this, expressions=roles 758 ) 759 760 return super()._parse_use() 761 762 def _negate_range( 763 self, this: t.Optional[exp.Expression] = None 764 ) -> t.Optional[exp.Expression]: 765 if not this: 766 return this 767 768 query = this.args.get("query") 769 if isinstance(this, exp.In) and isinstance(query, exp.Query): 770 # Snowflake treats `value NOT IN (subquery)` as `VALUE <> ALL (subquery)`, so 771 # we do this conversion here to avoid parsing it into `NOT value IN (subquery)` 772 # which can produce different results (most likely a SnowFlake bug). 773 # 774 # https://docs.snowflake.com/en/sql-reference/functions/in 775 # Context: https://github.com/tobymao/sqlglot/issues/3890 776 return self.expression( 777 exp.NEQ, this=this.this, expression=exp.All(this=query.unnest()) 778 ) 779 780 return self.expression(exp.Not, this=this) 781 782 def _parse_tag(self) -> exp.Tags: 783 return self.expression( 784 exp.Tags, 785 expressions=self._parse_wrapped_csv(self._parse_property), 786 ) 787 788 def _parse_with_constraint(self) -> t.Optional[exp.Expression]: 789 if self._prev.token_type != TokenType.WITH: 790 self._retreat(self._index - 1) 791 792 if self._match_text_seq("MASKING", "POLICY"): 793 policy = self._parse_column() 794 return self.expression( 795 exp.MaskingPolicyColumnConstraint, 796 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 797 expressions=self._match(TokenType.USING) 798 and self._parse_wrapped_csv(self._parse_id_var), 799 ) 800 if self._match_text_seq("PROJECTION", "POLICY"): 801 policy = self._parse_column() 802 return self.expression( 803 exp.ProjectionPolicyColumnConstraint, 804 this=policy.to_dot() if isinstance(policy, exp.Column) else policy, 805 ) 806 if self._match(TokenType.TAG): 807 return self._parse_tag() 808 809 return None 810 811 def _parse_with_property(self) -> t.Optional[exp.Expression] | t.List[exp.Expression]: 812 if self._match(TokenType.TAG): 813 return self._parse_tag() 814 815 return super()._parse_with_property() 816 817 def _parse_create(self) -> exp.Create | exp.Command: 818 expression = super()._parse_create() 819 if isinstance(expression, exp.Create) and expression.kind in self.NON_TABLE_CREATABLES: 820 # Replace the Table node with the enclosed Identifier 821 expression.this.replace(expression.this.this) 822 823 return expression 824 825 # https://docs.snowflake.com/en/sql-reference/functions/date_part.html 826 # https://docs.snowflake.com/en/sql-reference/functions-date-time.html#label-supported-date-time-parts 827 def _parse_date_part(self: Snowflake.Parser) -> t.Optional[exp.Expression]: 828 this = self._parse_var() or self._parse_type() 829 830 if not this: 831 return None 832 833 self._match(TokenType.COMMA) 834 expression = self._parse_bitwise() 835 this = map_date_part(this) 836 name = this.name.upper() 837 838 if name.startswith("EPOCH"): 839 if name == "EPOCH_MILLISECOND": 840 scale = 10**3 841 elif name == "EPOCH_MICROSECOND": 842 scale = 10**6 843 elif name == "EPOCH_NANOSECOND": 844 scale = 10**9 845 else: 846 scale = None 847 848 ts = self.expression(exp.Cast, this=expression, to=exp.DataType.build("TIMESTAMP")) 849 to_unix: exp.Expression = self.expression(exp.TimeToUnix, this=ts) 850 851 if scale: 852 to_unix = exp.Mul(this=to_unix, expression=exp.Literal.number(scale)) 853 854 return to_unix 855 856 return self.expression(exp.Extract, this=this, expression=expression) 857 858 def _parse_bracket_key_value(self, is_map: bool = False) -> t.Optional[exp.Expression]: 859 if is_map: 860 # Keys are strings in Snowflake's objects, see also: 861 # - https://docs.snowflake.com/en/sql-reference/data-types-semistructured 862 # - https://docs.snowflake.com/en/sql-reference/functions/object_construct 863 return self._parse_slice(self._parse_string()) 864 865 return self._parse_slice(self._parse_alias(self._parse_assignment(), explicit=True)) 866 867 def _parse_lateral(self) -> t.Optional[exp.Lateral]: 868 lateral = super()._parse_lateral() 869 if not lateral: 870 return lateral 871 872 if isinstance(lateral.this, exp.Explode): 873 table_alias = lateral.args.get("alias") 874 columns = [exp.to_identifier(col) for col in self.FLATTEN_COLUMNS] 875 if table_alias and not table_alias.args.get("columns"): 876 table_alias.set("columns", columns) 877 elif not table_alias: 878 exp.alias_(lateral, "_flattened", table=columns, copy=False) 879 880 return lateral 881 882 def _parse_table_parts( 883 self, schema: bool = False, is_db_reference: bool = False, wildcard: bool = False 884 ) -> exp.Table: 885 # https://docs.snowflake.com/en/user-guide/querying-stage 886 if self._match(TokenType.STRING, advance=False): 887 table = self._parse_string() 888 elif self._match_text_seq("@", advance=False): 889 table = self._parse_location_path() 890 else: 891 table = None 892 893 if table: 894 file_format = None 895 pattern = None 896 897 wrapped = self._match(TokenType.L_PAREN) 898 while self._curr and wrapped and not self._match(TokenType.R_PAREN): 899 if self._match_text_seq("FILE_FORMAT", "=>"): 900 file_format = self._parse_string() or super()._parse_table_parts( 901 is_db_reference=is_db_reference 902 ) 903 elif self._match_text_seq("PATTERN", "=>"): 904 pattern = self._parse_string() 905 else: 906 break 907 908 self._match(TokenType.COMMA) 909 910 table = self.expression(exp.Table, this=table, format=file_format, pattern=pattern) 911 else: 912 table = super()._parse_table_parts(schema=schema, is_db_reference=is_db_reference) 913 914 return table 915 916 def _parse_table( 917 self, 918 schema: bool = False, 919 joins: bool = False, 920 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 921 parse_bracket: bool = False, 922 is_db_reference: bool = False, 923 parse_partition: bool = False, 924 consume_pipe: bool = False, 925 ) -> t.Optional[exp.Expression]: 926 table = super()._parse_table( 927 schema=schema, 928 joins=joins, 929 alias_tokens=alias_tokens, 930 parse_bracket=parse_bracket, 931 is_db_reference=is_db_reference, 932 parse_partition=parse_partition, 933 ) 934 if isinstance(table, exp.Table) and isinstance(table.this, exp.TableFromRows): 935 table_from_rows = table.this 936 for arg in exp.TableFromRows.arg_types: 937 if arg != "this": 938 table_from_rows.set(arg, table.args.get(arg)) 939 940 table = table_from_rows 941 942 return table 943 944 def _parse_id_var( 945 self, 946 any_token: bool = True, 947 tokens: t.Optional[t.Collection[TokenType]] = None, 948 ) -> t.Optional[exp.Expression]: 949 if self._match_text_seq("IDENTIFIER", "("): 950 identifier = ( 951 super()._parse_id_var(any_token=any_token, tokens=tokens) 952 or self._parse_string() 953 ) 954 self._match_r_paren() 955 return self.expression(exp.Anonymous, this="IDENTIFIER", expressions=[identifier]) 956 957 return super()._parse_id_var(any_token=any_token, tokens=tokens) 958 959 def _parse_show_snowflake(self, this: str) -> exp.Show: 960 scope = None 961 scope_kind = None 962 963 # will identity SHOW TERSE SCHEMAS but not SHOW TERSE PRIMARY KEYS 964 # which is syntactically valid but has no effect on the output 965 terse = self._tokens[self._index - 2].text.upper() == "TERSE" 966 967 history = self._match_text_seq("HISTORY") 968 969 like = self._parse_string() if self._match(TokenType.LIKE) else None 970 971 if self._match(TokenType.IN): 972 if self._match_text_seq("ACCOUNT"): 973 scope_kind = "ACCOUNT" 974 elif self._match_text_seq("CLASS"): 975 scope_kind = "CLASS" 976 scope = self._parse_table_parts() 977 elif self._match_text_seq("APPLICATION"): 978 scope_kind = "APPLICATION" 979 if self._match_text_seq("PACKAGE"): 980 scope_kind += " PACKAGE" 981 scope = self._parse_table_parts() 982 elif self._match_set(self.DB_CREATABLES): 983 scope_kind = self._prev.text.upper() 984 if self._curr: 985 scope = self._parse_table_parts() 986 elif self._curr: 987 scope_kind = "SCHEMA" if this in self.SCHEMA_KINDS else "TABLE" 988 scope = self._parse_table_parts() 989 990 return self.expression( 991 exp.Show, 992 **{ 993 "terse": terse, 994 "this": this, 995 "history": history, 996 "like": like, 997 "scope": scope, 998 "scope_kind": scope_kind, 999 "starts_with": self._match_text_seq("STARTS", "WITH") and self._parse_string(), 1000 "limit": self._parse_limit(), 1001 "from": self._parse_string() if self._match(TokenType.FROM) else None, 1002 "privileges": self._match_text_seq("WITH", "PRIVILEGES") 1003 and self._parse_csv(lambda: self._parse_var(any_token=True, upper=True)), 1004 }, 1005 ) 1006 1007 def _parse_put(self) -> exp.Put | exp.Command: 1008 if self._curr.token_type != TokenType.STRING: 1009 return self._parse_as_command(self._prev) 1010 1011 return self.expression( 1012 exp.Put, 1013 this=self._parse_string(), 1014 target=self._parse_location_path(), 1015 properties=self._parse_properties(), 1016 ) 1017 1018 def _parse_get(self) -> t.Optional[exp.Expression]: 1019 start = self._prev 1020 1021 # If we detect GET( then we need to parse a function, not a statement 1022 if self._match(TokenType.L_PAREN): 1023 self._retreat(self._index - 2) 1024 return self._parse_expression() 1025 1026 target = self._parse_location_path() 1027 1028 # Parse as command if unquoted file path 1029 if self._curr.token_type == TokenType.URI_START: 1030 return self._parse_as_command(start) 1031 1032 return self.expression( 1033 exp.Get, 1034 this=self._parse_string(), 1035 target=target, 1036 properties=self._parse_properties(), 1037 ) 1038 1039 def _parse_location_property(self) -> exp.LocationProperty: 1040 self._match(TokenType.EQ) 1041 return self.expression(exp.LocationProperty, this=self._parse_location_path()) 1042 1043 def _parse_file_location(self) -> t.Optional[exp.Expression]: 1044 # Parse either a subquery or a staged file 1045 return ( 1046 self._parse_select(table=True, parse_subquery_alias=False) 1047 if self._match(TokenType.L_PAREN, advance=False) 1048 else self._parse_table_parts() 1049 ) 1050 1051 def _parse_location_path(self) -> exp.Var: 1052 start = self._curr 1053 self._advance_any(ignore_reserved=True) 1054 1055 # We avoid consuming a comma token because external tables like @foo and @bar 1056 # can be joined in a query with a comma separator, as well as closing paren 1057 # in case of subqueries 1058 while self._is_connected() and not self._match_set( 1059 (TokenType.COMMA, TokenType.L_PAREN, TokenType.R_PAREN), advance=False 1060 ): 1061 self._advance_any(ignore_reserved=True) 1062 1063 return exp.var(self._find_sql(start, self._prev)) 1064 1065 def _parse_lambda_arg(self) -> t.Optional[exp.Expression]: 1066 this = super()._parse_lambda_arg() 1067 1068 if not this: 1069 return this 1070 1071 typ = self._parse_types() 1072 1073 if typ: 1074 return self.expression(exp.Cast, this=this, to=typ) 1075 1076 return this 1077 1078 def _parse_foreign_key(self) -> exp.ForeignKey: 1079 # inlineFK, the REFERENCES columns are implied 1080 if self._match(TokenType.REFERENCES, advance=False): 1081 return self.expression(exp.ForeignKey) 1082 1083 # outoflineFK, explicitly names the columns 1084 return super()._parse_foreign_key() 1085 1086 def _parse_file_format_property(self) -> exp.FileFormatProperty: 1087 self._match(TokenType.EQ) 1088 if self._match(TokenType.L_PAREN, advance=False): 1089 expressions = self._parse_wrapped_options() 1090 else: 1091 expressions = [self._parse_format_name()] 1092 1093 return self.expression( 1094 exp.FileFormatProperty, 1095 expressions=expressions, 1096 ) 1097 1098 def _parse_credentials_property(self) -> exp.CredentialsProperty: 1099 return self.expression( 1100 exp.CredentialsProperty, 1101 expressions=self._parse_wrapped_options(), 1102 ) 1103 1104 def _parse_semantic_view(self) -> exp.SemanticView: 1105 kwargs: t.Dict[str, t.Any] = {"this": self._parse_table_parts()} 1106 1107 while self._curr and not self._match(TokenType.R_PAREN, advance=False): 1108 if self._match_text_seq("DIMENSIONS"): 1109 kwargs["dimensions"] = self._parse_csv(self._parse_disjunction) 1110 if self._match_text_seq("METRICS"): 1111 kwargs["metrics"] = self._parse_csv(self._parse_disjunction) 1112 if self._match_text_seq("WHERE"): 1113 kwargs["where"] = self._parse_expression() 1114 1115 return self.expression(exp.SemanticView, **kwargs)
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- NO_PAREN_FUNCTIONS
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- RESERVED_TOKENS
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- ALIAS_TOKENS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- FUNC_TOKENS
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_KINDS
- JOIN_HINTS
- COLUMN_OPERATORS
- EXPRESSION_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PLACEHOLDER_PARSERS
- RANGE_PARSERS
- PIPE_SYNTAX_TRANSFORM_PARSERS
- ALTER_ALTER_PARSERS
- SCHEMA_UNNAMED_CONSTRAINTS
- NO_PAREN_FUNCTION_PARSERS
- INVALID_FUNC_NAME_TOKENS
- FUNCTIONS_WITH_ALIASED_ARGS
- KEY_VALUE_DEFINITIONS
- QUERY_MODIFIER_PARSERS
- QUERY_MODIFIER_TOKENS
- SET_PARSERS
- TYPE_LITERAL_PARSERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- WINDOW_EXCLUDE_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- ANALYZE_STYLES
- ANALYZE_EXPRESSION_PARSERS
- PARTITION_KEYWORDS
- AMBIGUOUS_ALIAS_TOKENS
- OPERATION_MODIFIERS
- RECURSIVE_CTE_SEARCH_KIND
- MODIFIABLES
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- LOG_DEFAULTS_TO_LN
- TABLESAMPLE_CSV
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- MODIFIERS_ATTACHED_TO_SET_OP
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- INTERVAL_SPANS
- SUPPORTS_PARTITION_SELECTION
- WRAPPED_TRANSFORM_COLUMN_CONSTRAINT
- OPTIONAL_ALIAS_TOKEN_CTE
- ALTER_RENAME_REQUIRES_COLUMN
- JOINS_HAVE_EQUAL_PRECEDENCE
- ZONE_AWARE_TIMESTAMP_CONSTRUCTOR
- MAP_KEYS_ARE_ARBITRARY_EXPRESSIONS
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- parse_set_operation
- build_cast
- errors
- sql
1117 class Tokenizer(tokens.Tokenizer): 1118 STRING_ESCAPES = ["\\", "'"] 1119 HEX_STRINGS = [("x'", "'"), ("X'", "'")] 1120 RAW_STRINGS = ["$$"] 1121 COMMENTS = ["--", "//", ("/*", "*/")] 1122 NESTED_COMMENTS = False 1123 1124 KEYWORDS = { 1125 **tokens.Tokenizer.KEYWORDS, 1126 "FILE://": TokenType.URI_START, 1127 "BYTEINT": TokenType.INT, 1128 "EXCLUDE": TokenType.EXCEPT, 1129 "FILE FORMAT": TokenType.FILE_FORMAT, 1130 "GET": TokenType.GET, 1131 "MATCH_CONDITION": TokenType.MATCH_CONDITION, 1132 "MATCH_RECOGNIZE": TokenType.MATCH_RECOGNIZE, 1133 "MINUS": TokenType.EXCEPT, 1134 "NCHAR VARYING": TokenType.VARCHAR, 1135 "PUT": TokenType.PUT, 1136 "REMOVE": TokenType.COMMAND, 1137 "RM": TokenType.COMMAND, 1138 "SAMPLE": TokenType.TABLE_SAMPLE, 1139 "SEMANTIC VIEW": TokenType.SEMANTIC_VIEW, 1140 "SQL_DOUBLE": TokenType.DOUBLE, 1141 "SQL_VARCHAR": TokenType.VARCHAR, 1142 "STORAGE INTEGRATION": TokenType.STORAGE_INTEGRATION, 1143 "TAG": TokenType.TAG, 1144 "TIMESTAMP_TZ": TokenType.TIMESTAMPTZ, 1145 "TOP": TokenType.TOP, 1146 "WAREHOUSE": TokenType.WAREHOUSE, 1147 "STAGE": TokenType.STAGE, 1148 "STREAMLIT": TokenType.STREAMLIT, 1149 } 1150 KEYWORDS.pop("/*+") 1151 1152 SINGLE_TOKENS = { 1153 **tokens.Tokenizer.SINGLE_TOKENS, 1154 "$": TokenType.PARAMETER, 1155 } 1156 1157 VAR_SINGLE_TOKENS = {"$"} 1158 1159 COMMANDS = tokens.Tokenizer.COMMANDS - {TokenType.SHOW}
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BIT_STRINGS
- BYTE_STRINGS
- HEREDOC_STRINGS
- UNICODE_STRINGS
- IDENTIFIERS
- QUOTES
- IDENTIFIER_ESCAPES
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- HINT_START
- TOKENS_PRECEDING_HINT
- WHITE_SPACE
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- use_rs_tokenizer
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
1161 class Generator(generator.Generator): 1162 PARAMETER_TOKEN = "$" 1163 MATCHED_BY_SOURCE = False 1164 SINGLE_STRING_INTERVAL = True 1165 JOIN_HINTS = False 1166 TABLE_HINTS = False 1167 QUERY_HINTS = False 1168 AGGREGATE_FILTER_SUPPORTED = False 1169 SUPPORTS_TABLE_COPY = False 1170 COLLATE_IS_FUNC = True 1171 LIMIT_ONLY_LITERALS = True 1172 JSON_KEY_VALUE_PAIR_SEP = "," 1173 INSERT_OVERWRITE = " OVERWRITE INTO" 1174 STRUCT_DELIMITER = ("(", ")") 1175 COPY_PARAMS_ARE_WRAPPED = False 1176 COPY_PARAMS_EQ_REQUIRED = True 1177 STAR_EXCEPT = "EXCLUDE" 1178 SUPPORTS_EXPLODING_PROJECTIONS = False 1179 ARRAY_CONCAT_IS_VAR_LEN = False 1180 SUPPORTS_CONVERT_TIMEZONE = True 1181 EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE = False 1182 SUPPORTS_MEDIAN = True 1183 ARRAY_SIZE_NAME = "ARRAY_SIZE" 1184 SUPPORTS_DECODE_CASE = True 1185 IS_BOOL_ALLOWED = False 1186 1187 TRANSFORMS = { 1188 **generator.Generator.TRANSFORMS, 1189 exp.ApproxDistinct: rename_func("APPROX_COUNT_DISTINCT"), 1190 exp.ArgMax: rename_func("MAX_BY"), 1191 exp.ArgMin: rename_func("MIN_BY"), 1192 exp.ArrayConcat: lambda self, e: self.arrayconcat_sql(e, name="ARRAY_CAT"), 1193 exp.ArrayContains: lambda self, e: self.func("ARRAY_CONTAINS", e.expression, e.this), 1194 exp.ArrayIntersect: rename_func("ARRAY_INTERSECTION"), 1195 exp.AtTimeZone: lambda self, e: self.func( 1196 "CONVERT_TIMEZONE", e.args.get("zone"), e.this 1197 ), 1198 exp.BitwiseOr: rename_func("BITOR"), 1199 exp.BitwiseXor: rename_func("BITXOR"), 1200 exp.BitwiseLeftShift: rename_func("BITSHIFTLEFT"), 1201 exp.BitwiseRightShift: rename_func("BITSHIFTRIGHT"), 1202 exp.Create: transforms.preprocess([_flatten_structured_types_unless_iceberg]), 1203 exp.DateAdd: date_delta_sql("DATEADD"), 1204 exp.DateDiff: date_delta_sql("DATEDIFF"), 1205 exp.DatetimeAdd: date_delta_sql("TIMESTAMPADD"), 1206 exp.DatetimeDiff: timestampdiff_sql, 1207 exp.DateStrToDate: datestrtodate_sql, 1208 exp.DayOfMonth: rename_func("DAYOFMONTH"), 1209 exp.DayOfWeek: rename_func("DAYOFWEEK"), 1210 exp.DayOfWeekIso: rename_func("DAYOFWEEKISO"), 1211 exp.DayOfYear: rename_func("DAYOFYEAR"), 1212 exp.Explode: rename_func("FLATTEN"), 1213 exp.Extract: lambda self, e: self.func( 1214 "DATE_PART", map_date_part(e.this, self.dialect), e.expression 1215 ), 1216 exp.FileFormatProperty: lambda self, 1217 e: f"FILE_FORMAT=({self.expressions(e, 'expressions', sep=' ')})", 1218 exp.FromTimeZone: lambda self, e: self.func( 1219 "CONVERT_TIMEZONE", e.args.get("zone"), "'UTC'", e.this 1220 ), 1221 exp.GenerateSeries: lambda self, e: self.func( 1222 "ARRAY_GENERATE_RANGE", e.args["start"], e.args["end"] + 1, e.args.get("step") 1223 ), 1224 exp.GroupConcat: lambda self, e: groupconcat_sql(self, e, sep=""), 1225 exp.If: if_sql(name="IFF", false_value="NULL"), 1226 exp.JSONExtractArray: _json_extract_value_array_sql, 1227 exp.JSONExtractScalar: lambda self, e: self.func( 1228 "JSON_EXTRACT_PATH_TEXT", e.this, e.expression 1229 ), 1230 exp.JSONObject: lambda self, e: self.func("OBJECT_CONSTRUCT_KEEP_NULL", *e.expressions), 1231 exp.JSONPathRoot: lambda *_: "", 1232 exp.JSONValueArray: _json_extract_value_array_sql, 1233 exp.Levenshtein: unsupported_args("ins_cost", "del_cost", "sub_cost")( 1234 rename_func("EDITDISTANCE") 1235 ), 1236 exp.LocationProperty: lambda self, e: f"LOCATION={self.sql(e, 'this')}", 1237 exp.LogicalAnd: rename_func("BOOLAND_AGG"), 1238 exp.LogicalOr: rename_func("BOOLOR_AGG"), 1239 exp.Map: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1240 exp.MakeInterval: no_make_interval_sql, 1241 exp.Max: max_or_greatest, 1242 exp.Min: min_or_least, 1243 exp.ParseJSON: lambda self, e: self.func( 1244 "TRY_PARSE_JSON" if e.args.get("safe") else "PARSE_JSON", e.this 1245 ), 1246 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 1247 exp.PercentileCont: transforms.preprocess( 1248 [transforms.add_within_group_for_percentiles] 1249 ), 1250 exp.PercentileDisc: transforms.preprocess( 1251 [transforms.add_within_group_for_percentiles] 1252 ), 1253 exp.Pivot: transforms.preprocess([_unqualify_pivot_columns]), 1254 exp.RegexpExtract: _regexpextract_sql, 1255 exp.RegexpExtractAll: _regexpextract_sql, 1256 exp.RegexpILike: _regexpilike_sql, 1257 exp.Rand: rename_func("RANDOM"), 1258 exp.Select: transforms.preprocess( 1259 [ 1260 transforms.eliminate_window_clause, 1261 transforms.eliminate_distinct_on, 1262 transforms.explode_projection_to_unnest(), 1263 transforms.eliminate_semi_and_anti_joins, 1264 _transform_generate_date_array, 1265 _qualify_unnested_columns, 1266 _eliminate_dot_variant_lookup, 1267 ] 1268 ), 1269 exp.SHA: rename_func("SHA1"), 1270 exp.StarMap: rename_func("OBJECT_CONSTRUCT"), 1271 exp.StartsWith: rename_func("STARTSWITH"), 1272 exp.EndsWith: rename_func("ENDSWITH"), 1273 exp.StrPosition: lambda self, e: strposition_sql( 1274 self, e, func_name="CHARINDEX", supports_position=True 1275 ), 1276 exp.StrToDate: lambda self, e: self.func("DATE", e.this, self.format_time(e)), 1277 exp.StringToArray: rename_func("STRTOK_TO_ARRAY"), 1278 exp.Stuff: rename_func("INSERT"), 1279 exp.StPoint: rename_func("ST_MAKEPOINT"), 1280 exp.TimeAdd: date_delta_sql("TIMEADD"), 1281 exp.Timestamp: no_timestamp_sql, 1282 exp.TimestampAdd: date_delta_sql("TIMESTAMPADD"), 1283 exp.TimestampDiff: lambda self, e: self.func( 1284 "TIMESTAMPDIFF", e.unit, e.expression, e.this 1285 ), 1286 exp.TimestampTrunc: timestamptrunc_sql(), 1287 exp.TimeStrToTime: timestrtotime_sql, 1288 exp.TimeToUnix: lambda self, e: f"EXTRACT(epoch_second FROM {self.sql(e, 'this')})", 1289 exp.ToArray: rename_func("TO_ARRAY"), 1290 exp.ToChar: lambda self, e: self.function_fallback_sql(e), 1291 exp.ToDouble: rename_func("TO_DOUBLE"), 1292 exp.TsOrDsAdd: date_delta_sql("DATEADD", cast=True), 1293 exp.TsOrDsDiff: date_delta_sql("DATEDIFF"), 1294 exp.TsOrDsToDate: lambda self, e: self.func( 1295 "TRY_TO_DATE" if e.args.get("safe") else "TO_DATE", e.this, self.format_time(e) 1296 ), 1297 exp.TsOrDsToTime: lambda self, e: self.func( 1298 "TRY_TO_TIME" if e.args.get("safe") else "TO_TIME", e.this, self.format_time(e) 1299 ), 1300 exp.Unhex: rename_func("HEX_DECODE_BINARY"), 1301 exp.UnixToTime: rename_func("TO_TIMESTAMP"), 1302 exp.Uuid: rename_func("UUID_STRING"), 1303 exp.VarMap: lambda self, e: var_map_sql(self, e, "OBJECT_CONSTRUCT"), 1304 exp.WeekOfYear: rename_func("WEEKOFYEAR"), 1305 exp.Xor: rename_func("BOOLXOR"), 1306 } 1307 1308 SUPPORTED_JSON_PATH_PARTS = { 1309 exp.JSONPathKey, 1310 exp.JSONPathRoot, 1311 exp.JSONPathSubscript, 1312 } 1313 1314 TYPE_MAPPING = { 1315 **generator.Generator.TYPE_MAPPING, 1316 exp.DataType.Type.NESTED: "OBJECT", 1317 exp.DataType.Type.STRUCT: "OBJECT", 1318 exp.DataType.Type.BIGDECIMAL: "DOUBLE", 1319 } 1320 1321 TOKEN_MAPPING = { 1322 TokenType.AUTO_INCREMENT: "AUTOINCREMENT", 1323 } 1324 1325 PROPERTIES_LOCATION = { 1326 **generator.Generator.PROPERTIES_LOCATION, 1327 exp.CredentialsProperty: exp.Properties.Location.POST_WITH, 1328 exp.LocationProperty: exp.Properties.Location.POST_WITH, 1329 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 1330 exp.SetProperty: exp.Properties.Location.UNSUPPORTED, 1331 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 1332 } 1333 1334 UNSUPPORTED_VALUES_EXPRESSIONS = { 1335 exp.Map, 1336 exp.StarMap, 1337 exp.Struct, 1338 exp.VarMap, 1339 } 1340 1341 RESPECT_IGNORE_NULLS_UNSUPPORTED_EXPRESSIONS = (exp.ArrayAgg,) 1342 1343 def with_properties(self, properties: exp.Properties) -> str: 1344 return self.properties(properties, wrapped=False, prefix=self.sep(""), sep=" ") 1345 1346 def values_sql(self, expression: exp.Values, values_as_table: bool = True) -> str: 1347 if expression.find(*self.UNSUPPORTED_VALUES_EXPRESSIONS): 1348 values_as_table = False 1349 1350 return super().values_sql(expression, values_as_table=values_as_table) 1351 1352 def datatype_sql(self, expression: exp.DataType) -> str: 1353 expressions = expression.expressions 1354 if ( 1355 expressions 1356 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1357 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1358 ): 1359 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1360 return "OBJECT" 1361 1362 return super().datatype_sql(expression) 1363 1364 def tonumber_sql(self, expression: exp.ToNumber) -> str: 1365 return self.func( 1366 "TO_NUMBER", 1367 expression.this, 1368 expression.args.get("format"), 1369 expression.args.get("precision"), 1370 expression.args.get("scale"), 1371 ) 1372 1373 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1374 milli = expression.args.get("milli") 1375 if milli is not None: 1376 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1377 expression.set("nano", milli_to_nano) 1378 1379 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression) 1380 1381 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1382 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1383 return self.func("TO_GEOGRAPHY", expression.this) 1384 if expression.is_type(exp.DataType.Type.GEOMETRY): 1385 return self.func("TO_GEOMETRY", expression.this) 1386 1387 return super().cast_sql(expression, safe_prefix=safe_prefix) 1388 1389 def trycast_sql(self, expression: exp.TryCast) -> str: 1390 value = expression.this 1391 1392 if value.type is None: 1393 from sqlglot.optimizer.annotate_types import annotate_types 1394 1395 value = annotate_types(value, dialect=self.dialect) 1396 1397 # Snowflake requires that TRY_CAST's value be a string 1398 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1399 # if we can deduce that the value is a string, then we can generate TRY_CAST 1400 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1401 return super().trycast_sql(expression) 1402 1403 return self.cast_sql(expression) 1404 1405 def log_sql(self, expression: exp.Log) -> str: 1406 if not expression.expression: 1407 return self.func("LN", expression.this) 1408 1409 return super().log_sql(expression) 1410 1411 def unnest_sql(self, expression: exp.Unnest) -> str: 1412 unnest_alias = expression.args.get("alias") 1413 offset = expression.args.get("offset") 1414 1415 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1416 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1417 1418 columns = [ 1419 exp.to_identifier("seq"), 1420 exp.to_identifier("key"), 1421 exp.to_identifier("path"), 1422 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1423 value, 1424 exp.to_identifier("this"), 1425 ] 1426 1427 if unnest_alias: 1428 unnest_alias.set("columns", columns) 1429 else: 1430 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1431 1432 table_input = self.sql(expression.expressions[0]) 1433 if not table_input.startswith("INPUT =>"): 1434 table_input = f"INPUT => {table_input}" 1435 1436 expression_parent = expression.parent 1437 1438 explode = ( 1439 f"FLATTEN({table_input})" 1440 if isinstance(expression_parent, exp.Lateral) 1441 else f"TABLE(FLATTEN({table_input}))" 1442 ) 1443 alias = self.sql(unnest_alias) 1444 alias = f" AS {alias}" if alias else "" 1445 value = ( 1446 "" 1447 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1448 else f"{value} FROM " 1449 ) 1450 1451 return f"{value}{explode}{alias}" 1452 1453 def show_sql(self, expression: exp.Show) -> str: 1454 terse = "TERSE " if expression.args.get("terse") else "" 1455 history = " HISTORY" if expression.args.get("history") else "" 1456 like = self.sql(expression, "like") 1457 like = f" LIKE {like}" if like else "" 1458 1459 scope = self.sql(expression, "scope") 1460 scope = f" {scope}" if scope else "" 1461 1462 scope_kind = self.sql(expression, "scope_kind") 1463 if scope_kind: 1464 scope_kind = f" IN {scope_kind}" 1465 1466 starts_with = self.sql(expression, "starts_with") 1467 if starts_with: 1468 starts_with = f" STARTS WITH {starts_with}" 1469 1470 limit = self.sql(expression, "limit") 1471 1472 from_ = self.sql(expression, "from") 1473 if from_: 1474 from_ = f" FROM {from_}" 1475 1476 privileges = self.expressions(expression, key="privileges", flat=True) 1477 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1478 1479 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}" 1480 1481 def describe_sql(self, expression: exp.Describe) -> str: 1482 # Default to table if kind is unknown 1483 kind_value = expression.args.get("kind") or "TABLE" 1484 kind = f" {kind_value}" if kind_value else "" 1485 this = f" {self.sql(expression, 'this')}" 1486 expressions = self.expressions(expression, flat=True) 1487 expressions = f" {expressions}" if expressions else "" 1488 return f"DESCRIBE{kind}{this}{expressions}" 1489 1490 def generatedasidentitycolumnconstraint_sql( 1491 self, expression: exp.GeneratedAsIdentityColumnConstraint 1492 ) -> str: 1493 start = expression.args.get("start") 1494 start = f" START {start}" if start else "" 1495 increment = expression.args.get("increment") 1496 increment = f" INCREMENT {increment}" if increment else "" 1497 1498 order = expression.args.get("order") 1499 if order is not None: 1500 order_clause = " ORDER" if order else " NOORDER" 1501 else: 1502 order_clause = "" 1503 1504 return f"AUTOINCREMENT{start}{increment}{order_clause}" 1505 1506 def cluster_sql(self, expression: exp.Cluster) -> str: 1507 return f"CLUSTER BY ({self.expressions(expression, flat=True)})" 1508 1509 def struct_sql(self, expression: exp.Struct) -> str: 1510 keys = [] 1511 values = [] 1512 1513 for i, e in enumerate(expression.expressions): 1514 if isinstance(e, exp.PropertyEQ): 1515 keys.append( 1516 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1517 ) 1518 values.append(e.expression) 1519 else: 1520 keys.append(exp.Literal.string(f"_{i}")) 1521 values.append(e) 1522 1523 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values))) 1524 1525 @unsupported_args("weight", "accuracy") 1526 def approxquantile_sql(self, expression: exp.ApproxQuantile) -> str: 1527 return self.func("APPROX_PERCENTILE", expression.this, expression.args.get("quantile")) 1528 1529 def alterset_sql(self, expression: exp.AlterSet) -> str: 1530 exprs = self.expressions(expression, flat=True) 1531 exprs = f" {exprs}" if exprs else "" 1532 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1533 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1534 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1535 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1536 tag = self.expressions(expression, key="tag", flat=True) 1537 tag = f" TAG {tag}" if tag else "" 1538 1539 return f"SET{exprs}{file_format}{copy_options}{tag}" 1540 1541 def strtotime_sql(self, expression: exp.StrToTime): 1542 safe_prefix = "TRY_" if expression.args.get("safe") else "" 1543 return self.func( 1544 f"{safe_prefix}TO_TIMESTAMP", expression.this, self.format_time(expression) 1545 ) 1546 1547 def timestampsub_sql(self, expression: exp.TimestampSub): 1548 return self.sql( 1549 exp.TimestampAdd( 1550 this=expression.this, 1551 expression=expression.expression * -1, 1552 unit=expression.unit, 1553 ) 1554 ) 1555 1556 def jsonextract_sql(self, expression: exp.JSONExtract): 1557 this = expression.this 1558 1559 # JSON strings are valid coming from other dialects such as BQ so 1560 # for these cases we PARSE_JSON preemptively 1561 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1562 "requires_json" 1563 ): 1564 this = exp.ParseJSON(this=this) 1565 1566 return self.func( 1567 "GET_PATH", 1568 this, 1569 expression.expression, 1570 ) 1571 1572 def timetostr_sql(self, expression: exp.TimeToStr) -> str: 1573 this = expression.this 1574 if this.is_string: 1575 this = exp.cast(this, exp.DataType.Type.TIMESTAMP) 1576 1577 return self.func("TO_CHAR", this, self.format_time(expression)) 1578 1579 def datesub_sql(self, expression: exp.DateSub) -> str: 1580 value = expression.expression 1581 if value: 1582 value.replace(value * (-1)) 1583 else: 1584 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1585 1586 return date_delta_sql("DATEADD")(self, expression) 1587 1588 def select_sql(self, expression: exp.Select) -> str: 1589 limit = expression.args.get("limit") 1590 offset = expression.args.get("offset") 1591 if offset and not limit: 1592 expression.limit(exp.Null(), copy=False) 1593 return super().select_sql(expression) 1594 1595 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1596 is_materialized = expression.find(exp.MaterializedProperty) 1597 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1598 1599 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1600 # For materialized views, COPY GRANTS is located *before* the columns list 1601 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1602 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1603 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1604 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1605 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1606 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1607 1608 this_name = self.sql(expression.this, "this") 1609 copy_grants = self.sql(copy_grants_property) 1610 this_schema = self.schema_columns_sql(expression.this) 1611 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1612 1613 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1614 1615 return super().createable_sql(expression, locations) 1616 1617 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1618 this = expression.this 1619 1620 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1621 # and add it later as part of the WITHIN GROUP clause 1622 order = this if isinstance(this, exp.Order) else None 1623 if order: 1624 expression.set("this", order.this.pop()) 1625 1626 expr_sql = super().arrayagg_sql(expression) 1627 1628 if order: 1629 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1630 1631 return expr_sql 1632 1633 def array_sql(self, expression: exp.Array) -> str: 1634 expressions = expression.expressions 1635 1636 first_expr = seq_get(expressions, 0) 1637 if isinstance(first_expr, exp.Select): 1638 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1639 if first_expr.text("kind").upper() == "STRUCT": 1640 object_construct_args = [] 1641 for expr in first_expr.expressions: 1642 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1643 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1644 name = expr.this if isinstance(expr, exp.Alias) else expr 1645 1646 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1647 1648 array_agg = exp.ArrayAgg( 1649 this=_build_object_construct(args=object_construct_args) 1650 ) 1651 1652 first_expr.set("kind", None) 1653 first_expr.set("expressions", [array_agg]) 1654 1655 return self.sql(first_expr.subquery()) 1656 1657 return inline_array_sql(self, expression) 1658 1659 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1660 zone = self.sql(expression, "this") 1661 if not zone: 1662 return super().currentdate_sql(expression) 1663 1664 expr = exp.Cast( 1665 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1666 to=exp.DataType(this=exp.DataType.Type.DATE), 1667 ) 1668 return self.sql(expr) 1669 1670 def dot_sql(self, expression: exp.Dot) -> str: 1671 this = expression.this 1672 1673 if not this.type: 1674 from sqlglot.optimizer.annotate_types import annotate_types 1675 1676 this = annotate_types(this, dialect=self.dialect) 1677 1678 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1679 # Generate colon notation for the top level STRUCT 1680 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1681 1682 return super().dot_sql(expression)
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHEREclause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
1352 def datatype_sql(self, expression: exp.DataType) -> str: 1353 expressions = expression.expressions 1354 if ( 1355 expressions 1356 and expression.is_type(*exp.DataType.STRUCT_TYPES) 1357 and any(isinstance(field_type, exp.DataType) for field_type in expressions) 1358 ): 1359 # The correct syntax is OBJECT [ (<key> <value_type [NOT NULL] [, ...]) ] 1360 return "OBJECT" 1361 1362 return super().datatype_sql(expression)
1373 def timestampfromparts_sql(self, expression: exp.TimestampFromParts) -> str: 1374 milli = expression.args.get("milli") 1375 if milli is not None: 1376 milli_to_nano = milli.pop() * exp.Literal.number(1000000) 1377 expression.set("nano", milli_to_nano) 1378 1379 return rename_func("TIMESTAMP_FROM_PARTS")(self, expression)
1381 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 1382 if expression.is_type(exp.DataType.Type.GEOGRAPHY): 1383 return self.func("TO_GEOGRAPHY", expression.this) 1384 if expression.is_type(exp.DataType.Type.GEOMETRY): 1385 return self.func("TO_GEOMETRY", expression.this) 1386 1387 return super().cast_sql(expression, safe_prefix=safe_prefix)
1389 def trycast_sql(self, expression: exp.TryCast) -> str: 1390 value = expression.this 1391 1392 if value.type is None: 1393 from sqlglot.optimizer.annotate_types import annotate_types 1394 1395 value = annotate_types(value, dialect=self.dialect) 1396 1397 # Snowflake requires that TRY_CAST's value be a string 1398 # If TRY_CAST is being roundtripped (since Snowflake is the only dialect that sets "requires_string") or 1399 # if we can deduce that the value is a string, then we can generate TRY_CAST 1400 if expression.args.get("requires_string") or value.is_type(*exp.DataType.TEXT_TYPES): 1401 return super().trycast_sql(expression) 1402 1403 return self.cast_sql(expression)
1411 def unnest_sql(self, expression: exp.Unnest) -> str: 1412 unnest_alias = expression.args.get("alias") 1413 offset = expression.args.get("offset") 1414 1415 unnest_alias_columns = unnest_alias.columns if unnest_alias else [] 1416 value = seq_get(unnest_alias_columns, 0) or exp.to_identifier("value") 1417 1418 columns = [ 1419 exp.to_identifier("seq"), 1420 exp.to_identifier("key"), 1421 exp.to_identifier("path"), 1422 offset.pop() if isinstance(offset, exp.Expression) else exp.to_identifier("index"), 1423 value, 1424 exp.to_identifier("this"), 1425 ] 1426 1427 if unnest_alias: 1428 unnest_alias.set("columns", columns) 1429 else: 1430 unnest_alias = exp.TableAlias(this="_u", columns=columns) 1431 1432 table_input = self.sql(expression.expressions[0]) 1433 if not table_input.startswith("INPUT =>"): 1434 table_input = f"INPUT => {table_input}" 1435 1436 expression_parent = expression.parent 1437 1438 explode = ( 1439 f"FLATTEN({table_input})" 1440 if isinstance(expression_parent, exp.Lateral) 1441 else f"TABLE(FLATTEN({table_input}))" 1442 ) 1443 alias = self.sql(unnest_alias) 1444 alias = f" AS {alias}" if alias else "" 1445 value = ( 1446 "" 1447 if isinstance(expression_parent, (exp.From, exp.Join, exp.Lateral)) 1448 else f"{value} FROM " 1449 ) 1450 1451 return f"{value}{explode}{alias}"
1453 def show_sql(self, expression: exp.Show) -> str: 1454 terse = "TERSE " if expression.args.get("terse") else "" 1455 history = " HISTORY" if expression.args.get("history") else "" 1456 like = self.sql(expression, "like") 1457 like = f" LIKE {like}" if like else "" 1458 1459 scope = self.sql(expression, "scope") 1460 scope = f" {scope}" if scope else "" 1461 1462 scope_kind = self.sql(expression, "scope_kind") 1463 if scope_kind: 1464 scope_kind = f" IN {scope_kind}" 1465 1466 starts_with = self.sql(expression, "starts_with") 1467 if starts_with: 1468 starts_with = f" STARTS WITH {starts_with}" 1469 1470 limit = self.sql(expression, "limit") 1471 1472 from_ = self.sql(expression, "from") 1473 if from_: 1474 from_ = f" FROM {from_}" 1475 1476 privileges = self.expressions(expression, key="privileges", flat=True) 1477 privileges = f" WITH PRIVILEGES {privileges}" if privileges else "" 1478 1479 return f"SHOW {terse}{expression.name}{history}{like}{scope_kind}{scope}{starts_with}{limit}{from_}{privileges}"
1481 def describe_sql(self, expression: exp.Describe) -> str: 1482 # Default to table if kind is unknown 1483 kind_value = expression.args.get("kind") or "TABLE" 1484 kind = f" {kind_value}" if kind_value else "" 1485 this = f" {self.sql(expression, 'this')}" 1486 expressions = self.expressions(expression, flat=True) 1487 expressions = f" {expressions}" if expressions else "" 1488 return f"DESCRIBE{kind}{this}{expressions}"
1490 def generatedasidentitycolumnconstraint_sql( 1491 self, expression: exp.GeneratedAsIdentityColumnConstraint 1492 ) -> str: 1493 start = expression.args.get("start") 1494 start = f" START {start}" if start else "" 1495 increment = expression.args.get("increment") 1496 increment = f" INCREMENT {increment}" if increment else "" 1497 1498 order = expression.args.get("order") 1499 if order is not None: 1500 order_clause = " ORDER" if order else " NOORDER" 1501 else: 1502 order_clause = "" 1503 1504 return f"AUTOINCREMENT{start}{increment}{order_clause}"
1509 def struct_sql(self, expression: exp.Struct) -> str: 1510 keys = [] 1511 values = [] 1512 1513 for i, e in enumerate(expression.expressions): 1514 if isinstance(e, exp.PropertyEQ): 1515 keys.append( 1516 exp.Literal.string(e.name) if isinstance(e.this, exp.Identifier) else e.this 1517 ) 1518 values.append(e.expression) 1519 else: 1520 keys.append(exp.Literal.string(f"_{i}")) 1521 values.append(e) 1522 1523 return self.func("OBJECT_CONSTRUCT", *flatten(zip(keys, values)))
1529 def alterset_sql(self, expression: exp.AlterSet) -> str: 1530 exprs = self.expressions(expression, flat=True) 1531 exprs = f" {exprs}" if exprs else "" 1532 file_format = self.expressions(expression, key="file_format", flat=True, sep=" ") 1533 file_format = f" STAGE_FILE_FORMAT = ({file_format})" if file_format else "" 1534 copy_options = self.expressions(expression, key="copy_options", flat=True, sep=" ") 1535 copy_options = f" STAGE_COPY_OPTIONS = ({copy_options})" if copy_options else "" 1536 tag = self.expressions(expression, key="tag", flat=True) 1537 tag = f" TAG {tag}" if tag else "" 1538 1539 return f"SET{exprs}{file_format}{copy_options}{tag}"
1556 def jsonextract_sql(self, expression: exp.JSONExtract): 1557 this = expression.this 1558 1559 # JSON strings are valid coming from other dialects such as BQ so 1560 # for these cases we PARSE_JSON preemptively 1561 if not isinstance(this, (exp.ParseJSON, exp.JSONExtract)) and not expression.args.get( 1562 "requires_json" 1563 ): 1564 this = exp.ParseJSON(this=this) 1565 1566 return self.func( 1567 "GET_PATH", 1568 this, 1569 expression.expression, 1570 )
1579 def datesub_sql(self, expression: exp.DateSub) -> str: 1580 value = expression.expression 1581 if value: 1582 value.replace(value * (-1)) 1583 else: 1584 self.unsupported("DateSub cannot be transpiled if the subtracted count is unknown") 1585 1586 return date_delta_sql("DATEADD")(self, expression)
1595 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1596 is_materialized = expression.find(exp.MaterializedProperty) 1597 copy_grants_property = expression.find(exp.CopyGrantsProperty) 1598 1599 if expression.kind == "VIEW" and is_materialized and copy_grants_property: 1600 # For materialized views, COPY GRANTS is located *before* the columns list 1601 # This is in contrast to normal views where COPY GRANTS is located *after* the columns list 1602 # We default CopyGrantsProperty to POST_SCHEMA which means we need to output it POST_NAME if a materialized view is detected 1603 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-materialized-view#syntax 1604 # ref: https://docs.snowflake.com/en/sql-reference/sql/create-view#syntax 1605 post_schema_properties = locations[exp.Properties.Location.POST_SCHEMA] 1606 post_schema_properties.pop(post_schema_properties.index(copy_grants_property)) 1607 1608 this_name = self.sql(expression.this, "this") 1609 copy_grants = self.sql(copy_grants_property) 1610 this_schema = self.schema_columns_sql(expression.this) 1611 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1612 1613 return f"{this_name}{self.sep()}{copy_grants}{this_schema}" 1614 1615 return super().createable_sql(expression, locations)
1617 def arrayagg_sql(self, expression: exp.ArrayAgg) -> str: 1618 this = expression.this 1619 1620 # If an ORDER BY clause is present, we need to remove it from ARRAY_AGG 1621 # and add it later as part of the WITHIN GROUP clause 1622 order = this if isinstance(this, exp.Order) else None 1623 if order: 1624 expression.set("this", order.this.pop()) 1625 1626 expr_sql = super().arrayagg_sql(expression) 1627 1628 if order: 1629 expr_sql = self.sql(exp.WithinGroup(this=expr_sql, expression=order)) 1630 1631 return expr_sql
1633 def array_sql(self, expression: exp.Array) -> str: 1634 expressions = expression.expressions 1635 1636 first_expr = seq_get(expressions, 0) 1637 if isinstance(first_expr, exp.Select): 1638 # SELECT AS STRUCT foo AS alias_foo -> ARRAY_AGG(OBJECT_CONSTRUCT('alias_foo', foo)) 1639 if first_expr.text("kind").upper() == "STRUCT": 1640 object_construct_args = [] 1641 for expr in first_expr.expressions: 1642 # Alias case: SELECT AS STRUCT foo AS alias_foo -> OBJECT_CONSTRUCT('alias_foo', foo) 1643 # Column case: SELECT AS STRUCT foo -> OBJECT_CONSTRUCT('foo', foo) 1644 name = expr.this if isinstance(expr, exp.Alias) else expr 1645 1646 object_construct_args.extend([exp.Literal.string(expr.alias_or_name), name]) 1647 1648 array_agg = exp.ArrayAgg( 1649 this=_build_object_construct(args=object_construct_args) 1650 ) 1651 1652 first_expr.set("kind", None) 1653 first_expr.set("expressions", [array_agg]) 1654 1655 return self.sql(first_expr.subquery()) 1656 1657 return inline_array_sql(self, expression)
1659 def currentdate_sql(self, expression: exp.CurrentDate) -> str: 1660 zone = self.sql(expression, "this") 1661 if not zone: 1662 return super().currentdate_sql(expression) 1663 1664 expr = exp.Cast( 1665 this=exp.ConvertTimezone(target_tz=zone, timestamp=exp.CurrentTimestamp()), 1666 to=exp.DataType(this=exp.DataType.Type.DATE), 1667 ) 1668 return self.sql(expr)
1670 def dot_sql(self, expression: exp.Dot) -> str: 1671 this = expression.this 1672 1673 if not this.type: 1674 from sqlglot.optimizer.annotate_types import annotate_types 1675 1676 this = annotate_types(this, dialect=self.dialect) 1677 1678 if not isinstance(this, exp.Dot) and this.is_type(exp.DataType.Type.STRUCT): 1679 # Generate colon notation for the top level STRUCT 1680 return f"{self.sql(this)}:{self.sql(expression, 'expression')}" 1681 1682 return super().dot_sql(expression)
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- RENAME_TABLE_WITH_DB
- GROUPINGS_SEP
- INDEX_ON
- QUERY_HINT_SEP
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- NVL2_SUPPORTED
- VALUES_AS_TABLE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- TABLESAMPLE_REQUIRES_PARENS
- TABLESAMPLE_SIZE_IS_ROWS
- TABLESAMPLE_KEYWORDS
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- LAST_DAY_SUPPORTS_DATE_PART
- SUPPORTS_TABLE_ALIAS_COLUMNS
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- CAN_IMPLEMENT_ARRAY_ANY
- SUPPORTS_TO_NUMBER
- SUPPORTS_WINDOW_EXCLUDE
- SET_OP_MODIFIERS
- COPY_HAS_INTO_KEYWORD
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_UNIX_SECONDS
- ALTER_SET_WRAPPED
- NORMALIZE_EXTRACT_DATE_PARTS
- PARSE_JSON_NAME
- ALTER_SET_TYPE
- ARRAY_SIZE_DIM_REQUIRED
- SUPPORTS_BETWEEN_FLAGS
- SUPPORTS_LIKE_QUANTIFIERS
- TIME_PART_SINGULARS
- NAMED_PLACEHOLDER_TOKEN
- EXPRESSION_PRECEDES_PROPERTIES_CREATABLES
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- sanitize_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- create_sql
- sequenceproperties_sql
- clone_sql
- heredoc_sql
- prepend_ctes
- with_sql
- cte_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- limitoptions_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- partition_sql
- properties_sql
- root_properties
- properties
- locate_properties
- property_name
- property_sql
- likeproperty_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablefromrows_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- for_modifiers
- queryoption_sql
- offset_limit_modifiers
- after_limit_modifiers
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- placeholder_sql
- subquery_sql
- qualify_sql
- prewhere_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- formatphrase_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterindex_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- addpartition_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- safedivide_sql
- overlaps_sql
- distance_sql
- eq_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- is_sql
- like_sql
- ilike_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- neq_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- jsoncast_sql
- try_sql
- use_sql
- binary
- ceil_floor
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- whens_sql
- merge_sql
- tochar_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- uniquekeyproperty_sql
- distributedbyproperty_sql
- oncluster_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- indexcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodatetime_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonextractquote_sql
- jsonexists_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql
- string_sql
- median_sql
- overflowtruncatebehavior_sql
- unixseconds_sql
- arraysize_sql
- attach_sql
- detach_sql
- attachoption_sql
- featuresattime_sql
- watermarkcolumnconstraint_sql
- encodeproperty_sql
- includeproperty_sql
- xmlelement_sql
- xmlkeyvalueoption_sql
- partitionbyrangeproperty_sql
- partitionbyrangepropertydynamic_sql
- unpivotcolumns_sql
- analyzesample_sql
- analyzestatistics_sql
- analyzehistogram_sql
- analyzedelete_sql
- analyzelistchainedrows_sql
- analyzevalidate_sql
- analyze_sql
- xmltable_sql
- xmlnamespace_sql
- export_sql
- declare_sql
- declareitem_sql
- recursivewithsearch_sql
- parameterizedagg_sql
- anonymousaggfunc_sql
- combinedaggfunc_sql
- combinedparameterizedagg_sql
- get_put_sql
- translatecharacters_sql
- decodecase_sql
- semanticview_sql