diff --git a/internal/explain/expressions.go b/internal/explain/expressions.go index b2646505a..dc823b8e0 100644 --- a/internal/explain/expressions.go +++ b/internal/explain/expressions.go @@ -49,12 +49,27 @@ func explainLiteral(sb *strings.Builder, n *ast.Literal, indent string, depth in } hasComplexExpr := false for _, e := range exprs { - lit, isLit := e.(*ast.Literal) - // Non-literals or tuple/array literals count as complex - if !isLit || (isLit && (lit.Type == ast.LiteralTuple || lit.Type == ast.LiteralArray)) { - hasComplexExpr = true - break + // Simple literals (numbers, strings, etc.) are OK + if lit, isLit := e.(*ast.Literal); isLit { + // Nested tuples/arrays are complex + if lit.Type == ast.LiteralTuple || lit.Type == ast.LiteralArray { + hasComplexExpr = true + break + } + // Other literals are simple + continue + } + // Unary negation of numeric literals is also simple + if unary, isUnary := e.(*ast.UnaryExpr); isUnary && unary.Op == "-" { + if lit, isLit := unary.Operand.(*ast.Literal); isLit { + if lit.Type == ast.LiteralInteger || lit.Type == ast.LiteralFloat { + continue + } + } } + // Everything else is complex + hasComplexExpr = true + break } if hasComplexExpr { // Render as Function tuple instead of Literal @@ -329,6 +344,33 @@ func explainAliasedExpr(sb *strings.Builder, n *ast.AliasedExpr, depth int) { } } } + // Check if this is an array containing specific expressions that need Function array format + if e.Type == ast.LiteralArray { + if exprs, ok := e.Value.([]ast.Expression); ok { + needsFunctionFormat := false + for _, expr := range exprs { + // Check for tuples - use Function array + if lit, ok := expr.(*ast.Literal); ok && lit.Type == ast.LiteralTuple { + needsFunctionFormat = true + break + } + // Check for identifiers - use Function array + if _, ok := expr.(*ast.Identifier); ok { + needsFunctionFormat = true + break + } + } + if needsFunctionFormat { + // Render as Function array with alias + fmt.Fprintf(sb, "%sFunction array (alias %s) (children %d)\n", indent, n.Alias, 1) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(exprs)) + for _, expr := range exprs { + Node(sb, expr, depth+2) + } + return + } + } + } fmt.Fprintf(sb, "%sLiteral %s (alias %s)\n", indent, FormatLiteral(e), n.Alias) case *ast.BinaryExpr: // Binary expressions become functions with alias @@ -450,6 +492,8 @@ func explainWithElement(sb *strings.Builder, n *ast.WithElement, indent string, fmt.Fprintf(sb, "%sSubquery (children %d)\n", indent, 1) } Node(sb, e.Query, depth+1) + case *ast.CastExpr: + explainCastExprWithAlias(sb, e, n.Name, indent, depth) default: // For other types, just output the expression (alias may be lost) Node(sb, n.Query, depth) diff --git a/internal/explain/format.go b/internal/explain/format.go index 1d9fef124..b58d8bf6d 100644 --- a/internal/explain/format.go +++ b/internal/explain/format.go @@ -21,7 +21,14 @@ func FormatFloat(val float64) string { if math.IsNaN(val) { return "nan" } - // Use 'f' format to avoid scientific notation, -1 precision for smallest representation + // Use scientific notation for extremely small numbers (< 1e-10) + // This matches ClickHouse's behavior where numbers like 0.000001 stay decimal + // but extremely small numbers like 1e-38 use scientific notation + absVal := math.Abs(val) + if absVal > 0 && absVal < 1e-10 { + return strconv.FormatFloat(val, 'e', -1, 64) + } + // Use decimal notation for normal-sized numbers return strconv.FormatFloat(val, 'f', -1, 64) } @@ -218,6 +225,9 @@ func FormatDataType(dt *ast.DataType) string { } else if ntp, ok := p.(*ast.NameTypePair); ok { // Named tuple field: "name Type" params = append(params, ntp.Name+" "+FormatDataType(ntp.Type)) + } else if binExpr, ok := p.(*ast.BinaryExpr); ok { + // Binary expression (e.g., 'hello' = 1 for Enum types) + params = append(params, formatBinaryExprForType(binExpr)) } else { params = append(params, fmt.Sprintf("%v", p)) } @@ -225,28 +235,55 @@ func FormatDataType(dt *ast.DataType) string { return fmt.Sprintf("%s(%s)", dt.Name, strings.Join(params, ", ")) } +// formatBinaryExprForType formats a binary expression for use in type parameters +func formatBinaryExprForType(expr *ast.BinaryExpr) string { + var left, right string + + // Format left side + if lit, ok := expr.Left.(*ast.Literal); ok { + if lit.Type == ast.LiteralString { + left = fmt.Sprintf("\\\\\\'%s\\\\\\'", lit.Value) + } else { + left = fmt.Sprintf("%v", lit.Value) + } + } else if ident, ok := expr.Left.(*ast.Identifier); ok { + left = ident.Name() + } else { + left = fmt.Sprintf("%v", expr.Left) + } + + // Format right side + if lit, ok := expr.Right.(*ast.Literal); ok { + right = fmt.Sprintf("%v", lit.Value) + } else if ident, ok := expr.Right.(*ast.Identifier); ok { + right = ident.Name() + } else { + right = fmt.Sprintf("%v", expr.Right) + } + + return left + " " + expr.Op + " " + right +} + // NormalizeFunctionName normalizes function names to match ClickHouse's EXPLAIN AST output func NormalizeFunctionName(name string) string { // ClickHouse normalizes certain function names in EXPLAIN AST normalized := map[string]string{ - "ltrim": "trimLeft", - "rtrim": "trimRight", - "lcase": "lower", - "ucase": "upper", - "mid": "substring", - "ceiling": "ceil", - "ln": "log", - "log10": "log10", - "log2": "log2", - "rand": "rand", - "ifnull": "ifNull", - "nullif": "nullIf", - "coalesce": "coalesce", - "greatest": "greatest", - "least": "least", - "concat_ws": "concat", - "length": "length", - "char_length": "length", + "ltrim": "trimLeft", + "rtrim": "trimRight", + "lcase": "lower", + "ucase": "upper", + "mid": "substring", + "ceiling": "ceil", + "ln": "log", + "log10": "log10", + "log2": "log2", + "rand": "rand", + "ifnull": "ifNull", + "nullif": "nullIf", + "coalesce": "coalesce", + "greatest": "greatest", + "least": "least", + "concat_ws": "concat", } if n, ok := normalized[strings.ToLower(name)]; ok { return n diff --git a/internal/explain/functions.go b/internal/explain/functions.go index 6cf4be8b9..863b7edbe 100644 --- a/internal/explain/functions.go +++ b/internal/explain/functions.go @@ -21,6 +21,10 @@ func explainFunctionCallWithAlias(sb *strings.Builder, n *ast.FunctionCall, alia } // Normalize function name fnName := NormalizeFunctionName(n.Name) + // Append "Distinct" if the function has DISTINCT modifier + if n.Distinct { + fnName = fnName + "Distinct" + } if alias != "" { fmt.Fprintf(sb, "%sFunction %s (alias %s) (children %d)\n", indent, fnName, alias, children) } else { @@ -774,7 +778,6 @@ func explainExtractExpr(sb *strings.Builder, n *ast.ExtractExpr, indent string, func explainWindowSpec(sb *strings.Builder, n *ast.WindowSpec, indent string, depth int) { // Window spec is represented as WindowDefinition // For simple cases like OVER (), just output WindowDefinition without children - // Note: ClickHouse's EXPLAIN AST does not output frame info (ROWS BETWEEN etc) children := 0 if n.Name != "" { children++ @@ -785,6 +788,10 @@ func explainWindowSpec(sb *strings.Builder, n *ast.WindowSpec, indent string, de if len(n.OrderBy) > 0 { children++ } + // Count frame offset as child if present + if n.Frame != nil && n.Frame.StartBound != nil && n.Frame.StartBound.Offset != nil { + children++ + } if children > 0 { fmt.Fprintf(sb, "%sWindowDefinition (children %d)\n", indent, children) if n.Name != "" { @@ -802,7 +809,10 @@ func explainWindowSpec(sb *strings.Builder, n *ast.WindowSpec, indent string, de explainOrderByElement(sb, o, strings.Repeat(" ", depth+2), depth+2) } } - // Frame handling would go here if needed + // Frame start offset + if n.Frame != nil && n.Frame.StartBound != nil && n.Frame.StartBound.Offset != nil { + Node(sb, n.Frame.StartBound.Offset, depth+1) + } } else { fmt.Fprintf(sb, "%sWindowDefinition\n", indent) } diff --git a/lexer/lexer.go b/lexer/lexer.go index 5889d27fd..d59ee3e7f 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -429,7 +429,19 @@ func (l *Lexer) readQuotedIdentifier() Item { var sb strings.Builder l.readChar() // skip opening quote - for !l.eof && l.ch != '"' { + for !l.eof { + if l.ch == '"' { + // Check for SQL-style doubled quote escape "" + l.readChar() + if l.ch == '"' { + // Doubled quote - add single quote and continue + sb.WriteRune('"') + l.readChar() + continue + } + // Single quote - end of identifier + break + } if l.ch == '\\' { l.readChar() if !l.eof { @@ -441,9 +453,6 @@ func (l *Lexer) readQuotedIdentifier() Item { sb.WriteRune(l.ch) l.readChar() } - if l.ch == '"' { - l.readChar() // skip closing quote - } return Item{Token: token.IDENT, Value: sb.String(), Pos: pos} } diff --git a/parser/testdata/00160_decode_xml_component/metadata.json b/parser/testdata/00160_decode_xml_component/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/00160_decode_xml_component/metadata.json +++ b/parser/testdata/00160_decode_xml_component/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/00267_tuple_array_access_operators_priority/metadata.json b/parser/testdata/00267_tuple_array_access_operators_priority/metadata.json index ef120d978..9e26dfeeb 100644 --- a/parser/testdata/00267_tuple_array_access_operators_priority/metadata.json +++ b/parser/testdata/00267_tuple_array_access_operators_priority/metadata.json @@ -1 +1 @@ -{"todo": true} +{} \ No newline at end of file diff --git a/parser/testdata/00317_in_tuples_and_out_of_range_values/metadata.json b/parser/testdata/00317_in_tuples_and_out_of_range_values/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/00317_in_tuples_and_out_of_range_values/metadata.json +++ b/parser/testdata/00317_in_tuples_and_out_of_range_values/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/00324_hashing_enums/metadata.json b/parser/testdata/00324_hashing_enums/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/00324_hashing_enums/metadata.json +++ b/parser/testdata/00324_hashing_enums/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/00471_sql_style_quoting/metadata.json b/parser/testdata/00471_sql_style_quoting/metadata.json index ef120d978..9e26dfeeb 100644 --- a/parser/testdata/00471_sql_style_quoting/metadata.json +++ b/parser/testdata/00471_sql_style_quoting/metadata.json @@ -1 +1 @@ -{"todo": true} +{} \ No newline at end of file diff --git a/parser/testdata/00502_string_concat_with_array/metadata.json b/parser/testdata/00502_string_concat_with_array/metadata.json index ef120d978..9e26dfeeb 100644 --- a/parser/testdata/00502_string_concat_with_array/metadata.json +++ b/parser/testdata/00502_string_concat_with_array/metadata.json @@ -1 +1 @@ -{"todo": true} +{} \ No newline at end of file diff --git a/parser/testdata/00511_get_size_of_enum/metadata.json b/parser/testdata/00511_get_size_of_enum/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/00511_get_size_of_enum/metadata.json +++ b/parser/testdata/00511_get_size_of_enum/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/00526_array_join_with_arrays_of_nullable/metadata.json b/parser/testdata/00526_array_join_with_arrays_of_nullable/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/00526_array_join_with_arrays_of_nullable/metadata.json +++ b/parser/testdata/00526_array_join_with_arrays_of_nullable/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/00548_slice_of_nested/metadata.json b/parser/testdata/00548_slice_of_nested/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/00548_slice_of_nested/metadata.json +++ b/parser/testdata/00548_slice_of_nested/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/00566_enum_min_max/metadata.json b/parser/testdata/00566_enum_min_max/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/00566_enum_min_max/metadata.json +++ b/parser/testdata/00566_enum_min_max/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/00642_cast/metadata.json b/parser/testdata/00642_cast/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/00642_cast/metadata.json +++ b/parser/testdata/00642_cast/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/00674_has_array_enum/metadata.json b/parser/testdata/00674_has_array_enum/metadata.json index ef120d978..9e26dfeeb 100644 --- a/parser/testdata/00674_has_array_enum/metadata.json +++ b/parser/testdata/00674_has_array_enum/metadata.json @@ -1 +1 @@ -{"todo": true} +{} \ No newline at end of file diff --git a/parser/testdata/00726_length_aliases/metadata.json b/parser/testdata/00726_length_aliases/metadata.json index ef120d978..9e26dfeeb 100644 --- a/parser/testdata/00726_length_aliases/metadata.json +++ b/parser/testdata/00726_length_aliases/metadata.json @@ -1 +1 @@ -{"todo": true} +{} \ No newline at end of file diff --git a/parser/testdata/01259_combinator_distinct/metadata.json b/parser/testdata/01259_combinator_distinct/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/01259_combinator_distinct/metadata.json +++ b/parser/testdata/01259_combinator_distinct/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/01353_topk_enum/metadata.json b/parser/testdata/01353_topk_enum/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/01353_topk_enum/metadata.json +++ b/parser/testdata/01353_topk_enum/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/01651_group_uniq_array_enum/metadata.json b/parser/testdata/01651_group_uniq_array_enum/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/01651_group_uniq_array_enum/metadata.json +++ b/parser/testdata/01651_group_uniq_array_enum/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/01700_point_in_polygon_ubsan/metadata.json b/parser/testdata/01700_point_in_polygon_ubsan/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/01700_point_in_polygon_ubsan/metadata.json +++ b/parser/testdata/01700_point_in_polygon_ubsan/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/01717_global_with_subquery_fix/metadata.json b/parser/testdata/01717_global_with_subquery_fix/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/01717_global_with_subquery_fix/metadata.json +++ b/parser/testdata/01717_global_with_subquery_fix/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/01914_ubsan_quantile_timing/metadata.json b/parser/testdata/01914_ubsan_quantile_timing/metadata.json index ef120d978..9e26dfeeb 100644 --- a/parser/testdata/01914_ubsan_quantile_timing/metadata.json +++ b/parser/testdata/01914_ubsan_quantile_timing/metadata.json @@ -1 +1 @@ -{"todo": true} +{} \ No newline at end of file diff --git a/parser/testdata/02158_interval_length_sum/metadata.json b/parser/testdata/02158_interval_length_sum/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/02158_interval_length_sum/metadata.json +++ b/parser/testdata/02158_interval_length_sum/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/02306_window_move_row_number_fix/metadata.json b/parser/testdata/02306_window_move_row_number_fix/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/02306_window_move_row_number_fix/metadata.json +++ b/parser/testdata/02306_window_move_row_number_fix/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/02467_cross_join_three_table_functions/metadata.json b/parser/testdata/02467_cross_join_three_table_functions/metadata.json index ccffb5b94..ef120d978 100644 --- a/parser/testdata/02467_cross_join_three_table_functions/metadata.json +++ b/parser/testdata/02467_cross_join_three_table_functions/metadata.json @@ -1 +1 @@ -{"todo": true} \ No newline at end of file +{"todo": true} diff --git a/parser/testdata/02476_fix_cast_parser_bug/metadata.json b/parser/testdata/02476_fix_cast_parser_bug/metadata.json index d10cf5963..37dda2fb7 100644 --- a/parser/testdata/02476_fix_cast_parser_bug/metadata.json +++ b/parser/testdata/02476_fix_cast_parser_bug/metadata.json @@ -1 +1 @@ -{"todo": true, "parse_error": true} \ No newline at end of file +{"todo": true, "parse_error": true} diff --git a/parser/testdata/02958_transform_enum/metadata.json b/parser/testdata/02958_transform_enum/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/02958_transform_enum/metadata.json +++ b/parser/testdata/02958_transform_enum/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/03003_enum_and_string_compatible/metadata.json b/parser/testdata/03003_enum_and_string_compatible/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/03003_enum_and_string_compatible/metadata.json +++ b/parser/testdata/03003_enum_and_string_compatible/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/03747_float_parsing_subnormal/metadata.json b/parser/testdata/03747_float_parsing_subnormal/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/03747_float_parsing_subnormal/metadata.json +++ b/parser/testdata/03747_float_parsing_subnormal/metadata.json @@ -1 +1 @@ -{"todo": true} +{}