From 660782d4e8ee27a03b06b23e1393f1c97b3ee31f Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 26 Dec 2025 09:43:34 +0000 Subject: [PATCH 1/2] Fix 4 parser tests with hex literals, REPLACE transformer, and SAMPLE BY - Add hex string literal parsing (x'...') in lexer for 02590_bson_duplicate_column - Add REPLACE/EXCEPT transformer support in asterisk explain output for 02813_system_licenses_base - Fix 01942_untuple_transformers_msan (enabled by REPLACE fix) - Add SAMPLE BY clause output in CREATE TABLE storage definition for 00975_sample_prewhere_distributed - Only show SAMPLE BY when it's a function (not simple identifier) - Don't show when SAMPLE BY equals ORDER BY (ClickHouse behavior) --- internal/explain/explain.go | 2 +- internal/explain/expressions.go | 56 +++++++++++++++++-- internal/explain/statements.go | 40 ++++++++++++- lexer/lexer.go | 31 +++++++++- .../metadata.json | 2 +- .../metadata.json | 2 +- .../02590_bson_duplicate_column/metadata.json | 2 +- .../02813_system_licenses_base/metadata.json | 2 +- 8 files changed, 126 insertions(+), 11 deletions(-) diff --git a/internal/explain/explain.go b/internal/explain/explain.go index 49afa0c09..893f1cbe9 100644 --- a/internal/explain/explain.go +++ b/internal/explain/explain.go @@ -72,7 +72,7 @@ func Node(sb *strings.Builder, node interface{}, depth int) { case *ast.WithElement: explainWithElement(sb, n, indent, depth) case *ast.Asterisk: - explainAsterisk(sb, n, indent) + explainAsterisk(sb, n, indent, depth) case *ast.ColumnsMatcher: fmt.Fprintf(sb, "%sColumnsRegexpMatcher\n", indent) diff --git a/internal/explain/expressions.go b/internal/explain/expressions.go index dd058b158..13bf0587e 100644 --- a/internal/explain/expressions.go +++ b/internal/explain/expressions.go @@ -471,12 +471,60 @@ func explainAliasedExpr(sb *strings.Builder, n *ast.AliasedExpr, depth int) { } } -func explainAsterisk(sb *strings.Builder, n *ast.Asterisk, indent string) { +func explainAsterisk(sb *strings.Builder, n *ast.Asterisk, indent string, depth int) { + // Check if there are any column transformers (EXCEPT, REPLACE) + hasTransformers := len(n.Except) > 0 || len(n.Replace) > 0 + if n.Table != "" { - fmt.Fprintf(sb, "%sQualifiedAsterisk (children %d)\n", indent, 1) - fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Table) + if hasTransformers { + fmt.Fprintf(sb, "%sQualifiedAsterisk (children %d)\n", indent, 2) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Table) + explainColumnsTransformers(sb, n, indent+" ", depth+1) + } else { + fmt.Fprintf(sb, "%sQualifiedAsterisk (children %d)\n", indent, 1) + fmt.Fprintf(sb, "%s Identifier %s\n", indent, n.Table) + } } else { - fmt.Fprintf(sb, "%sAsterisk\n", indent) + if hasTransformers { + fmt.Fprintf(sb, "%sAsterisk (children %d)\n", indent, 1) + explainColumnsTransformers(sb, n, indent+" ", depth+1) + } else { + fmt.Fprintf(sb, "%sAsterisk\n", indent) + } + } +} + +func explainColumnsTransformers(sb *strings.Builder, n *ast.Asterisk, indent string, depth int) { + transformerCount := 0 + if len(n.Except) > 0 { + transformerCount++ + } + if len(n.Replace) > 0 { + transformerCount++ + } + + fmt.Fprintf(sb, "%sColumnsTransformerList (children %d)\n", indent, transformerCount) + + if len(n.Except) > 0 { + fmt.Fprintf(sb, "%s ColumnsExceptTransformer (children %d)\n", indent, len(n.Except)) + for _, col := range n.Except { + fmt.Fprintf(sb, "%s Identifier %s\n", indent, col) + } + } + + if len(n.Replace) > 0 { + fmt.Fprintf(sb, "%s ColumnsReplaceTransformer (children %d)\n", indent, len(n.Replace)) + for _, replace := range n.Replace { + children := 0 + if replace.Expr != nil { + children = 1 + } + fmt.Fprintf(sb, "%s ColumnsReplaceTransformer::Replacement (children %d)\n", indent, children) + if replace.Expr != nil { + // Output the expression without alias - the replacement name is implied + Node(sb, replace.Expr, depth+3) + } + } } } diff --git a/internal/explain/statements.go b/internal/explain/statements.go index 06de66ffc..1ae2c01f9 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -147,7 +147,7 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, } } } - if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 || n.PartitionBy != nil || len(n.Settings) > 0 { + if n.Engine != nil || len(n.OrderBy) > 0 || len(n.PrimaryKey) > 0 || n.PartitionBy != nil || n.SampleBy != nil || len(n.Settings) > 0 { storageChildren := 0 if n.Engine != nil { storageChildren++ @@ -161,6 +161,25 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, if len(n.PrimaryKey) > 0 { storageChildren++ } + // SAMPLE BY is only shown in EXPLAIN AST when it's a function (not a simple identifier) + // and when it's different from ORDER BY + if n.SampleBy != nil { + if _, isIdent := n.SampleBy.(*ast.Identifier); !isIdent { + // Check if SAMPLE BY equals ORDER BY - if so, don't show it + showSampleBy := true + if len(n.OrderBy) == 1 { + var orderBySb, sampleBySb strings.Builder + Node(&orderBySb, n.OrderBy[0], 0) + Node(&sampleBySb, n.SampleBy, 0) + if orderBySb.String() == sampleBySb.String() { + showSampleBy = false + } + } + if showSampleBy { + storageChildren++ + } + } + } if len(n.Settings) > 0 { storageChildren++ } @@ -241,6 +260,25 @@ func explainCreateQuery(sb *strings.Builder, n *ast.CreateQuery, indent string, } } } + // SAMPLE BY is only shown in EXPLAIN AST when it's a function (not a simple identifier) + // and when it's different from ORDER BY + if n.SampleBy != nil { + if _, isIdent := n.SampleBy.(*ast.Identifier); !isIdent { + // Check if SAMPLE BY equals ORDER BY - if so, don't show it + showSampleBy := true + if len(n.OrderBy) == 1 { + var orderBySb, sampleBySb strings.Builder + Node(&orderBySb, n.OrderBy[0], 0) + Node(&sampleBySb, n.SampleBy, 0) + if orderBySb.String() == sampleBySb.String() { + showSampleBy = false + } + } + if showSampleBy { + Node(sb, n.SampleBy, depth+2) + } + } + } if len(n.Settings) > 0 { fmt.Fprintf(sb, "%s Set\n", indent) } diff --git a/lexer/lexer.go b/lexer/lexer.go index eb98e1ee1..bb5f01ec8 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -425,6 +425,35 @@ func (l *Lexer) readString(quote rune) Item { return Item{Token: token.STRING, Value: sb.String(), Pos: pos} } +func (l *Lexer) readHexString() Item { + pos := l.pos + var sb strings.Builder + l.readChar() // skip opening quote + + for !l.eof { + if l.ch == '\'' { + l.readChar() // skip closing quote + break + } + // Read two hex digits and convert to byte + hex1 := l.ch + l.readChar() + if l.eof || l.ch == '\'' { + // Odd number of hex digits - write single value + sb.WriteByte(byte(hexValue(hex1))) + if l.ch == '\'' { + l.readChar() // skip closing quote + } + break + } + hex2 := l.ch + val := hexValue(hex1)*16 + hexValue(hex2) + sb.WriteByte(byte(val)) + l.readChar() + } + return Item{Token: token.STRING, Value: sb.String(), Pos: pos} +} + func (l *Lexer) readQuotedIdentifier() Item { pos := l.pos var sb strings.Builder @@ -841,7 +870,7 @@ func (l *Lexer) readIdentifier() Item { // Check for hex string literal: x'...' or X'...' if (l.ch == 'x' || l.ch == 'X') && l.peekChar() == '\'' { l.readChar() // skip x - return l.readString('\'') // read as regular string + return l.readHexString() // read as hex-decoded string } // Check for binary string literal: b'...' or B'...' diff --git a/parser/testdata/00975_sample_prewhere_distributed/metadata.json b/parser/testdata/00975_sample_prewhere_distributed/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/00975_sample_prewhere_distributed/metadata.json +++ b/parser/testdata/00975_sample_prewhere_distributed/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/01942_untuple_transformers_msan/metadata.json b/parser/testdata/01942_untuple_transformers_msan/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/01942_untuple_transformers_msan/metadata.json +++ b/parser/testdata/01942_untuple_transformers_msan/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/02590_bson_duplicate_column/metadata.json b/parser/testdata/02590_bson_duplicate_column/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/02590_bson_duplicate_column/metadata.json +++ b/parser/testdata/02590_bson_duplicate_column/metadata.json @@ -1 +1 @@ -{"todo": true} +{} diff --git a/parser/testdata/02813_system_licenses_base/metadata.json b/parser/testdata/02813_system_licenses_base/metadata.json index ef120d978..0967ef424 100644 --- a/parser/testdata/02813_system_licenses_base/metadata.json +++ b/parser/testdata/02813_system_licenses_base/metadata.json @@ -1 +1 @@ -{"todo": true} +{} From f4644f205df876d3304c5487c15553988dd51aad Mon Sep 17 00:00:00 2001 From: Claude Date: Fri, 26 Dec 2025 11:05:25 +0000 Subject: [PATCH 2/2] Trigger CI rebuild