diff --git a/ast/ast.go b/ast/ast.go index 7fc2ca021b..3304569abd 100644 --- a/ast/ast.go +++ b/ast/ast.go @@ -291,6 +291,7 @@ type ColumnDeclaration struct { Default Expression `json:"default,omitempty"` DefaultKind string `json:"default_kind,omitempty"` // DEFAULT, MATERIALIZED, ALIAS, EPHEMERAL Codec *CodecExpr `json:"codec,omitempty"` + Statistics []*FunctionCall `json:"statistics,omitempty"` // STATISTICS clause TTL Expression `json:"ttl,omitempty"` PrimaryKey bool `json:"primary_key,omitempty"` // PRIMARY KEY constraint Comment string `json:"comment,omitempty"` @@ -522,6 +523,8 @@ type AlterCommand struct { Assignments []*Assignment `json:"assignments,omitempty"` // For UPDATE Projection *Projection `json:"projection,omitempty"` // For ADD PROJECTION ProjectionName string `json:"projection_name,omitempty"` // For DROP/MATERIALIZE/CLEAR PROJECTION + StatisticsColumns []string `json:"statistics_columns,omitempty"` // For ADD/DROP/CLEAR/MATERIALIZE STATISTICS + StatisticsTypes []*FunctionCall `json:"statistics_types,omitempty"` // For ADD/MODIFY STATISTICS TYPE } // Projection represents a projection definition. @@ -585,6 +588,11 @@ const ( AlterDropProjection AlterCommandType = "DROP_PROJECTION" AlterMaterializeProjection AlterCommandType = "MATERIALIZE_PROJECTION" AlterClearProjection AlterCommandType = "CLEAR_PROJECTION" + AlterAddStatistics AlterCommandType = "ADD_STATISTICS" + AlterModifyStatistics AlterCommandType = "MODIFY_STATISTICS" + AlterDropStatistics AlterCommandType = "DROP_STATISTICS" + AlterClearStatistics AlterCommandType = "CLEAR_STATISTICS" + AlterMaterializeStatistics AlterCommandType = "MATERIALIZE_STATISTICS" ) // TruncateQuery represents a TRUNCATE statement. diff --git a/internal/explain/explain.go b/internal/explain/explain.go index 6975d8d12a..d2fd2c982d 100644 --- a/internal/explain/explain.go +++ b/internal/explain/explain.go @@ -234,6 +234,9 @@ func Column(sb *strings.Builder, col *ast.ColumnDeclaration, depth int) { if col.Type != nil { children++ } + if len(col.Statistics) > 0 { + children++ + } // EPHEMERAL columns without explicit default get defaultValueOfTypeName hasEphemeralDefault := col.DefaultKind == "EPHEMERAL" && col.Default == nil if col.Default != nil || hasEphemeralDefault { @@ -246,6 +249,9 @@ func Column(sb *strings.Builder, col *ast.ColumnDeclaration, depth int) { if col.Type != nil { Node(sb, col.Type, depth+1) } + if len(col.Statistics) > 0 { + explainStatisticsExpr(sb, col.Statistics, indent+" ", depth+1) + } if col.Default != nil { Node(sb, col.Default, depth+1) } else if hasEphemeralDefault { @@ -282,6 +288,31 @@ func explainCodecFunction(sb *strings.Builder, fn *ast.FunctionCall, indent stri } } +// explainStatisticsExpr handles STATISTICS expressions in column declarations +func explainStatisticsExpr(sb *strings.Builder, stats []*ast.FunctionCall, indent string, depth int) { + // STATISTICS is rendered as a Function with one child (ExpressionList of statistics types) + fmt.Fprintf(sb, "%sFunction STATISTICS (children 1)\n", indent) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(stats)) + for _, s := range stats { + explainStatisticsFunction(sb, s, indent+" ", depth+2) + } +} + +// explainStatisticsFunction handles individual statistics functions (e.g., tdigest, uniq, countmin) +func explainStatisticsFunction(sb *strings.Builder, fn *ast.FunctionCall, indent string, depth int) { + if len(fn.Arguments) == 0 { + // Statistics type without parameters: just the function name + fmt.Fprintf(sb, "%sFunction %s\n", indent, fn.Name) + } else { + // Statistics type with parameters: function with ExpressionList of arguments + fmt.Fprintf(sb, "%sFunction %s (children 1)\n", indent, fn.Name) + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(fn.Arguments)) + for _, arg := range fn.Arguments { + Node(sb, arg, depth+2) + } + } +} + func Index(sb *strings.Builder, idx *ast.IndexDefinition, depth int) { indent := strings.Repeat(" ", depth) children := 0 diff --git a/internal/explain/statements.go b/internal/explain/statements.go index 014d96f423..68644bb4a1 100644 --- a/internal/explain/statements.go +++ b/internal/explain/statements.go @@ -829,7 +829,12 @@ func explainAlterQuery(sb *strings.Builder, n *ast.AlterQuery, indent string, de func explainAlterCommand(sb *strings.Builder, cmd *ast.AlterCommand, indent string, depth int) { children := countAlterCommandChildren(cmd) - fmt.Fprintf(sb, "%sAlterCommand %s (children %d)\n", indent, cmd.Type, children) + // CLEAR_STATISTICS is normalized to DROP_STATISTICS in EXPLAIN AST output + cmdType := cmd.Type + if cmdType == ast.AlterClearStatistics { + cmdType = ast.AlterDropStatistics + } + fmt.Fprintf(sb, "%sAlterCommand %s (children %d)\n", indent, cmdType, children) switch cmd.Type { case ast.AlterAddColumn: @@ -917,6 +922,10 @@ func explainAlterCommand(sb *strings.Builder, cmd *ast.AlterCommand, indent stri if cmd.ProjectionName != "" { fmt.Fprintf(sb, "%s Identifier %s\n", indent, cmd.ProjectionName) } + case ast.AlterAddStatistics, ast.AlterModifyStatistics: + explainStatisticsCommand(sb, cmd, indent, depth) + case ast.AlterDropStatistics, ast.AlterClearStatistics, ast.AlterMaterializeStatistics: + explainStatisticsCommand(sb, cmd, indent, depth) default: if cmd.Partition != nil { Node(sb, cmd.Partition, depth+1) @@ -964,6 +973,49 @@ func explainProjectionSelectQuery(sb *strings.Builder, q *ast.ProjectionSelectQu } } +func explainStatisticsCommand(sb *strings.Builder, cmd *ast.AlterCommand, indent string, depth int) { + // Stat node has 1 child (columns only) or 2 children (columns + types) + statChildren := 0 + if len(cmd.StatisticsColumns) > 0 { + statChildren++ + } + if len(cmd.StatisticsTypes) > 0 { + statChildren++ + } + + fmt.Fprintf(sb, "%s Stat (children %d)\n", indent, statChildren) + + // First: column names as ExpressionList of Identifiers + if len(cmd.StatisticsColumns) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(cmd.StatisticsColumns)) + for _, col := range cmd.StatisticsColumns { + fmt.Fprintf(sb, "%s Identifier %s\n", indent, col) + } + } + + // Second: statistics types as ExpressionList of Functions + if len(cmd.StatisticsTypes) > 0 { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(cmd.StatisticsTypes)) + for _, t := range cmd.StatisticsTypes { + explainStatisticsTypeFunction(sb, t, indent+" ", depth+3) + } + } +} + +func explainStatisticsTypeFunction(sb *strings.Builder, fn *ast.FunctionCall, indent string, depth int) { + // Statistics type functions always have (children 1) even if no actual arguments + // because ClickHouse shows them with an empty ExpressionList + fmt.Fprintf(sb, "%sFunction %s (children 1)\n", indent, fn.Name) + if len(fn.Arguments) == 0 { + fmt.Fprintf(sb, "%s ExpressionList\n", indent) + } else { + fmt.Fprintf(sb, "%s ExpressionList (children %d)\n", indent, len(fn.Arguments)) + for _, arg := range fn.Arguments { + Node(sb, arg, depth+1) + } + } +} + func countAlterCommandChildren(cmd *ast.AlterCommand) int { children := 0 switch cmd.Type { @@ -1036,6 +1088,16 @@ func countAlterCommandChildren(cmd *ast.AlterCommand) int { if cmd.ProjectionName != "" { children++ } + case ast.AlterAddStatistics, ast.AlterModifyStatistics: + // Statistics commands with TYPE have one child (Stat node) + if len(cmd.StatisticsColumns) > 0 || len(cmd.StatisticsTypes) > 0 { + children = 1 + } + case ast.AlterDropStatistics, ast.AlterClearStatistics, ast.AlterMaterializeStatistics: + // Statistics commands without TYPE have one child (Stat node with just columns) + if len(cmd.StatisticsColumns) > 0 { + children = 1 + } default: if cmd.Partition != nil { children++ diff --git a/parser/parser.go b/parser/parser.go index d36bec8aff..ca37f8a0b5 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -2509,6 +2509,12 @@ func (p *Parser) parseColumnDeclaration() *ast.ColumnDeclaration { col.Type = p.parseDataType() } + // Parse STATISTICS clause (e.g., STATISTICS(tdigest, uniq)) + if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "STATISTICS" { + p.nextToken() + col.Statistics = p.parseStatisticsExpr() + } + // Handle COLLATE clause (MySQL compatibility, e.g., varchar(255) COLLATE binary) if p.currentIs(token.COLLATE) { p.nextToken() @@ -2759,6 +2765,100 @@ func (p *Parser) parseCodecExpr() *ast.CodecExpr { return codec } +func (p *Parser) parseStatisticsExpr() []*ast.FunctionCall { + var stats []*ast.FunctionCall + + if !p.expect(token.LPAREN) { + return nil + } + + for !p.currentIs(token.RPAREN) && !p.currentIs(token.EOF) { + if p.currentIs(token.IDENT) { + name := p.current.Value + pos := p.current.Pos + p.nextToken() + + fn := &ast.FunctionCall{ + Position: pos, + Name: name, + } + + // Statistics types can have optional parameters: e.g., tdigest(100) + if p.currentIs(token.LPAREN) { + p.nextToken() + if !p.currentIs(token.RPAREN) { + fn.Arguments = p.parseExpressionList() + } + p.expect(token.RPAREN) + } + + stats = append(stats, fn) + } + + if p.currentIs(token.COMMA) { + p.nextToken() + } else { + break + } + } + + p.expect(token.RPAREN) + return stats +} + +// parseStatisticsColumnList parses comma-separated column names for ALTER STATISTICS commands +func (p *Parser) parseStatisticsColumnList() []string { + var columns []string + + for p.currentIs(token.IDENT) || p.current.Token.IsKeyword() { + columns = append(columns, p.current.Value) + p.nextToken() + + if p.currentIs(token.COMMA) { + p.nextToken() + } else { + break + } + } + + return columns +} + +// parseStatisticsTypeList parses comma-separated statistics type names for ALTER STATISTICS TYPE clause +func (p *Parser) parseStatisticsTypeList() []*ast.FunctionCall { + var types []*ast.FunctionCall + + for p.currentIs(token.IDENT) { + name := p.current.Value + pos := p.current.Pos + p.nextToken() + + fn := &ast.FunctionCall{ + Position: pos, + Name: name, + } + + // Statistics types can have optional parameters + if p.currentIs(token.LPAREN) { + p.nextToken() + if !p.currentIs(token.RPAREN) { + fn.Arguments = p.parseExpressionList() + } + p.expect(token.RPAREN) + } + + types = append(types, fn) + + if p.currentIs(token.COMMA) { + p.nextToken() + } else { + break + } + } + + return types +} + func (p *Parser) parseEngineClause() *ast.EngineClause { engine := &ast.EngineClause{ Position: p.current.Pos, @@ -3188,6 +3288,27 @@ func (p *Parser) parseAlterCommand() *ast.AlterCommand { cmd.Type = ast.AlterAddProjection p.nextToken() cmd.Projection = p.parseProjection() + } else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "STATISTICS" { + cmd.Type = ast.AlterAddStatistics + p.nextToken() + // Handle IF NOT EXISTS + if p.currentIs(token.IF) { + p.nextToken() + if p.currentIs(token.NOT) { + p.nextToken() + if p.currentIs(token.EXISTS) { + cmd.IfNotExists = true + p.nextToken() + } + } + } + // Parse column list (comma-separated identifiers) + cmd.StatisticsColumns = p.parseStatisticsColumnList() + // Parse TYPE clause + if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "TYPE" { + p.nextToken() + cmd.StatisticsTypes = p.parseStatisticsTypeList() + } } case token.DROP: p.nextToken() @@ -3233,6 +3354,15 @@ func (p *Parser) parseAlterCommand() *ast.AlterCommand { cmd.ProjectionName = p.current.Value p.nextToken() } + } else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "STATISTICS" { + cmd.Type = ast.AlterDropStatistics + p.nextToken() + if p.currentIs(token.IF) { + p.nextToken() + p.expect(token.EXISTS) + cmd.IfExists = true + } + cmd.StatisticsColumns = p.parseStatisticsColumnList() } case token.IDENT: // Handle CLEAR, MATERIALIZE @@ -3260,6 +3390,15 @@ func (p *Parser) parseAlterCommand() *ast.AlterCommand { cmd.ProjectionName = p.current.Value p.nextToken() } + } else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "STATISTICS" { + cmd.Type = ast.AlterClearStatistics + p.nextToken() + if p.currentIs(token.IF) { + p.nextToken() + p.expect(token.EXISTS) + cmd.IfExists = true + } + cmd.StatisticsColumns = p.parseStatisticsColumnList() } } else if upper == "MATERIALIZE" { p.nextToken() @@ -3277,6 +3416,15 @@ func (p *Parser) parseAlterCommand() *ast.AlterCommand { cmd.ProjectionName = p.current.Value p.nextToken() } + } else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "STATISTICS" { + cmd.Type = ast.AlterMaterializeStatistics + p.nextToken() + if p.currentIs(token.IF) { + p.nextToken() + p.expect(token.EXISTS) + cmd.IfExists = true + } + cmd.StatisticsColumns = p.parseStatisticsColumnList() } } else { return nil @@ -3299,6 +3447,16 @@ func (p *Parser) parseAlterCommand() *ast.AlterCommand { cmd.Type = ast.AlterModifySetting p.nextToken() cmd.Settings = p.parseSettingsList() + } else if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "STATISTICS" { + cmd.Type = ast.AlterModifyStatistics + p.nextToken() + // Parse column list (comma-separated identifiers) + cmd.StatisticsColumns = p.parseStatisticsColumnList() + // Parse TYPE clause + if p.currentIs(token.IDENT) && strings.ToUpper(p.current.Value) == "TYPE" { + p.nextToken() + cmd.StatisticsTypes = p.parseStatisticsTypeList() + } } case token.RENAME: p.nextToken() diff --git a/parser/testdata/02864_statistics_ddl/metadata.json b/parser/testdata/02864_statistics_ddl/metadata.json index ed95752596..0967ef424b 100644 --- a/parser/testdata/02864_statistics_ddl/metadata.json +++ b/parser/testdata/02864_statistics_ddl/metadata.json @@ -1,69 +1 @@ -{ - "explain_todo": { - "stmt100": true, - "stmt101": true, - "stmt102": true, - "stmt103": true, - "stmt104": true, - "stmt105": true, - "stmt106": true, - "stmt107": true, - "stmt108": true, - "stmt109": true, - "stmt110": true, - "stmt111": true, - "stmt112": true, - "stmt115": true, - "stmt116": true, - "stmt119": true, - "stmt120": true, - "stmt123": true, - "stmt124": true, - "stmt127": true, - "stmt128": true, - "stmt131": true, - "stmt133": true, - "stmt135": true, - "stmt137": true, - "stmt139": true, - "stmt21": true, - "stmt22": true, - "stmt23": true, - "stmt24": true, - "stmt25": true, - "stmt26": true, - "stmt27": true, - "stmt28": true, - "stmt4": true, - "stmt44": true, - "stmt45": true, - "stmt46": true, - "stmt47": true, - "stmt48": true, - "stmt6": true, - "stmt64": true, - "stmt65": true, - "stmt66": true, - "stmt67": true, - "stmt68": true, - "stmt7": true, - "stmt82": true, - "stmt83": true, - "stmt84": true, - "stmt85": true, - "stmt86": true, - "stmt87": true, - "stmt88": true, - "stmt89": true, - "stmt90": true, - "stmt91": true, - "stmt92": true, - "stmt93": true, - "stmt94": true, - "stmt95": true, - "stmt96": true, - "stmt97": true, - "stmt98": true, - "stmt99": true - } -} +{}