diff --git a/parser/ast.go b/parser/ast.go index 8cb13c9..6d6e204 100644 --- a/parser/ast.go +++ b/parser/ast.go @@ -1770,19 +1770,19 @@ func (c *CreateDatabase) Accept(visitor ASTVisitor) error { } type CreateTable struct { - CreatePos Pos // position of CREATE|ATTACH keyword - StatementEnd Pos - OrReplace bool - Name *TableIdentifier - IfNotExists bool - UUID *UUID - OnCluster *ClusterClause - TableSchema *TableSchemaClause - Engine *EngineExpr - SubQuery *SubQuery - TableFunction *TableFunctionExpr - HasTemporary bool - Comment *StringLiteral + CreatePos Pos // position of CREATE|ATTACH keyword + StatementEnd Pos + OrReplace bool + Name *TableIdentifier + IfNotExists bool + UUID *UUID + OnCluster *ClusterClause + TableSchema *TableSchemaClause + Engine *EngineExpr + SubQuery *SubQuery + TableFunction *TableFunctionExpr + HasTemporary bool + Comment *StringLiteral } func (c *CreateTable) Pos() Pos { @@ -7633,11 +7633,41 @@ func (g *GlobalInOperation) Accept(visitor ASTVisitor) error { return visitor.VisitGlobalInExpr(g) } +type IntervalFrom struct { + Interval *Ident + FromPos Pos + FromExpr Expr +} + +func (i *IntervalFrom) Pos() Pos { + return i.Interval.NamePos +} + +func (i *IntervalFrom) End() Pos { + return i.FromExpr.End() +} + +func (i *IntervalFrom) String() string { + var builder strings.Builder + builder.WriteString(i.Interval.String()) + builder.WriteString(" FROM ") + builder.WriteString(i.FromExpr.String()) + return builder.String() +} + +func (i *IntervalFrom) Accept(visitor ASTVisitor) error { + visitor.Enter(i) + defer visitor.Leave(i) + if err := i.FromExpr.Accept(visitor); err != nil { + return err + } + return visitor.VisitIntervalFrom(i) +} + type ExtractExpr struct { ExtractPos Pos - Interval *Ident - FromPos Pos - FromExpr Expr + ExtractEnd Pos + Parameters []Expr } func (e *ExtractExpr) Pos() Pos { @@ -7645,15 +7675,18 @@ func (e *ExtractExpr) Pos() Pos { } func (e *ExtractExpr) End() Pos { - return e.FromExpr.End() + return e.ExtractEnd } func (e *ExtractExpr) String() string { var builder strings.Builder builder.WriteString("EXTRACT(") - builder.WriteString(e.Interval.String()) - builder.WriteString(" FROM ") - builder.WriteString(e.FromExpr.String()) + for i, param := range e.Parameters { + if i > 0 { + builder.WriteString(", ") + } + builder.WriteString(param.String()) + } builder.WriteByte(')') return builder.String() } @@ -7661,8 +7694,10 @@ func (e *ExtractExpr) String() string { func (e *ExtractExpr) Accept(visitor ASTVisitor) error { visitor.Enter(e) defer visitor.Leave(e) - if err := e.FromExpr.Accept(visitor); err != nil { - return err + for _, param := range e.Parameters { + if err := param.Accept(visitor); err != nil { + return err + } } return visitor.VisitExtractExpr(e) } diff --git a/parser/ast_visitor.go b/parser/ast_visitor.go index ee8f066..527c618 100644 --- a/parser/ast_visitor.go +++ b/parser/ast_visitor.go @@ -160,6 +160,7 @@ type ASTVisitor interface { VisitNegateExpr(expr *NegateExpr) error VisitGlobalInExpr(expr *GlobalInOperation) error VisitExtractExpr(expr *ExtractExpr) error + VisitIntervalFrom(expr *IntervalFrom) error VisitDropDatabase(expr *DropDatabase) error VisitDropStmt(expr *DropStmt) error VisitDropUserOrRole(expr *DropUserOrRole) error @@ -1318,6 +1319,13 @@ func (v *DefaultASTVisitor) VisitExtractExpr(expr *ExtractExpr) error { return nil } +func (v *DefaultASTVisitor) VisitIntervalFrom(expr *IntervalFrom) error { + if v.Visit != nil { + return v.Visit(expr) + } + return nil +} + func (v *DefaultASTVisitor) VisitDropDatabase(expr *DropDatabase) error { if v.Visit != nil { return v.Visit(expr) diff --git a/parser/parser_column.go b/parser/parser_column.go index c68d63f..4225060 100644 --- a/parser/parser_column.go +++ b/parser/parser_column.go @@ -261,40 +261,70 @@ func (p *Parser) parseTernaryExpr(condition Expr) (*TernaryOperation, error) { }, nil } -func (p *Parser) parseColumnExtractExpr(pos Pos) (*ExtractExpr, error) { - if err := p.expectKeyword(KeywordExtract); err != nil { +func (p *Parser) parseExtractFrom(ident *Ident) (*IntervalFrom, error) { + fromPos := p.Pos() + if err := p.expectKeyword(KeywordFrom); err != nil { return nil, err } - if err := p.expectTokenKind(TokenKindLParen); err != nil { + + expr, err := p.parseExpr(p.Pos()) + if err != nil { return nil, err } + return &IntervalFrom{ + Interval: ident, + FromPos: fromPos, + FromExpr: expr, + }, nil +} - // parse interval - ident, err := p.parseIdent() - if err != nil { +func (p *Parser) parseColumnExtractExpr(pos Pos) (*ExtractExpr, error) { + if err := p.expectKeyword(KeywordExtract); err != nil { return nil, err } - if !intervalUnits.Contains(strings.ToUpper(ident.Name)) { - return nil, fmt.Errorf("unknown interval type: <%q>", ident.Name) + if err := p.expectTokenKind(TokenKindLParen); err != nil { + return nil, err } - fromPos := p.Pos() - if err := p.expectKeyword(KeywordFrom); err != nil { - return nil, err + parameters := make([]Expr, 0) + for !p.lexer.isEOF() { + expr, err := p.parseExpr(p.Pos()) + if err != nil { + return nil, err + } + + var param Expr + if ident, ok := expr.(*Ident); ok { + if intervalUnits.Contains(strings.ToUpper(ident.Name)) && p.matchKeyword(KeywordFrom) { + param, err = p.parseExtractFrom(ident) + if err != nil { + return nil, err + } + parameters = append(parameters, param) + } else { + parameters = append(parameters, expr) + } + } else { + parameters = append(parameters, expr) + } + + if p.tryConsumeTokenKind(TokenKindComma) == nil { + break + } } - expr, err := p.parseExpr(p.Pos()) - if err != nil { - return nil, err + if len(parameters) == 0 { + return nil, fmt.Errorf("EXTRACT requires at least one parameter") } + + extractEnd := p.Pos() if err := p.expectTokenKind(TokenKindRParen); err != nil { return nil, err } return &ExtractExpr{ ExtractPos: pos, - Interval: ident, - FromPos: fromPos, - FromExpr: expr, + ExtractEnd: extractEnd, + Parameters: parameters, }, nil } diff --git a/parser/testdata/query/format/select_extract_with_regex.sql b/parser/testdata/query/format/select_extract_with_regex.sql new file mode 100644 index 0000000..02f15a7 --- /dev/null +++ b/parser/testdata/query/format/select_extract_with_regex.sql @@ -0,0 +1,27 @@ +-- Origin SQL: +SELECT + COUNT(1), SRC_TYPE, NODE_CLASS, PORT, CLIENT_PORT +FROM + test.table +WHERE + app_id = 999118646 + AND toUnixTimestamp(timestamp) >= 1740366695 + AND toUnixTimestamp(timestamp) <= 1740377495 +GROUP BY + CASE + WHEN length(extract(instance, '((\\d+\\.){3}\\d+)')) > 0 THEN instance + ELSE '空' + END, + CASE + WHEN length(extract(client_ip, '((\\d+\\.){3}\\d+)')) > 0 THEN client_ip + ELSE '空' + END, + src_type, + node_class, + port, + client_port +LIMIT 10000 + + +-- Format SQL: +SELECT COUNT(1), SRC_TYPE, NODE_CLASS, PORT, CLIENT_PORT FROM test.table WHERE app_id = 999118646 AND toUnixTimestamp(timestamp) >= 1740366695 AND toUnixTimestamp(timestamp) <= 1740377495 GROUP BY CASE WHEN length(EXTRACT(instance, '((\\d+\\.){3}\\d+)')) > 0 THEN instance ELSE '空' END, CASE WHEN length(EXTRACT(client_ip, '((\\d+\\.){3}\\d+)')) > 0 THEN client_ip ELSE '空' END, src_type, node_class, port, client_port LIMIT 10000; diff --git a/parser/testdata/query/output/select_extract_with_regex.sql.golden.json b/parser/testdata/query/output/select_extract_with_regex.sql.golden.json new file mode 100644 index 0000000..07102a0 --- /dev/null +++ b/parser/testdata/query/output/select_extract_with_regex.sql.golden.json @@ -0,0 +1,454 @@ +[ + { + "SelectPos": 0, + "StatementEnd": 476, + "With": null, + "Top": null, + "HasDistinct": false, + "DistinctOn": null, + "SelectItems": [ + { + "Expr": { + "Name": { + "Name": "COUNT", + "QuoteType": 1, + "NamePos": 9, + "NameEnd": 14 + }, + "Params": { + "LeftParenPos": 14, + "RightParenPos": 16, + "Items": { + "ListPos": 15, + "ListEnd": 16, + "HasDistinct": false, + "Items": [ + { + "Expr": { + "NumPos": 15, + "NumEnd": 16, + "Literal": "1", + "Base": 10 + }, + "Alias": null + } + ] + }, + "ColumnArgList": null + } + }, + "Modifiers": [], + "Alias": null + }, + { + "Expr": { + "Name": "SRC_TYPE", + "QuoteType": 1, + "NamePos": 19, + "NameEnd": 27 + }, + "Modifiers": [], + "Alias": null + }, + { + "Expr": { + "Name": "NODE_CLASS", + "QuoteType": 1, + "NamePos": 29, + "NameEnd": 39 + }, + "Modifiers": [], + "Alias": null + }, + { + "Expr": { + "Name": "PORT", + "QuoteType": 1, + "NamePos": 41, + "NameEnd": 45 + }, + "Modifiers": [], + "Alias": null + }, + { + "Expr": { + "Name": "CLIENT_PORT", + "QuoteType": 1, + "NamePos": 47, + "NameEnd": 58 + }, + "Modifiers": [], + "Alias": null + } + ], + "From": { + "FromPos": 59, + "Expr": { + "Table": { + "TablePos": 66, + "TableEnd": 76, + "Alias": null, + "Expr": { + "Database": { + "Name": "test", + "QuoteType": 1, + "NamePos": 66, + "NameEnd": 70 + }, + "Table": { + "Name": "table", + "QuoteType": 1, + "NamePos": 71, + "NameEnd": 76 + } + }, + "HasFinal": false + }, + "StatementEnd": 76, + "SampleRatio": null, + "HasFinal": false + } + }, + "ArrayJoin": null, + "Window": null, + "Prewhere": null, + "Where": { + "WherePos": 77, + "Expr": { + "LeftExpr": { + "LeftExpr": { + "LeftExpr": { + "Name": "app_id", + "QuoteType": 1, + "NamePos": 85, + "NameEnd": 91 + }, + "Operation": "=", + "RightExpr": { + "NumPos": 94, + "NumEnd": 103, + "Literal": "999118646", + "Base": 10 + }, + "HasGlobal": false, + "HasNot": false + }, + "Operation": "AND", + "RightExpr": { + "LeftExpr": { + "Name": { + "Name": "toUnixTimestamp", + "QuoteType": 1, + "NamePos": 110, + "NameEnd": 125 + }, + "Params": { + "LeftParenPos": 125, + "RightParenPos": 135, + "Items": { + "ListPos": 126, + "ListEnd": 135, + "HasDistinct": false, + "Items": [ + { + "Expr": { + "Name": "timestamp", + "QuoteType": 1, + "NamePos": 126, + "NameEnd": 135 + }, + "Alias": null + } + ] + }, + "ColumnArgList": null + } + }, + "Operation": "\u003e=", + "RightExpr": { + "NumPos": 140, + "NumEnd": 150, + "Literal": "1740366695", + "Base": 10 + }, + "HasGlobal": false, + "HasNot": false + }, + "HasGlobal": false, + "HasNot": false + }, + "Operation": "AND", + "RightExpr": { + "LeftExpr": { + "Name": { + "Name": "toUnixTimestamp", + "QuoteType": 1, + "NamePos": 157, + "NameEnd": 172 + }, + "Params": { + "LeftParenPos": 172, + "RightParenPos": 182, + "Items": { + "ListPos": 173, + "ListEnd": 182, + "HasDistinct": false, + "Items": [ + { + "Expr": { + "Name": "timestamp", + "QuoteType": 1, + "NamePos": 173, + "NameEnd": 182 + }, + "Alias": null + } + ] + }, + "ColumnArgList": null + } + }, + "Operation": "\u003c=", + "RightExpr": { + "NumPos": 187, + "NumEnd": 197, + "Literal": "1740377495", + "Base": 10 + }, + "HasGlobal": false, + "HasNot": false + }, + "HasGlobal": false, + "HasNot": false + } + }, + "GroupBy": { + "GroupByPos": 198, + "GroupByEnd": 465, + "AggregateType": "", + "Expr": { + "ListPos": 209, + "ListEnd": 464, + "HasDistinct": false, + "Items": [ + { + "Expr": { + "CasePos": 209, + "EndPos": 0, + "Expr": null, + "Whens": [ + { + "WhenPos": 218, + "ThenPos": 275, + "When": { + "LeftExpr": { + "Name": { + "Name": "length", + "QuoteType": 1, + "NamePos": 223, + "NameEnd": 229 + }, + "Params": { + "LeftParenPos": 229, + "RightParenPos": 269, + "Items": { + "ListPos": 230, + "ListEnd": 268, + "HasDistinct": false, + "Items": [ + { + "Expr": { + "ExtractPos": 230, + "ExtractEnd": 268, + "Parameters": [ + { + "Name": "instance", + "QuoteType": 1, + "NamePos": 238, + "NameEnd": 246 + }, + { + "LiteralPos": 249, + "LiteralEnd": 267, + "Literal": "((\\\\d+\\\\.){3}\\\\d+)" + } + ] + }, + "Alias": null + } + ] + }, + "ColumnArgList": null + } + }, + "Operation": "\u003e", + "RightExpr": { + "NumPos": 273, + "NumEnd": 274, + "Literal": "0", + "Base": 10 + }, + "HasGlobal": false, + "HasNot": false + }, + "Then": { + "Name": "instance", + "QuoteType": 1, + "NamePos": 280, + "NameEnd": 288 + }, + "ElsePos": 0, + "Else": null + } + ], + "ElsePos": 293, + "Else": { + "LiteralPos": 299, + "LiteralEnd": 302, + "Literal": "空" + } + }, + "Alias": null + }, + { + "Expr": { + "CasePos": 313, + "EndPos": 0, + "Expr": null, + "Whens": [ + { + "WhenPos": 322, + "ThenPos": 380, + "When": { + "LeftExpr": { + "Name": { + "Name": "length", + "QuoteType": 1, + "NamePos": 327, + "NameEnd": 333 + }, + "Params": { + "LeftParenPos": 333, + "RightParenPos": 374, + "Items": { + "ListPos": 334, + "ListEnd": 373, + "HasDistinct": false, + "Items": [ + { + "Expr": { + "ExtractPos": 334, + "ExtractEnd": 373, + "Parameters": [ + { + "Name": "client_ip", + "QuoteType": 1, + "NamePos": 342, + "NameEnd": 351 + }, + { + "LiteralPos": 354, + "LiteralEnd": 372, + "Literal": "((\\\\d+\\\\.){3}\\\\d+)" + } + ] + }, + "Alias": null + } + ] + }, + "ColumnArgList": null + } + }, + "Operation": "\u003e", + "RightExpr": { + "NumPos": 378, + "NumEnd": 379, + "Literal": "0", + "Base": 10 + }, + "HasGlobal": false, + "HasNot": false + }, + "Then": { + "Name": "client_ip", + "QuoteType": 1, + "NamePos": 385, + "NameEnd": 394 + }, + "ElsePos": 0, + "Else": null + } + ], + "ElsePos": 399, + "Else": { + "LiteralPos": 405, + "LiteralEnd": 408, + "Literal": "空" + } + }, + "Alias": null + }, + { + "Expr": { + "Name": "src_type", + "QuoteType": 1, + "NamePos": 419, + "NameEnd": 427 + }, + "Alias": null + }, + { + "Expr": { + "Name": "node_class", + "QuoteType": 1, + "NamePos": 431, + "NameEnd": 441 + }, + "Alias": null + }, + { + "Expr": { + "Name": "port", + "QuoteType": 1, + "NamePos": 445, + "NameEnd": 449 + }, + "Alias": null + }, + { + "Expr": { + "Name": "client_port", + "QuoteType": 1, + "NamePos": 453, + "NameEnd": 464 + }, + "Alias": null + } + ] + }, + "WithCube": false, + "WithRollup": false, + "WithTotals": false + }, + "WithTotal": false, + "Having": null, + "OrderBy": null, + "LimitBy": null, + "Limit": { + "LimitPos": 465, + "Limit": { + "NumPos": 471, + "NumEnd": 476, + "Literal": "10000", + "Base": 10 + }, + "Offset": null + }, + "Settings": null, + "Format": null, + "UnionAll": null, + "UnionDistinct": null, + "Except": null + } +] \ No newline at end of file diff --git a/parser/testdata/query/output/select_window_comprehensive.sql.golden.json b/parser/testdata/query/output/select_window_comprehensive.sql.golden.json index 325d6c7..d59cd2d 100644 --- a/parser/testdata/query/output/select_window_comprehensive.sql.golden.json +++ b/parser/testdata/query/output/select_window_comprehensive.sql.golden.json @@ -1884,19 +1884,24 @@ "OrderPos": 3780, "Expr": { "ExtractPos": 3780, - "Interval": { - "Name": "HOUR", - "QuoteType": 1, - "NamePos": 3797, - "NameEnd": 3801 - }, - "FromPos": 3802, - "FromExpr": { - "Name": "timestamp", - "QuoteType": 1, - "NamePos": 3807, - "NameEnd": 3816 - } + "ExtractEnd": 3816, + "Parameters": [ + { + "Interval": { + "Name": "HOUR", + "QuoteType": 1, + "NamePos": 3797, + "NameEnd": 3801 + }, + "FromPos": 3802, + "FromExpr": { + "Name": "timestamp", + "QuoteType": 1, + "NamePos": 3807, + "NameEnd": 3816 + } + } + ] }, "Alias": null, "Direction": "", diff --git a/parser/testdata/query/select_extract_with_regex.sql b/parser/testdata/query/select_extract_with_regex.sql new file mode 100644 index 0000000..8accf10 --- /dev/null +++ b/parser/testdata/query/select_extract_with_regex.sql @@ -0,0 +1,22 @@ +SELECT + COUNT(1), SRC_TYPE, NODE_CLASS, PORT, CLIENT_PORT +FROM + test.table +WHERE + app_id = 999118646 + AND toUnixTimestamp(timestamp) >= 1740366695 + AND toUnixTimestamp(timestamp) <= 1740377495 +GROUP BY + CASE + WHEN length(extract(instance, '((\\d+\\.){3}\\d+)')) > 0 THEN instance + ELSE '空' + END, + CASE + WHEN length(extract(client_ip, '((\\d+\\.){3}\\d+)')) > 0 THEN client_ip + ELSE '空' + END, + src_type, + node_class, + port, + client_port +LIMIT 10000 diff --git a/parser/walk.go b/parser/walk.go index a69fde3..41f2037 100644 --- a/parser/walk.go +++ b/parser/walk.go @@ -1353,6 +1353,15 @@ func Walk(node Expr, fn WalkFunc) bool { return false } case *ExtractExpr: + for _, param := range n.Parameters { + if !Walk(param, fn) { + return false + } + } + case *IntervalFrom: + if !Walk(n.Interval, fn) { + return false + } if !Walk(n.FromExpr, fn) { return false }