@@ -45,9 +45,6 @@ pub enum ParserError {
4545 TokenizerError(String),
4646 ParserError(String),
4747 RecursionLimitExceeded,
48- /// Error indicating that the parsing branch taken
49- /// did not yield a meaningful result
50- BranchAbandoned,
5148}
5249
5350// avoid clippy type_complexity warnings
@@ -177,7 +174,6 @@ impl fmt::Display for ParserError {
177174 ParserError::TokenizerError(s) => s,
178175 ParserError::ParserError(s) => s,
179176 ParserError::RecursionLimitExceeded => "recursion limit exceeded",
180- ParserError::BranchAbandoned => "branch abandoned",
181177 }
182178 )
183179 }
@@ -1013,45 +1009,48 @@ impl<'a> Parser<'a> {
10131009 Ok(Statement::NOTIFY { channel, payload })
10141010 }
10151011
1016- fn parse_expr_by_keyword(&mut self, w: &Word) -> Result<Expr, ParserError> {
1012+ fn parse_expr_prefix_by_reserved_word(
1013+ &mut self,
1014+ w: &Word,
1015+ ) -> Result<Option<Expr>, ParserError> {
10171016 match w.keyword {
10181017 Keyword::TRUE | Keyword::FALSE if self.dialect.supports_boolean_literals() => {
10191018 self.prev_token();
1020- Ok(Expr::Value(self.parse_value()?))
1019+ Ok(Some( Expr::Value(self.parse_value()?) ))
10211020 }
10221021 Keyword::NULL => {
10231022 self.prev_token();
1024- Ok(Expr::Value(self.parse_value()?))
1023+ Ok(Some( Expr::Value(self.parse_value()?) ))
10251024 }
10261025 Keyword::CURRENT_CATALOG
10271026 | Keyword::CURRENT_USER
10281027 | Keyword::SESSION_USER
10291028 | Keyword::USER
10301029 if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
10311030 {
1032- Ok(Expr::Function(Function {
1031+ Ok(Some( Expr::Function(Function {
10331032 name: ObjectName(vec![w.to_ident()]),
10341033 parameters: FunctionArguments::None,
10351034 args: FunctionArguments::None,
10361035 null_treatment: None,
10371036 filter: None,
10381037 over: None,
10391038 within_group: vec![],
1040- }))
1039+ })))
10411040 }
10421041 Keyword::CURRENT_TIMESTAMP
10431042 | Keyword::CURRENT_TIME
10441043 | Keyword::CURRENT_DATE
10451044 | Keyword::LOCALTIME
10461045 | Keyword::LOCALTIMESTAMP => {
1047- self.parse_time_functions(ObjectName(vec![w.to_ident()]))
1048- }
1049- Keyword::CASE => self.parse_case_expr(),
1050- Keyword::CONVERT => self.parse_convert_expr(false),
1051- Keyword::TRY_CONVERT if self.dialect.supports_try_convert() => self.parse_convert_expr(true),
1052- Keyword::CAST => self.parse_cast_expr(CastKind::Cast),
1053- Keyword::TRY_CAST => self.parse_cast_expr(CastKind::TryCast),
1054- Keyword::SAFE_CAST => self.parse_cast_expr(CastKind::SafeCast),
1046+ Ok(Some( self.parse_time_functions(ObjectName(vec![w.to_ident()]))? ))
1047+ }
1048+ Keyword::CASE => Ok(Some( self.parse_case_expr()?) ),
1049+ Keyword::CONVERT => Ok(Some( self.parse_convert_expr(false)?) ),
1050+ Keyword::TRY_CONVERT if self.dialect.supports_try_convert() => Ok(Some( self.parse_convert_expr(true)?) ),
1051+ Keyword::CAST => Ok(Some( self.parse_cast_expr(CastKind::Cast)?) ),
1052+ Keyword::TRY_CAST => Ok(Some( self.parse_cast_expr(CastKind::TryCast)?) ),
1053+ Keyword::SAFE_CAST => Ok(Some( self.parse_cast_expr(CastKind::SafeCast)?) ),
10551054 Keyword::EXISTS
10561055 // Support parsing Databricks has a function named `exists`.
10571056 if !dialect_of!(self is DatabricksDialect)
@@ -1063,22 +1062,22 @@ impl<'a> Parser<'a> {
10631062 })
10641063 ) =>
10651064 {
1066- self.parse_exists_expr(false)
1065+ Ok(Some( self.parse_exists_expr(false)?) )
10671066 }
1068- Keyword::EXTRACT => self.parse_extract_expr(),
1069- Keyword::CEIL => self.parse_ceil_floor_expr(true),
1070- Keyword::FLOOR => self.parse_ceil_floor_expr(false),
1067+ Keyword::EXTRACT => Ok(Some( self.parse_extract_expr()?) ),
1068+ Keyword::CEIL => Ok(Some( self.parse_ceil_floor_expr(true)?) ),
1069+ Keyword::FLOOR => Ok(Some( self.parse_ceil_floor_expr(false)?) ),
10711070 Keyword::POSITION if self.peek_token().token == Token::LParen => {
1072- self.parse_position_expr(w.to_ident())
1071+ Ok(Some( self.parse_position_expr(w.to_ident())? ))
10731072 }
1074- Keyword::SUBSTRING => self.parse_substring_expr(),
1075- Keyword::OVERLAY => self.parse_overlay_expr(),
1076- Keyword::TRIM => self.parse_trim_expr(),
1077- Keyword::INTERVAL => self.parse_interval(),
1073+ Keyword::SUBSTRING => Ok(Some( self.parse_substring_expr()?) ),
1074+ Keyword::OVERLAY => Ok(Some( self.parse_overlay_expr()?) ),
1075+ Keyword::TRIM => Ok(Some( self.parse_trim_expr()?) ),
1076+ Keyword::INTERVAL => Ok(Some( self.parse_interval()?) ),
10781077 // Treat ARRAY[1,2,3] as an array [1,2,3], otherwise try as subquery or a function call
10791078 Keyword::ARRAY if self.peek_token() == Token::LBracket => {
10801079 self.expect_token(&Token::LBracket)?;
1081- self.parse_array_expr(true)
1080+ Ok(Some( self.parse_array_expr(true)?) )
10821081 }
10831082 Keyword::ARRAY
10841083 if self.peek_token() == Token::LParen
@@ -1087,37 +1086,36 @@ impl<'a> Parser<'a> {
10871086 self.expect_token(&Token::LParen)?;
10881087 let query = self.parse_query()?;
10891088 self.expect_token(&Token::RParen)?;
1090- Ok(Expr::Function(Function {
1089+ Ok(Some( Expr::Function(Function {
10911090 name: ObjectName(vec![w.to_ident()]),
10921091 parameters: FunctionArguments::None,
10931092 args: FunctionArguments::Subquery(query),
10941093 filter: None,
10951094 null_treatment: None,
10961095 over: None,
10971096 within_group: vec![],
1098- }))
1097+ })))
10991098 }
1100- Keyword::NOT => self.parse_not(),
1099+ Keyword::NOT => Ok(Some( self.parse_not()?) ),
11011100 Keyword::MATCH if dialect_of!(self is MySqlDialect | GenericDialect) => {
1102- self.parse_match_against()
1101+ Ok(Some( self.parse_match_against()?) )
11031102 }
11041103 Keyword::STRUCT if dialect_of!(self is BigQueryDialect | GenericDialect) => {
11051104 self.prev_token();
1106- self.parse_bigquery_struct_literal()
1105+ Ok(Some( self.parse_bigquery_struct_literal()?) )
11071106 }
11081107 Keyword::PRIOR if matches!(self.state, ParserState::ConnectBy) => {
11091108 let expr = self.parse_subexpr(self.dialect.prec_value(Precedence::PlusMinus))?;
1110- Ok(Expr::Prior(Box::new(expr)))
1109+ Ok(Some( Expr::Prior(Box::new(expr) )))
11111110 }
11121111 Keyword::MAP if self.peek_token() == Token::LBrace && self.dialect.support_map_literal_syntax() => {
1113- self.parse_duckdb_map_literal()
1112+ Ok(Some( self.parse_duckdb_map_literal()?) )
11141113 }
1115- Keyword::DEFAULT => Ok(Expr::Default),
1116- _ => Err(ParserError::BranchAbandoned)
1114+ _ => Ok(None)
11171115 }
11181116 }
11191117
1120- fn parse_ident_expr (&mut self, w: &Word) -> Result<Expr, ParserError> {
1118+ fn parse_expr_prefix_by_nonreserved_word (&mut self, w: &Word) -> Result<Expr, ParserError> {
11211119 match self.peek_token().token {
11221120 Token::LParen | Token::Period => {
11231121 let mut id_parts: Vec<Ident> = vec![w.to_ident()];
@@ -1233,23 +1231,33 @@ impl<'a> Parser<'a> {
12331231
12341232 let next_token = self.next_token();
12351233 let expr = match next_token.token {
1236- // We first try to parse the word as the prefix of an expression.
1237- // For example, the word INTERVAL in: SELECT INTERVAL '7' DAY
1238- Token::Word(w) => match self.try_parse(|parser| parser.parse_expr_by_keyword(&w)) {
1239- Ok(expr) => Ok(expr),
1240- // Word does not indicate the start of a complex expression, try to parse as identifier
1241- Err(ParserError::BranchAbandoned) => Ok(self.parse_ident_expr(&w)?),
1242- // Word indicates the start of a complex expression, try to parse as identifier if the
1243- // dialect does not reserve it, otherwise return the original error
1244- Err(e) => {
1245- if !self.dialect.is_reserved_for_identifier(w.keyword) {
1246- if let Ok(expr) = self.try_parse(|parser| parser.parse_ident_expr(&w)) {
1247- return Ok(expr);
1234+ Token::Word(w) => {
1235+ // Save the parser index so we can rollback
1236+ let index_before = self.index;
1237+ // We first try to parse the word as the prefix of an expression.
1238+ // For example, the word INTERVAL in: SELECT INTERVAL '7' DAY
1239+ match self.parse_expr_prefix_by_reserved_word(&w) {
1240+ // No expression prefix associated with this word
1241+ Ok(None) => Ok(self.parse_expr_prefix_by_nonreserved_word(&w)?),
1242+ // This word indicated an expression prefix and parsing was successful
1243+ Ok(Some(expr)) => Ok(expr),
1244+ // This word indicated an expression prefix but parsing failed. Two options:
1245+ // 1. Malformed statement
1246+ // 2. The dialect may allow this word as identifier as well as indicating an expression
1247+ Err(e) => {
1248+ let index_after_error = self.index;
1249+ if !self.dialect.is_reserved_for_identifier(w.keyword) {
1250+ // Rollback before trying to parse using a different approach
1251+ self.index = index_before;
1252+ if let Ok(expr) = self.parse_expr_prefix_by_nonreserved_word(&w) {
1253+ return Ok(expr);
1254+ }
12481255 }
1256+ self.index = index_after_error;
1257+ return Err(e);
12491258 }
1250- return Err(e);
12511259 }
1252- }, // End of Token::Word
1260+ } // End of Token::Word
12531261 // array `[1, 2, 3]`
12541262 Token::LBracket => self.parse_array_expr(false),
12551263 tok @ Token::Minus | tok @ Token::Plus => {
@@ -3680,24 +3688,6 @@ impl<'a> Parser<'a> {
36803688 }
36813689 }
36823690
3683- /// Run a parser method `f`, reverting back to the current position if unsuccessful
3684- /// but retaining the error message if such was raised by `f`
3685- pub fn try_parse<T, F>(&mut self, mut f: F) -> Result<T, ParserError>
3686- where
3687- F: FnMut(&mut Parser) -> Result<T, ParserError>,
3688- {
3689- let index = self.index;
3690- match f(self) {
3691- Ok(t) => Ok(t),
3692- // Unwind stack if limit exceeded
3693- Err(ParserError::RecursionLimitExceeded) => Err(ParserError::RecursionLimitExceeded),
3694- Err(e) => {
3695- self.index = index;
3696- Err(e)
3697- }
3698- }
3699- }
3700-
37013691 /// Parse either `ALL`, `DISTINCT` or `DISTINCT ON (...)`. Returns [`None`] if `ALL` is parsed
37023692 /// and results in a [`ParserError`] if both `ALL` and `DISTINCT` are found.
37033693 pub fn parse_all_or_distinct(&mut self) -> Result<Option<Distinct>, ParserError> {
0 commit comments