@@ -2107,8 +2107,9 @@ impl<'a> Tokenizer<'a> {
2107
2107
) -> Result < Option < Token > , TokenizerError > {
2108
2108
let mut s = String :: new ( ) ;
2109
2109
let mut nested = 1 ;
2110
+ let mut c_style_comments = false ;
2110
2111
let supports_nested_comments = self . dialect . supports_nested_comments ( ) ;
2111
-
2112
+ let supports_c_style_comments = self . dialect . supports_c_style_comments ( ) ;
2112
2113
loop {
2113
2114
match chars. next ( ) {
2114
2115
Some ( '/' ) if matches ! ( chars. peek( ) , Some ( '*' ) ) && supports_nested_comments => {
@@ -2117,10 +2118,40 @@ impl<'a> Tokenizer<'a> {
2117
2118
s. push ( '*' ) ;
2118
2119
nested += 1 ;
2119
2120
}
2121
+ Some ( '!' ) if supports_c_style_comments => {
2122
+ c_style_comments = true ;
2123
+ // consume the optional version digits and whitespace
2124
+ while let Some ( & c) = chars. peek ( ) {
2125
+ if c. is_ascii_digit ( ) || c. is_whitespace ( ) {
2126
+ chars. next ( ) ;
2127
+ } else {
2128
+ break ;
2129
+ }
2130
+ }
2131
+ }
2132
+ // consume all leading whitespaces until the '*/' character if in a C-style comment
2133
+ Some ( ch) if ch. is_whitespace ( ) && c_style_comments => {
2134
+ let mut tmp_s = String :: new ( ) ;
2135
+ while let Some ( c) = chars. next ( ) {
2136
+ if c. is_whitespace ( ) {
2137
+ tmp_s. push ( c) ;
2138
+ } else if c == '*' && chars. peek ( ) == Some ( & '/' ) {
2139
+ chars. next ( ) ; // consume the '/'
2140
+ return Ok ( Some ( Token :: make_word ( & s, None ) ) ) ;
2141
+ } else {
2142
+ tmp_s. push ( c) ;
2143
+ s. push_str ( & tmp_s) ;
2144
+ break ;
2145
+ }
2146
+ }
2147
+ }
2120
2148
Some ( '*' ) if matches ! ( chars. peek( ) , Some ( '/' ) ) => {
2121
2149
chars. next ( ) ; // consume the '/'
2122
2150
nested -= 1 ;
2123
2151
if nested == 0 {
2152
+ if c_style_comments {
2153
+ break Ok ( Some ( Token :: make_word ( & s, None ) ) ) ;
2154
+ }
2124
2155
break Ok ( Some ( Token :: Whitespace ( Whitespace :: MultiLineComment ( s) ) ) ) ;
2125
2156
}
2126
2157
s. push ( '*' ) ;
@@ -4070,4 +4101,39 @@ mod tests {
4070
4101
panic ! ( "Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}" ) ;
4071
4102
}
4072
4103
}
4104
+ #[ test]
4105
+ fn tokenize_multiline_comment_with_c_style_comment ( ) {
4106
+ let sql = String :: from ( "0/*! word */1" ) ;
4107
+
4108
+ let dialect = MySqlDialect { } ;
4109
+ let tokens = Tokenizer :: new ( & dialect, & sql) . tokenize ( ) . unwrap ( ) ;
4110
+ let expected = vec ! [
4111
+ Token :: Number ( "0" . to_string( ) , false ) ,
4112
+ Token :: Word ( Word {
4113
+ value: "word" . to_string( ) ,
4114
+ quote_style: None ,
4115
+ keyword: Keyword :: NoKeyword ,
4116
+ } ) ,
4117
+ Token :: Number ( "1" . to_string( ) , false ) ,
4118
+ ] ;
4119
+ compare ( expected, tokens) ;
4120
+ }
4121
+
4122
+ #[ test]
4123
+ fn tokenize_multiline_comment_with_c_style_comment_and_version ( ) {
4124
+ let sql = String :: from ( "0/*!8000000 word */1" ) ;
4125
+
4126
+ let dialect = MySqlDialect { } ;
4127
+ let tokens = Tokenizer :: new ( & dialect, & sql) . tokenize ( ) . unwrap ( ) ;
4128
+ let expected = vec ! [
4129
+ Token :: Number ( "0" . to_string( ) , false ) ,
4130
+ Token :: Word ( Word {
4131
+ value: "word" . to_string( ) ,
4132
+ quote_style: None ,
4133
+ keyword: Keyword :: NoKeyword ,
4134
+ } ) ,
4135
+ Token :: Number ( "1" . to_string( ) , false ) ,
4136
+ ] ;
4137
+ compare ( expected, tokens) ;
4138
+ }
4073
4139
}
0 commit comments