Skip to content

Commit e5de267

Browse files
Add support for C-style comments
This commit adds support for C-style comments supported by MySQL. It parses and consumes the optional version number after the `!` character and leading whitespace.
1 parent f642dd5 commit e5de267

File tree

4 files changed

+81
-1
lines changed

4 files changed

+81
-1
lines changed

src/dialect/generic.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,6 +156,10 @@ impl Dialect for GenericDialect {
156156
true
157157
}
158158

159+
fn supports_c_style_comments(&self) -> bool {
160+
true
161+
}
162+
159163
fn supports_user_host_grantee(&self) -> bool {
160164
true
161165
}

src/dialect/mod.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -898,6 +898,11 @@ pub trait Dialect: Debug + Any {
898898
false
899899
}
900900

901+
/// Returns true if the dialect supports hint and C-style comments
902+
fn supports_c_style_comments(&self) -> bool {
903+
false
904+
}
905+
901906
/// Returns true if this dialect supports treating the equals operator `=` within a `SelectItem`
902907
/// as an alias assignment operator, rather than a boolean expression.
903908
/// For example: the following statements are equivalent for such a dialect:

src/dialect/mysql.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,11 @@ impl Dialect for MySqlDialect {
8484
true
8585
}
8686

87+
/// see <https://dev.mysql.com/doc/refman/8.4/en/comments.html>
88+
fn supports_c_style_comments(&self) -> bool {
89+
true
90+
}
91+
8792
fn parse_infix(
8893
&self,
8994
parser: &mut crate::parser::Parser,

src/tokenizer.rs

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2107,8 +2107,9 @@ impl<'a> Tokenizer<'a> {
21072107
) -> Result<Option<Token>, TokenizerError> {
21082108
let mut s = String::new();
21092109
let mut nested = 1;
2110+
let mut c_style_comments = false;
21102111
let supports_nested_comments = self.dialect.supports_nested_comments();
2111-
2112+
let supports_c_style_comments = self.dialect.supports_c_style_comments();
21122113
loop {
21132114
match chars.next() {
21142115
Some('/') if matches!(chars.peek(), Some('*')) && supports_nested_comments => {
@@ -2117,10 +2118,40 @@ impl<'a> Tokenizer<'a> {
21172118
s.push('*');
21182119
nested += 1;
21192120
}
2121+
Some('!') if supports_c_style_comments => {
2122+
c_style_comments = true;
2123+
// consume the optional version digits and whitespace
2124+
while let Some(&c) = chars.peek() {
2125+
if c.is_ascii_digit() || c.is_whitespace() {
2126+
chars.next();
2127+
} else {
2128+
break;
2129+
}
2130+
}
2131+
}
2132+
// consume all leading whitespaces until the '*/' character if in a C-style comment
2133+
Some(ch) if ch.is_whitespace() && c_style_comments => {
2134+
let mut tmp_s = String::new();
2135+
while let Some(c) = chars.next() {
2136+
if c.is_whitespace() {
2137+
tmp_s.push(c);
2138+
} else if c == '*' && chars.peek() == Some(&'/') {
2139+
chars.next(); // consume the '/'
2140+
return Ok(Some(Token::make_word(&s, None)));
2141+
} else {
2142+
tmp_s.push(c);
2143+
s.push_str(&tmp_s);
2144+
break;
2145+
}
2146+
}
2147+
}
21202148
Some('*') if matches!(chars.peek(), Some('/')) => {
21212149
chars.next(); // consume the '/'
21222150
nested -= 1;
21232151
if nested == 0 {
2152+
if c_style_comments {
2153+
break Ok(Some(Token::make_word(&s, None)));
2154+
}
21242155
break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s))));
21252156
}
21262157
s.push('*');
@@ -4070,4 +4101,39 @@ mod tests {
40704101
panic!("Tokenizer should have failed on {sql}, but it succeeded with {tokens:?}");
40714102
}
40724103
}
4104+
#[test]
4105+
fn tokenize_multiline_comment_with_c_style_comment() {
4106+
let sql = String::from("0/*! word */1");
4107+
4108+
let dialect = MySqlDialect {};
4109+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
4110+
let expected = vec![
4111+
Token::Number("0".to_string(), false),
4112+
Token::Word(Word {
4113+
value: "word".to_string(),
4114+
quote_style: None,
4115+
keyword: Keyword::NoKeyword,
4116+
}),
4117+
Token::Number("1".to_string(), false),
4118+
];
4119+
compare(expected, tokens);
4120+
}
4121+
4122+
#[test]
4123+
fn tokenize_multiline_comment_with_c_style_comment_and_version() {
4124+
let sql = String::from("0/*!8000000 word */1");
4125+
4126+
let dialect = MySqlDialect {};
4127+
let tokens = Tokenizer::new(&dialect, &sql).tokenize().unwrap();
4128+
let expected = vec![
4129+
Token::Number("0".to_string(), false),
4130+
Token::Word(Word {
4131+
value: "word".to_string(),
4132+
quote_style: None,
4133+
keyword: Keyword::NoKeyword,
4134+
}),
4135+
Token::Number("1".to_string(), false),
4136+
];
4137+
compare(expected, tokens);
4138+
}
40734139
}

0 commit comments

Comments
 (0)