diff --git a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java
index 97783227..52774c40 100644
--- a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java
+++ b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java
@@ -476,6 +476,8 @@ private static enum State {
COMMENT,
COMMENT_DASH,
COMMENT_DASH_DASH,
+ COMMENT_DASH_DASH_BANG,
+ COMMENT_DASH_AFTER_BANG,
DIRECTIVE,
DONE,
BOGUS_COMMENT,
@@ -640,20 +642,35 @@ && canonicalElementName(start + 2, end)
case BANG:
if ('-' == ch) {
state = State.BANG_DASH;
+ } else if('>' == ch) { // is a valid html comment
+ state = State.DONE;
+ type = HtmlTokenType.COMMENT;
} else {
state = State.DIRECTIVE;
}
break;
case BANG_DASH:
if ('-' == ch) {
- state = State.COMMENT;
+ state = State.COMMENT_DASH_AFTER_BANG;
} else {
state = State.DIRECTIVE;
}
break;
+ case COMMENT_DASH_AFTER_BANG:
+ if ('>' == ch) { // is a valid html comment
+ state = State.DONE;
+ type = HtmlTokenType.COMMENT;
+ } else if ('-' == ch) { // is a valid html comment
+ state = State.COMMENT_DASH_AFTER_BANG;
+ } else {
+ state = State.COMMENT;
+ }
+ break;
case COMMENT:
if ('-' == ch) {
state = State.COMMENT_DASH;
+ } else {
+ state = State.COMMENT;
}
break;
case COMMENT_DASH:
@@ -665,12 +682,24 @@ && canonicalElementName(start + 2, end)
if ('>' == ch) {
state = State.DONE;
type = HtmlTokenType.COMMENT;
+ } else if ('!' == ch) { // --!> is also valid closing sequence
+ state = State.COMMENT_DASH_DASH_BANG;
} else if ('-' == ch) {
state = State.COMMENT_DASH_DASH;
} else {
state = State.COMMENT_DASH;
}
break;
+ case COMMENT_DASH_DASH_BANG:
+ if ('>' == ch) {
+ state = State.DONE;
+ type = HtmlTokenType.COMMENT;
+ }else if ('-' == ch) {
+ state = State.COMMENT_DASH;
+ }else {
+ state = State.COMMENT;
+ }
+ break;
case DIRECTIVE:
if ('>' == ch) {
type = HtmlTokenType.DIRECTIVE;
diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java
index 2ebf55ea..c8ca186a 100644
--- a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java
+++ b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java
@@ -117,6 +117,65 @@ public static final void testShortTags() {
"TAGEND: >");
}
+ @Test
+ public static final void testCommentDeclarationWith0CommentsAndXss() throws Exception
+ {
+ //check https://datatracker.ietf.org/doc/html/rfc1866#section-3.2.5
+ assertTokens("
",
+ "COMMENT: ",
+ "TAGBEGIN:
"
+ );
+ }
+
+ @Test
+ public static final void testTextEndingWithTagOpenAndBang() throws Exception
+ {
+ //taken from https://html.spec.whatwg.org/#comments
+ assertTokens("",
+ "COMMENT: ",
+ "TAGBEGIN: ",
+ "TAGBEGIN: "
+ );
+ }
+
+
+ public static final void testDashDashBangComment() throws Exception
+ {
+ assertTokens("",
+ "COMMENT: "
+ );
+ }
+ @Test
+ public static final void testAbruptClosingOfEmptyComment() throws Exception
+ {
+ assertTokens("
abc",
+ "COMMENT: ",
+ "TAGBEGIN:
",
+ "TEXT: a",
+ "COMMENT: ",
+ "TEXT: b",
+ "SERVERCODE: c"
+ );
+ }
+
+ @Test
+ public static final void testIncorrectlyClosedComment() throws Exception
+ {
+ assertTokens("
",
+ "COMMENT: ",
+ "TAGBEGIN:
"
+ );
+ }
+
private static void lex(String input, Appendable out) throws Exception {
HtmlLexer lexer = new HtmlLexer(input);
int maxTypeLength = 0;