diff --git a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java index 97783227..52774c40 100644 --- a/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java +++ b/owasp-java-html-sanitizer/src/main/java/org/owasp/html/HtmlLexer.java @@ -476,6 +476,8 @@ private static enum State { COMMENT, COMMENT_DASH, COMMENT_DASH_DASH, + COMMENT_DASH_DASH_BANG, + COMMENT_DASH_AFTER_BANG, DIRECTIVE, DONE, BOGUS_COMMENT, @@ -640,20 +642,35 @@ && canonicalElementName(start + 2, end) case BANG: if ('-' == ch) { state = State.BANG_DASH; + } else if('>' == ch) { // is a valid html comment + state = State.DONE; + type = HtmlTokenType.COMMENT; } else { state = State.DIRECTIVE; } break; case BANG_DASH: if ('-' == ch) { - state = State.COMMENT; + state = State.COMMENT_DASH_AFTER_BANG; } else { state = State.DIRECTIVE; } break; + case COMMENT_DASH_AFTER_BANG: + if ('>' == ch) { // is a valid html comment + state = State.DONE; + type = HtmlTokenType.COMMENT; + } else if ('-' == ch) { // is a valid html comment + state = State.COMMENT_DASH_AFTER_BANG; + } else { + state = State.COMMENT; + } + break; case COMMENT: if ('-' == ch) { state = State.COMMENT_DASH; + } else { + state = State.COMMENT; } break; case COMMENT_DASH: @@ -665,12 +682,24 @@ && canonicalElementName(start + 2, end) if ('>' == ch) { state = State.DONE; type = HtmlTokenType.COMMENT; + } else if ('!' == ch) { // --!> is also valid closing sequence + state = State.COMMENT_DASH_DASH_BANG; } else if ('-' == ch) { state = State.COMMENT_DASH_DASH; } else { state = State.COMMENT_DASH; } break; + case COMMENT_DASH_DASH_BANG: + if ('>' == ch) { + state = State.DONE; + type = HtmlTokenType.COMMENT; + }else if ('-' == ch) { + state = State.COMMENT_DASH; + }else { + state = State.COMMENT; + } + break; case DIRECTIVE: if ('>' == ch) { type = HtmlTokenType.DIRECTIVE; diff --git a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java index 2ebf55ea..c8ca186a 100644 --- a/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java +++ b/owasp-java-html-sanitizer/src/test/java/org/owasp/html/HtmlLexerTest.java @@ -117,6 +117,65 @@ public static final void testShortTags() { "TAGEND: >"); } + @Test + public static final void testCommentDeclarationWith0CommentsAndXss() throws Exception + { + //check https://datatracker.ietf.org/doc/html/rfc1866#section-3.2.5 + assertTokens("", + "COMMENT: ", + "TAGBEGIN: " + ); + } + + @Test + public static final void testTextEndingWithTagOpenAndBang() throws Exception + { + //taken from https://html.spec.whatwg.org/#comments + assertTokens("", + "COMMENT: ", + "TAGBEGIN: ", + "TAGBEGIN: " + ); + } + + + public static final void testDashDashBangComment() throws Exception + { + assertTokens("", + "COMMENT: " + ); + } + @Test + public static final void testAbruptClosingOfEmptyComment() throws Exception + { + assertTokens("abc", + "COMMENT: ", + "TAGBEGIN: ", + "TEXT: a", + "COMMENT: ", + "TEXT: b", + "SERVERCODE: c" + ); + } + + @Test + public static final void testIncorrectlyClosedComment() throws Exception + { + assertTokens("", + "COMMENT: ", + "TAGBEGIN: " + ); + } + private static void lex(String input, Appendable out) throws Exception { HtmlLexer lexer = new HtmlLexer(input); int maxTypeLength = 0;