From e16b2d88a49a64f716ca33ffadf699d1ef7ac9fd Mon Sep 17 00:00:00 2001 From: Adrian Hesketh Date: Mon, 30 Dec 2024 13:49:03 +0000 Subject: [PATCH] feat: support unquoted HTML attributes, closes #963 (#1028) Co-authored-by: Joe Davidson --- README.md | 7 +++ parser/v2/elementparser.go | 78 +++++++++++++++--------- parser/v2/elementparser_test.go | 102 ++++++++++++++++++++++++++++++++ parser/v2/fuzz.sh | 2 + 4 files changed, 161 insertions(+), 28 deletions(-) create mode 100755 parser/v2/fuzz.sh diff --git a/README.md b/README.md index 9957ac2dc..c477b7c3a 100644 --- a/README.md +++ b/README.md @@ -118,6 +118,13 @@ go tool cover -func coverage.out | grep total gotestsum --watch -- -coverprofile=coverage.out ``` +### test-fuzz + +```sh +./parser/v2/fuzz.sh +./parser/v2/goexpression/fuzz.sh +``` + ### benchmark Run benchmarks. diff --git a/parser/v2/elementparser.go b/parser/v2/elementparser.go index 3c3e73d3f..b9ecf4b48 100644 --- a/parser/v2/elementparser.go +++ b/parser/v2/elementparser.go @@ -98,11 +98,41 @@ var ( }) ) +type attributeValueParser struct { + EqualsAndQuote parse.Parser[string] + Suffix parse.Parser[string] + UseSingleQuote bool +} + +func (avp attributeValueParser) Parse(pi *parse.Input) (value string, ok bool, err error) { + start := pi.Index() + if _, ok, err = avp.EqualsAndQuote.Parse(pi); err != nil || !ok { + return + } + if value, ok, err = parse.StringUntil(avp.Suffix).Parse(pi); err != nil || !ok { + pi.Seek(start) + return + } + if _, ok, err = avp.Suffix.Parse(pi); err != nil || !ok { + pi.Seek(start) + return + } + return value, true, nil +} + // Constant attribute. var ( - attributeConstantValueParser = parse.StringUntil(parse.Rune('"')) - attributeConstantValueSingleQuoteParser = parse.StringUntil(parse.Rune('\'')) - constantAttributeParser = parse.Func(func(pi *parse.Input) (attr ConstantAttribute, ok bool, err error) { + attributeValueParsers = []attributeValueParser{ + // Double quoted. + {EqualsAndQuote: parse.String(`="`), Suffix: parse.String(`"`), UseSingleQuote: false}, + // Single quoted. + {EqualsAndQuote: parse.String(`='`), Suffix: parse.String(`'`), UseSingleQuote: true}, + // Unquoted. + // A valid unquoted attribute value in HTML is any string of text that is not an empty string, + // and that doesn’t contain spaces, tabs, line feeds, form feeds, carriage returns, ", ', `, =, <, or >. + {EqualsAndQuote: parse.String("="), Suffix: parse.Any(parse.RuneIn(" \t\n\r\"'`=<>/"), parse.EOF[string]()), UseSingleQuote: false}, + } + constantAttributeParser = parse.Func(func(pi *parse.Input) (attr ConstantAttribute, ok bool, err error) { start := pi.Index() // Optional whitespace leader. @@ -117,38 +147,30 @@ var ( } attr.NameRange = NewRange(pi.PositionAt(pi.Index()-len(attr.Name)), pi.Position()) - // =" - result, ok, err := parse.Or(parse.String(`="`), parse.String(`='`)).Parse(pi) - if err != nil || !ok { - pi.Seek(start) - return - } - - valueParser := attributeConstantValueParser - closeParser := parse.String(`"`) - if result.B.OK { - valueParser = attributeConstantValueSingleQuoteParser - closeParser = parse.String(`'`) - attr.SingleQuote = true + for _, p := range attributeValueParsers { + attr.Value, ok, err = p.Parse(pi) + if err != nil { + pos := pi.Position() + if pErr, isParseError := err.(parse.ParseError); isParseError { + pos = pErr.Pos + } + return attr, false, parse.Error(fmt.Sprintf("%s: %v", attr.Name, err), pos) + } + if ok { + attr.SingleQuote = p.UseSingleQuote + break + } } - // Attribute value. - if attr.Value, ok, err = valueParser.Parse(pi); err != nil || !ok { + if !ok { pi.Seek(start) - return + return attr, false, nil } attr.Value = html.UnescapeString(attr.Value) - // Only use single quotes if actually required, due to double quote in the value (prefer double quotes). - if attr.SingleQuote && !strings.Contains(attr.Value, "\"") { - attr.SingleQuote = false - } - // " - closing quote. - if _, ok, err = closeParser.Parse(pi); err != nil || !ok { - err = parse.Error(fmt.Sprintf("missing closing quote on attribute %q", attr.Name), pi.Position()) - return - } + // Only use single quotes if actually required, due to double quote in the value (prefer double quotes). + attr.SingleQuote = attr.SingleQuote && strings.Contains(attr.Value, "\"") return attr, true, nil }) diff --git a/parser/v2/elementparser_test.go b/parser/v2/elementparser_test.go index 0c7796dc7..19050c18a 100644 --- a/parser/v2/elementparser_test.go +++ b/parser/v2/elementparser_test.go @@ -531,6 +531,19 @@ if test { }, }, }, + { + name: "unquoted attributes are supported", + input: ` data=123`, + parser: StripType(constantAttributeParser), + expected: ConstantAttribute{ + Name: "data", + Value: "123", + NameRange: Range{ + From: Position{Index: 1, Line: 0, Col: 1}, + To: Position{Index: 5, Line: 0, Col: 5}, + }, + }, + }, } for _, tt := range tests { tt := tt @@ -1569,6 +1582,77 @@ amount is charged`, }, }, }, + { + name: "element: self-closing with unquoted attribute", + input: `
`, + expected: Element{ + Name: "hr", + NameRange: Range{ + From: Position{Index: 1, Line: 0, Col: 1}, + To: Position{Index: 3, Line: 0, Col: 3}, + }, + Attributes: []Attribute{ + ConstantAttribute{ + Name: "noshade", + Value: "noshade", + NameRange: Range{ + From: Position{Index: 4, Line: 0, Col: 4}, + To: Position{Index: 11, Line: 0, Col: 11}, + }, + }, + }, + }, + }, + { + name: "element: self-closing with unquoted and other attributes", + input: `
`, + expected: Element{ + Name: "hr", + NameRange: Range{ + From: Position{Index: 1, Line: 0, Col: 1}, + To: Position{Index: 3, Line: 0, Col: 3}, + }, + Attributes: []Attribute{ + ConstantAttribute{ + Name: "noshade", + Value: "noshade", + NameRange: Range{ + From: Position{Index: 4, Line: 0, Col: 4}, + To: Position{Index: 11, Line: 0, Col: 11}, + }, + }, + BoolConstantAttribute{ + Name: "disabled", + NameRange: Range{ + From: Position{Index: 20, Line: 0, Col: 20}, + To: Position{Index: 28, Line: 0, Col: 28}, + }, + }, + ExpressionAttribute{ + Name: "other-attribute", + NameRange: Range{ + From: Position{Index: 29, Line: 0, Col: 29}, + To: Position{Index: 44, Line: 0, Col: 44}, + }, + Expression: Expression{ + Value: "false", + Range: Range{ + From: Position{ + Index: 47, + Line: 0, + Col: 47, + }, + To: Position{ + Index: 52, + Line: 0, + Col: 52, + }, + }, + }, + }, + }, + }, + }, } for _, tt := range tests { tt := tt @@ -1702,3 +1786,21 @@ func TestBigElement(t *testing.T) { t.Errorf("unexpected failure to parse") } } + +func FuzzElement(f *testing.F) { + seeds := []string{ + `
`, + ``, + ``, + `
{ "test" }
`, + `
Test`, + } + + for _, tc := range seeds { + f.Add(tc) + } + + f.Fuzz(func(t *testing.T, input string) { + _, _, _ = element.Parse(parse.NewInput(input)) + }) +} diff --git a/parser/v2/fuzz.sh b/parser/v2/fuzz.sh new file mode 100755 index 000000000..cee8f72de --- /dev/null +++ b/parser/v2/fuzz.sh @@ -0,0 +1,2 @@ +echo Element +go test -fuzz=FuzzElement -fuzztime=120s