Skip to content

Commit

Permalink
feat: support unquoted HTML attributes, closes #963 (#1028)
Browse files Browse the repository at this point in the history
Co-authored-by: Joe Davidson <[email protected]>
  • Loading branch information
a-h and joerdav authored Dec 30, 2024
1 parent 9058914 commit e16b2d8
Show file tree
Hide file tree
Showing 4 changed files with 161 additions and 28 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,13 @@ go tool cover -func coverage.out | grep total
gotestsum --watch -- -coverprofile=coverage.out
```

### test-fuzz

```sh
./parser/v2/fuzz.sh
./parser/v2/goexpression/fuzz.sh
```

### benchmark

Run benchmarks.
Expand Down
78 changes: 50 additions & 28 deletions parser/v2/elementparser.go
Original file line number Diff line number Diff line change
Expand Up @@ -98,11 +98,41 @@ var (
})
)

type attributeValueParser struct {
EqualsAndQuote parse.Parser[string]
Suffix parse.Parser[string]
UseSingleQuote bool
}

func (avp attributeValueParser) Parse(pi *parse.Input) (value string, ok bool, err error) {
start := pi.Index()
if _, ok, err = avp.EqualsAndQuote.Parse(pi); err != nil || !ok {
return
}
if value, ok, err = parse.StringUntil(avp.Suffix).Parse(pi); err != nil || !ok {
pi.Seek(start)
return
}
if _, ok, err = avp.Suffix.Parse(pi); err != nil || !ok {
pi.Seek(start)
return
}
return value, true, nil
}

// Constant attribute.
var (
attributeConstantValueParser = parse.StringUntil(parse.Rune('"'))
attributeConstantValueSingleQuoteParser = parse.StringUntil(parse.Rune('\''))
constantAttributeParser = parse.Func(func(pi *parse.Input) (attr ConstantAttribute, ok bool, err error) {
attributeValueParsers = []attributeValueParser{
// Double quoted.
{EqualsAndQuote: parse.String(`="`), Suffix: parse.String(`"`), UseSingleQuote: false},
// Single quoted.
{EqualsAndQuote: parse.String(`='`), Suffix: parse.String(`'`), UseSingleQuote: true},
// Unquoted.
// A valid unquoted attribute value in HTML is any string of text that is not an empty string,
// and that doesn’t contain spaces, tabs, line feeds, form feeds, carriage returns, ", ', `, =, <, or >.
{EqualsAndQuote: parse.String("="), Suffix: parse.Any(parse.RuneIn(" \t\n\r\"'`=<>/"), parse.EOF[string]()), UseSingleQuote: false},
}
constantAttributeParser = parse.Func(func(pi *parse.Input) (attr ConstantAttribute, ok bool, err error) {
start := pi.Index()

// Optional whitespace leader.
Expand All @@ -117,38 +147,30 @@ var (
}
attr.NameRange = NewRange(pi.PositionAt(pi.Index()-len(attr.Name)), pi.Position())

// ="
result, ok, err := parse.Or(parse.String(`="`), parse.String(`='`)).Parse(pi)
if err != nil || !ok {
pi.Seek(start)
return
}

valueParser := attributeConstantValueParser
closeParser := parse.String(`"`)
if result.B.OK {
valueParser = attributeConstantValueSingleQuoteParser
closeParser = parse.String(`'`)
attr.SingleQuote = true
for _, p := range attributeValueParsers {
attr.Value, ok, err = p.Parse(pi)
if err != nil {
pos := pi.Position()
if pErr, isParseError := err.(parse.ParseError); isParseError {
pos = pErr.Pos
}
return attr, false, parse.Error(fmt.Sprintf("%s: %v", attr.Name, err), pos)
}
if ok {
attr.SingleQuote = p.UseSingleQuote
break
}
}

// Attribute value.
if attr.Value, ok, err = valueParser.Parse(pi); err != nil || !ok {
if !ok {
pi.Seek(start)
return
return attr, false, nil
}

attr.Value = html.UnescapeString(attr.Value)
// Only use single quotes if actually required, due to double quote in the value (prefer double quotes).
if attr.SingleQuote && !strings.Contains(attr.Value, "\"") {
attr.SingleQuote = false
}

// " - closing quote.
if _, ok, err = closeParser.Parse(pi); err != nil || !ok {
err = parse.Error(fmt.Sprintf("missing closing quote on attribute %q", attr.Name), pi.Position())
return
}
// Only use single quotes if actually required, due to double quote in the value (prefer double quotes).
attr.SingleQuote = attr.SingleQuote && strings.Contains(attr.Value, "\"")

return attr, true, nil
})
Expand Down
102 changes: 102 additions & 0 deletions parser/v2/elementparser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -531,6 +531,19 @@ if test {
},
},
},
{
name: "unquoted attributes are supported",
input: ` data=123`,
parser: StripType(constantAttributeParser),
expected: ConstantAttribute{
Name: "data",
Value: "123",
NameRange: Range{
From: Position{Index: 1, Line: 0, Col: 1},
To: Position{Index: 5, Line: 0, Col: 5},
},
},
},
}
for _, tt := range tests {
tt := tt
Expand Down Expand Up @@ -1569,6 +1582,77 @@ amount is charged</div>`,
},
},
},
{
name: "element: self-closing with unquoted attribute",
input: `<hr noshade=noshade/>`,
expected: Element{
Name: "hr",
NameRange: Range{
From: Position{Index: 1, Line: 0, Col: 1},
To: Position{Index: 3, Line: 0, Col: 3},
},
Attributes: []Attribute{
ConstantAttribute{
Name: "noshade",
Value: "noshade",
NameRange: Range{
From: Position{Index: 4, Line: 0, Col: 4},
To: Position{Index: 11, Line: 0, Col: 11},
},
},
},
},
},
{
name: "element: self-closing with unquoted and other attributes",
input: `<hr noshade=noshade disabled other-attribute={ false } />`,
expected: Element{
Name: "hr",
NameRange: Range{
From: Position{Index: 1, Line: 0, Col: 1},
To: Position{Index: 3, Line: 0, Col: 3},
},
Attributes: []Attribute{
ConstantAttribute{
Name: "noshade",
Value: "noshade",
NameRange: Range{
From: Position{Index: 4, Line: 0, Col: 4},
To: Position{Index: 11, Line: 0, Col: 11},
},
},
BoolConstantAttribute{
Name: "disabled",
NameRange: Range{
From: Position{Index: 20, Line: 0, Col: 20},
To: Position{Index: 28, Line: 0, Col: 28},
},
},
ExpressionAttribute{
Name: "other-attribute",
NameRange: Range{
From: Position{Index: 29, Line: 0, Col: 29},
To: Position{Index: 44, Line: 0, Col: 44},
},
Expression: Expression{
Value: "false",
Range: Range{
From: Position{
Index: 47,
Line: 0,
Col: 47,
},
To: Position{
Index: 52,
Line: 0,
Col: 52,
},
},
},
},
},
},
},
}
for _, tt := range tests {
tt := tt
Expand Down Expand Up @@ -1702,3 +1786,21 @@ func TestBigElement(t *testing.T) {
t.Errorf("unexpected failure to parse")
}
}

func FuzzElement(f *testing.F) {
seeds := []string{
`<br>`,
`<a href="test" unquoted=unquoted/>`,
`<input value={ "test" }/>`,
`<div>{ "test" }</div>`,
`<a unquoted=unquoted href="test" unquoted=unquoted>Test</a>`,
}

for _, tc := range seeds {
f.Add(tc)
}

f.Fuzz(func(t *testing.T, input string) {
_, _, _ = element.Parse(parse.NewInput(input))
})
}
2 changes: 2 additions & 0 deletions parser/v2/fuzz.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
echo Element
go test -fuzz=FuzzElement -fuzztime=120s

0 comments on commit e16b2d8

Please sign in to comment.