From 4ebc52d6dfc17ef2bee1fbb7657ef41619ab51d7 Mon Sep 17 00:00:00 2001 From: JP Hastings-Spital Date: Sun, 9 Jul 2023 07:37:24 +0100 Subject: [PATCH] feat!: Matches required query parameters Code now parses and `MatchAndExpandPlaceholders` for query parameters as well as paths. Placeholders are shared between the two. This commit codifies some of the previously implicit edgecases, particularly around duplicate placeholders. Closes #20 --- README.md | 8 +- redirects.go | 149 ++++++++++++++++--- redirects_example_test.go | 70 +++++++++ redirects_test.go | 292 +++++++++++++++++++++++++++++++++++++- 4 files changed, 492 insertions(+), 27 deletions(-) diff --git a/README.md b/README.md index b63aa79..eed5a4a 100644 --- a/README.md +++ b/README.md @@ -7,10 +7,10 @@ This is a parser for the IPFS Web Gateway's `_redirects` file format. Follow specification work at https://github.com/ipfs/specs/pull/290 ## Format -Currently only supports `from`, `to` and `status`. +Currently only supports `from`, `fromQuery`, `to` and `status`. ``` -from to [status] +from [fromQuery [fromQuery]] to [status] ``` ## Example @@ -39,6 +39,10 @@ from to [status] # Single page app rewrite (SPA, PWA) /* /index.html 200 + +# Query parameter rewrite +/thing type=:type /thing-:type.html 200 +/thing /things.html 200 ``` ## Notes for contributors diff --git a/redirects.go b/redirects.go index 02c0494..e0dd509 100644 --- a/redirects.go +++ b/redirects.go @@ -6,6 +6,7 @@ import ( "fmt" "io" "net/url" + "regexp" "strconv" "strings" @@ -21,6 +22,12 @@ type Rule struct { // From is the path which is matched to perform the rule. From string + // FromQuery is the set of required query parameters which + // must be present to perform the rule. + // A string without a preceding colon requires that query parameter is this exact value. + // A string with a preceding colon will match any value, and provide it as a placeholder. + FromQuery map[string]string + // To is the destination which may be relative, or absolute // in order to proxy the request to another URL. To string @@ -51,37 +58,70 @@ func (r *Rule) IsProxy() bool { // MatchAndExpandPlaceholders expands placeholders in `r.To` and returns true if the provided path matches. // Otherwise it returns false. -func (r *Rule) MatchAndExpandPlaceholders(urlPath string) bool { +func (r *Rule) MatchAndExpandPlaceholders(urlPath string, urlParams url.Values) bool { // get rule.From, trim trailing slash, ... fromPath := urlpath.New(strings.TrimSuffix(r.From, "/")) match, ok := fromPath.Match(urlPath) - if !ok { return false } - // We have a match! Perform substitution and return the updated rule + placeholders := match.Params + placeholders["splat"] = match.Trailing + if !matchParams(r.FromQuery, urlParams, placeholders) { + return false + } + + // We have a match! Perform substitution and return the updated rule toPath := r.To - toPath = replacePlaceholders(toPath, match) - toPath = replaceSplat(toPath, match) + toPath = replacePlaceholders(toPath, placeholders) + + // There's a placeholder unsupplied somewhere + if strings.Contains(toPath, ":") { + return false + } r.To = toPath return true } -func replacePlaceholders(to string, match urlpath.Match) string { - if len(match.Params) > 0 { - for key, value := range match.Params { - to = strings.ReplaceAll(to, ":"+key, value) - } +func replacePlaceholders(to string, placeholders map[string]string) string { + if len(placeholders) == 0 { + return to + } + + for key, value := range placeholders { + to = strings.ReplaceAll(to, ":"+key, value) } return to } -func replaceSplat(to string, match urlpath.Match) string { - return strings.ReplaceAll(to, ":splat", match.Trailing) +func replaceSplat(to string, splat string) string { + return strings.ReplaceAll(to, ":splat", splat) +} + +func matchParams(fromQuery map[string]string, urlParams url.Values, placeholders map[string]string) bool { + for neededK, neededV := range fromQuery { + haveVs, ok := urlParams[neededK] + if !ok { + return false + } + + if isPlaceholder(neededV) { + if _, ok := placeholders[neededV[1:]]; !ok { + placeholders[neededV[1:]] = haveVs[0] + } + continue + } + + if !contains(haveVs, neededV) { + return false + } + } + + return true } // Must parse utility. @@ -124,10 +164,6 @@ func Parse(r io.Reader) (rules []Rule, err error) { return nil, fmt.Errorf("missing 'to' path") } - if len(fields) > 3 { - return nil, fmt.Errorf("must match format 'from to [status]'") - } - // implicit status rule := Rule{Status: 301} @@ -138,16 +174,22 @@ func Parse(r io.Reader) (rules []Rule, err error) { } rule.From = from + hasStatus := isLikelyStatusCode(fields[len(fields)-1]) + toIndex := len(fields) - 1 + if hasStatus { + toIndex = len(fields) - 2 + } + // to (must parse as an absolute path or an URL) - to, err := parseTo(fields[1]) + to, err := parseTo(fields[toIndex]) if err != nil { return nil, errors.Wrapf(err, "parsing 'to'") } rule.To = to // status - if len(fields) > 2 { - code, err := parseStatus(fields[2]) + if hasStatus { + code, err := parseStatus(fields[len(fields)-1]) if err != nil { return nil, errors.Wrapf(err, "parsing status %q", fields[2]) } @@ -155,6 +197,19 @@ func Parse(r io.Reader) (rules []Rule, err error) { rule.Status = code } + // from query + if toIndex > 1 { + rule.FromQuery = make(map[string]string) + + for i := 1; i < toIndex; i++ { + key, value, err := parseFromQuery(fields[i]) + if err != nil { + return nil, errors.Wrapf(err, "parsing 'fromQuery'") + } + rule.FromQuery[key] = value + } + } + rules = append(rules, rule) } @@ -194,6 +249,46 @@ func parseFrom(s string) (string, error) { return s, nil } +func parseFromQuery(s string) (string, string, error) { + params, err := url.ParseQuery(s) + if err != nil { + return "", "", err + } + if len(params) != 1 { + return "", "", fmt.Errorf("separate different fromQuery arguments with a space") + } + + var key string + var val []string + // We know there's only 1, but we don't know the key to access it + for k, v := range params { + key = k + val = v + } + + if url.QueryEscape(key) != key { + return "", "", fmt.Errorf("fromQuery key must be URL encoded") + } + + if len(val) > 1 { + return "", "", fmt.Errorf("separate different fromQuery arguments with a space") + } + + ignorePlaceholders := val[0] + if isPlaceholder(val[0]) { + ignorePlaceholders = ignorePlaceholders[1:] + } + + if url.QueryEscape(ignorePlaceholders) != ignorePlaceholders { + return "", "", fmt.Errorf("fromQuery val must be URL encoded") + } + return key, val[0], nil +} + +func isPlaceholder(s string) bool { + return strings.HasPrefix(s, ":") +} + func parseTo(s string) (string, error) { // confirm value is within URL path spec u, err := url.Parse(s) @@ -211,6 +306,13 @@ func parseTo(s string) (string, error) { return s, nil } +var likeStatusCode = regexp.MustCompile(`^\d{1,3}!?$`) + +// isLikelyStatusCode returns true if the given string is likely to be a status code. +func isLikelyStatusCode(s string) bool { + return likeStatusCode.MatchString(s) +} + // parseStatus returns the status code. func parseStatus(s string) (code int, err error) { if strings.HasSuffix(s, "!") { @@ -237,3 +339,12 @@ func isValidStatusCode(status int) bool { } return false } + +func contains(arr []string, s string) bool { + for _, a := range arr { + if a == s { + return true + } + } + return false +} diff --git a/redirects_example_test.go b/redirects_example_test.go index 2d464a6..77ab601 100644 --- a/redirects_example_test.go +++ b/redirects_example_test.go @@ -30,6 +30,18 @@ func Example() { # Proxying /api/* https://api.example.com/:splat 200 + + # Query parameters + /things type=photos /photos.html 200 + /things type= /empty.html 200 + /things type=:thing /thing-:thing.html 200 + /things /things.html 200 + + # Multiple query parameters + /stuff type=lost name=:name other=:ignore /other-stuff/:name.html 200 + + # Query parameters with implicit 301 + /items id=:id /items/:id.html `)) enc := json.NewEncoder(os.Stdout) @@ -39,53 +51,111 @@ func Example() { // [ // { // "From": "/home", + // "FromQuery": null, // "To": "/", // "Status": 301 // }, // { // "From": "/blog/my-post.php", + // "FromQuery": null, // "To": "/blog/my-post", // "Status": 301 // }, // { // "From": "/news", + // "FromQuery": null, // "To": "/blog", // "Status": 301 // }, // { // "From": "/google", + // "FromQuery": null, // "To": "https://www.google.com", // "Status": 301 // }, // { // "From": "/home", + // "FromQuery": null, // "To": "/", // "Status": 301 // }, // { // "From": "/my-redirect", + // "FromQuery": null, // "To": "/", // "Status": 302 // }, // { // "From": "/pass-through", + // "FromQuery": null, // "To": "/index.html", // "Status": 200 // }, // { // "From": "/ecommerce", + // "FromQuery": null, // "To": "/store-closed", // "Status": 404 // }, // { // "From": "/*", + // "FromQuery": null, // "To": "/index.html", // "Status": 200 // }, // { // "From": "/api/*", + // "FromQuery": null, // "To": "https://api.example.com/:splat", // "Status": 200 + // }, + // { + // "From": "/things", + // "FromQuery": { + // "type": "photos" + // }, + // "To": "/photos.html", + // "Status": 200 + // }, + // { + // "From": "/things", + // "FromQuery": { + // "type": "" + // }, + // "To": "/empty.html", + // "Status": 200 + // }, + // { + // "From": "/things", + // "FromQuery": { + // "type": ":thing" + // }, + // "To": "/thing-:thing.html", + // "Status": 200 + // }, + // { + // "From": "/things", + // "FromQuery": null, + // "To": "/things.html", + // "Status": 200 + // }, + // { + // "From": "/stuff", + // "FromQuery": { + // "name": ":name", + // "other": ":ignore", + // "type": "lost" + // }, + // "To": "/other-stuff/:name.html", + // "Status": 200 + // }, + // { + // "From": "/items", + // "FromQuery": { + // "id": ":id" + // }, + // "To": "/items/:id.html", + // "Status": 301 // } // ] } diff --git a/redirects_test.go b/redirects_test.go index 6262c41..e228822 100644 --- a/redirects_test.go +++ b/redirects_test.go @@ -98,6 +98,28 @@ func TestParse(t *testing.T) { assert.Error(t, err) assert.Contains(t, err.Error(), "redirects file size cannot exceed") }) + + t.Run("with fromQuery arguments", func(t *testing.T) { + rules, err := ParseString(` + /fixed type=type /type.html + /dynamic type=:type /type-:type.html + /empty type= /empty-type.html + /any type=:ignore /any-type.html + /multi a=a b=:b c= d /multi-:b.html + /fixed200 type=type /type.html 200 + /dynamic200 type=:type /type-:type.html 200 + /empty200 type= /empty-type.html 200 + /any200 type=:ignore /any-type.html 200 + /multi200 a=a b=:b c= d /multi-:b.html 200 + `) + + assert.NoError(t, err) + assert.Len(t, rules, 10) + assert.Equal(t, "type", rules[0].FromQuery["type"]) + assert.Equal(t, ":type", rules[1].FromQuery["type"]) + assert.Equal(t, "", rules[2].FromQuery["type"]) + assert.Equal(t, ":ignore", rules[3].FromQuery["type"]) + }) } func FuzzParse(f *testing.F) { @@ -108,7 +130,12 @@ func FuzzParse(f *testing.F) { "/%C4%85 /ę 301\n", "#/a \n\n/b", "/a200 /b200 200\n/a301 /b301 301\n/a302 /b302 302\n/a303 /b303 303\n/a307 /b307 307\n/a308 /b308 308\n/a404 /b404 404\n/a410 /b410 410\n/a451 /b451 451\n", - "hello\n", "/redirect-one /one.html\r\n/200-index /index.html 200\r\n", "a b 2\nc d 42", "/a/*/b blah", "/from https://example.com 200\n/a/:blah/yeah /b/:blah/yeah"} + "hello\n", "/redirect-one /one.html\r\n/200-index /index.html 200\r\n", "a b 2\nc d 42", "/a/*/b blah", "/from https://example.com 200\n/a/:blah/yeah /b/:blah/yeah", + "/fixed-val val=val /to\n", "/dynamic-val val=:val /to/:val\n", "/empty-val val= /to\n", "/any-val val /to\n", + "/fixed-val val=val /to 200\n/dynamic-val val=:val /to/:val 301\n/empty-val val= /to 404\n/any-val val /to 302\n", + "/multi-query val1=val1 val2=:val2 val3= val4 /to/:val2\n/multi-query2 val1=val1 val2=:val2 val3= val4 /to/:val2 302\n", + "/bad-syntax1 val=a&val=b /to\n", "/bad-syntax2 val=a&val2=b /to 302\n", "/a ^¬params /b\n", "/bad-status type=:type /to 3oo\n", "/bad-chars :type=whatever /to\n", "/bad-chars type=what:ever /to\n", + } for _, tc := range testcases { f.Add([]byte(tc)) } @@ -154,6 +181,21 @@ func FuzzParse(f *testing.F) { t.Errorf("should error for 'to' URL with scheme other than safelisted ones: url=%q, scheme=%q, orig=%q", to, to.Scheme, orig) } } + + for key, val := range r.FromQuery { + if url.QueryEscape(key) != key { + t.Errorf("should error for 'fromQuery' keys being unacceptable URL characters. orig=%q", orig) + } + + // Colons should only be present in values right at the start (they're invalid characters otherwise). + if len(val) > 0 && val[0] == ':' { + val = val[1:] + } + + if url.QueryEscape(val) != val { + t.Errorf("should error for 'fromQuery' values containing unacceptable URL characters. orig=%q", orig) + } + } } s := bufio.NewScanner(bytes.NewReader(orig)) @@ -172,11 +214,6 @@ func FuzzParse(f *testing.F) { continue } - if len(fields) > 3 { - t.Errorf("should error with more than 3 fields. orig=%q", orig) - continue - } - if len(fields) > 0 && !strings.HasPrefix(fields[0], "/") { t.Errorf("should error for from path not starting with '/'. orig=%q", orig) continue @@ -195,3 +232,246 @@ func FuzzParse(f *testing.F) { } }) } + +func TestMatchAndExpandPlaceholders(t *testing.T) { + testcases := []struct { + name string + rule *Rule + inPath string + inParams string + success bool + expectedTo string + }{ + { + name: "No expansion", + rule: &Rule{ + From: "/from", + To: "/to", + }, + inPath: "/from", + inParams: "", + success: true, + expectedTo: "/to", + }, + { + name: "No expansion, but trailing slash", + rule: &Rule{ + From: "/from/", + To: "/to", + }, + inPath: "/from", + inParams: "", + success: true, + expectedTo: "/to", + }, + { + name: "Splat matching", + rule: &Rule{ + From: "/*", + To: "/to", + }, + inPath: "/from", + inParams: "", + success: true, + expectedTo: "/to", + }, + { + name: "Splat substitution", + rule: &Rule{ + From: "/*", + To: "/other/:splat", + }, + inPath: "/from", + inParams: "", + success: true, + expectedTo: "/other/from", + }, + { + name: "Named substitution", + rule: &Rule{ + From: "/:thing", + To: "/:thing.html", + }, + inPath: "/from", + inParams: "", + success: true, + expectedTo: "/from.html", + }, + { + name: "Missing placeholder", + rule: &Rule{ + From: "/:this", + To: "/:that.html", + }, + inPath: "/from", + inParams: "", + success: false, + }, + { + name: "Static query parameter, match", + rule: &Rule{ + From: "/from", + FromQuery: map[string]string{ + "a": "b", + }, + To: "/to", + }, + inPath: "/from", + inParams: "a=b", + success: true, + expectedTo: "/to", + }, + { + name: "Static query parameter, muli-match first", + rule: &Rule{ + From: "/from", + FromQuery: map[string]string{ + "a": "b", + }, + To: "/to", + }, + inPath: "/from", + inParams: "a=b&a=c", + success: true, + expectedTo: "/to", + }, + { + name: "Static query parameter, muli-match second", + rule: &Rule{ + From: "/from", + FromQuery: map[string]string{ + "a": "b", + }, + To: "/to", + }, + inPath: "/from", + inParams: "a=c&a=b", + success: true, + expectedTo: "/to", + }, + { + name: "Static query parameter, no match", + rule: &Rule{ + From: "/from", + FromQuery: map[string]string{ + "a": "b", + }, + To: "/to", + }, + inPath: "/from", + inParams: "", + success: false, + }, + { + name: "Dynamic query parameter, match", + rule: &Rule{ + From: "/from", + FromQuery: map[string]string{ + "a": ":a", + }, + To: "/to/:a.html", + }, + inPath: "/from", + inParams: "a=b", + success: true, + expectedTo: "/to/b.html", + }, + { + name: "Dynamic query parameter, multi-match", + rule: &Rule{ + From: "/from", + FromQuery: map[string]string{ + "a": ":a", + }, + To: "/:a.html", + }, + inPath: "/from", + inParams: "a=b&a=c", + success: true, + expectedTo: "/b.html", + }, + { + name: "Dynamic query parameter, no match", + rule: &Rule{ + From: "/from", + FromQuery: map[string]string{ + "a": "b", + }, + To: "/to", + }, + inPath: "/from", + inParams: "", + success: false, + }, + { + name: "Repeated placeholder in path", + rule: &Rule{ + From: "/:from/:from", + To: "/:from.html", + }, + inPath: "/a/b", + inParams: "", + success: true, + expectedTo: "/b.html", + }, + { + name: "Repeated placeholder in params", + rule: &Rule{ + From: "/from", + FromQuery: map[string]string{ + "q": ":val", + "r": ":val", + }, + To: "/:val.html", + }, + inPath: "/from", + inParams: "q=qq&r=rr", + success: true, + expectedTo: "/qq.html", + }, + { + name: "Repeated placeholder in path then params", + rule: &Rule{ + From: "/:val", + FromQuery: map[string]string{ + "q": ":val", + }, + To: "/:val.html", + }, + inPath: "/path", + inParams: "q=query", + success: true, + expectedTo: "/path.html", + }, + { + name: "Repeated placeholder splat", + rule: &Rule{ + From: "/*", + FromQuery: map[string]string{ + "q": ":splat", + }, + To: "/:splat.html", + }, + inPath: "/path", + inParams: "q=query", + success: true, + expectedTo: "/path.html", + }, + } + + for _, tc := range testcases { + t.Run(tc.name, func(t *testing.T) { + params, err := url.ParseQuery(tc.inParams) + if err != nil { + t.Errorf("Invalid inParams given (%s): %v", tc.inParams, err) + } + + ok := tc.rule.MatchAndExpandPlaceholders(tc.inPath, params) + assert.Equal(t, tc.success, ok, "Expected success to be %v, but was %v", tc.success, ok) + + if tc.success { + assert.Equal(t, tc.expectedTo, tc.rule.To, "Expected the To property to be changed to %q, but was %q", tc.expectedTo, tc.rule.To) + } + }) + } +}