diff --git a/src/Playwright.Tests/InterceptionTests.cs b/src/Playwright.Tests/InterceptionTests.cs index 936dff8a2..1ec94ee47 100644 --- a/src/Playwright.Tests/InterceptionTests.cs +++ b/src/Playwright.Tests/InterceptionTests.cs @@ -23,6 +23,7 @@ */ using System.Net; +using System.Text.RegularExpressions; using Microsoft.Playwright.Helpers; namespace Microsoft.Playwright.Tests; @@ -32,32 +33,85 @@ public class InterceptionTests : PageTestEx [PlaywrightTest("interception.spec.ts", "should work with glob")] public void ShouldWorkWithGlob() { - Assert.That("https://localhost:8080/foo.js", Does.Match(StringExtensions.GlobToRegex("**/*.js"))); - Assert.That("https://localhost:8080/foo.js", Does.Not.Match(StringExtensions.GlobToRegex("**/*.css"))); - Assert.That("https://localhost:8080/foo.js", Does.Not.Match(StringExtensions.GlobToRegex("*.js"))); - Assert.That("https://localhost:8080/foo.js", Does.Match(StringExtensions.GlobToRegex("https://**/*.js"))); - Assert.That("http://localhost:8080/simple/path.js", Does.Match(StringExtensions.GlobToRegex("http://localhost:8080/simple/path.js"))); - Assert.That("http://localhost:8080/Simple/path.js", Does.Match(StringExtensions.GlobToRegex("http://localhost:8080/?imple/path.js"))); - Assert.That("https://localhost:8080/a.js", Does.Match(StringExtensions.GlobToRegex("**/{a,b}.js"))); - Assert.That("https://localhost:8080/b.js", Does.Match(StringExtensions.GlobToRegex("**/{a,b}.js"))); - Assert.That("https://localhost:8080/c.js", Does.Not.Match(StringExtensions.GlobToRegex("**/{a,b}.js"))); - Assert.That("https://localhost:8080/c.jpg", Does.Match(StringExtensions.GlobToRegex("**/*.{png,jpg,jpeg}"))); - Assert.That("https://localhost:8080/c.jpeg", Does.Match(StringExtensions.GlobToRegex("**/*.{png,jpg,jpeg}"))); - Assert.That("https://localhost:8080/c.png", Does.Match(StringExtensions.GlobToRegex("**/*.{png,jpg,jpeg}"))); - Assert.That("https://localhost:8080/c.css", Does.Not.Match(StringExtensions.GlobToRegex("**/*.{png,jpg,jpeg}"))); - Assert.That("foo.js", Does.Match(StringExtensions.GlobToRegex("foo*"))); - Assert.That("foo/bar.js", Does.Not.Match(StringExtensions.GlobToRegex("foo*"))); - Assert.That("http://localhost:3000/signin-oidc/foo", Does.Not.Match(StringExtensions.GlobToRegex("http://localhost:3000/signin-oidc*"))); - Assert.That("http://localhost:3000/signin-oidcnice", Does.Match(StringExtensions.GlobToRegex("http://localhost:3000/signin-oidc*"))); - - Assert.That("http://mydomain:8080/blah/blah/three-columns/settings.html?id=settings-e3c58efe-02e9-44b0-97ac-dd138100cf7c&blah", Does.Match(StringExtensions.GlobToRegex("**/three-columns/settings.html?**id=[a-z]**"))); - - Assert.AreEqual("^\\?$", StringExtensions.GlobToRegex("\\?")); - Assert.AreEqual("^\\\\$", StringExtensions.GlobToRegex("\\")); - Assert.AreEqual("^\\\\$", StringExtensions.GlobToRegex("\\\\")); - Assert.AreEqual("^\\[$", StringExtensions.GlobToRegex("\\[")); - Assert.AreEqual("^[a-z]$", StringExtensions.GlobToRegex("[a-z]")); - Assert.AreEqual(@"^\$\^\+\.\*\(\)\|\?\{\}\[\]$", StringExtensions.GlobToRegex("$^+.\\*()|\\?\\{\\}\\[\\]")); + Regex GlobToRegex(string glob) + { + return new Regex(URLMatch.GlobToRegexPattern(glob)); + } + + bool URLMatches(string baseURL, string url, string glob) + { + return new URLMatch() + { + baseURL = baseURL, + glob = glob, + }.Match(url); + } + + Assert.That("https://localhost:8080/foo.js", Does.Match(GlobToRegex("**/*.js"))); + Assert.That("https://localhost:8080/foo.js", Does.Not.Match(GlobToRegex("**/*.css"))); + Assert.That("https://localhost:8080/foo.js", Does.Not.Match(GlobToRegex("*.js"))); + Assert.That("https://localhost:8080/foo.js", Does.Match(GlobToRegex("https://**/*.js"))); + Assert.That("http://localhost:8080/simple/path.js", Does.Match(GlobToRegex("http://localhost:8080/simple/path.js"))); + Assert.That("https://localhost:8080/a.js", Does.Match(GlobToRegex("**/{a,b}.js"))); + Assert.That("https://localhost:8080/b.js", Does.Match(GlobToRegex("**/{a,b}.js"))); + Assert.That("https://localhost:8080/c.js", Does.Not.Match(GlobToRegex("**/{a,b}.js"))); + Assert.That("https://localhost:8080/c.jpg", Does.Match(GlobToRegex("**/*.{png,jpg,jpeg}"))); + Assert.That("https://localhost:8080/c.jpeg", Does.Match(GlobToRegex("**/*.{png,jpg,jpeg}"))); + Assert.That("https://localhost:8080/c.png", Does.Match(GlobToRegex("**/*.{png,jpg,jpeg}"))); + Assert.That("https://localhost:8080/c.css", Does.Not.Match(GlobToRegex("**/*.{png,jpg,jpeg}"))); + Assert.That("foo.js", Does.Match(GlobToRegex("foo*"))); + Assert.That("foo/bar.js", Does.Not.Match(GlobToRegex("foo*"))); + Assert.That("http://localhost:3000/signin-oidc/foo", Does.Not.Match(GlobToRegex("http://localhost:3000/signin-oidc*"))); + Assert.That("http://localhost:3000/signin-oidcnice", Does.Match(GlobToRegex("http://localhost:3000/signin-oidc*"))); + + // range [] is NOT supported + Assert.That("http://example.com/api/v[0-9]", Does.Match(GlobToRegex("**/api/v[0-9]"))); + Assert.That("http://example.com/api/version", Does.Not.Match(GlobToRegex("**/api/v[0-9]"))); + + // query params + Assert.That("http://example.com/api?param", Does.Match(GlobToRegex("**/api\\?param"))); + Assert.That("http://example.com/api-param", Does.Not.Match(GlobToRegex("**/api\\?param"))); + Assert.That("http://mydomain:8080/blah/blah/three-columns/settings.html?id=settings-e3c58efe-02e9-44b0-97ac-dd138100cf7c&blah", Does.Match(GlobToRegex("**/three-columns/settings.html\\?**id=settings-**"))); + + Assert.AreEqual("^\\?$", URLMatch.GlobToRegexPattern("\\?")); + Assert.AreEqual("^\\\\$", URLMatch.GlobToRegexPattern("\\")); + Assert.AreEqual("^\\\\$", URLMatch.GlobToRegexPattern("\\\\")); + Assert.AreEqual("^\\[$", URLMatch.GlobToRegexPattern("\\[")); + Assert.AreEqual("^\\[a-z\\]$", URLMatch.GlobToRegexPattern("[a-z]")); + Assert.AreEqual(@"^\$\^\+\.\*\(\)\|\?\{\}\[\]$", URLMatch.GlobToRegexPattern("$^+.\\*()|\\?\\{\\}\\[\\]")); + + Assert.True(URLMatches(null, "http://playwright.dev/", "http://playwright.dev")); + Assert.True(URLMatches(null, "http://playwright.dev/?a=b", "http://playwright.dev?a=b")); + Assert.True(URLMatches(null, "http://playwright.dev/", "h*://playwright.dev")); + Assert.True(URLMatches(null, "http://api.playwright.dev/?x=y", "http://*.playwright.dev?x=y")); + Assert.True(URLMatches(null, "http://playwright.dev/foo/bar", "**/foo/**")); + Assert.True(URLMatches("http://playwright.dev", "http://playwright.dev/?x=y", "?x=y")); + Assert.True(URLMatches("http://playwright.dev/foo/", "http://playwright.dev/foo/bar?x=y", "./bar?x=y")); + + // This is not supported, we treat ? as a query separator. + Assert.That("http://localhost:8080/Simple/path.js", Does.Not.Match(GlobToRegex("http://localhost:8080/?imple/path.js"))); + Assert.False(URLMatches(null, "http://playwright.dev/", "http://playwright.?ev")); + Assert.True(URLMatches(null, "http://playwright./?ev", "http://playwright.?ev")); + Assert.False(URLMatches(null, "http://playwright.dev/foo", "http://playwright.dev/f??")); + Assert.True(URLMatches(null, "http://playwright.dev/f??", "http://playwright.dev/f??")); + Assert.True(URLMatches(null, "http://playwright.dev/?x=y", "http://playwright.dev\\?x=y")); + Assert.True(URLMatches(null, "http://playwright.dev/?x=y", "http://playwright.dev/\\?x=y")); + Assert.True(URLMatches("http://playwright.dev/foo", "http://playwright.dev/foo?bar", "?bar")); + Assert.True(URLMatches("http://playwright.dev/foo", "http://playwright.dev/foo?bar", "\\\\?bar")); + Assert.True(URLMatches("http://first.host/", "http://second.host/foo", "**/foo")); + Assert.True(URLMatches("http://playwright.dev/", "http://localhost/", "*//localhost/")); + } + + [PlaywrightTest("interception.spec.ts", "should intercept by glob")] + public async Task ShouldInterceptByGlob() + { + await Page.GotoAsync(Server.EmptyPage); + await Page.RouteAsync("http://localhos**?*oo", (route) => + { + return route.FulfillAsync(new() { Status = (int)HttpStatusCode.OK, Body = "intercepted" }); + }); + var result = await Page.EvaluateAsync("url => fetch(url).then(r => r.text())", Server.Prefix + "/?foo"); + Assert.AreEqual("intercepted", result); } [PlaywrightTest("interception.spec.ts", "should work with ignoreHTTPSErrors")] diff --git a/src/Playwright/Core/BrowserContext.cs b/src/Playwright/Core/BrowserContext.cs index 748ec4f2a..6d5941f00 100644 --- a/src/Playwright/Core/BrowserContext.cs +++ b/src/Playwright/Core/BrowserContext.cs @@ -765,7 +765,7 @@ private async Task UnrouteAsync(string globMatch, Regex reMatch, Func(); foreach (var routeHandler in _routes) { - if (routeHandler.urlMatcher.Equals(globMatch, reMatch, funcMatch, Options.BaseURL) && (handler == null || routeHandler.Handler == handler)) + if (routeHandler.urlMatcher.Equals(globMatch, reMatch, funcMatch, Options.BaseURL, false) && (handler == null || routeHandler.Handler == handler)) { removed.Add(routeHandler); } @@ -934,6 +934,7 @@ private Task RouteWebSocketAsync(string globMatch, Regex reMatch, Func(); foreach (var routeHandler in _routes) { - if (routeHandler.urlMatcher.Equals(globMatch, reMatch, funcMatch, Context.Options.BaseURL) && (handler == null || routeHandler.Handler == handler)) + if (routeHandler.urlMatcher.Equals(globMatch, reMatch, funcMatch, Context.Options.BaseURL, false) && (handler == null || routeHandler.Handler == handler)) { removed.Add(routeHandler); } @@ -1610,6 +1610,7 @@ private Task RouteWebSocketAsync(string globMatch, Regex urlRegex, Func internal static class StringExtensions { - // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#escaping - private static readonly char[] _escapeGlobChars = new[] { '$', '^', '+', '.', '*', '(', ')', '|', '\\', '?', '{', '}', '[', ']' }; - private static readonly Dictionary _mappings = new Dictionary(StringComparer.InvariantCultureIgnoreCase) { { ".323", "text/h323" }, @@ -629,94 +626,6 @@ public static Dictionary ParseQueryString(this string query) return result; } - /// - /// Converts an url glob expression to a regex. - /// - /// Input url. - /// A Regex with the glob expression. - public static string GlobToRegex(this string glob) - { - if (string.IsNullOrEmpty(glob)) - { - return null; - } - - List tokens = new() { "^" }; - bool inGroup = false; - - for (int i = 0; i < glob.Length; ++i) - { - var c = glob[i]; - if (c == '\\' && i + 1 < glob.Length) - { - var @char = glob[++i]; - tokens.Add(_escapeGlobChars.Contains(@char) ? "\\" + @char : @char.ToString()); - continue; - } - if (c == '*') - { - char? beforeDeep = i == 0 ? null : glob[i - 1]; - int starCount = 1; - while (i < glob.Length - 1 && glob[i + 1] == '*') - { - starCount++; - i++; - } - - char? afterDeep = i >= glob.Length - 1 ? null : glob[i + 1]; - var isDeep = starCount > 1 && - (beforeDeep == '/' || beforeDeep == null) && - (afterDeep == '/' || afterDeep == null); - if (isDeep) - { - tokens.Add("((?:[^/]*(?:\\/|$))*)"); - i++; - } - else - { - tokens.Add("([^/]*)"); - } - continue; - } - - switch (c) - { - case '?': - tokens.Add("."); - break; - case '[': - tokens.Add("["); - break; - case ']': - tokens.Add("]"); - break; - case '{': - inGroup = true; - tokens.Add("("); - break; - case '}': - inGroup = false; - tokens.Add(")"); - break; - case ',': - if (inGroup) - { - tokens.Add("|"); - break; - } - - tokens.Add("\\" + c); - break; - default: - tokens.Add(_escapeGlobChars.Contains(c) ? "\\" + c : c.ToString()); - break; - } - } - - tokens.Add("$"); - return string.Concat(tokens.ToArray()); - } - internal static string GetContentType(this string path) { const string defaultContentType = "application/octet-stream"; diff --git a/src/Playwright/Helpers/URLMatch.cs b/src/Playwright/Helpers/URLMatch.cs index 4622a0281..0ac605955 100644 --- a/src/Playwright/Helpers/URLMatch.cs +++ b/src/Playwright/Helpers/URLMatch.cs @@ -23,12 +23,17 @@ */ using System; +using System.Collections.Generic; +using System.Linq; using System.Text.RegularExpressions; namespace Microsoft.Playwright.Helpers; public class URLMatch { + // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_expressions#escaping + private static readonly char[] _escapeGlobChars = new[] { '$', '^', '+', '.', '*', '(', ')', '|', '\\', '?', '{', '}', '[', ']' }; + public Regex re { get; set; } public Func func { get; set; } @@ -37,12 +42,14 @@ public class URLMatch public string baseURL { get; set; } + public bool isWebSocketUrl { get; set; } + public bool Match(string url) { - return MatchImpl(url, re, func, glob, baseURL); + return MatchImpl(url, re, func, glob, baseURL, isWebSocketUrl); } - private static bool MatchImpl(string url, Regex re, Func func, string glob, string baseURL) + private static bool MatchImpl(string url, Regex re, Func func, string glob, string baseURL, bool isWebSocketUrl) { if (re != null) { @@ -60,29 +67,34 @@ private static bool MatchImpl(string url, Regex re, Func func, str { return true; } - if (!glob.StartsWith("*", StringComparison.InvariantCultureIgnoreCase)) - { - // Allow http(s) baseURL to match ws(s) urls. - if (!string.IsNullOrEmpty(baseURL) && new Regex("^https?://").IsMatch(baseURL) && new Regex("^wss?://").IsMatch(url)) - { - baseURL = new Regex("^http").Replace(baseURL, "ws"); - } - glob = ConstructURLBasedOnBaseURL(baseURL, glob); - } - return new Regex(glob.GlobToRegex()).IsMatch(url); + string match = ResolveGlobToRegexPattern(baseURL, glob, isWebSocketUrl); + return new Regex(match).IsMatch(url); } + return true; } + // In Node.js, new URL('http://localhost') returns 'http://localhost/'. + // To ensure the same url matching behvaior, do the same. + internal static Uri FixupTrailingSlash(Uri uri) + { + var builder = new UriBuilder(uri); + if (string.IsNullOrEmpty(builder.Path)) + { + builder.Path = "/"; + } + return builder.Uri; + } + internal static string ConstructURLBasedOnBaseURL(string baseUrl, string url) { try { if (string.IsNullOrEmpty(baseUrl)) { - return new Uri(url, UriKind.Absolute).ToString(); + return FixupTrailingSlash(new Uri(url, UriKind.Absolute)).ToString(); } - return new Uri(new Uri(baseUrl), new Uri(url, UriKind.RelativeOrAbsolute)).ToString(); + return FixupTrailingSlash(new Uri(new Uri(baseUrl), new Uri(url, UriKind.RelativeOrAbsolute))).ToString(); } catch { @@ -90,11 +102,168 @@ internal static string ConstructURLBasedOnBaseURL(string baseUrl, string url) } } - public bool Equals(string globMatch, Regex reMatch, Func funcMatch, string baseURL) + public static string GlobToRegexPattern(string glob) + { + if (string.IsNullOrEmpty(glob)) + { + return null; + } + + List tokens = new() { "^" }; + bool inGroup = false; + + for (int i = 0; i < glob.Length; ++i) + { + var c = glob[i]; + if (c == '\\' && i + 1 < glob.Length) + { + var @char = glob[++i]; + tokens.Add(_escapeGlobChars.Contains(@char) ? "\\" + @char : @char.ToString()); + continue; + } + if (c == '*') + { + char? beforeDeep = i == 0 ? null : glob[i - 1]; + int starCount = 1; + while (i < glob.Length - 1 && glob[i + 1] == '*') + { + starCount++; + i++; + } + + char? afterDeep = i >= glob.Length - 1 ? null : glob[i + 1]; + var isDeep = starCount > 1 && + (beforeDeep == '/' || beforeDeep == null) && + (afterDeep == '/' || afterDeep == null); + if (isDeep) + { + tokens.Add("((?:[^/]*(?:\\/|$))*)"); + i++; + } + else + { + tokens.Add("([^/]*)"); + } + continue; + } + + switch (c) + { + case '{': + inGroup = true; + tokens.Add("("); + break; + case '}': + inGroup = false; + tokens.Add(")"); + break; + case ',': + if (inGroup) + { + tokens.Add("|"); + break; + } + + tokens.Add("\\" + c); + break; + default: + tokens.Add(_escapeGlobChars.Contains(c) ? "\\" + c : c.ToString()); + break; + } + } + + tokens.Add("$"); + return string.Concat(tokens.ToArray()); + } + + internal static string ToWebSocketBaseURL(string baseURL) + { + if (string.IsNullOrEmpty(baseURL)) + { + return baseURL; + } + // Allow http(s) baseURL to match ws(s) urls. + if (baseURL.StartsWith("http://")) + { + return baseURL.Replace("http://", "ws://"); + } + if (baseURL.StartsWith("https://")) + { + return baseURL.Replace("https://", "wss://"); + } + return baseURL; + } + + internal static string ResolveGlobToRegexPattern(string baseURL, string glob, bool isWebSocketUrl) + { + if (isWebSocketUrl) + { + baseURL = ToWebSocketBaseURL(baseURL); + } + glob = ResolveGlobBase(baseURL, glob); + return GlobToRegexPattern(glob); + } + + internal static string ResolveGlobBase(string baseURL, string match) + { + // NOTE: Node.js version uses "$" in mapped tokens, but C# cannot swallow that. + // So we use "playwright-pw-" instead. It is also important that this string is lowercase. + if (!match.StartsWith("*")) + { + var tokenMap = new Dictionary(); + + string MapToken(string original, string replacement) + { + if (string.IsNullOrEmpty(original)) + { + return string.Empty; + } + tokenMap[replacement] = original; + return replacement; + } + + // Escaped `\\?` behaves the same as `?` in our glob patterns. + match = match.Replace("\\\\?", "?"); + // Glob symbols may be escaped in the URL and some of them such as ? affect resolution, + // so we replace them with safe components first. + var relativePath = string.Join("/", match.Split('/').Select((token, index) => + { + if (token == "." || token == ".." || token == string.Empty) + { + return token; + } + // Handle special case of http*://, note that the new schema has to be + // a web schema so that slashes are properly inserted after domain. + if (index == 0 && token.EndsWith(":")) + { + return MapToken(token, "http:"); + } + int questionIndex = token.IndexOf('?'); + if (questionIndex == -1) + { + return MapToken(token, $"playwright-pw-{index}-pw-playwright"); + } + string newPrefix = MapToken(token.Substring(0, questionIndex), $"playwright-pw-{index}-pw-playwright"); + string newSuffix = MapToken(token.Substring(questionIndex), $"?playwright-pw2-{index}-pw2-playwright"); + return newPrefix + newSuffix; + })); + + string resolved = ConstructURLBasedOnBaseURL(baseURL, relativePath); + foreach (var kvp in tokenMap) + { + resolved = resolved.Replace(kvp.Key, kvp.Value); + } + match = resolved; + } + return match; + } + + public bool Equals(string globMatch, Regex reMatch, Func funcMatch, string baseURL, bool isWebSocketUrl) { return this.re?.ToString() == reMatch?.ToString() && this.re?.Options == reMatch?.Options && this.func == funcMatch && this.glob == globMatch - && this.baseURL == baseURL; + && this.baseURL == baseURL + && this.isWebSocketUrl == isWebSocketUrl; } }