From e3bc4adc947a5e50edfeeba67ab0ee3002127b31 Mon Sep 17 00:00:00 2001 From: Tobias Bales Date: Thu, 23 Jan 2025 15:51:34 +0100 Subject: [PATCH] Add parsing without raising exceptions Similar to what Integer(value, exception: false) does but for URI.parse(value, exception: false). The change is implemented for the default parser (RFC3986) and also RFC2396. The goal is to allow parsing invalid/user input without having to have control flow via exceptions or by having to wrap URI.parse in a method per project. This change can be tried via`~/.rubies/ruby-master/bin/ruby -r 'uri' -e 'URI.parse("https://example.com/\[\]", exception: false)'` and `~/.rubies/ruby-master/bin/ruby -r 'uri' -e 'URI.parse("https://example.com/\[\]")'` respectively (or with`exception: true`) and is also covered by tests. Or by just changing the tests to observe the behavior --- lib/uri/common.rb | 4 ++-- lib/uri/rfc2396_parser.rb | 15 ++++++++++++--- lib/uri/rfc3986_parser.rb | 17 +++++++++++++---- test/uri/test_parser.rb | 11 +++++++++++ 4 files changed, 38 insertions(+), 9 deletions(-) diff --git a/lib/uri/common.rb b/lib/uri/common.rb index c3fe0b4..1350368 100644 --- a/lib/uri/common.rb +++ b/lib/uri/common.rb @@ -204,8 +204,8 @@ def self.split(uri) # It's recommended to first ::escape string +uri+ # if it may contain invalid URI characters. # - def self.parse(uri) - DEFAULT_PARSER.parse(uri) + def self.parse(uri, exception: true) + DEFAULT_PARSER.parse(uri, exception: exception) end # Merges the given URI strings +str+ diff --git a/lib/uri/rfc2396_parser.rb b/lib/uri/rfc2396_parser.rb index 0336366..700f5a3 100644 --- a/lib/uri/rfc2396_parser.rb +++ b/lib/uri/rfc2396_parser.rb @@ -117,7 +117,7 @@ def initialize(opts = {}) attr_reader :regexp # Returns a split URI against +regexp[:ABS_URI]+. - def split(uri) + def split(uri, exception: true) case uri when '' # null uri @@ -139,10 +139,14 @@ def split(uri) # server = [ [ userinfo "@" ] hostport ] if !scheme + return unless exception + raise InvalidURIError, "bad URI (absolute but no scheme): #{uri}" end if !opaque && (!path && (!host && !registry)) + return unless exception + raise InvalidURIError, "bad URI (absolute but no path): #{uri}" end @@ -173,6 +177,8 @@ def split(uri) # server = [ [ userinfo "@" ] hostport ] else + return unless exception + raise InvalidURIError, "bad URI (is not URI?): #{uri}" end @@ -206,8 +212,11 @@ def split(uri) # p.parse("ldap://ldap.example.com/dc=example?user=john") # #=> # # - def parse(uri) - URI.for(*self.split(uri), self) + def parse(uri, exception: true) + scheme = self.split(uri, exception: exception) + return if scheme.nil? + + URI.for(*scheme, self) end # diff --git a/lib/uri/rfc3986_parser.rb b/lib/uri/rfc3986_parser.rb index 0b5f0c4..f933b2e 100644 --- a/lib/uri/rfc3986_parser.rb +++ b/lib/uri/rfc3986_parser.rb @@ -74,14 +74,18 @@ def initialize @regexp = default_regexp.each_value(&:freeze).freeze end - def split(uri) #:nodoc: + def split(uri, exception: true) #:nodoc: begin uri = uri.to_str rescue NoMethodError + return unless exception raise InvalidURIError, "bad URI (is not URI?): #{uri.inspect}" end - uri.ascii_only? or + unless uri.ascii_only? + return unless exception + raise InvalidURIError, "URI must be ascii only #{uri.dump}" + end if m = RFC3986_URI.match(uri) query = m["query"] scheme = m["scheme"] @@ -127,12 +131,17 @@ def split(uri) #:nodoc: m["fragment"] ] else + return unless exception + raise InvalidURIError, "bad URI (is not URI?): #{uri.inspect}" end end - def parse(uri) # :nodoc: - URI.for(*self.split(uri), self) + def parse(uri, exception: true) # :nodoc: + scheme = self.split(uri, exception: exception) + return if scheme.nil? + + URI.for(*scheme, self) end def join(*uris) # :nodoc: diff --git a/test/uri/test_parser.rb b/test/uri/test_parser.rb index f455a5c..e2e41ec 100644 --- a/test/uri/test_parser.rb +++ b/test/uri/test_parser.rb @@ -92,6 +92,17 @@ def test_split end end + def test_split_without_exception + assert_equal(["http", nil, "example.com", nil, nil, "", nil, nil, nil], URI.split("http://example.com")) + assert_equal(["http", nil, "[0::0]", nil, nil, "", nil, nil, nil], URI.split("http://[0::0]")) + assert_equal([nil, nil, "example.com", nil, nil, "", nil, nil, nil], URI.split("//example.com")) + assert_equal([nil, nil, "[0::0]", nil, nil, "", nil, nil, nil], URI.split("//[0::0]")) + + assert_equal(["a", nil, nil, nil, nil, "", nil, nil, nil], URI.split("a:")) + assert_nil URI.parse("::", exception: false) + assert_nil URI.parse("foo@example:foo", exception: false) + end + def test_rfc2822_parse_relative_uri pre = ->(length) { " " * length + "\0"