Skip to content

Commit 1cff181

Browse files
authored
Add internal URI handling API (#19073)
Part of #14461. Related to https://wiki.php.net/rfc/url_parsing_api.
1 parent 504a633 commit 1cff181

File tree

94 files changed

+1239
-317
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

94 files changed

+1239
-317
lines changed

NEWS

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,14 @@ PHP NEWS
1212
error in PHP 9. (alexandre-daubois)
1313
. Fixed OSS-Fuzz #439125710 (Pipe cannot be used in write context).
1414
(nielsdos)
15+
. Added support for configuring the URI parser for the FTP/FTPS as well as
16+
the SSL/TLS stream wrappers as described in
17+
https://wiki.php.net/rfc/url_parsing_api#plugability. (kocsismate)
18+
19+
- Filter:
20+
. Added support for configuring the URI parser for FILTER_VALIDATE_URL
21+
as described in https://wiki.php.net/rfc/url_parsing_api#plugability.
22+
(kocsismate)
1523

1624
- ODBC:
1725
. Remove ODBCVER and assume ODBC 3.5. (Calvin Buckley)
@@ -26,6 +34,11 @@ PHP NEWS
2634
- Session:
2735
. Added support for partitioned cookies. (nielsdos)
2836

37+
- SOAP:
38+
. Added support for configuring the URI parser for SoapClient::_doRequest()
39+
as described in https://wiki.php.net/rfc/url_parsing_api#plugability.
40+
(kocsismate)
41+
2942
- SPL:
3043
. Deprecate ArrayObject and ArrayIterator with objects. (Girgias)
3144

UPGRADING.INTERNALS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ PHP 8.5 INTERNALS UPGRADE NOTES
8585
. ext/standard/php_smart_string.h and ext/standard/php_smart_string_public.h
8686
were removed. Use the corresponding headers in Zend/ instead.
8787

88+
- URI
89+
. Internal API for URI handling was added via the php_uri_*() functions.
90+
8891
========================
8992
2. Build system changes
9093
========================

ext/filter/logical_filters.c

Lines changed: 40 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,11 @@
1616
+----------------------------------------------------------------------+
1717
*/
1818

19+
#include "zend_exceptions.h"
1920
#include "php_filter.h"
2021
#include "filter_private.h"
21-
#include "ext/standard/url.h"
2222
#include "ext/pcre/php_pcre.h"
23+
#include "ext/uri/php_uri.h"
2324

2425
#include "zend_multiply.h"
2526

@@ -89,6 +90,8 @@
8990
#define FORMAT_IPV4 4
9091
#define FORMAT_IPV6 6
9192

93+
#define URL_OPTION_URI_PARSER_CLASS "uri_parser_class"
94+
9295
static bool _php_filter_validate_ipv6(const char *str, size_t str_len, int ip[8]);
9396

9497
static bool php_filter_parse_int(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
@@ -591,7 +594,6 @@ static bool php_filter_is_valid_ipv6_hostname(const zend_string *s)
591594

592595
void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
593596
{
594-
php_url *url;
595597
size_t old_len = Z_STRLEN_P(value);
596598

597599
php_filter_url(value, flags, option_array, charset);
@@ -600,52 +602,66 @@ void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
600602
RETURN_VALIDATION_FAILED
601603
}
602604

603-
/* Use parse_url - if it returns false, we return NULL */
604-
url = php_url_parse_ex(Z_STRVAL_P(value), Z_STRLEN_P(value));
605+
/* Parse options */
606+
zval *option_val;
607+
zend_string *parser_name;
608+
int parser_name_set;
609+
FETCH_STR_OPTION(parser_name, URL_OPTION_URI_PARSER_CLASS);
610+
611+
uri_handler_t *uri_handler = php_uri_get_handler(parser_name_set ? parser_name : NULL);
612+
if (uri_handler == NULL) {
613+
zend_value_error("%s(): \"uri_parser_class\" option has invalid value", get_active_function_name());
614+
RETURN_VALIDATION_FAILED
615+
}
605616

606-
if (url == NULL) {
617+
/* Parse the URI - if it fails, we return NULL */
618+
php_uri *uri = php_uri_parse_to_struct(uri_handler, Z_STRVAL_P(value), Z_STRLEN_P(value), URI_COMPONENT_READ_RAW, true);
619+
if (uri == NULL) {
607620
RETURN_VALIDATION_FAILED
608621
}
609622

610-
if (url->scheme != NULL &&
611-
(zend_string_equals_literal_ci(url->scheme, "http") || zend_string_equals_literal_ci(url->scheme, "https"))) {
623+
if (uri->scheme != NULL &&
624+
(zend_string_equals_literal_ci(uri->scheme, "http") || zend_string_equals_literal_ci(uri->scheme, "https"))) {
612625

613-
if (url->host == NULL) {
614-
goto bad_url;
626+
if (uri->host == NULL) {
627+
php_uri_struct_free(uri);
628+
RETURN_VALIDATION_FAILED
615629
}
616630

617631
if (
632+
/* Skipping these checks is possible because the new URI implementations perform comprehensive validations. */
633+
strcmp(uri_handler->name, URI_PARSER_PHP) == 0 &&
618634
/* An IPv6 enclosed by square brackets is a valid hostname.*/
619-
!php_filter_is_valid_ipv6_hostname(url->host) &&
635+
!php_filter_is_valid_ipv6_hostname(uri->host) &&
620636
/* Validate domain.
621637
* This includes a loose check for an IPv4 address. */
622-
!php_filter_validate_domain_ex(url->host, FILTER_FLAG_HOSTNAME)
638+
!php_filter_validate_domain_ex(uri->host, FILTER_FLAG_HOSTNAME)
623639
) {
624-
php_url_free(url);
640+
php_uri_struct_free(uri);
625641
RETURN_VALIDATION_FAILED
626642
}
627643
}
628644

629-
if (
630-
url->scheme == NULL ||
631-
/* some schemas allow the host to be empty */
632-
(url->host == NULL && (!zend_string_equals_literal(url->scheme, "mailto") && !zend_string_equals_literal(url->scheme, "news") && !zend_string_equals_literal(url->scheme, "file"))) ||
633-
((flags & FILTER_FLAG_PATH_REQUIRED) && url->path == NULL) || ((flags & FILTER_FLAG_QUERY_REQUIRED) && url->query == NULL)
645+
if (uri->scheme == NULL ||
646+
/* some schemes allow the host to be empty */
647+
(uri->host == NULL && (!zend_string_equals_literal(uri->scheme, "mailto") && !zend_string_equals_literal(uri->scheme, "news") && !zend_string_equals_literal(uri->scheme, "file"))) ||
648+
((flags & FILTER_FLAG_PATH_REQUIRED) && uri->path == NULL) || ((flags & FILTER_FLAG_QUERY_REQUIRED) && uri->query == NULL)
634649
) {
635-
bad_url:
636-
php_url_free(url);
650+
php_uri_struct_free(uri);
637651
RETURN_VALIDATION_FAILED
638652
}
639653

640-
if ((url->user != NULL && !is_userinfo_valid(url->user))
641-
|| (url->pass != NULL && !is_userinfo_valid(url->pass))
654+
if (strcmp(uri_handler->name, URI_PARSER_PHP) == 0 &&
655+
(
656+
(uri->user != NULL && !is_userinfo_valid(uri->user)) ||
657+
(uri->password != NULL && !is_userinfo_valid(uri->password))
658+
)
642659
) {
643-
php_url_free(url);
660+
php_uri_struct_free(uri);
644661
RETURN_VALIDATION_FAILED
645-
646662
}
647663

648-
php_url_free(url);
664+
php_uri_struct_free(uri);
649665
}
650666
/* }}} */
651667

ext/filter/tests/062.phpt

Lines changed: 184 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,184 @@
1+
--TEST--
2+
filter_var() and FILTER_VALIDATE_URL with different URI parsers
3+
--EXTENSIONS--
4+
filter
5+
--FILE--
6+
<?php
7+
8+
function validateUrls(string $parserName)
9+
{
10+
$values = [
11+
'http://example.com/index.html',
12+
'http://www.example.com/index.php',
13+
'http://www.example/img/test.png',
14+
'http://www.example/img/dir/',
15+
'http://www.example/img/dir',
16+
'http://www.thelongestdomainnameintheworldandthensomeandthensomemoreandmore.com/',
17+
'http://toolongtoolongtoolongtoolongtoolongtoolongtoolongtoolongtoolongtoolong.com',
18+
'http://eauBcFReEmjLcoZwI0RuONNnwU4H9r151juCaqTI5VeIP5jcYIqhx1lh5vV00l2rTs6y7hOp7rYw42QZiq6VIzjcYrRm8gFRMk9U9Wi1grL8Mr5kLVloYLthHgyA94QK3SaXCATklxgo6XvcbXIqAGG7U0KxTr8hJJU1p2ZQ2mXHmp4DhYP8N9SRuEKzaCPcSIcW7uj21jZqBigsLsNAXEzU8SPXZjmVQVtwQATPWeWyGW4GuJhjP4Q8o0.com',
19+
'http://kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.CQ1oT5Uq3jJt6Uhy3VH9u3Gi5YhfZCvZVKgLlaXNFhVKB1zJxvunR7SJa.com.',
20+
'http://kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58R.example.com',
21+
'http://[2001:0db8:0000:85a3:0000:0000:ac1f:8001]',
22+
'http://[2001:db8:0:85a3:0:0:ac1f:8001]:123/me.html',
23+
'http://[2001:db8:0:85a3::ac1f:8001]/',
24+
'http://[::1]',
25+
'http://cont-ains.h-yph-en-s.com',
26+
'http://..com',
27+
'http://a.-bc.com',
28+
'http://ab.cd-.com',
29+
'http://-.abc.com',
30+
'http://abc.-.abc.com',
31+
'http://underscore_.example.com',
32+
'http//www.example/wrong/url/',
33+
'http:/www.example',
34+
'file:///tmp/test.c',
35+
'ftp://ftp.example.com/tmp/',
36+
'/tmp/test.c',
37+
'/',
38+
'http://',
39+
'http:/',
40+
'http:',
41+
'http',
42+
'',
43+
-1,
44+
[],
45+
46+
'news:news.php.net',
47+
'file://foo/bar',
48+
"http://\r\n/bar",
49+
"http://example.com:qq",
50+
"http://example.com:-2",
51+
"http://example.com:65536",
52+
"http://example.com:65537",
53+
];
54+
55+
foreach ($values as $value) {
56+
var_dump(filter_var($value, FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName]));
57+
}
58+
59+
var_dump(filter_var("qwe", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName]));
60+
var_dump(filter_var("http://qwe", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName]));
61+
var_dump(filter_var("http://", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName]));
62+
var_dump(filter_var("/tmp/test", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName]));
63+
var_dump(filter_var("http://www.example.com", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName]));
64+
var_dump(filter_var("http://www.example.com", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName, "flags" => FILTER_FLAG_PATH_REQUIRED]));
65+
var_dump(filter_var("http://www.example.com/path/at/the/server/", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName, "flags" => FILTER_FLAG_PATH_REQUIRED]));
66+
var_dump(filter_var("http://www.example.com/index.html", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName, "flags" => FILTER_FLAG_QUERY_REQUIRED]));
67+
var_dump(filter_var("http://www.example.com/index.php?a=b&c=d", FILTER_VALIDATE_URL, ["uri_parser_class" => $parserName, "flags" => FILTER_FLAG_QUERY_REQUIRED]));
68+
}
69+
70+
echo "RFC3986:\n";
71+
validateUrls(Uri\Rfc3986Uri::class);
72+
73+
echo "\nWHATWG:\n";
74+
validateUrls(Uri\WhatWgUri::class);
75+
76+
echo "Done\n";
77+
?>
78+
--EXPECT--
79+
RFC3986:
80+
string(29) "http://example.com/index.html"
81+
string(32) "http://www.example.com/index.php"
82+
string(31) "http://www.example/img/test.png"
83+
string(27) "http://www.example/img/dir/"
84+
string(26) "http://www.example/img/dir"
85+
string(79) "http://www.thelongestdomainnameintheworldandthensomeandthensomemoreandmore.com/"
86+
bool(false)
87+
bool(false)
88+
string(261) "http://kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.CQ1oT5Uq3jJt6Uhy3VH9u3Gi5YhfZCvZVKgLlaXNFhVKB1zJxvunR7SJa.com."
89+
bool(false)
90+
string(48) "http://[2001:0db8:0000:85a3:0000:0000:ac1f:8001]"
91+
string(50) "http://[2001:db8:0:85a3:0:0:ac1f:8001]:123/me.html"
92+
string(36) "http://[2001:db8:0:85a3::ac1f:8001]/"
93+
string(12) "http://[::1]"
94+
string(31) "http://cont-ains.h-yph-en-s.com"
95+
bool(false)
96+
bool(false)
97+
bool(false)
98+
bool(false)
99+
bool(false)
100+
bool(false)
101+
bool(false)
102+
bool(false)
103+
string(18) "file:///tmp/test.c"
104+
string(26) "ftp://ftp.example.com/tmp/"
105+
bool(false)
106+
bool(false)
107+
bool(false)
108+
bool(false)
109+
bool(false)
110+
bool(false)
111+
bool(false)
112+
bool(false)
113+
bool(false)
114+
string(18) "mailto:[email protected]"
115+
string(17) "news:news.php.net"
116+
string(14) "file://foo/bar"
117+
bool(false)
118+
bool(false)
119+
bool(false)
120+
bool(false)
121+
bool(false)
122+
bool(false)
123+
string(10) "http://qwe"
124+
bool(false)
125+
bool(false)
126+
string(22) "http://www.example.com"
127+
bool(false)
128+
string(42) "http://www.example.com/path/at/the/server/"
129+
bool(false)
130+
string(40) "http://www.example.com/index.php?a=b&c=d"
131+
132+
WHATWG:
133+
string(29) "http://example.com/index.html"
134+
string(32) "http://www.example.com/index.php"
135+
string(31) "http://www.example/img/test.png"
136+
string(27) "http://www.example/img/dir/"
137+
string(26) "http://www.example/img/dir"
138+
string(79) "http://www.thelongestdomainnameintheworldandthensomeandthensomemoreandmore.com/"
139+
bool(false)
140+
bool(false)
141+
string(261) "http://kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.kDTvHt1PPDgX5EiP2MwiXjcoWNOhhTuOVAUWJ3TmpBYCC9QoJV114LMYrV3Zl58.CQ1oT5Uq3jJt6Uhy3VH9u3Gi5YhfZCvZVKgLlaXNFhVKB1zJxvunR7SJa.com."
142+
bool(false)
143+
string(48) "http://[2001:0db8:0000:85a3:0000:0000:ac1f:8001]"
144+
string(50) "http://[2001:db8:0:85a3:0:0:ac1f:8001]:123/me.html"
145+
string(36) "http://[2001:db8:0:85a3::ac1f:8001]/"
146+
string(12) "http://[::1]"
147+
string(31) "http://cont-ains.h-yph-en-s.com"
148+
bool(false)
149+
bool(false)
150+
bool(false)
151+
bool(false)
152+
bool(false)
153+
bool(false)
154+
bool(false)
155+
bool(false)
156+
string(18) "file:///tmp/test.c"
157+
string(26) "ftp://ftp.example.com/tmp/"
158+
bool(false)
159+
bool(false)
160+
bool(false)
161+
bool(false)
162+
bool(false)
163+
bool(false)
164+
bool(false)
165+
bool(false)
166+
bool(false)
167+
string(18) "mailto:[email protected]"
168+
string(17) "news:news.php.net"
169+
string(14) "file://foo/bar"
170+
bool(false)
171+
bool(false)
172+
bool(false)
173+
bool(false)
174+
bool(false)
175+
bool(false)
176+
string(10) "http://qwe"
177+
bool(false)
178+
bool(false)
179+
string(22) "http://www.example.com"
180+
bool(false)
181+
string(42) "http://www.example.com/path/at/the/server/"
182+
bool(false)
183+
string(40) "http://www.example.com/index.php?a=b&c=d"
184+
Done

0 commit comments

Comments
 (0)