Skip to content

Commit 3e8ced0

Browse files
committed
[PHP 8.4][Intl] Add grapheme_str_split
Add a polyfill for the `grapheme_str_split` function added in PHP 8.4. Requires PHP 7.3, because the polyfill is based on `\X` Regex, and it only works properly on PCRE2, which [only comes with PHP 7.3+](https://php.watch/versions/7.3/pcre2). Further, there are some cases that the polyfill cannot split complex characters (such as two consecutive country flag Emojis). This is now fixed in [PCRE2Project/pcre2#410](PCRE2Project/pcre2#410). However, this change will likely only make it to PHP 8.4. References: - [RFC: Grapheme cluster for `str_split` function: `grapheme_str_split`](https://wiki.php.net/rfc/grapheme_str_split) - [PHP.Watch: PHP 8.4: New `grapheme_str_split` function](https://php.watch/versions/8.4/grapheme_str_split)
1 parent e85ab80 commit 3e8ced0

File tree

12 files changed

+160
-0
lines changed

12 files changed

+160
-0
lines changed

README.md

+1
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ Polyfills are provided for:
6868
- the `Date*Exception/Error` classes introduced in PHP 8.3;
6969
- the `SQLite3Exception` class introduced in PHP 8.3;
7070
- the `mb_ucfirst` and `mb_lcfirst` functions introduced in PHP 8.4;
71+
- the `grapheme_str_split` function introduced in PHP 8.4 (requires PHP >= 7.3);
7172

7273
It is strongly recommended to upgrade your PHP version and/or install the missing
7374
extensions whenever possible. This polyfill should be used only when there is no

src/Intl/Grapheme/Grapheme.php

+33
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
* - grapheme_strrpos - Find position (in grapheme units) of last occurrence of a string
2727
* - grapheme_strstr - Returns part of haystack string from the first occurrence of needle to the end of haystack
2828
* - grapheme_substr - Return part of a string
29+
* - grapheme_str_split - Splits a string into an array of individual or chunks of graphemes.
2930
*
3031
* @author Nicolas Grekas <[email protected]>
3132
*
@@ -191,6 +192,38 @@ public static function grapheme_strstr($s, $needle, $beforeNeedle = false)
191192
return mb_strstr($s, $needle, $beforeNeedle, 'UTF-8');
192193
}
193194

195+
public static function grapheme_str_split($s, $len = 1) {
196+
if ($len < 0 || $len > 1073741823) {
197+
if (80000 > \PHP_VERSION_ID) {
198+
return false;
199+
}
200+
201+
throw new \ValueError('grapheme_str_split(): Argument #2 ($length) must be greater than 0 and less than or equal to 1073741823.');
202+
}
203+
204+
if ($s === '') {
205+
return [];
206+
}
207+
208+
preg_match_all('/\X/u', $s, $matches);
209+
210+
if (empty($matches[0])) {
211+
return false;
212+
}
213+
214+
if ($len === 1) {
215+
return $matches[0];
216+
}
217+
218+
$chunks = array_chunk($matches[0], $len);
219+
220+
array_walk($chunks, static function(&$value) {
221+
$value = implode('', $value);
222+
});
223+
224+
return $chunks;
225+
}
226+
194227
private static function grapheme_position($s, $needle, $offset, $mode)
195228
{
196229
$needle = (string) $needle;

src/Intl/Grapheme/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ This component provides a partial, native PHP implementation of the
2121
- [`grapheme_strstr`](https://php.net/grapheme_strstr): Returns part of haystack string from
2222
the first occurrence of needle to the end of haystack
2323
- [`grapheme_substr`](https://php.net/grapheme_substr): Return part of a string
24+
- [`grapheme_str_split](https://php.net/grapheme_str_split): Splits a string into an array of individual or chunks of graphemes.
2425

2526
More information can be found in the
2627
[main Polyfill README](https://github.com/symfony/polyfill/blob/main/README.md).

src/Intl/Grapheme/bootstrap.php

+4
Original file line numberDiff line numberDiff line change
@@ -56,3 +56,7 @@ function grapheme_strstr($haystack, $needle, $beforeNeedle = false) { return p\G
5656
if (!function_exists('grapheme_substr')) {
5757
function grapheme_substr($string, $offset, $length = null) { return p\Grapheme::grapheme_substr($string, $offset, $length); }
5858
}
59+
60+
if (\PHP_VERSION_ID >= 70300) {
61+
require __DIR__.'/bootstrap73.php';
62+
}

src/Intl/Grapheme/bootstrap73.php

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\Polyfill\Php84 as p;
13+
14+
if (!function_exists('grapheme_str_split') && function_exists('grapheme_substr')) {
15+
function grapheme_str_split(string $string, int $length = 1) { return p\Php84::grapheme_str_split($string, $length); }
16+
}
17+

src/Intl/Grapheme/bootstrap80.php

+3
Original file line numberDiff line numberDiff line change
@@ -48,3 +48,6 @@ function grapheme_strstr(?string $haystack, ?string $needle, ?bool $beforeNeedle
4848
if (!function_exists('grapheme_substr')) {
4949
function grapheme_substr(?string $string, ?int $offset, ?int $length = null): string|false { return p\Grapheme::grapheme_substr((string) $string, (int) $offset, $length); }
5050
}
51+
if (!function_exists('grapheme_str_split')) {
52+
function grapheme_str_split(string $string, int $length = 1): array|false { return p\Grapheme::grapheme_str_split($string, $length); }
53+
}

src/Php84/Php84.php

+29
Original file line numberDiff line numberDiff line change
@@ -63,4 +63,33 @@ public static function mb_lcfirst(string $string, ?string $encoding = null): str
6363

6464
return $firstChar . mb_substr($string, 1, null, $encoding);
6565
}
66+
67+
public static function grapheme_str_split(string $string, int $length)
68+
{
69+
if ($length < 0 || $length > 1073741823) {
70+
throw new \ValueError('grapheme_str_split(): Argument #2 ($length) must be greater than 0 and less than or equal to 1073741823.');
71+
}
72+
73+
if ($string === '') {
74+
return [];
75+
}
76+
77+
preg_match_all('/\X/u', $string, $matches);
78+
79+
if (empty($matches[0])) {
80+
return false;
81+
}
82+
83+
if ($length === 1) {
84+
return $matches[0];
85+
}
86+
87+
$chunks = array_chunk($matches[0], $length);
88+
89+
array_walk($chunks, static function(&$value) {
90+
$value = implode('', $value);
91+
});
92+
93+
return $chunks;
94+
}
6695
}

src/Php84/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ Symfony Polyfill / Php84
44
This component provides features added to PHP 8.4 core:
55

66
- [`mb_ucfirst` and `mb_lcfirst`](https://wiki.php.net/rfc/mb_ucfirst)
7+
- [`grapheme_str_split`](https://wiki.php.net/rfc/grapheme_str_split)
78

89
More information can be found in the
910
[main Polyfill README](https://github.com/symfony/polyfill/blob/main/README.md).

src/Php84/bootstrap.php

+4
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,7 @@ function mb_ucfirst($string, ?string $encoding = null): string { return p\Php84:
2323
if (!function_exists('mb_lcfirst')) {
2424
function mb_lcfirst($string, ?string $encoding = null): string { return p\Php84::mb_lcfirst($string, $encoding); }
2525
}
26+
27+
if (\PHP_VERSION_ID >= 70300) {
28+
require __DIR__.'/bootstrap73.php';
29+
}

src/Php84/bootstrap73.php

+21
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
<?php
2+
3+
/*
4+
* This file is part of the Symfony package.
5+
*
6+
* (c) Fabien Potencier <[email protected]>
7+
*
8+
* For the full copyright and license information, please view the LICENSE
9+
* file that was distributed with this source code.
10+
*/
11+
12+
use Symfony\Polyfill\Php84 as p;
13+
14+
if (\PHP_VERSION_ID >= 80400) {
15+
return;
16+
}
17+
18+
if (!function_exists('grapheme_str_split') && function_exists('grapheme_substr')) {
19+
function grapheme_str_split(string $string, int $length = 1) { return p\Php84::grapheme_str_split($string, $length); }
20+
}
21+

tests/Intl/Grapheme/GraphemeTest.php

+26
Original file line numberDiff line numberDiff line change
@@ -207,4 +207,30 @@ public function testGraphemeStrstr()
207207
$this->assertSame('국어', grapheme_strstr('한국어', ''));
208208
$this->assertSame('ÉJÀ', grapheme_stristr('DÉJÀ', 'é'));
209209
}
210+
211+
/**
212+
* @dataProvider graphemeStrSplitDataProvider
213+
* @requires PHP 7.3
214+
*/
215+
public function testGraphemeStrSplit(string $string, int $length, array $expectedValues) {
216+
$this->assertSame($expectedValues, grapheme_str_split($string, $length));
217+
}
218+
219+
public static function graphemeStrSplitDataProvider(): array {
220+
$return = [
221+
['', 1, []],
222+
['PHP', 1, ['P', 'H', 'P']],
223+
['你好', 1, ['', '']],
224+
['අයේෂ්', 1, ['', 'යේ', 'ෂ්']],
225+
['สวัสดี', 2, ['สวั', 'สดี']],
226+
['土下座🙇‍♀を', 1, ["", "", "", "🙇‍♀", ""]],
227+
];
228+
229+
// https://github.com/PCRE2Project/pcre2/issues/410
230+
if (PCRE_VERSION_MAJOR > 10 && PCRE_VERSION_MAJOR >= 44) {
231+
$return[] = ['土下座🙇‍♀を', 1, ["", "", "", "🙇‍♀", ""]];
232+
}
233+
234+
return $return;
235+
}
210236
}

tests/Php84/Php84Test.php

+20
Original file line numberDiff line numberDiff line change
@@ -68,4 +68,24 @@ public static function lcFirstDataProvider(): array {
6868
["ß", "ß"],
6969
];
7070
}
71+
72+
/**
73+
* @dataProvider graphemeStrSplitDataProvider
74+
* @requires PHP 7.3
75+
*/
76+
public function testGraphemeStrSplit(string $string, int $length, array $expectedValues) {
77+
$this->assertSame($expectedValues, grapheme_str_split($string, $length));
78+
}
79+
80+
public static function graphemeStrSplitDataProvider(): array {
81+
return [
82+
['', 1, []],
83+
['PHP', 1, ['P', 'H', 'P']],
84+
['你好', 1, ['', '']],
85+
['අයේෂ්', 1, ['', 'යේ', 'ෂ්']],
86+
['สวัสดี', 2, ['สวั', 'สดี']],
87+
['土下座🙇‍♀を', 1, ["", "", "", "🙇‍♀", ""]],
88+
// ['👭🏻👰🏿‍♂️', 2, ['👭🏻', '👰🏿‍♂️']], // https://github.com/PCRE2Project/pcre2/issues/410
89+
];
90+
}
7191
}

0 commit comments

Comments
 (0)