Skip to content

Commit 651aeff

Browse files
authored
Merge pull request #3 from PhantPHP/HtmlToText
Html to text
2 parents d1f5c69 + e3c41f8 commit 651aeff

File tree

4 files changed

+394
-5
lines changed

4 files changed

+394
-5
lines changed

README.md

+40-2
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ PHP >= 8.1
1414
### E-mail address validity checker
1515

1616
```php
17-
use Phant\EmailAddress\Service\EmailAddressValidator;
17+
use Phant\EmailSender\Service\EmailAddressValidator;
1818

1919
$emailAddressValidator = new EmailAddressValidator();
2020

@@ -31,11 +31,49 @@ if (!$emailAddressValidator->checkMxServer('[email protected]') {
3131
### E-mail Sender via Sendinblue
3232

3333
```php
34-
use Phant\Email\Service\SendinblueEmailSender;
34+
use Phant\EmailSender\Service\SendinblueEmailSender;
3535

3636
// @todo : Create e-mail with [phant/data-structure](https://github.com/PhantPHP/data-structure)
3737

3838
apiKey = '*****.*****';
3939

4040
$sent = (new SendinblueEmailSender($apiKey))->send(email);
4141
```
42+
43+
### Html to text
44+
45+
Transform HTML
46+
```html
47+
<section>
48+
<h1>Lorem ipsum dolor sit amet.</h1>
49+
<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
50+
<ul>
51+
<li>Foo</li>
52+
<li>Bar</li>
53+
</ul>
54+
<p><a href="https://domain.ext/path">Action</a></p>
55+
</section>
56+
```
57+
58+
To TEXT
59+
```text
60+
Lorem ipsum dolor sit amet.
61+
62+
Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>
63+
64+
- Foo
65+
- Bar
66+
67+
Action : https://domain.ext/path
68+
```
69+
70+
71+
```php
72+
use Phant\EmailSender\Service\HtmlToText;
73+
74+
$html = '';
75+
76+
$text = (new HtmlToText())(html);
77+
78+
79+
```

component/Service/HtmlToText.php

+109
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
<?php
2+
3+
declare(strict_types=1);
4+
5+
namespace Phant\EmailSender\Service;
6+
7+
final class HtmlToText
8+
{
9+
public function _invoke(
10+
string $html
11+
): string {
12+
$html = self::removeCodeIndent($html);
13+
$html = self::removeCodeFormating($html);
14+
15+
$html = self::transformTagA($html);
16+
$html = self::transformTagBr($html);
17+
$html = self::transformTagLi($html);
18+
19+
$html = self::removeNonTextStructuringTags($html);
20+
$html = self::removeTextStructuringTags($html);
21+
22+
$html = self::cleanText($html);
23+
24+
return $html;
25+
}
26+
27+
public static function removeCodeIndent(
28+
string $html
29+
): string {
30+
$lines = explode("\n", $html);
31+
foreach ($lines as $k => $line) {
32+
$lines[$k] = trim($line);
33+
}
34+
$html = implode("\n", $lines);
35+
36+
return $html;
37+
}
38+
39+
public static function removeCodeFormating(
40+
string $html
41+
): string {
42+
$html = trim($html);
43+
$html = str_replace(["\r", "\n"], '', $html);
44+
$html = preg_replace('/\t+/', ' ', $html);
45+
$html = preg_replace('/ +/', ' ', $html);
46+
47+
return $html;
48+
}
49+
50+
public static function transformTagA(
51+
string $html
52+
): string {
53+
return preg_replace("/<a .*? ?href=[\"']([^\"']*)[\"'].*?>\n*(.*?)\n*<\/a>/i", "$2 : $1", $html);
54+
}
55+
56+
public static function transformTagBr(
57+
string $html
58+
): string {
59+
return preg_replace('/(<\/?br>)/is', "\n", $html);
60+
}
61+
62+
public static function transformTagLi(
63+
string $html
64+
): string {
65+
return preg_replace('/(<li ?.*?>)/is', "$1- ", $html);
66+
}
67+
68+
public static function removeNonTextStructuringTags(
69+
string $html
70+
): string {
71+
$html = self::removeCodeIndent($html);
72+
$html = preg_replace('/(<\/(address|article|aside|blockquote|div|fieldset|footer|form|header|main|nav|pre|section)>)/is', "$1\n", $html);
73+
$html = strip_tags($html, ['<h1>','<h2>','<h3>','<h4>','<h5>','<h6>','<p>','<ol>','<ul>', '<li>']);
74+
$html = preg_replace("/\n{2,}/", "\n", $html);
75+
$html = trim($html);
76+
77+
return $html;
78+
}
79+
80+
public static function removeTextStructuringTags(
81+
string $html
82+
): string {
83+
$html = self::removeCodeIndent($html);
84+
$html = preg_replace('/(<\/(dd|dl|dt|li|ol|ul)>)/is', "$1\n", $html);
85+
$html = preg_replace('/(<(h([1-6]))(.*?)>)/is', "\n\n$1", $html);
86+
$html = preg_replace('/(<\/(h([1-6]))(.*?)>)/is', "$1\n\n", $html);
87+
$html = preg_replace('/(<\/p(.*?)>)/is', "$1\n\n", $html);
88+
$html = strip_tags($html);
89+
$html = preg_replace("/\n{3,}/", "\n\n\n", $html);
90+
$html = trim($html);
91+
92+
return $html;
93+
}
94+
95+
public static function cleanText(
96+
string $text
97+
): string {
98+
$lines = explode("\n", $text);
99+
foreach ($lines as $k => $line) {
100+
$lines[$k] = trim($line);
101+
}
102+
$text = implode("\n", $lines);
103+
104+
$text = preg_replace('/ +/', ' ', $text);
105+
$text = trim($text);
106+
107+
return $text;
108+
}
109+
}

test/Service/EmailAddressValidatorTest.php

+1-3
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,10 @@
44

55
namespace Test\Service;
66

7-
use PHPUnit\Framework\TestCase;
8-
97
use Phant\DataStructure\Web\EmailAddress;
108
use Phant\EmailSender\Service\EmailAddressValidator;
119

12-
final class EmailAddressValidatorTest extends TestCase
10+
final class EmailAddressValidatorTest extends \PHPUnit\Framework\TestCase
1311
{
1412
public function testCheckTrashMailBoxService(): void
1513
{

0 commit comments

Comments
 (0)