Skip to content

Commit 1a06173

Browse files
javiertroosan
javier
authored andcommitted
Add parsing of html image in base64 format (PHPOffice#1382)
* increased test coverage of new lines * added exception control to file_get_contents error * update changelog
1 parent 82f3a2a commit 1a06173

File tree

5 files changed

+152
-2
lines changed

5 files changed

+152
-2
lines changed

CHANGELOG.md

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ v0.15.0 (?? ??? 2018)
2323
- Add support for table indent (tblInd) @Trainmaster #1343
2424
- Added parsing of internal links in HTML reader @lalop #1336
2525
- Several improvements to charts @JAEK-S #1332
26+
- Add parsing of html image in base64 format @jgpATs2w #1382
2627

2728
### Fixed
2829
- Fix reading of docx default style - @troosan #1238

samples/resources/Sample_30_ReadHTML.html

+10
Original file line numberDiff line numberDiff line change
@@ -11,5 +11,15 @@ <h1>Adding element via HTML</h1>
1111
<ul><li>Item 1</li><li>Item 2</li><ul><li>Item 2.1</li><li>Item 2.1</li></ul></ul>
1212
<p>Ordered (numbered) list:</p>
1313
<ol><li>Item 1</li><li>Item 2</li></ol>
14+
15+
<p style="line-height:2">Double height</p>
16+
17+
<h2>Includes images</h2>
18+
<img src="https://phpword.readthedocs.io/en/latest/_images/phpword.png" alt=""/>
19+
20+
<img src="https://localhost/gev/desarrollo/actividades/pruebas_14/5b064503587f7.jpeg" name="Imagen 12" align="bottom" width="208" height="183" border="0"/>
21+
<img src="http://localhost/gev/desarrollo/actividades/pruebas_14/5b064503589db.png" name="Imagen 13" align="bottom" width="143" height="202" border="0"/>
22+
<img src="http://localhost/gev/desarrollo/actividades/pruebas_14/5b0645035aac8.jpeg" name="Imagen 14" align="bottom" width="194" height="188" border="0"/>
23+
1424
</body>
1525
</html>

samples/results/.gitignore

100644100755
File mode changed.

src/PhpWord/Shared/Html.php

+54-2
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
use PhpOffice\PhpWord\Element\AbstractContainer;
2121
use PhpOffice\PhpWord\Element\Row;
2222
use PhpOffice\PhpWord\Element\Table;
23+
use PhpOffice\PhpWord\Settings;
2324
use PhpOffice\PhpWord\SimpleType\Jc;
2425
use PhpOffice\PhpWord\SimpleType\NumberFormat;
2526

@@ -32,6 +33,7 @@ class Html
3233
{
3334
private static $listIndex = 0;
3435
private static $xpath;
36+
private static $options;
3537

3638
/**
3739
* Add HTML parts.
@@ -44,13 +46,17 @@ class Html
4446
* @param string $html The code to parse
4547
* @param bool $fullHTML If it's a full HTML, no need to add 'body' tag
4648
* @param bool $preserveWhiteSpace If false, the whitespaces between nodes will be removed
49+
* @param array $options:
50+
* + IMG_SRC_SEARCH: optional to speed up images loading from remote url when files can be found locally
51+
* + IMG_SRC_REPLACE: optional to speed up images loading from remote url when files can be found locally
4752
*/
48-
public static function addHtml($element, $html, $fullHTML = false, $preserveWhiteSpace = true)
53+
public static function addHtml($element, $html, $fullHTML = false, $preserveWhiteSpace = true, $options = null)
4954
{
5055
/*
5156
* @todo parse $stylesheet for default styles. Should result in an array based on id, class and element,
5257
* which could be applied when such an element occurs in the parseNode function.
5358
*/
59+
self::$options = $options;
5460

5561
// Preprocess: remove all line ends, decode HTML entity,
5662
// fix ampersand and angle brackets and add body tag for HTML fragments
@@ -141,6 +147,7 @@ protected static function parseNode($node, $element, $styles = array(), $data =
141147
'sup' => array('Property', null, null, $styles, null, 'superScript', true),
142148
'sub' => array('Property', null, null, $styles, null, 'subScript', true),
143149
'span' => array('Span', $node, null, $styles, null, null, null),
150+
'font' => array('Span', $node, null, $styles, null, null, null),
144151
'table' => array('Table', $node, $element, $styles, null, null, null),
145152
'tr' => array('Row', $node, $element, $styles, null, null, null),
146153
'td' => array('Cell', $node, $element, $styles, null, null, null),
@@ -648,7 +655,52 @@ private static function parseImage($node, $element)
648655
break;
649656
}
650657
}
651-
$newElement = $element->addImage($src, $style);
658+
$originSrc = $src;
659+
if (strpos($src, 'data:image') !== false) {
660+
$tmpDir = Settings::getTempDir() . '/';
661+
662+
$match = array();
663+
preg_match('/data:image\/(\w+);base64,(.+)/', $src, $match);
664+
665+
$src = $imgFile = $tmpDir . uniqid() . '.' . $match[1];
666+
667+
$ifp = fopen($imgFile, 'wb');
668+
669+
if ($ifp !== false) {
670+
fwrite($ifp, base64_decode($match[2]));
671+
fclose($ifp);
672+
}
673+
}
674+
$src = urldecode($src);
675+
676+
if (!is_file($src)
677+
&& !is_null(self::$options)
678+
&& isset(self::$options['IMG_SRC_SEARCH'])
679+
&& isset(self::$options['IMG_SRC_REPLACE'])) {
680+
$src = str_replace(self::$options['IMG_SRC_SEARCH'], self::$options['IMG_SRC_REPLACE'], $src);
681+
}
682+
683+
if (!is_file($src)) {
684+
if ($imgBlob = @file_get_contents($src)) {
685+
$tmpDir = Settings::getTempDir() . '/';
686+
$match = array();
687+
preg_match('/.+\.(\w+)$/', $src, $match);
688+
$src = $tmpDir . uniqid() . '.' . $match[1];
689+
690+
$ifp = fopen($src, 'wb');
691+
692+
if ($ifp !== false) {
693+
fwrite($ifp, $imgBlob);
694+
fclose($ifp);
695+
}
696+
}
697+
}
698+
699+
if (is_file($src)) {
700+
$newElement = $element->addImage($src, $style);
701+
} else {
702+
throw new \Exception("Could not load image $originSrc");
703+
}
652704

653705
return $newElement;
654706
}

tests/PhpWord/Shared/HtmlTest.php

+87
Large diffs are not rendered by default.

0 commit comments

Comments
 (0)