Skip to content

Commit b0ed3db

Browse files
authoredAug 30, 2024··
MsDoc Reader: Support for UTF-8 characters (#2664)
1 parent 9f755a4 commit b0ed3db

File tree

6 files changed

+68
-9
lines changed

6 files changed

+68
-9
lines changed
 

‎docs/changes/1.x/1.3.0.md

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
- Word2007 Reader : Support for FormFields by [@vincentKool](https://github.com/vincentKool) in [#2653](https://github.com/PHPOffice/PHPWord/pull/2653)
1212
- RTF Writer : Support for Table Border Style fixing [#345](https://github.com/PHPOffice/PHPWord/issues/345) by [@Progi1984](https://github.com/Progi1984) in [#2656](https://github.com/PHPOffice/PHPWord/pull/2656)
1313
- Word2007 Reader: Support the page break (<w:lastRenderedPageBreak/>) by [@stanolacko](https://github.com/stanolacko) in [#2662](https://github.com/PHPOffice/PHPWord/pull/2662)
14+
- MsDoc Reader: Support for UTF-8 characters by [@Progi1984] fixing [#881](https://github.com/PHPOffice/PHPWord/issues/881), [#1454](https://github.com/PHPOffice/PHPWord/issues/1454), [#1817](https://github.com/PHPOffice/PHPWord/issues/1817), [#1927](https://github.com/PHPOffice/PHPWord/issues/1927), [#2383](https://github.com/PHPOffice/PHPWord/issues/2383), [#2565](https://github.com/PHPOffice/PHPWord/issues/2565) in [#2664](https://github.com/PHPOffice/PHPWord/pull/2664)
1415

1516
### Bug fixes
1617

‎src/PhpWord/Reader/MsDoc.php

+6-4
Original file line numberDiff line numberDiff line change
@@ -1279,10 +1279,12 @@ private function readRecordPlcfBtePapx(): void
12791279
break;
12801280
}
12811281
$strLen = $arrayRGFC[$key + 1] - $arrayRGFC[$key] - 1;
1282-
for ($inc = 0; $inc < $strLen; ++$inc) {
1283-
$byte = self::getInt1d($this->dataWorkDocument, $arrayRGFC[$key] + $inc);
1282+
for ($inc = 0; $inc < ($strLen * 2); ++$inc) {
1283+
$byte = self::getInt2d($this->dataWorkDocument, $arrayRGFC[$key] + ($inc * 2));
12841284
if ($byte > 0) {
1285-
$string .= chr($byte);
1285+
$string .= mb_chr($byte, 'UTF-8');
1286+
} else {
1287+
break;
12861288
}
12871289
}
12881290
}
@@ -2331,7 +2333,7 @@ private function generatePhpWord(): void
23312333
foreach ($this->arrayParagraphs as $itmParagraph) {
23322334
$textPara = $itmParagraph;
23332335
foreach ($this->arrayCharacters as $oCharacters) {
2334-
$subText = substr($textPara, $oCharacters->pos_start, $oCharacters->pos_len);
2336+
$subText = mb_substr($textPara, $oCharacters->pos_start, $oCharacters->pos_len);
23352337
$subText = str_replace(chr(13), PHP_EOL, $subText);
23362338
$arrayText = explode(PHP_EOL, $subText);
23372339
if (end($arrayText) == '') {

‎tests/PhpWordTests/Reader/MsDocTest.php

+61-5
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@
1818
namespace PhpOffice\PhpWordTests\Reader;
1919

2020
use Exception;
21+
use PhpOffice\PhpWord\Element\Text;
2122
use PhpOffice\PhpWord\IOFactory;
23+
use PhpOffice\PhpWord\PhpWord;
2224
use PhpOffice\PhpWord\Reader\MsDoc;
2325

2426
/**
@@ -50,14 +52,20 @@ public function testCanReadFailed(): void
5052
self::assertFalse($object->canRead($filename));
5153
}
5254

53-
/**
54-
* Load.
55-
*/
56-
public function testLoad(): void
55+
public function testLoadBasic(): void
5756
{
5857
$filename = __DIR__ . '/../_files/documents/reader.doc';
5958
$phpWord = IOFactory::load($filename, 'MsDoc');
60-
self::assertInstanceOf('PhpOffice\\PhpWord\\PhpWord', $phpWord);
59+
self::assertInstanceOf(PhpWord::class, $phpWord);
60+
61+
$sections = $phpWord->getSections();
62+
self::assertCount(1, $sections);
63+
$elements = $sections[0]->getElements();
64+
self::assertArrayHasKey(0, $elements);
65+
/** @var Text $element0 */
66+
$element0 = $elements[0];
67+
self::assertInstanceOf(Text::class, $element0);
68+
self::assertEquals('Welcome to PhpWord', $element0->getText());
6169
}
6270

6371
public function testLoadHalfPointFont(): void
@@ -76,6 +84,54 @@ public function testLoadHalfPointFont(): void
7684
}
7785
}
7886

87+
public function testLoadChinese(): void
88+
{
89+
$filename = __DIR__ . '/../_files/documents/docChinese.doc';
90+
$phpWord = IOFactory::load($filename, 'MsDoc');
91+
self::assertInstanceOf(PhpWord::class, $phpWord);
92+
93+
$sections = $phpWord->getSections();
94+
self::assertCount(1, $sections);
95+
$elements = $sections[0]->getElements();
96+
self::assertArrayHasKey(0, $elements);
97+
/** @var Text $element0 */
98+
$element0 = $elements[0];
99+
self::assertInstanceOf(Text::class, $element0);
100+
self::assertEquals('OKKI AI 客户案例', $element0->getText());
101+
}
102+
103+
public function testLoadCzech(): void
104+
{
105+
$filename = __DIR__ . '/../_files/documents/docCzech.doc';
106+
$phpWord = IOFactory::load($filename, 'MsDoc');
107+
self::assertInstanceOf(PhpWord::class, $phpWord);
108+
109+
$sections = $phpWord->getSections();
110+
self::assertCount(1, $sections);
111+
$elements = $sections[0]->getElements();
112+
self::assertArrayHasKey(0, $elements);
113+
/** @var Text $element0 */
114+
$element0 = $elements[0];
115+
self::assertInstanceOf(Text::class, $element0);
116+
self::assertEquals('Příliš žluťoučký kůň pěl ďábelské ódy', $element0->getText());
117+
}
118+
119+
public function testLoadSlovak(): void
120+
{
121+
$filename = __DIR__ . '/../_files/documents/docSlovak.doc';
122+
$phpWord = IOFactory::load($filename, 'MsDoc');
123+
self::assertInstanceOf(PhpWord::class, $phpWord);
124+
125+
$sections = $phpWord->getSections();
126+
self::assertCount(1, $sections);
127+
$elements = $sections[0]->getElements();
128+
self::assertArrayHasKey(0, $elements);
129+
/** @var Text $element0 */
130+
$element0 = $elements[0];
131+
self::assertInstanceOf(Text::class, $element0);
132+
self::assertEquals('Pondelok', $element0->getText());
133+
}
134+
79135
/**
80136
* Test exception on not existing file.
81137
*/
Binary file not shown.
10.5 KB
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)
Please sign in to comment.