Skip to content

Commit 8ba8d43

Browse files
committed
MsDoc Reader: Support for UTF-8 characters
1 parent 9f755a4 commit 8ba8d43

File tree

6 files changed

+59
-9
lines changed

6 files changed

+59
-9
lines changed

docs/changes/1.x/1.3.0.md

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
- Word2007 Reader : Support for FormFields by [@vincentKool](https://github.com/vincentKool) in [#2653](https://github.com/PHPOffice/PHPWord/pull/2653)
1212
- RTF Writer : Support for Table Border Style fixing [#345](https://github.com/PHPOffice/PHPWord/issues/345) by [@Progi1984](https://github.com/Progi1984) in [#2656](https://github.com/PHPOffice/PHPWord/pull/2656)
1313
- Word2007 Reader: Support the page break (<w:lastRenderedPageBreak/>) by [@stanolacko](https://github.com/stanolacko) in [#2662](https://github.com/PHPOffice/PHPWord/pull/2662)
14+
- MsDoc Reader: Support for UTF-8 characters by [@Progi1984] fixing [#881](https://github.com/PHPOffice/PHPWord/issues/881), [#1454](https://github.com/PHPOffice/PHPWord/issues/1454), [#1817](https://github.com/PHPOffice/PHPWord/issues/1817), [#1927](https://github.com/PHPOffice/PHPWord/issues/1927), [#2383](https://github.com/PHPOffice/PHPWord/issues/2383), [#2565](https://github.com/PHPOffice/PHPWord/issues/2565) in [#2664](https://github.com/PHPOffice/PHPWord/pull/2664)
1415

1516
### Bug fixes
1617

src/PhpWord/Reader/MsDoc.php

+6-4
Original file line numberDiff line numberDiff line change
@@ -1279,10 +1279,12 @@ private function readRecordPlcfBtePapx(): void
12791279
break;
12801280
}
12811281
$strLen = $arrayRGFC[$key + 1] - $arrayRGFC[$key] - 1;
1282-
for ($inc = 0; $inc < $strLen; ++$inc) {
1283-
$byte = self::getInt1d($this->dataWorkDocument, $arrayRGFC[$key] + $inc);
1282+
for ($inc = 0; $inc < ($strLen * 2); ++$inc) {
1283+
$byte = self::getInt2d($this->dataWorkDocument, $arrayRGFC[$key] + ($inc * 2));
12841284
if ($byte > 0) {
1285-
$string .= chr($byte);
1285+
$string .= mb_chr($byte, 'UTF-8');
1286+
} else {
1287+
break;
12861288
}
12871289
}
12881290
}
@@ -2331,7 +2333,7 @@ private function generatePhpWord(): void
23312333
foreach ($this->arrayParagraphs as $itmParagraph) {
23322334
$textPara = $itmParagraph;
23332335
foreach ($this->arrayCharacters as $oCharacters) {
2334-
$subText = substr($textPara, $oCharacters->pos_start, $oCharacters->pos_len);
2336+
$subText = mb_substr($textPara, $oCharacters->pos_start, $oCharacters->pos_len);
23352337
$subText = str_replace(chr(13), PHP_EOL, $subText);
23362338
$arrayText = explode(PHP_EOL, $subText);
23372339
if (end($arrayText) == '') {

tests/PhpWordTests/Reader/MsDocTest.php

+52-5
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
use Exception;
2121
use PhpOffice\PhpWord\IOFactory;
22+
use PhpOffice\PhpWord\PhpWord;
2223
use PhpOffice\PhpWord\Reader\MsDoc;
2324

2425
/**
@@ -50,14 +51,18 @@ public function testCanReadFailed(): void
5051
self::assertFalse($object->canRead($filename));
5152
}
5253

53-
/**
54-
* Load.
55-
*/
56-
public function testLoad(): void
54+
public function testLoadBasic(): void
5755
{
5856
$filename = __DIR__ . '/../_files/documents/reader.doc';
5957
$phpWord = IOFactory::load($filename, 'MsDoc');
60-
self::assertInstanceOf('PhpOffice\\PhpWord\\PhpWord', $phpWord);
58+
self::assertInstanceOf(PhpWord::class, $phpWord);
59+
60+
$sections = $phpWord->getSections();
61+
self::assertCount(1, $sections);
62+
$elements = $sections[0]->getElements();
63+
self::assertArrayHasKey(0, $elements);
64+
$element0 = $elements[0];
65+
self::assertEquals('Welcome to PhpWord', $element0->getText());
6166
}
6267

6368
public function testLoadHalfPointFont(): void
@@ -76,6 +81,48 @@ public function testLoadHalfPointFont(): void
7681
}
7782
}
7883

84+
public function testLoadChinese(): void
85+
{
86+
$filename = __DIR__ . '/../_files/documents/docChinese.doc';
87+
$phpWord = IOFactory::load($filename, 'MsDoc');
88+
self::assertInstanceOf(PhpWord::class, $phpWord);
89+
90+
$sections = $phpWord->getSections();
91+
self::assertCount(1, $sections);
92+
$elements = $sections[0]->getElements();
93+
self::assertArrayHasKey(0, $elements);
94+
$element0 = $elements[0];
95+
self::assertEquals('OKKI AI 客户案例', $element0->getText());
96+
}
97+
98+
public function testLoadCzech(): void
99+
{
100+
$filename = __DIR__ . '/../_files/documents/docCzech.doc';
101+
$phpWord = IOFactory::load($filename, 'MsDoc');
102+
self::assertInstanceOf(PhpWord::class, $phpWord);
103+
104+
$sections = $phpWord->getSections();
105+
self::assertCount(1, $sections);
106+
$elements = $sections[0]->getElements();
107+
self::assertArrayHasKey(0, $elements);
108+
$element0 = $elements[0];
109+
self::assertEquals('Příliš žluťoučký kůň pěl ďábelské ódy', $element0->getText());
110+
}
111+
112+
public function testLoadSlovak(): void
113+
{
114+
$filename = __DIR__ . '/../_files/documents/docSlovak.doc';
115+
$phpWord = IOFactory::load($filename, 'MsDoc');
116+
self::assertInstanceOf(PhpWord::class, $phpWord);
117+
118+
$sections = $phpWord->getSections();
119+
self::assertCount(1, $sections);
120+
$elements = $sections[0]->getElements();
121+
self::assertArrayHasKey(0, $elements);
122+
$element0 = $elements[0];
123+
self::assertEquals('Pondelok', $element0->getText());
124+
}
125+
79126
/**
80127
* Test exception on not existing file.
81128
*/
Binary file not shown.
10.5 KB
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)