Skip to content

[WIP] Feature: WPS Writer Support #2769

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 10 commits into
base: master
Choose a base branch
from
2 changes: 1 addition & 1 deletion src/PhpWord/IOFactory.php
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ abstract class IOFactory
*/
public static function createWriter(PhpWord $phpWord, $name = 'Word2007')
{
if ($name !== 'WriterInterface' && !in_array($name, ['ODText', 'RTF', 'Word2007', 'HTML', 'PDF', 'EPub3'], true)) {
if ($name !== 'WriterInterface' && !in_array($name, ['ODText', 'RTF', 'Word2007', 'HTML', 'PDF', 'EPub3', 'WPS'], true)) {
throw new Exception("\"{$name}\" is not a valid writer.");
}

Expand Down
132 changes: 132 additions & 0 deletions src/PhpWord/Reader/WPS.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
<?php

/**
* This file is part of PHPWord - A pure PHP library for reading and writing
* word processing documents.
*
* PHPWord is free software distributed under the terms of the GNU Lesser
* General Public License version 3 as published by the Free Software Foundation.
*
* For the full copyright and license information, please read the LICENSE
* file that was distributed with this source code. For the full list of
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
*
* @see https://github.com/PHPOffice/PHPWord
*
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
*/

namespace PhpOffice\PhpWord\Reader;

use PhpOffice\PhpWord\PhpWord;
use PhpOffice\PhpWord\Shared\XMLReader;

/**
* Reader for WPS.
*/
class WPS extends AbstractReader implements ReaderInterface
{
/**
* Magic pattern to identify WPS binary format files.
*/
const WPS_MAGIC_PATTERN = '/(CHNKWKS|CHNKINK)/';

/**
* Loads PhpWord from file.
*
* @param string $docFile
*
* @return PhpWord
*/
public function load($docFile)
{
$phpWord = new PhpWord();

// Check if this is a binary WPS file
if ($this->isBinaryWpsFile($docFile)) {
return $this->loadBinaryWps($docFile, $phpWord);
}

// Otherwise process as XML-based WPS file
$relationships = $this->readRelationships($docFile);
$readerParts = [
'content.xml' => 'Content',
'meta.xml' => 'Meta',
];
foreach ($readerParts as $xmlFile => $partName) {
$this->readPart($phpWord, $relationships, $partName, $docFile, $xmlFile);
}

return $phpWord;
}

/**
* Check if the file is a binary WPS file.
*
* @param string $docFile
*
* @return bool
*/
private function isBinaryWpsFile($docFile)
{
$fileContent = file_get_contents($docFile, false, null, 0, 1024);
if (!is_string($fileContent)) {
return false;
}

return preg_match(self::WPS_MAGIC_PATTERN, $fileContent) === 1;
}

/**
* Load a binary WPS file.
*
* @param string $docFile
*
* @return PhpWord
*/
private function loadBinaryWps($docFile, PhpWord $phpWord)
{
$reader = new WPSBinaryReader();
$text = $reader->extractText($docFile);

if (!empty($text)) {
$section = $phpWord->addSection();
$section->addText($text);
}

return $phpWord;
}

/**
* Read document part.
*/
private function readPart(PhpWord $phpWord, array $relationships, string $partName, string $docFile, string $xmlFile): void
{
$partClass = "PhpOffice\\PhpWord\\Reader\\WPS\\{$partName}";
if (class_exists($partClass)) {
/** @var WPS\AbstractPart $part Type hint */
$part = new $partClass($docFile, $xmlFile);
$part->setRels($relationships);
$part->read($phpWord);
}
}

/**
* Read all relationship files.
*/
private function readRelationships(string $docFile): array
{
$rels = [];
$xmlFile = 'META-INF/manifest.xml';
$xmlReader = new XMLReader();
$xmlReader->getDomFromZip($docFile, $xmlFile);
$nodes = $xmlReader->getElements('manifest:file-entry');
foreach ($nodes as $node) {
$type = $xmlReader->getAttribute('manifest:media-type', $node);
$target = $xmlReader->getAttribute('manifest:full-path', $node);
$rels[] = ['type' => $type, 'target' => $target];
}

return $rels;
}
}
30 changes: 30 additions & 0 deletions src/PhpWord/Reader/WPS/AbstractPart.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
<?php

/**
* This file is part of PHPWord - A pure PHP library for reading and writing
* word processing documents.
*
* PHPWord is free software distributed under the terms of the GNU Lesser
* General Public License version 3 as published by the Free Software Foundation.
*
* For the full copyright and license information, please read the LICENSE
* file that was distributed with this source code. For the full list of
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
*
* @see https://github.com/PHPOffice/PHPWord
*
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
*/

namespace PhpOffice\PhpWord\Reader\WPS;

use PhpOffice\PhpWord\Reader\Word2007\AbstractPart as Word2007AbstractPart;

/**
* Abstract part reader for WPS format.
*
* @since 0.18.0
*/
abstract class AbstractPart extends Word2007AbstractPart
{
}
125 changes: 125 additions & 0 deletions src/PhpWord/Reader/WPS/Content.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
<?php

/**
* This file is part of PHPWord - A pure PHP library for reading and writing
* word processing documents.
*
* PHPWord is free software distributed under the terms of the GNU Lesser
* General Public License version 3 as published by the Free Software Foundation.
*
* For the full copyright and license information, please read the LICENSE
* file that was distributed with this source code. For the full list of
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
*
* @see https://github.com/PHPOffice/PHPWord
*
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
*/

namespace PhpOffice\PhpWord\Reader\WPS;

use DOMElement;
use PhpOffice\PhpWord\PhpWord;
use PhpOffice\PhpWord\Shared\XMLReader;

/**
* WPS content reader.
*
* @since 0.18.0
*/
class Content extends AbstractPart
{
/**
* Read content.xml.
*/
public function read(PhpWord $phpWord): void
{
// Prevent using an empty file as ZipArchive to avoid deprecation warnings
if (filesize($this->docFile) === 0) {
return;
}

$xmlReader = new XMLReader();
$xmlReader->getDomFromZip($this->docFile, $this->xmlFile);
$nodes = $xmlReader->getElements('office:body/office:text/*');
if ($nodes->length > 0) {
$section = $phpWord->addSection();
foreach ($nodes as $node) {
$this->readElement($xmlReader, $node, $section);
}
}
}

/**
* Read element based on node name.
*/
private function readElement(XMLReader $xmlReader, DOMElement $node, \PhpOffice\PhpWord\Element\Section $parent): void
{
switch ($node->nodeName) {
case 'text:p':
$this->readParagraph($xmlReader, $node, $parent);

break;
case 'text:h':
$this->readHeading($xmlReader, $node, $parent);

break;
case 'table:table':
// Implement table reading as needed
break;
}
}

/**
* Read paragraph.
*/
protected function readParagraph(XMLReader $xmlReader, DOMElement $domNode, $parent, $docPart = 'document'): void
{
$textRun = $parent->addTextRun();
$nodes = $xmlReader->getElements('*', $domNode);
foreach ($nodes as $textNode) {
if ($textNode->nodeName == 'text:span') {
$text = $xmlReader->getValue('.', $textNode);
if (!empty($text)) {
$textRun->addText($text);
}
} elseif ($textNode->nodeName == 'text:line-break') {
$textRun->addTextBreak();
}
}

// If the paragraph has direct text content (not wrapped in spans)
$textContent = $this->getDirectTextContent($domNode);
if (!empty($textContent)) {
$textRun->addText($textContent);
}
}

/**
* Read heading.
*/
private function readHeading(XMLReader $xmlReader, DOMElement $node, \PhpOffice\PhpWord\Element\Section $parent): void
{
$text = $xmlReader->getValue('.', $node);
$level = $xmlReader->getAttribute('text:outline-level', $node);
if (empty($level)) {
$level = 1;
}
$parent->addTitle($text, (int) $level);
}

/**
* Get direct text content of a node, excluding child element content.
*/
private function getDirectTextContent(DOMElement $node): string
{
$textContent = '';
foreach ($node->childNodes as $child) {
if ($child->nodeType === XML_TEXT_NODE) {
$textContent .= $child->nodeValue;
}
}

return trim($textContent);
}
}
83 changes: 83 additions & 0 deletions src/PhpWord/Reader/WPS/Meta.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
<?php

/**
* This file is part of PHPWord - A pure PHP library for reading and writing
* word processing documents.
*
* PHPWord is free software distributed under the terms of the GNU Lesser
* General Public License version 3 as published by the Free Software Foundation.
*
* For the full copyright and license information, please read the LICENSE
* file that was distributed with this source code. For the full list of
* contributors, visit https://github.com/PHPOffice/PHPWord/contributors.
*
* @see https://github.com/PHPOffice/PHPWord
*
* @license http://www.gnu.org/licenses/lgpl.txt LGPL version 3
*/

namespace PhpOffice\PhpWord\Reader\WPS;

use PhpOffice\PhpWord\PhpWord;
use PhpOffice\PhpWord\Shared\XMLReader;

/**
* WPS meta reader.
*
* @since 0.18.0
*/
class Meta extends AbstractPart
{
/**
* Read meta.xml.
*/
public function read(PhpWord $phpWord): void
{
$xmlReader = new XMLReader();
$xmlReader->getDomFromZip($this->docFile, $this->xmlFile);

$docProps = $phpWord->getDocInfo();

// Title
$title = $xmlReader->getValue('office:meta/dc:title');
if (!empty($title)) {
$docProps->setTitle($title);
}

// Subject
$subject = $xmlReader->getValue('office:meta/dc:subject');
if (!empty($subject)) {
$docProps->setSubject($subject);
}

// Creator
$creator = $xmlReader->getValue('office:meta/meta:initial-creator');
if (!empty($creator)) {
$docProps->setCreator($creator);
}

// Keywords
$keywords = $xmlReader->getValue('office:meta/meta:keyword');
if (!empty($keywords)) {
$docProps->setKeywords($keywords);
}

// Description
$description = $xmlReader->getValue('office:meta/dc:description');
if (!empty($description)) {
$docProps->setDescription($description);
}

// Category
$category = $xmlReader->getValue('office:meta/meta:user-defined[@meta:name="Category"]');
if (!empty($category)) {
$docProps->setCategory($category);
}

// Company
$company = $xmlReader->getValue('office:meta/meta:user-defined[@meta:name="Company"]');
if (!empty($company)) {
$docProps->setCompany($company);
}
}
}
Loading
Loading