Skip to content

Commit 53901a3

Browse files
committedApr 12, 2018
Work on #463.
1 parent e7d7ece commit 53901a3

File tree

4 files changed

+300
-1
lines changed

4 files changed

+300
-1
lines changed
 

‎mik

+1-1
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ foreach ($records as $record) {
247247
// Create metadata to return an XML file that can be passed
248248
// on to the writer.
249249
try {
250-
$metadata = $parser->metadata($record_key) . "\n";
250+
$metadata = $parser->metadata($record_key);
251251
} catch (Exception $exception) {
252252
$log->addError(
253253
'ErrorException',

‎src/metadataparsers/csv/Csv.php

+28
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
<?php
2+
// src/metadataparsers/csv/Csv.php
3+
4+
namespace mik\metadataparsers\csv;
5+
6+
use mik\metadataparsers\MetadataParser;
7+
8+
/**
9+
* Templated metadata parser - Generates CSV metadata.
10+
*/
11+
abstract class Csv extends MetadataParser
12+
{
13+
public function __construct($settings)
14+
{
15+
parent::__construct($settings);
16+
$fetcherClass = 'mik\\fetchers\\' . $settings['FETCHER']['class'];
17+
$this->fetcher = new $fetcherClass($settings);
18+
19+
$this->outputFile = $this->settings['WRITER']['output_file'];
20+
21+
if (isset($this->settings['MANIPULATORS']['metadatamanipulators'])) {
22+
$this->metadatamanipulators = $this->settings['MANIPULATORS']['metadatamanipulators'];
23+
} else {
24+
$this->metadatamanipulators = null;
25+
}
26+
}
27+
28+
}

‎src/metadataparsers/csv/DcToCsv.php

+136
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
<?php
2+
// src/metadataparsers/csv/Csv.php
3+
4+
/**
5+
* Metadata parser class that writes metadata out to a CSV file.
6+
*/
7+
8+
namespace mik\metadataparsers\csv;
9+
10+
// use mik\metadataparsers\MetadataParser;
11+
use League\Csv\Writer;
12+
use Monolog\Logger;
13+
14+
class DcToCsv extends Csv
15+
{
16+
/**
17+
* Create a new metadata parser instance
18+
*/
19+
public function __construct($settings)
20+
{
21+
// Call Metadata.php contructor
22+
parent::__construct($settings);
23+
24+
// The CSV writer that writes out object metadata is instantiated in the writer.
25+
$headings = $this->settings['METADATA_PARSER']['dc_elements'];
26+
array_unshift($headings, $this->settings['METADATA_PARSER']['record_key']);
27+
$output_file_path = $this->settings['WRITER']['output_file'];
28+
$output_csv_writer = Writer::createFromPath($output_file_path, 'a');
29+
$output_csv_writer->insertOne($headings);
30+
}
31+
32+
/**
33+
* {@inheritdoc}
34+
*
35+
* Returns the output of the template.
36+
*/
37+
public function metadata($record_key)
38+
{
39+
$objectInfo = $this->fetcher->getItemInfo($record_key);
40+
$metadata = $this->populateRow($record_key, $objectInfo);
41+
return $metadata;
42+
}
43+
44+
/**
45+
* Converts the item's metadata into an array.
46+
*
47+
* @param string $record_key
48+
* The current item's record_key.
49+
* @param object $objectInfo
50+
* The current item's metadata as generated by the fetcher.
51+
*
52+
* @return array
53+
* The row, as an array.
54+
*/
55+
public function populateRow($record_key, $objectInfo)
56+
{
57+
$record = $this->getDcValues($objectInfo);
58+
$row = array();
59+
60+
// Field will be named in metadata parser's 'record_key' config setting.
61+
$row[] = $record_key;
62+
63+
foreach ($this->settings['METADATA_PARSER']['dc_elements'] as $element) {
64+
// @todo: parse out multiple values and add them to the CSV separated
65+
// by a character; what does Migrate Plus want?
66+
foreach ($record as $record_element => $record_values) {
67+
if ($element == $record_element) {
68+
if (count($record_values) > 0) {
69+
$row[] = $record_values[0];
70+
}
71+
else {
72+
$row[] = '';
73+
}
74+
}
75+
}
76+
}
77+
78+
if (isset($this->metadatamanipulators)) {
79+
$record = $this->applyMetadatamanipulators($record_key, $xml_from_template);
80+
}
81+
82+
return $row;
83+
}
84+
85+
/**
86+
* Applies metadatamanipulators listed in the config to provided serialized XML document.
87+
*
88+
* @param string $record_key
89+
* The current item's record_key.
90+
* @param array $row
91+
* An associative array containing the object's metadata.
92+
*
93+
* @return array
94+
* The modified associative array containing the object's metadata.
95+
*/
96+
public function applyMetadatamanipulators($record_key, $row)
97+
{
98+
foreach ($this->metadatamanipulators as $metadatamanipulator) {
99+
$metadatamanipulatorClassAndParams = explode('|', $metadatamanipulator);
100+
$metadatamanipulatorClassName = array_shift($metadatamanipulatorClassAndParams);
101+
$manipulatorParams = $metadatamanipulatorClassAndParams;
102+
$metdataManipulatorClass = 'mik\\metadatamanipulators\\' . $metadatamanipulatorClassName;
103+
$metadatamanipulator = new $metdataManipulatorClass($this->settings, $manipulatorParams, $record_key);
104+
$modified_xml = $metadatamanipulator->manipulate($xml);
105+
}
106+
107+
// return $modified_xml;
108+
return $modified_row;
109+
}
110+
111+
/**
112+
* Parses a DC XML document into an array.
113+
*
114+
* @param string $xml
115+
* The DC XML document.
116+
*
117+
* @return array
118+
* An associative array containing element name => element values.
119+
*/
120+
public function getDcValues($xml) {
121+
$dc_values = array();
122+
$dom = new \DomDocument();
123+
$dom->loadXML($xml);
124+
$elements = $dom->getElementsByTagNameNS('http://purl.org/dc/elements/1.1/', '*');
125+
foreach ($elements as $e) {
126+
if (!array_key_exists($e->localName, $dc_values)) {
127+
$dc_values[$e->localName] = array();
128+
$dc_values[$e->localName][] = $e->nodeValue;
129+
}
130+
else {
131+
$dc_values[$e->localName][] = $e->nodeValue;
132+
}
133+
}
134+
return $dc_values;
135+
}
136+
}

‎src/writers/OaipmhCsv.php

+135
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
<?php
2+
3+
namespace mik\writers;
4+
5+
use GuzzleHttp\Client;
6+
use mik\exceptions\MikErrorException;
7+
use Monolog\Logger;
8+
use League\Csv\Writer;
9+
10+
class OaipmhCsv extends \mik\writers\Writer
11+
{
12+
/**
13+
* @var array $settings - configuration settings from confugration class.
14+
*/
15+
public $settings;
16+
17+
/**
18+
* @var object $fetcher - Fetcher registered in .ini file.
19+
*/
20+
private $fetcher;
21+
22+
/**
23+
* @var object File getter registered in .ini file.
24+
*/
25+
private $fileGetter;
26+
27+
/**
28+
* Create a new OAI-PMH writer Instance
29+
* @param array $settings configuration settings.
30+
*/
31+
public function __construct($settings)
32+
{
33+
parent::__construct($settings);
34+
$this->fetcher = new \mik\fetchers\Oaipmh($settings);
35+
$fileGetterClass = 'mik\\filegetters\\' . $settings['FILE_GETTER']['class'];
36+
$this->fileGetter = new $fileGetterClass($settings);
37+
$this->output_directory = $settings['WRITER']['output_directory'];
38+
39+
$this->output_file_path = $this->settings['WRITER']['output_file'];
40+
$this->output_csv_writer = Writer::createFromPath($this->output_file_path, 'a');
41+
42+
if (isset($this->settings['WRITER']['http_timeout'])) {
43+
// Seconds.
44+
$this->httpTimeout = $this->settings['WRITER']['http_timeout'];
45+
} else {
46+
$this->httpTimeout = 60;
47+
}
48+
49+
if (isset($this->settings['WRITER']['metadata_only'])) {
50+
// Seconds.
51+
$this->metadata_only = $this->settings['WRITER']['metadata_only'];
52+
} else {
53+
$this->metadata_only = false;
54+
}
55+
56+
// Default Mac PHP setups may use Apple's Secure Transport
57+
// rather than OpenSSL, causing issues with CA verification.
58+
// Allow configuration override of CA verification at users own risk.
59+
if (isset($this->settings['SYSTEM']['verify_ca'])) {
60+
if ($this->settings['SYSTEM']['verify_ca'] == false) {
61+
$this->verifyCA = false;
62+
}
63+
} else {
64+
$this->verifyCA = true;
65+
}
66+
}
67+
68+
/**
69+
* Write folders and files.
70+
*/
71+
public function writePackages($metadata, $pages, $record_id)
72+
{
73+
// Create root output folder
74+
$this->createOutputDirectory();
75+
$output_path = $this->outputDirectory . DIRECTORY_SEPARATOR;
76+
77+
$normalized_record_id = $this->normalizeFilename($record_id);
78+
$this->writeMetadataFile($metadata);
79+
80+
if ($this->metadata_only) {
81+
return;
82+
}
83+
84+
// Retrieve the file associated with the document and write it to the output
85+
// folder using the filename or record_id identifier
86+
$source_file_url = $this->fileGetter->getFilePath($record_id);
87+
// Retrieve the PDF, etc. using Guzzle.
88+
if ($source_file_url) {
89+
$client = new Client();
90+
$response = $client->get(
91+
$source_file_url,
92+
['stream' => true,
93+
'timeout' => $this->httpTimeout,
94+
'connect_timeout' => $this->httpTimeout,
95+
'verify' => $this->verifyCA]
96+
);
97+
98+
// Lazy MimeType => extension mapping: use the last part of the MimeType.
99+
$content_types = $response->getHeader('Content-Type');
100+
list($type, $extension) = explode('/', $content_types[0]);
101+
$extension = preg_replace('/;.*$/', '', $extension);
102+
103+
$content_file_path = $output_path . $normalized_record_id . '.' . $extension;
104+
105+
$body = $response->getBody();
106+
while (!$body->eof()) {
107+
file_put_contents($content_file_path, $body->read(2048), FILE_APPEND);
108+
}
109+
} else {
110+
$this->log->addWarning(
111+
"No content file found in OAI-PMH record",
112+
array('record' => $record_id)
113+
);
114+
}
115+
}
116+
117+
/**
118+
* Adds a row to CSV file (unlike other Writers' writeMetadataFile(),
119+
* which writes out an entire metadata XML file.
120+
*/
121+
public function writeMetadataFile($metadata, $output_file_path = '')
122+
{
123+
$this->output_csv_writer->insertOne($metadata);
124+
}
125+
126+
/**
127+
* Convert %3A (:) in filenames into underscores (_).
128+
*/
129+
public function normalizeFilename($string)
130+
{
131+
$string = urldecode($string);
132+
$string = preg_replace('/:/', '_', $string);
133+
return $string;
134+
}
135+
}

0 commit comments

Comments
 (0)
Please sign in to comment.