|
| 1 | +<?php |
| 2 | + |
| 3 | +namespace mik\writers; |
| 4 | + |
| 5 | +use GuzzleHttp\Client; |
| 6 | +use mik\exceptions\MikErrorException; |
| 7 | +use Monolog\Logger; |
| 8 | +use League\Csv\Writer; |
| 9 | + |
| 10 | +class OaipmhCsv extends \mik\writers\Writer |
| 11 | +{ |
| 12 | + /** |
| 13 | + * @var array $settings - configuration settings from confugration class. |
| 14 | + */ |
| 15 | + public $settings; |
| 16 | + |
| 17 | + /** |
| 18 | + * @var object $fetcher - Fetcher registered in .ini file. |
| 19 | + */ |
| 20 | + private $fetcher; |
| 21 | + |
| 22 | + /** |
| 23 | + * @var object File getter registered in .ini file. |
| 24 | + */ |
| 25 | + private $fileGetter; |
| 26 | + |
| 27 | + /** |
| 28 | + * Create a new OAI-PMH writer Instance |
| 29 | + * @param array $settings configuration settings. |
| 30 | + */ |
| 31 | + public function __construct($settings) |
| 32 | + { |
| 33 | + parent::__construct($settings); |
| 34 | + $this->fetcher = new \mik\fetchers\Oaipmh($settings); |
| 35 | + $fileGetterClass = 'mik\\filegetters\\' . $settings['FILE_GETTER']['class']; |
| 36 | + $this->fileGetter = new $fileGetterClass($settings); |
| 37 | + $this->output_directory = $settings['WRITER']['output_directory']; |
| 38 | + |
| 39 | + $this->output_file_path = $this->settings['WRITER']['output_file']; |
| 40 | + $this->output_csv_writer = Writer::createFromPath($this->output_file_path, 'a'); |
| 41 | + |
| 42 | + if (isset($this->settings['WRITER']['http_timeout'])) { |
| 43 | + // Seconds. |
| 44 | + $this->httpTimeout = $this->settings['WRITER']['http_timeout']; |
| 45 | + } else { |
| 46 | + $this->httpTimeout = 60; |
| 47 | + } |
| 48 | + |
| 49 | + if (isset($this->settings['WRITER']['metadata_only'])) { |
| 50 | + // Seconds. |
| 51 | + $this->metadata_only = $this->settings['WRITER']['metadata_only']; |
| 52 | + } else { |
| 53 | + $this->metadata_only = false; |
| 54 | + } |
| 55 | + |
| 56 | + // Default Mac PHP setups may use Apple's Secure Transport |
| 57 | + // rather than OpenSSL, causing issues with CA verification. |
| 58 | + // Allow configuration override of CA verification at users own risk. |
| 59 | + if (isset($this->settings['SYSTEM']['verify_ca'])) { |
| 60 | + if ($this->settings['SYSTEM']['verify_ca'] == false) { |
| 61 | + $this->verifyCA = false; |
| 62 | + } |
| 63 | + } else { |
| 64 | + $this->verifyCA = true; |
| 65 | + } |
| 66 | + } |
| 67 | + |
| 68 | + /** |
| 69 | + * Write folders and files. |
| 70 | + */ |
| 71 | + public function writePackages($metadata, $pages, $record_id) |
| 72 | + { |
| 73 | + // Create root output folder |
| 74 | + $this->createOutputDirectory(); |
| 75 | + $output_path = $this->outputDirectory . DIRECTORY_SEPARATOR; |
| 76 | + |
| 77 | + $normalized_record_id = $this->normalizeFilename($record_id); |
| 78 | + $this->writeMetadataFile($metadata); |
| 79 | + |
| 80 | + if ($this->metadata_only) { |
| 81 | + return; |
| 82 | + } |
| 83 | + |
| 84 | + // Retrieve the file associated with the document and write it to the output |
| 85 | + // folder using the filename or record_id identifier |
| 86 | + $source_file_url = $this->fileGetter->getFilePath($record_id); |
| 87 | + // Retrieve the PDF, etc. using Guzzle. |
| 88 | + if ($source_file_url) { |
| 89 | + $client = new Client(); |
| 90 | + $response = $client->get( |
| 91 | + $source_file_url, |
| 92 | + ['stream' => true, |
| 93 | + 'timeout' => $this->httpTimeout, |
| 94 | + 'connect_timeout' => $this->httpTimeout, |
| 95 | + 'verify' => $this->verifyCA] |
| 96 | + ); |
| 97 | + |
| 98 | + // Lazy MimeType => extension mapping: use the last part of the MimeType. |
| 99 | + $content_types = $response->getHeader('Content-Type'); |
| 100 | + list($type, $extension) = explode('/', $content_types[0]); |
| 101 | + $extension = preg_replace('/;.*$/', '', $extension); |
| 102 | + |
| 103 | + $content_file_path = $output_path . $normalized_record_id . '.' . $extension; |
| 104 | + |
| 105 | + $body = $response->getBody(); |
| 106 | + while (!$body->eof()) { |
| 107 | + file_put_contents($content_file_path, $body->read(2048), FILE_APPEND); |
| 108 | + } |
| 109 | + } else { |
| 110 | + $this->log->addWarning( |
| 111 | + "No content file found in OAI-PMH record", |
| 112 | + array('record' => $record_id) |
| 113 | + ); |
| 114 | + } |
| 115 | + } |
| 116 | + |
| 117 | + /** |
| 118 | + * Adds a row to CSV file (unlike other Writers' writeMetadataFile(), |
| 119 | + * which writes out an entire metadata XML file. |
| 120 | + */ |
| 121 | + public function writeMetadataFile($metadata, $output_file_path = '') |
| 122 | + { |
| 123 | + $this->output_csv_writer->insertOne($metadata); |
| 124 | + } |
| 125 | + |
| 126 | + /** |
| 127 | + * Convert %3A (:) in filenames into underscores (_). |
| 128 | + */ |
| 129 | + public function normalizeFilename($string) |
| 130 | + { |
| 131 | + $string = urldecode($string); |
| 132 | + $string = preg_replace('/:/', '_', $string); |
| 133 | + return $string; |
| 134 | + } |
| 135 | +} |
0 commit comments