diff --git a/.env.example b/.env.example index 45142831..96780ed0 100644 --- a/.env.example +++ b/.env.example @@ -24,6 +24,17 @@ COOL_OFF_PERIOD=10 SCHEMA_VERSION=beta BACKUP_KEEP_DAYS=7 +# Spectra Parsing Configuration +NMRKIT_API_URL=https://nmrkit.nmrxiv.org/latest/spectra/parse/url +BIOSCHEMA_API_URL=https://nmrxiv.org/api/v1/schemas/bioschemas +SPECTRA_STORAGE_DISK=local +SPECTRA_STORAGE_PATH=spectra_parse +SPECTRA_JOB_TRIES=3 +SPECTRA_JOB_TIMEOUT=600 +SPECTRA_RETRY_COUNT=3 +SPECTRA_DOWNLOAD_TIMEOUT=300 +SPECTRA_API_TIMEOUT=300 + # ============================================================================ # Logging Configuration # ============================================================================ diff --git a/.gitignore b/.gitignore index ccda3c5a..b262e1c1 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ /public/vendor/swagger-api /storage/*.key /storage/pail +/storage/app/spectra_parse /vendor .env .env.production diff --git a/app/Console/Commands/ExtractSpectra.php b/app/Console/Commands/ExtractSpectra.php deleted file mode 100644 index caf108f9..00000000 --- a/app/Console/Commands/ExtractSpectra.php +++ /dev/null @@ -1,182 +0,0 @@ -get(); - - foreach ($projects as $project) { - echo "\r\n"; - echo $project->identifier; - echo "\r\n"; - $studies = $project->studies; - foreach ($studies as $study) { - echo $study->identifier; - echo "\r\n"; - try { - if (! $study->has_nmrium) { - DB::transaction(function () use ($study) { - $download_url = $study->download_url; - if ($download_url) { - $nmrium_ = $this->processSpectra($download_url); - $parsedSpectra = $nmrium_['data']; - foreach ($parsedSpectra['spectra'] as $spectra) { - unset($spectra['data']); - unset($spectra['meta']); - unset($spectra['originalData']); - unset($spectra['originalInfo']); - } - - $version = $parsedSpectra['version']; - unset($parsedSpectra['version']); - - $nmriumJSON = [ - 'data' => $parsedSpectra, - 'version' => $version, - ]; - - $nmrium = $study->nmrium; - - if ($nmrium) { - $nmrium->nmrium_info = json_encode($nmriumJSON, JSON_UNESCAPED_UNICODE); - $nmrium->save(); - } else { - $nmrium = NMRium::create([ - 'nmrium_info' => json_encode($nmriumJSON, JSON_UNESCAPED_UNICODE), - ]); - $study->nmrium()->save($nmrium); - $study->has_nmrium = true; - $study->save(); - } - } - }); - } - $study = $study->fresh(); - if ($study->has_nmrium) { - $nmriumInfo = $study->nmrium->nmrium_info; - if (count($nmriumInfo['data']['spectra']) == 0) { - echo '--MISSING SPECTRA INFO (NMRIUM JSON)--'; - echo "\r\n"; - } else { - foreach ($study->datasets as $dataset) { - echo $dataset->identifier; - echo "\r\n"; - // echo $dataset->type; - // echo "\r\n"; - if (! $dataset->has_nmrium) { - $nmriumInfo = $study->nmrium->nmrium_info; - $_nmriumJSON = $nmriumInfo; - $fsObject = $dataset->fsObject; - - $studyFSObject = $study->fsObject; - $datasetFSObject = $dataset->fsObject; - - $draft = $study->draft; - - if ($draft && $draft->eln == 'chemotion') { - $path = '/'.$studyFSObject->name.'/'.$datasetFSObject->parent->name.'/'.$datasetFSObject->name; - } else { - $path = '/'.$studyFSObject->name.'/'.$datasetFSObject->name; - } - - $fType = $studyFSObject->type; - - $pathsMatch = false; - $spectrum = []; - $type = []; - foreach ($nmriumInfo['data']['spectra'] as $spectra) { - unset($_nmriumJSON['data']['spectra']); - $files = $spectra['sourceSelector']['files']; - if ($files) { - foreach ($files as $file) { - if (str_contains($file, $fType == 'file' ? $path : $path.'/')) { - $pathsMatch = true; - } - } - } - if ($pathsMatch) { - array_push($spectrum, $spectra); - $experimentDetailsExists = array_key_exists('experiment', $spectra['info']); - if ($experimentDetailsExists) { - $experiment = $spectra['info']['experiment']; - $nucleus = $spectra['info']['nucleus']; - if (is_array($nucleus)) { - $nucleus = implode('-', $nucleus); - } - array_push($type, $experiment.' - '.$nucleus); - } - $pathsMatch = false; - } - } - if (count($spectrum) > 0) { - $_nmriumJSON['data']['spectra'] = $spectrum; - $_nmrium = $dataset->nmrium; - if ($_nmrium) { - $_nmrium->nmrium_info = json_encode($_nmriumJSON, JSON_UNESCAPED_UNICODE); - $dataset->has_nmrium = true; - $_nmrium->save(); - } else { - $_nmrium = NMRium::create([ - 'nmrium_info' => json_encode($_nmriumJSON, JSON_UNESCAPED_UNICODE), - ]); - $dataset->nmrium()->save($_nmrium); - $dataset->has_nmrium = true; - } - } - $uType = array_unique($type); - if (count($uType) == 1) { - $dataset->type = $uType[0]; - } - $dataset->save(); - } - } - } - } - } catch (Exception $e) { - echo 'Caught exception: ', $e->getMessage(), "\n"; - } - - } - } - } - - public function processSpectra($url) - { - $url = urlencode($url); - $response = Http::timeout(300)->post('https://nodejs.nmrxiv.org/spectra-parser', [ - 'urls' => [$url], - 'snapshot' => false, - ]); - - return $response->json(); - } -} diff --git a/app/Console/Commands/QueueMetadataExtractionBagitGenerationJobs.php b/app/Console/Commands/QueueMetadataExtractionBagitGenerationJobs.php new file mode 100644 index 00000000..78dc66b6 --- /dev/null +++ b/app/Console/Commands/QueueMetadataExtractionBagitGenerationJobs.php @@ -0,0 +1,126 @@ +option('fresh')) { + if ($this->confirm('This will reset all BagIt status for all studies. Continue?', false)) { + Study::query()->update([ + 'metadata_bagit_generation_status' => null, + 'metadata_bagit_generation_logs' => null, + ]); + $this->info('✓ Cleared all existing BagIt status data'); + } else { + return self::SUCCESS; + } + } + + $query = Study::query() + ->where('has_nmrium', true) + ->where('is_public', true) + ->whereNotNull('download_url'); + + // Filter by specific study IDs if provided + if ($ids = $this->option('ids')) { + $studyIds = array_map('trim', explode(',', $ids)); + $query->whereIn('id', $studyIds); + $this->info('Processing '.count($studyIds).' specific study IDs...'); + } + + if ($this->option('retry-failed')) { + // Get failed study IDs from database + $failedStudies = Study::where('metadata_bagit_generation_status', 'failed')->get(); + + if ($failedStudies->isEmpty()) { + $this->warn('No failed jobs to retry.'); + + return self::SUCCESS; + } + + $query->whereIn('id', $failedStudies->pluck('id')); + $this->info('Retrying '.$failedStudies->count().' failed jobs...'); + + // Reset status to pending + Study::where('metadata_bagit_generation_status', 'failed') + ->whereIn('id', $failedStudies->pluck('id')) + ->update(['metadata_bagit_generation_status' => 'pending']); + } elseif (! $this->option('ids')) { + // Only exclude already processed studies when not targeting specific IDs + $query->where(function ($q) { + $q->whereNull('metadata_bagit_generation_status') + ->orWhereIn('metadata_bagit_generation_status', ['failed']); + }); + } + + if ($limit = $this->option('limit')) { + $query->limit((int) $limit); + } + + $studies = $query->get(); + + if ($studies->isEmpty()) { + $this->warn('No eligible studies found to process.'); + + return self::SUCCESS; + } + + $this->info("Found {$studies->count()} studies to process"); + + $bar = $this->output->createProgressBar($studies->count()); + $bar->setFormat('verbose'); + + $jobsDispatched = 0; + + foreach ($studies as $study) { + // Mark as pending with queued timestamp + $study->update([ + 'metadata_bagit_generation_status' => 'pending', + 'metadata_bagit_generation_logs' => [ + 'queued_at' => now()->toIso8601String(), + 'study_identifier' => str_replace('NMRXIV:', '', $study->identifier), + ], + ]); + + // Dispatch job to queue + ProcessMetadataExtractionBagitGenerationJob::dispatch($study->id); + + $jobsDispatched++; + $bar->advance(); + } + + $bar->finish(); + $this->newLine(2); + + $this->info("✓ Successfully dispatched {$jobsDispatched} jobs to the queue"); + + return self::SUCCESS; + } +} diff --git a/app/Jobs/ProcessMetadataExtractionBagitGenerationJob.php b/app/Jobs/ProcessMetadataExtractionBagitGenerationJob.php new file mode 100644 index 00000000..d71c923e --- /dev/null +++ b/app/Jobs/ProcessMetadataExtractionBagitGenerationJob.php @@ -0,0 +1,475 @@ +tries = config('nmrxiv.spectra_parsing.job_tries', 3); + $this->timeout = config('nmrxiv.spectra_parsing.job_timeout', 600); + $this->retries = config('nmrxiv.spectra_parsing.retry_count', 3); + } + + /** + * Execute the job. + */ + public function handle(): void + { + try { + $study = Study::with('datasets')->find($this->studyId); + + if (! $study) { + throw new \Exception("Study {$this->studyId} not found"); + } + + // Mark as processing + $study->update([ + 'metadata_bagit_generation_status' => 'processing', + 'metadata_bagit_generation_logs' => array_merge((array) ($study->metadata_bagit_generation_logs ?: []), [ + 'started_at' => now()->toIso8601String(), + ]), + ]); + + Log::info("Processing metadata extraction for study {$study->id} ({$study->identifier})"); + + // Process the study with BagIt structure + $result = $this->processStudy($study); + + // Mark as completed with metadata + $study->update([ + 'metadata_bagit_generation_status' => 'completed', + 'metadata_bagit_generation_logs' => array_merge((array) ($study->metadata_bagit_generation_logs ?: []), [ + 'completed_at' => now()->toIso8601String(), + 'storage_path' => $result['location'], + 'image_count' => $result['imageCount'], + ]), + ]); + + Log::info("Successfully processed study {$study->id} ({$study->identifier}): {$result['imageCount']} images saved to {$result['location']}"); + } catch (\Exception $e) { + Log::error("Failed to process study {$this->studyId}: {$e->getMessage()}"); + + // Mark as failed with error message + $study = Study::find($this->studyId); + if ($study) { + $study->update([ + 'metadata_bagit_generation_status' => 'failed', + 'metadata_bagit_generation_logs' => array_merge((array) ($study->metadata_bagit_generation_logs ?: []), [ + 'failed_at' => now()->toIso8601String(), + 'error_message' => $e->getMessage(), + ]), + ]); + } + + // Don't rethrow - let the job complete so it doesn't retry infinitely + } + } + + /** + * Process a single study with BagIt structure. + */ + protected function processStudy(Study $study): array + { + // Remove NMRXIV: prefix if present (e.g., "NMRXIV:S1295" -> "S1295") + $studyIdentifier = str_replace('NMRXIV:', '', $study->identifier); + $disk = Storage::disk(config('nmrxiv.spectra_parsing.storage_disk', 'local')); + $basePath = config('nmrxiv.spectra_parsing.storage_path', 'spectra_parse'); + $baseDir = "{$basePath}/{$studyIdentifier}"; + $dataDir = "{$baseDir}/data"; + $zipPath = null; + + try { + // Step 1: Download ZIP file + Log::info("Step 1/7: Downloading ZIP file for study {$study->id}"); + $zipPath = $this->downloadWithRetry($study->download_url, $this->retries); + + // Step 2: Extract ZIP to data directory + Log::info('Step 2/7: Extracting ZIP archive...'); + $studyName = $this->extractZip($zipPath, $disk->path($dataDir)); + + // Step 3: Call NMRKit API + Log::info('Step 3/7: Calling NMRKit API...'); + $jsonData = $this->callNMRKitAPI($study->download_url, $this->retries); + + // Step 4: Fetch Bio-Schema + Log::info('Step 4/7: Fetching bio-schema...'); + $bioSchema = null; + try { + $bioSchema = $this->fetchBioSchema($studyIdentifier, $this->retries); + } catch (\Exception $e) { + Log::warning("Bio-schema fetch failed: {$e->getMessage()}. Continuing without bio-schema..."); + } + + // Step 5: Create nmrxiv-meta structure + Log::info('Step 5/7: Creating nmrxiv-meta structure...'); + $metaDir = "{$dataDir}/{$studyName}/nmrxiv-meta"; + $imagesDir = "{$metaDir}/images"; + + if (! $disk->exists($metaDir)) { + $disk->makeDirectory($metaDir, 0755, true); + } + + // Clean up old images directory to prevent duplicates from previous runs + if ($disk->exists($imagesDir)) { + // Delete all PNG files in the images directory + $oldImages = $disk->files($imagesDir); + foreach ($oldImages as $oldImage) { + $disk->delete($oldImage); + } + Log::info('Cleaned up '.count($oldImages).' old image files'); + } else { + $disk->makeDirectory($imagesDir, 0755, true); + } + + // Clean up spectra data + if (isset($jsonData['data']['spectra']) && is_array($jsonData['data']['spectra'])) { + foreach ($jsonData['data']['spectra'] as &$spectra) { + unset($spectra['data']); + unset($spectra['meta']); + unset($spectra['originalData']); + unset($spectra['originalInfo']); + } + unset($spectra); + } + + // Extract and save images as PNG files + $imageCount = 0; + if (isset($jsonData['images']) && is_array($jsonData['images'])) { + foreach ($jsonData['images'] as $imageData) { + if (isset($imageData['id']) && isset($imageData['image'])) { + $imageId = $imageData['id']; + $base64Data = $imageData['image']; + + // Save PNG file + $pngPath = "{$imagesDir}/{$imageId}.png"; + $this->savePNGFromBase64($base64Data, $disk->path($pngPath)); + $imageCount++; + } + } + } + + // Save S{identifier}.nmrium (full API response with base64 images intact) + $nmriumPath = "{$metaDir}/{$studyIdentifier}.nmrium"; + $formattedJson = json_encode($jsonData, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES); + $disk->put($nmriumPath, $formattedJson); + + // Save bio-schema.json + if ($bioSchema !== null) { + $bioSchemaPath = "{$metaDir}/bio-schema.json"; + $bioSchemaJson = json_encode($bioSchema, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES); + $disk->put($bioSchemaPath, $bioSchemaJson); + } + + // Step 6: Generate BagIt manifests + Log::info('Step 6/7: Generating BagIt manifests...'); + $this->generateBagItManifests($disk->path($baseDir)); + + return [ + 'imageCount' => $imageCount, + 'location' => $disk->path($baseDir), + ]; + } finally { + // Step 7: Cleanup temporary files (always runs, even on exception) + if ($zipPath && file_exists($zipPath)) { + Log::info('Step 7/7: Cleaning up temporary ZIP file...'); + @unlink($zipPath); + } + } + } + + /** + * Download file with retry logic. + */ + protected function downloadWithRetry(string $url, int $retries): string + { + $attempt = 0; + $lastException = null; + + while ($attempt < $retries) { + try { + $attempt++; + Log::debug("Download attempt {$attempt}/{$retries}..."); + + $tempPath = storage_path('app/temp_'.uniqid().'.zip'); + $timeout = config('nmrxiv.spectra_parsing.download_timeout', 300); + $response = Http::timeout($timeout)->get($url); + + if (! $response->successful()) { + throw new \Exception("Download failed with status {$response->status()}"); + } + + file_put_contents($tempPath, $response->body()); + + return $tempPath; + } catch (\Exception $e) { + $lastException = $e; + if ($attempt < $retries) { + Log::warning("Download failed: {$e->getMessage()}. Retrying..."); + sleep(2); + } + } + } + + throw new \Exception("Download failed after {$retries} attempts: ".$lastException->getMessage()); + } + + /** + * Extract ZIP file and return the study name. + */ + protected function extractZip(string $zipPath, string $extractTo): string + { + $zip = new ZipArchive; + + if ($zip->open($zipPath) !== true) { + throw new \Exception("Failed to open ZIP file: {$zipPath}"); + } + + // Get the root folder name from first entry + $studyName = null; + if ($zip->numFiles > 0) { + $firstEntry = $zip->getNameIndex(0); + $parts = explode('/', $firstEntry); + $studyName = $parts[0]; + } + + if (! $studyName) { + throw new \Exception('Could not determine study name from ZIP'); + } + + // Extract all files + $zip->extractTo($extractTo); + $zip->close(); + + return $studyName; + } + + /** + * Call NMRKit API with retry logic. + */ + protected function callNMRKitAPI(string $url, int $retries): array + { + $attempt = 0; + $lastException = null; + + while ($attempt < $retries) { + try { + $attempt++; + Log::debug("NMRKit API attempt {$attempt}/{$retries}..."); + + $timeout = config('nmrxiv.spectra_parsing.api_timeout', 300); + $apiUrl = config('nmrxiv.spectra_parsing.nmrkit_api_url'); + + $response = Http::timeout($timeout) + ->post($apiUrl, [ + 'url' => $url, + 'capture_snapshot' => true, + 'auto_processing' => true, + 'auto_detection' => true, + ]); + + if (! $response->successful()) { + throw new \Exception("API request failed with status {$response->status()}: {$response->body()}"); + } + + return $response->json(); + } catch (\Exception $e) { + $lastException = $e; + if ($attempt < $retries) { + Log::warning("API call failed: {$e->getMessage()}. Retrying..."); + sleep(2); + } + } + } + + throw new \Exception("API call failed after {$retries} attempts: ".$lastException->getMessage()); + } + + /** + * Fetch bio-schema from nmrxiv.org API with retry logic. + */ + protected function fetchBioSchema(string $studyIdentifier, int $retries): array + { + $attempt = 0; + $lastException = null; + $baseUrl = config('nmrxiv.spectra_parsing.bioschema_api_url'); + $url = "{$baseUrl}/{$studyIdentifier}"; + + while ($attempt < $retries) { + try { + $attempt++; + Log::debug("Bio-schema attempt {$attempt}/{$retries}..."); + + $response = Http::timeout(60)->get($url); + + if (! $response->successful()) { + throw new \Exception("Bio-schema request failed with status {$response->status()}"); + } + + return $response->json(); + } catch (\Exception $e) { + $lastException = $e; + if ($attempt < $retries) { + Log::warning("Bio-schema fetch failed: {$e->getMessage()}. Retrying..."); + sleep(2); + } + } + } + + throw new \Exception("Bio-schema fetch failed after {$retries} attempts: ".$lastException->getMessage()); + } + + /** + * Save PNG image from base64 data. + */ + protected function savePNGFromBase64(string $base64Data, string $outputPath): void + { + // Remove data:image/png;base64, prefix if present + $base64Data = preg_replace('/^data:image\/[a-z]+;base64,/', '', $base64Data); + + $imageData = base64_decode($base64Data); + + if ($imageData === false) { + throw new \Exception('Failed to decode base64 image data'); + } + + file_put_contents($outputPath, $imageData); + } + + /** + * Generate BagIt manifests using whikloj/BagItTools library. + */ + protected function generateBagItManifests(string $bagPath): void + { + try { + // Create bag using BagItTools library + $bag = Bag::create($bagPath); + + // Update bag with checksums + $bag->update(); + + // Package the bag (this generates manifests) + $bag->package($bagPath); + + Log::debug('Used BagItTools library for manifest generation'); + } catch (\Exception $e) { + Log::warning("BagIt library failed: {$e->getMessage()}. Falling back to manual generation..."); + + // Fallback: Generate manually + $this->generateBagItManually($bagPath); + } + } + + /** + * Manually generate BagIt manifests. + */ + protected function generateBagItManually(string $bagPath): void + { + // Create bagit.txt + $bagitContent = "BagIt-Version: 1.0\nTag-File-Character-Encoding: UTF-8\n"; + file_put_contents($bagPath.'/bagit.txt', $bagitContent); + + // Create manifest-sha256.txt + $manifestLines = []; + $dataPath = $bagPath.'/data'; + + if (is_dir($dataPath)) { + $files = new \RecursiveIteratorIterator( + new \RecursiveDirectoryIterator($dataPath), + \RecursiveIteratorIterator::LEAVES_ONLY + ); + + foreach ($files as $file) { + if ($file->isFile()) { + $relativePath = str_replace($bagPath.'/', '', $file->getPathname()); + $hash = hash_file('sha256', $file->getPathname()); + $manifestLines[] = "{$hash} {$relativePath}"; + } + } + } + + file_put_contents($bagPath.'/manifest-sha256.txt', implode("\n", $manifestLines)."\n"); + + // Create bag-info.txt + $bagInfoContent = 'Payload-Oxum: '.$this->calculatePayloadOxum($dataPath)."\n"; + $bagInfoContent .= 'Bagging-Date: '.date('Y-m-d')."\n"; + $bagInfoContent .= "Bag-Software-Agent: Laravel-Queue-ProcessStudySpectraJob/1.0\n"; + file_put_contents($bagPath.'/bag-info.txt', $bagInfoContent); + + // Create tagmanifest-sha256.txt + $tagManifestLines = []; + foreach (['bagit.txt', 'bag-info.txt', 'manifest-sha256.txt'] as $tagFile) { + $tagFilePath = $bagPath.'/'.$tagFile; + if (file_exists($tagFilePath)) { + $hash = hash_file('sha256', $tagFilePath); + $tagManifestLines[] = "{$hash} {$tagFile}"; + } + } + file_put_contents($bagPath.'/tagmanifest-sha256.txt', implode("\n", $tagManifestLines)."\n"); + + Log::debug('Manual BagIt generation complete'); + } + + /** + * Calculate Payload-Oxum (total bytes.total files). + */ + protected function calculatePayloadOxum(string $dataPath): string + { + $totalBytes = 0; + $totalFiles = 0; + + if (is_dir($dataPath)) { + $files = new \RecursiveIteratorIterator( + new \RecursiveDirectoryIterator($dataPath), + \RecursiveIteratorIterator::LEAVES_ONLY + ); + + foreach ($files as $file) { + if ($file->isFile()) { + $totalBytes += $file->getSize(); + $totalFiles++; + } + } + } + + return "{$totalBytes}.{$totalFiles}"; + } +} diff --git a/app/Models/Study.php b/app/Models/Study.php index ef9c464f..e3af0657 100644 --- a/app/Models/Study.php +++ b/app/Models/Study.php @@ -64,6 +64,8 @@ class Study extends Model implements Auditable 'doi', 'identifier', 'validation_id', + 'metadata_bagit_generation_status', + 'metadata_bagit_generation_logs', ]; /** @@ -75,6 +77,7 @@ protected function casts(): array 'citations' => 'array', 'molecules' => 'array', 'processing_logs' => 'array', + 'metadata_bagit_generation_logs' => 'array', 'starred' => 'boolean', 'is_public' => 'boolean', 'is_archived' => 'boolean', diff --git a/composer.json b/composer.json index 689d1c71..9345bd8a 100644 --- a/composer.json +++ b/composer.json @@ -42,7 +42,8 @@ "spatie/laravel-tags": "^4.9", "spatie/laravel-welcome-notification": "^2.4", "spatie/schema-org": "^3.11", - "tightenco/ziggy": "^2.5" + "tightenco/ziggy": "^2.5", + "whikloj/bagittools": "^6.0" }, "require-dev": { "brianium/paratest": "^7.8", diff --git a/composer.lock b/composer.lock index e9e3a2ac..cb134d7a 100644 --- a/composer.lock +++ b/composer.lock @@ -4,7 +4,7 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#installing-dependencies", "This file is @generated automatically" ], - "content-hash": "77eb045659db2df85edae58563f57e29", + "content-hash": "9ba55c5bd2dcd41688398ab7e30420a2", "packages": [ { "name": "aws/aws-crt-php", @@ -4868,6 +4868,235 @@ }, "time": "2020-10-15T08:29:30+00:00" }, + { + "name": "pear/archive_tar", + "version": "1.6.0", + "source": { + "type": "git", + "url": "https://github.com/pear/Archive_Tar.git", + "reference": "dc3285537f1832da8ddbbe45f5a007248b6cc00e" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/pear/Archive_Tar/zipball/dc3285537f1832da8ddbbe45f5a007248b6cc00e", + "reference": "dc3285537f1832da8ddbbe45f5a007248b6cc00e", + "shasum": "" + }, + "require": { + "pear/pear-core-minimal": "^1.10.0alpha2", + "php": ">=5.4.0" + }, + "require-dev": { + "phpunit/phpunit": "*" + }, + "suggest": { + "ext-bz2": "Bz2 compression support.", + "ext-xz": "Lzma2 compression support.", + "ext-zlib": "Gzip compression support." + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "1.4.x-dev" + } + }, + "autoload": { + "psr-0": { + "Archive_Tar": "" + } + }, + "notification-url": "https://packagist.org/downloads/", + "include-path": [ + "./" + ], + "license": [ + "BSD-2-Clause" + ], + "authors": [ + { + "name": "Vincent Blavet", + "email": "vincent@phpconcept.net" + }, + { + "name": "Greg Beaver", + "email": "greg@chiaraquartet.net" + }, + { + "name": "Michiel Rook", + "email": "mrook@php.net" + } + ], + "description": "Tar file management class with compression support (gzip, bzip2, lzma2)", + "homepage": "https://github.com/pear/Archive_Tar", + "keywords": [ + "archive", + "tar" + ], + "support": { + "issues": "http://pear.php.net/bugs/search.php?cmd=display&package_name[]=Archive_Tar", + "source": "https://github.com/pear/Archive_Tar" + }, + "time": "2025-07-19T14:49:16+00:00" + }, + { + "name": "pear/console_getopt", + "version": "v1.4.3", + "source": { + "type": "git", + "url": "https://github.com/pear/Console_Getopt.git", + "reference": "a41f8d3e668987609178c7c4a9fe48fecac53fa0" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/pear/Console_Getopt/zipball/a41f8d3e668987609178c7c4a9fe48fecac53fa0", + "reference": "a41f8d3e668987609178c7c4a9fe48fecac53fa0", + "shasum": "" + }, + "type": "library", + "autoload": { + "psr-0": { + "Console": "./" + } + }, + "notification-url": "https://packagist.org/downloads/", + "include-path": [ + "./" + ], + "license": [ + "BSD-2-Clause" + ], + "authors": [ + { + "name": "Andrei Zmievski", + "email": "andrei@php.net", + "role": "Lead" + }, + { + "name": "Stig Bakken", + "email": "stig@php.net", + "role": "Developer" + }, + { + "name": "Greg Beaver", + "email": "cellog@php.net", + "role": "Helper" + } + ], + "description": "More info available on: http://pear.php.net/package/Console_Getopt", + "support": { + "issues": "http://pear.php.net/bugs/search.php?cmd=display&package_name[]=Console_Getopt", + "source": "https://github.com/pear/Console_Getopt" + }, + "time": "2019-11-20T18:27:48+00:00" + }, + { + "name": "pear/pear-core-minimal", + "version": "v1.10.18", + "source": { + "type": "git", + "url": "https://github.com/pear/pear-core-minimal.git", + "reference": "c7b55789d01de0ce090d289b73f1bbd6a2f113b1" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/pear/pear-core-minimal/zipball/c7b55789d01de0ce090d289b73f1bbd6a2f113b1", + "reference": "c7b55789d01de0ce090d289b73f1bbd6a2f113b1", + "shasum": "" + }, + "require": { + "pear/console_getopt": "~1.4", + "pear/pear_exception": "~1.0", + "php": ">=5.4" + }, + "replace": { + "rsky/pear-core-min": "self.version" + }, + "type": "library", + "autoload": { + "classmap": [ + "src/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "include-path": [ + "src/" + ], + "license": [ + "BSD-3-Clause" + ], + "authors": [ + { + "name": "Christian Weiske", + "email": "cweiske@php.net", + "role": "Lead" + } + ], + "description": "Minimal set of PEAR core files to be used as composer dependency", + "support": { + "issues": "http://pear.php.net/bugs/search.php?cmd=display&package_name[]=PEAR", + "source": "https://github.com/pear/pear-core-minimal" + }, + "time": "2025-12-14T20:37:07+00:00" + }, + { + "name": "pear/pear_exception", + "version": "v1.0.2", + "source": { + "type": "git", + "url": "https://github.com/pear/PEAR_Exception.git", + "reference": "b14fbe2ddb0b9f94f5b24cf08783d599f776fff0" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/pear/PEAR_Exception/zipball/b14fbe2ddb0b9f94f5b24cf08783d599f776fff0", + "reference": "b14fbe2ddb0b9f94f5b24cf08783d599f776fff0", + "shasum": "" + }, + "require": { + "php": ">=5.2.0" + }, + "require-dev": { + "phpunit/phpunit": "<9" + }, + "type": "class", + "extra": { + "branch-alias": { + "dev-master": "1.0.x-dev" + } + }, + "autoload": { + "classmap": [ + "PEAR/" + ] + }, + "notification-url": "https://packagist.org/downloads/", + "include-path": [ + "." + ], + "license": [ + "BSD-2-Clause" + ], + "authors": [ + { + "name": "Helgi Thormar", + "email": "dufuz@php.net" + }, + { + "name": "Greg Beaver", + "email": "cellog@php.net" + } + ], + "description": "The PEAR Exception base class.", + "homepage": "https://github.com/pear/PEAR_Exception", + "keywords": [ + "exception" + ], + "support": { + "issues": "http://pear.php.net/bugs/search.php?cmd=display&package_name[]=PEAR_Exception", + "source": "https://github.com/pear/PEAR_Exception" + }, + "time": "2021-03-21T15:43:46+00:00" + }, { "name": "php-http/discovery", "version": "1.20.0", @@ -11040,6 +11269,72 @@ ], "time": "2024-11-21T01:49:47+00:00" }, + { + "name": "whikloj/bagittools", + "version": "6.0.0", + "source": { + "type": "git", + "url": "https://github.com/whikloj/BagItTools.git", + "reference": "30697b4692cc874bd115b1b7d031001e7fe77f30" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/whikloj/BagItTools/zipball/30697b4692cc874bd115b1b7d031001e7fe77f30", + "reference": "30697b4692cc874bd115b1b7d031001e7fe77f30", + "shasum": "" + }, + "require": { + "ext-curl": "*", + "ext-intl": "*", + "ext-mbstring": "*", + "ext-zip": "*", + "pear/archive_tar": "^1.4.14", + "php": ">=8.2", + "symfony/console": ">7" + }, + "require-dev": { + "donatj/mock-webserver": "^2.6", + "phpstan/phpstan": "^1.4", + "phpunit/phpunit": "^9.6", + "squizlabs/php_codesniffer": "^3.5" + }, + "type": "library", + "extra": { + "symfony": { + "allow-contrib": false + } + }, + "autoload": { + "psr-4": { + "whikloj\\BagItTools\\": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Jared Whiklo", + "email": "jwhiklo@gmail.com", + "role": "Developer" + } + ], + "description": "A PHP library to manipulate and verify BagIt bags.", + "homepage": "https://github.com/whikloj/bagittools", + "keywords": [ + "bagit", + "bags", + "data", + "integrity", + "transmission" + ], + "support": { + "issues": "https://github.com/whikloj/BagItTools/issues", + "source": "https://github.com/whikloj/BagItTools/tree/6.0.0" + }, + "time": "2026-01-07T22:17:43+00:00" + }, { "name": "zircote/swagger-php", "version": "5.8.1", @@ -13285,5 +13580,5 @@ "php": "^8.4" }, "platform-dev": {}, - "plugin-api-version": "2.6.0" + "plugin-api-version": "2.9.0" } diff --git a/config/nmrxiv.php b/config/nmrxiv.php index 59a88466..2d8d5ae3 100644 --- a/config/nmrxiv.php +++ b/config/nmrxiv.php @@ -14,4 +14,33 @@ 'cool_off_period' => (int) env('COOL_OFF_PERIOD', 30), + /* + |-------------------------------------------------------------------------- + | Spectra Parsing Configuration + |-------------------------------------------------------------------------- + | + | Configuration for the spectra parsing queue system including API endpoints, + | storage locations, retry logic, and timeout values. + | + */ + + 'spectra_parsing' => [ + // API Endpoints + 'nmrkit_api_url' => env('NMRKIT_API_URL', 'https://nmrkit.nmrxiv.org/latest/spectra/parse/url'), + 'bioschema_api_url' => env('BIOSCHEMA_API_URL', 'https://nmrxiv.org/api/v1/schemas/bioschemas'), + + // Storage Configuration + 'storage_disk' => env('SPECTRA_STORAGE_DISK', 'local'), + 'storage_path' => env('SPECTRA_STORAGE_PATH', 'spectra_parse'), + + // Job Configuration + 'job_tries' => (int) env('SPECTRA_JOB_TRIES', 3), + 'job_timeout' => (int) env('SPECTRA_JOB_TIMEOUT', 600), + + // Network Configuration + 'retry_count' => (int) env('SPECTRA_RETRY_COUNT', 3), + 'download_timeout' => (int) env('SPECTRA_DOWNLOAD_TIMEOUT', 300), + 'api_timeout' => (int) env('SPECTRA_API_TIMEOUT', 300), + ], + ]; diff --git a/database/migrations/2026_02_19_120638_add_metadata_extraction_bagit_generation_columns_to_studies_table.php b/database/migrations/2026_02_19_120638_add_metadata_extraction_bagit_generation_columns_to_studies_table.php new file mode 100644 index 00000000..d9f1e450 --- /dev/null +++ b/database/migrations/2026_02_19_120638_add_metadata_extraction_bagit_generation_columns_to_studies_table.php @@ -0,0 +1,29 @@ +string('metadata_bagit_generation_status')->nullable()->after('is_public'); + $table->json('metadata_bagit_generation_logs')->nullable()->after('metadata_bagit_generation_status'); + }); + } + + /** + * Reverse the migrations. + */ + public function down(): void + { + Schema::table('studies', function (Blueprint $table) { + $table->dropColumn(['metadata_bagit_generation_status', 'metadata_bagit_generation_logs']); + }); + } +};