diff --git a/docs/source/index.md b/docs/source/index.md index 85546af3..41e0d429 100644 --- a/docs/source/index.md +++ b/docs/source/index.md @@ -12,6 +12,7 @@ quick_tutorial.md :caption: Using the Tools migration_process.md migration_tasks.md +migration_reports.md mapping_file_based_mapping.md marc_rule_based_mapping.md Transforming inventory diff --git a/docs/source/migration_reports.md b/docs/source/migration_reports.md new file mode 100644 index 00000000..c9c1f425 --- /dev/null +++ b/docs/source/migration_reports.md @@ -0,0 +1,320 @@ +# Migration Reports + +Migration reports are a key output of the FOLIO Migration Tools, providing detailed statistics and diagnostics for each migration task. These reports help you understand what happened during a transformation or data load, identify data quality issues, and track progress across migration iterations. + +## Overview + +### Purpose + +Every migration task generates a migration report upon completion. These reports serve several purposes: + +- **Transparency**: See exactly what the tools did with your data +- **Data Quality**: Identify records that failed validation or mapping +- **Debugging**: Understand why certain records may not have migrated as expected +- **Documentation**: Create an audit trail of each migration run +- **Iteration Planning**: Use statistics to prioritize data cleaning efforts + +### Report Formats + +Each task generates two report formats: + +| Format | File Location | Purpose | +|--------|---------------|---------| +| **Markdown** | `reports/report__.md` | Human-readable report for review and documentation | +| **JSON** | `reports/.raw/raw_report__.json` | Machine-readable format for automated processing and analysis | + +```{note} +The raw JSON reports were introduced to enable downstream processing such as aggregating statistics across multiple runs, building dashboards, or integrating with data quality monitoring tools. +``` + +### When Reports Are Generated + +Reports are generated at the end of each task's execution, during the "wrap-up" phase. This means: + +- For **transformation tasks**: The report reflects all records processed and any mapping issues encountered +- For **posting/loading tasks**: The report reflects API responses, successes, and failures +- For **migration tasks** (loans, requests, reserves): The report reflects transaction processing results + +## Report Structure + +### Markdown Report Sections + +A typical markdown migration report contains the following sections: + +#### 1. Header and Introduction +The report begins with a title and brief introduction explaining what the report contains. + +#### 2. Timings +A timing breakdown showing: + +| Measure | Description | +|---------|-------------| +| Time Started | UTC timestamp when the task began | +| Time Finished | UTC timestamp when the task completed | +| Elapsed time | Total duration of the task | + +#### 3. General Statistics +High-level counts such as: +- Total records processed +- Records successfully transformed/posted +- Records that failed +- Files processed + +#### 4. Task-Specific Sections +Each task type adds its own statistical sections. These appear as expandable `
` blocks containing tables of measures and counts. Examples include: + +- **Mapping statistics**: Which fields were mapped and how often +- **Validation results**: Records failing validation rules +- **Reference data mapping**: How legacy values mapped to FOLIO reference data +- **Error breakdowns**: Categorized error types and counts + +### JSON Report Structure + +The raw JSON report contains the same data in a structured format suitable for programmatic access: + +```json +{ + "GeneralStatistics": { + "blurb_id": "GeneralStatistics", + "Records processed": 10000, + "Records successfully transformed": 9850, + "Records with errors": 150 + }, + "MappedLocations": { + "blurb_id": "MappedLocations", + "Main Library": 5000, + "Branch Library": 3000, + "Special Collections": 1850 + } +} +``` + +Each top-level key represents a report section. The `blurb_id` field links to the translated section title, while all other keys are measure names with their counts. + +## File Locations + +Migration reports are stored in the iteration's `reports` folder: + +``` +iterations/ +└── / + ├── reports/ + │ ├── report__.md # Markdown report + │ ├── data_issues_log__.tsv # Detailed issues + │ ├── log___.log # Execution log + │ └── .raw/ + │ └── raw_report__.json # JSON report + ├── results/ + └── source_data/ +``` + +```{tip} +The `.raw` folder is created automatically and contains raw JSON reports. This folder can be used as input for reporting scripts or data quality dashboards. +``` + +## Task-Specific Reports + +### Transformation Tasks + +Transformation tasks convert legacy data into FOLIO format. Their reports focus on mapping statistics and data validation. + +#### BibsTransformer +Transforms MARC bibliographic records to FOLIO Instance records. + +| Report Section | Description | +|----------------|-------------| +| GeneralStatistics | Record counts, files processed | +| RecordStatus | Breakdown by MARC Leader/05 status | +| RecourceTypeMapping | Instance type mappings from 336 field | +| InstanceFormat | Format mappings | +| MappedIdentifierTypes | Identifier type usage | +| MappedContributorTypes | Contributor type mappings | +| PrecedingSuccedingTitles | Linked title relationships | + +#### HoldingsMarcTransformer +Transforms MFHD (MARC Holdings) records to FOLIO Holdings records. + +| Report Section | Description | +|----------------|-------------| +| GeneralStatistics | Record counts | +| HoldingsTypeMapping | Holdings type assignments | +| LocationMapping | Location code mappings | +| CallNumberTypeMapping | Call number type usage | + +#### HoldingsCsvTransformer +Transforms CSV/TSV holdings data to FOLIO Holdings records. + +| Report Section | Description | +|----------------|-------------| +| GeneralStatistics | Record counts, merging statistics | +| LocationMapping | Location mappings | +| HoldingsTypeMapping | Holdings type assignments | + +#### ItemsTransformer +Transforms legacy item data to FOLIO Item records. + +| Report Section | Description | +|----------------|-------------| +| GeneralStatistics | Record counts | +| MaterialTypeMapping | Material type mappings | +| LoanTypeMapping | Loan type mappings | +| ItemStatusMapping | Status mappings | +| LocationMapping | Effective location mappings | + +#### UserTransformer +Transforms patron/user data to FOLIO User records. + +| Report Section | Description | +|----------------|-------------| +| GeneralStatistics | Record counts | +| PatronGroupMapping | Patron group assignments | +| AddressTypeMapping | Address type mappings | +| DepartmentMapping | Department mappings | + +#### OrganizationTransformer +Transforms vendor/organization data for acquisitions. + +| Report Section | Description | +|----------------|-------------| +| GeneralStatistics | Record counts | +| OrganizationTypeMapping | Organization type assignments | + +#### OrdersTransformer +Transforms purchase order data. + +| Report Section | Description | +|----------------|-------------| +| GeneralStatistics | Order and order line counts | +| OrderTypeMapping | Order type assignments | +| AcquisitionMethodMapping | Acquisition method mappings | + +#### ManualFeeFinesTransformer +Transforms fee/fine data. + +| Report Section | Description | +|----------------|-------------| +| GeneralStatistics | Record counts | +| FeeFineTypeMapping | Fee/fine type mappings | + +### Loading Tasks + +#### BatchPoster +Posts transformed records to FOLIO via batch APIs. + +| Report Section | Description | +|----------------|-------------| +| GeneralStatistics | Records processed, posted, failed | +| Details | Error message breakdowns | + +```{attention} +If `rerun_failed_records` is enabled, the BatchPoster report will show statistics for both the initial run and the retry run. +``` + +### Circulation Migration Tasks + +These tasks migrate active circulation transactions. + +#### LoansMigrator +Migrates open loans from the legacy system. + +| Report Section | Description | +|----------------|-------------| +| GeneralStatistics | Loans processed, checked out, failed | +| DiscardedLoans | Reasons for discarded loans | + +#### RequestsMigrator +Migrates open hold/recall requests. + +| Report Section | Description | +|----------------|-------------| +| GeneralStatistics | Requests processed, created, failed | +| DiscardedLoans | Reasons for discarded requests | + +#### ReservesMigrator +Migrates course reserve relationships. + +| Report Section | Description | +|----------------|-------------| +| GeneralStatistics | Reserves processed, created, failed | +| DiscardedReserves | Reasons for discarded reserves | + +#### CoursesMigrator +Migrates course and course listing data. + +| Report Section | Description | +|----------------|-------------| +| GeneralStatistics | Courses and listings processed | + +## Additional Output Files + +### Data Issues Log + +In addition to the migration report, transformation tasks generate a **data issues log** as a TSV file: + +``` +reports/data_issues_log__.tsv +``` + +This file contains per-record issues with columns for: +- Record identifier (row number or legacy ID) +- Issue type +- Issue description +- Affected field + +Use this file to create targeted data cleaning task lists. + +### Mapping Reports + +Transformation tasks that use mapping files append a **mapping report** to the markdown report. This section shows: + +- Total records processed +- FOLIO fields that were mapped and their frequency +- Legacy fields that were mapped and their frequency + +This helps identify: +- Unmapped legacy fields that might contain valuable data +- FOLIO fields that were never populated + +## Using Reports for Data Cleaning + +### Recommended Workflow + +1. **Run an initial transformation** on a sample or full dataset +2. **Review the migration report** for high-level statistics +3. **Check the data issues log** for specific record problems +4. **Prioritize fixes** based on frequency counts in the report +5. **Clean source data** or adjust mapping files +6. **Re-run the transformation** and compare reports +7. **Iterate** until data quality meets requirements + +### Common Statistics to Watch + +| Statistic | What to Look For | +|-----------|------------------| +| Failed records | Should be 0 or very low percentage | +| Unmapped values | May indicate missing reference data mappings | +| Default fallback usage | High counts may indicate mapping gaps | +| Validation errors | Records that don't meet FOLIO schema requirements | + +### Automating Report Analysis + +The raw JSON reports enable automated analysis. Example use cases: + +- **Trend tracking**: Compare statistics across migration iterations +- **Quality gates**: Fail CI/CD pipelines if error rates exceed thresholds +- **Dashboards**: Aggregate statistics for project status reporting +- **Alerting**: Notify team when specific error types appear + +```python +# Example: Reading a raw JSON report +import json + +with open("reports/.raw/raw_report_20260124_transform_bibs.json") as f: + report = json.load(f) + +total = report["GeneralStatistics"].get("Records processed", 0) +failed = report["GeneralStatistics"].get("Records with errors", 0) +success_rate = ((total - failed) / total) * 100 if total > 0 else 0 + +print(f"Success rate: {success_rate:.2f}%") +``` diff --git a/pyproject.toml b/pyproject.toml index ec505413..a87c5e5a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "folio_migration_tools" -version = "1.10.0" +version = "1.10.1" description = "A tool allowing you to migrate data from legacy ILS:s (Library systems) into FOLIO LSP" authors = [ {name = "Theodor Tolstoy", email = "github.teddes@tolstoy.se"}, diff --git a/src/folio_migration_tools/migration_report.py b/src/folio_migration_tools/migration_report.py index d3dc0a3b..c4d0ddde 100644 --- a/src/folio_migration_tools/migration_report.py +++ b/src/folio_migration_tools/migration_report.py @@ -50,7 +50,7 @@ def add_general_statistics(self, measure_to_add: str): """ self.add("GeneralStatistics", measure_to_add) - def _write_json_report(self, report_file): + def write_json_report(self, report_file): """Writes the raw migration report data to a JSON file. Args: diff --git a/src/folio_migration_tools/migration_tasks/batch_poster.py b/src/folio_migration_tools/migration_tasks/batch_poster.py index 09f35dce..8b88ca13 100644 --- a/src/folio_migration_tools/migration_tasks/batch_poster.py +++ b/src/folio_migration_tools/migration_tasks/batch_poster.py @@ -901,6 +901,8 @@ def wrap_up(self): report_file, self.start_datetime, ) + with open(self.folder_structure.migration_reports_raw_file, "w") as raw_report_file: + self.migration_report.write_json_report(raw_report_file) self.clean_out_empty_logs() def rerun_run(self): diff --git a/src/folio_migration_tools/migration_tasks/bibs_transformer.py b/src/folio_migration_tools/migration_tasks/bibs_transformer.py index 2823a5bb..ebff3e65 100644 --- a/src/folio_migration_tools/migration_tasks/bibs_transformer.py +++ b/src/folio_migration_tools/migration_tasks/bibs_transformer.py @@ -163,6 +163,8 @@ def wrap_up(self): self.mapper.mapped_folio_fields, self.mapper.mapped_legacy_fields, ) + with open(self.folder_structure.migration_reports_raw_file, "w") as raw_report_file: + self.mapper.migration_report.write_json_report(raw_report_file) logging.info( "Done. Transformation report written to %s", diff --git a/src/folio_migration_tools/migration_tasks/courses_migrator.py b/src/folio_migration_tools/migration_tasks/courses_migrator.py index 40780bc1..7fb533a4 100644 --- a/src/folio_migration_tools/migration_tasks/courses_migrator.py +++ b/src/folio_migration_tools/migration_tasks/courses_migrator.py @@ -178,6 +178,8 @@ def wrap_up(self): self.mapper.migration_report.write_migration_report( i18n.t("Courses migration report"), report_file, self.mapper.start_datetime ) + with open(self.folder_structure.migration_reports_raw_file, "w") as raw_report_file: + self.mapper.migration_report.write_json_report(raw_report_file) self.clean_out_empty_logs() diff --git a/src/folio_migration_tools/migration_tasks/holdings_csv_transformer.py b/src/folio_migration_tools/migration_tasks/holdings_csv_transformer.py index a455e7ba..e6163559 100644 --- a/src/folio_migration_tools/migration_tasks/holdings_csv_transformer.py +++ b/src/folio_migration_tools/migration_tasks/holdings_csv_transformer.py @@ -368,6 +368,8 @@ def wrap_up(self): self.mapper.mapped_folio_fields, self.mapper.mapped_legacy_fields, ) + with open(self.folder_structure.migration_reports_raw_file, "w") as raw_report_file: + self.mapper.migration_report.write_json_report(raw_report_file) logging.info("All done!") self.clean_out_empty_logs() diff --git a/src/folio_migration_tools/migration_tasks/holdings_marc_transformer.py b/src/folio_migration_tools/migration_tasks/holdings_marc_transformer.py index 8d615abc..bb8ce5a6 100644 --- a/src/folio_migration_tools/migration_tasks/holdings_marc_transformer.py +++ b/src/folio_migration_tools/migration_tasks/holdings_marc_transformer.py @@ -360,6 +360,8 @@ def wrap_up(self): self.mapper.mapped_folio_fields, self.mapper.mapped_legacy_fields, ) + with open(self.folder_structure.migration_reports_raw_file, "w") as raw_report_file: + self.mapper.migration_report.write_json_report(raw_report_file) logging.info( "Done. Transformation report written to %s", diff --git a/src/folio_migration_tools/migration_tasks/items_transformer.py b/src/folio_migration_tools/migration_tasks/items_transformer.py index e6563829..af8a2992 100644 --- a/src/folio_migration_tools/migration_tasks/items_transformer.py +++ b/src/folio_migration_tools/migration_tasks/items_transformer.py @@ -481,5 +481,7 @@ def wrap_up(self): self.mapper.mapped_folio_fields, self.mapper.mapped_legacy_fields, ) + with open(self.folder_structure.migration_reports_raw_file, "w") as raw_report_file: + self.mapper.migration_report.write_json_report(raw_report_file) self.clean_out_empty_logs() logging.info("All done!") diff --git a/src/folio_migration_tools/migration_tasks/loans_migrator.py b/src/folio_migration_tools/migration_tasks/loans_migrator.py index 8e395b59..669e2c42 100644 --- a/src/folio_migration_tools/migration_tasks/loans_migrator.py +++ b/src/folio_migration_tools/migration_tasks/loans_migrator.py @@ -314,6 +314,8 @@ def wrap_up(self): self.migration_report.write_migration_report( i18n_t("Loans migration report"), report_file, self.start_datetime ) + with open(self.folder_structure.migration_reports_raw_file, "w") as raw_report_file: + self.migration_report.write_json_report(raw_report_file) self.clean_out_empty_logs() def write_failed_loans_to_file(self): diff --git a/src/folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py b/src/folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py index e9037bd6..86432608 100644 --- a/src/folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py +++ b/src/folio_migration_tools/migration_tasks/manual_fee_fines_transformer.py @@ -183,5 +183,7 @@ def wrap_up(self): self.mapper.mapped_folio_fields, self.mapper.mapped_legacy_fields, ) + with open(self.folder_structure.migration_reports_raw_file, "w") as raw_report_file: + self.mapper.migration_report.write_json_report(raw_report_file) self.clean_out_empty_logs() diff --git a/src/folio_migration_tools/migration_tasks/orders_transformer.py b/src/folio_migration_tools/migration_tasks/orders_transformer.py index 8425fc11..971ee20c 100644 --- a/src/folio_migration_tools/migration_tasks/orders_transformer.py +++ b/src/folio_migration_tools/migration_tasks/orders_transformer.py @@ -328,6 +328,8 @@ def wrap_up(self): self.mapper.mapped_folio_fields, self.mapper.mapped_legacy_fields, ) + with open(self.folder_structure.migration_reports_raw_file, "w") as raw_report_file: + self.mapper.migration_report.write_json_report(raw_report_file) logging.info("All done!") def merge_into_orders_with_embedded_pols(self, folio_rec, results_file): diff --git a/src/folio_migration_tools/migration_tasks/organization_transformer.py b/src/folio_migration_tools/migration_tasks/organization_transformer.py index 4c100240..78db6490 100644 --- a/src/folio_migration_tools/migration_tasks/organization_transformer.py +++ b/src/folio_migration_tools/migration_tasks/organization_transformer.py @@ -299,6 +299,8 @@ def wrap_up(self): self.mapper.save_id_map_file( self.folder_structure.organizations_id_map_path, self.organizations_id_map ) + with open(self.folder_structure.migration_reports_raw_file, "w") as raw_report_file: + self.mapper.migration_report.write_json_report(raw_report_file) self.clean_out_empty_logs() logging.info("All done!") diff --git a/src/folio_migration_tools/migration_tasks/requests_migrator.py b/src/folio_migration_tools/migration_tasks/requests_migrator.py index 30bfb49c..78ced046 100644 --- a/src/folio_migration_tools/migration_tasks/requests_migrator.py +++ b/src/folio_migration_tools/migration_tasks/requests_migrator.py @@ -236,6 +236,8 @@ def wrap_up(self): self.migration_report.write_migration_report( i18n_t("Requests migration report"), report_file, self.start_datetime ) + with open(self.folder_structure.migration_reports_raw_file, "w") as raw_report_file: + self.migration_report.write_json_report(raw_report_file) self.clean_out_empty_logs() def write_failed_request_to_file(self): diff --git a/src/folio_migration_tools/migration_tasks/reserves_migrator.py b/src/folio_migration_tools/migration_tasks/reserves_migrator.py index f07ed1cc..eddff524 100644 --- a/src/folio_migration_tools/migration_tasks/reserves_migrator.py +++ b/src/folio_migration_tools/migration_tasks/reserves_migrator.py @@ -126,6 +126,8 @@ def wrap_up(self): self.migration_report.write_migration_report( i18n_t("Reserves migration report"), report_file, self.start_datetime ) + with open(self.folder_structure.migration_reports_raw_file, "w") as raw_report_file: + self.migration_report.write_json_report(raw_report_file) self.clean_out_empty_logs() def write_failed_reserves_to_file(self): diff --git a/src/folio_migration_tools/migration_tasks/user_transformer.py b/src/folio_migration_tools/migration_tasks/user_transformer.py index 472de177..faea2614 100644 --- a/src/folio_migration_tools/migration_tasks/user_transformer.py +++ b/src/folio_migration_tools/migration_tasks/user_transformer.py @@ -265,6 +265,8 @@ def wrap_up(self): self.mapper.mapped_folio_fields, self.mapper.mapped_legacy_fields, ) + with open(self.folder_structure.migration_reports_raw_file, "w") as raw_report_file: + self.mapper.migration_report.write_json_report(raw_report_file) logging.info("All done!") self.clean_out_empty_logs() diff --git a/tests/test_bibs_transformer.py b/tests/test_bibs_transformer.py index 1e42a941..6ea841fd 100644 --- a/tests/test_bibs_transformer.py +++ b/tests/test_bibs_transformer.py @@ -80,6 +80,42 @@ def test_wrap_up(mock_clean_out_empty_logs, mock_folder_structure, mock_check_so mock_clean_out_empty_logs.assert_called_once() +@patch( + 'folio_migration_tools.migration_tasks.bibs_transformer.BibsTransformer' + '.clean_out_empty_logs' +) +def test_wrap_up_writes_json_report( + mock_clean_out_empty_logs, mock_folder_structure, mock_check_source_files, tmp_path +): + """Test that wrap_up writes both markdown and JSON reports.""" + library_config = get_mocked_library_config() + folio_client = mocked_folio_client() + transformer = BibsTransformer(TASK_CONFIG, library_config, folio_client) + transformer.processor = MagicMock() + + # Set up real temp files for reports + md_report_path = tmp_path / "report.md" + json_report_path = tmp_path / "raw_report.json" + transformer.folder_structure.migration_reports_file = md_report_path + transformer.folder_structure.migration_reports_raw_file = json_report_path + + # Add some data to the migration report + transformer.mapper.migration_report.add("GeneralStatistics", "Records processed", 100) + + transformer.wrap_up() + + # Verify both files were created + assert md_report_path.exists(), "Markdown report file should be created" + assert json_report_path.exists(), "JSON report file should be created" + + # Verify JSON report contains valid JSON with our data + import json + with open(json_report_path) as f: + json_data = json.load(f) + assert "GeneralStatistics" in json_data + assert json_data["GeneralStatistics"]["Records processed"] == 100 + + def test_different_ils_flavours(mock_folder_structure, mock_check_source_files): for ils_flavour in IlsFlavour: task_config = TASK_CONFIG.model_copy(update={"ils_flavour": ils_flavour}) diff --git a/tests/test_infrastructure/mocked_classes.py b/tests/test_infrastructure/mocked_classes.py index e52a8c37..6c2c4bad 100644 --- a/tests/test_infrastructure/mocked_classes.py +++ b/tests/test_infrastructure/mocked_classes.py @@ -421,6 +421,7 @@ def get_mocked_folder_structure(): mock_fs.legacy_records_folder = Path("source_files") mock_fs.logs_folder = Path("logs") mock_fs.migration_reports_file = Path("/dev/null") + mock_fs.migration_reports_raw_file = Path("/dev/null") mock_fs.transformation_extra_data_path = Path("transformation_extra_data") mock_fs.transformation_log_path = Path("/dev/null") mock_fs.data_issue_file_path = Path("/dev/null") diff --git a/tests/test_json_report_writing.py b/tests/test_json_report_writing.py new file mode 100644 index 00000000..d2c2943a --- /dev/null +++ b/tests/test_json_report_writing.py @@ -0,0 +1,148 @@ +"""Tests for JSON report writing across migration tasks. + +This module tests that all migration tasks correctly write both +markdown and raw JSON reports during wrap_up. +""" + +import json +from io import StringIO +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest + +from folio_migration_tools.migration_report import MigrationReport + + +class TestJsonReportWriting: + """Test that migration tasks write JSON reports correctly.""" + + def test_write_json_report_creates_valid_json(self): + """Test that write_json_report produces valid JSON output.""" + report = MigrationReport() + report.add("GeneralStatistics", "Records processed", 100) + report.add("GeneralStatistics", "Records failed", 5) + report.add("LocationMapping", "Main Library", 50) + + output = StringIO() + report.write_json_report(output) + + output.seek(0) + json_data = json.load(output) + + assert "GeneralStatistics" in json_data + assert json_data["GeneralStatistics"]["Records processed"] == 100 + assert json_data["GeneralStatistics"]["Records failed"] == 5 + assert "LocationMapping" in json_data + assert json_data["LocationMapping"]["Main Library"] == 50 + + def test_json_report_file_created(self, tmp_path): + """Test that JSON report file is created on disk.""" + report = MigrationReport() + report.add("GeneralStatistics", "Test measure", 42) + + json_path = tmp_path / "raw_report.json" + with open(json_path, "w") as f: + report.write_json_report(f) + + assert json_path.exists() + + with open(json_path) as f: + data = json.load(f) + assert data["GeneralStatistics"]["Test measure"] == 42 + + def test_json_report_includes_blurb_id(self): + """Test that blurb_id is included in JSON output when using add().""" + report = MigrationReport() + report.add("TestSection", "some_measure", 1) + + output = StringIO() + report.write_json_report(output) + + output.seek(0) + data = json.load(output) + + assert data["TestSection"]["blurb_id"] == "TestSection" + + def test_json_report_set_does_not_include_blurb_id(self): + """Test that set() does not automatically add blurb_id.""" + report = MigrationReport() + report.set("TestSection", "some_measure", 1) + + output = StringIO() + report.write_json_report(output) + + output.seek(0) + data = json.load(output) + + # set() doesn't add blurb_id automatically + assert "blurb_id" not in data["TestSection"] + + def test_folder_structure_has_raw_file_path(self, tmp_path): + """Test that FolderStructure defines migration_reports_raw_file.""" + from folio_migration_tools.folder_structure import FolderStructure + from folio_uuid.folio_namespaces import FOLIONamespaces + + # Create the required folder structure + base_folder = tmp_path / "migration" + base_folder.mkdir() + (base_folder / "mapping_files").mkdir() + (base_folder / "iterations").mkdir() + (base_folder / ".gitignore").write_text("results/\n") + + fs = FolderStructure( + base_folder, + FOLIONamespaces.instances, + "test_task", + "iteration_1", + False + ) + fs.setup_migration_file_structure() + + assert hasattr(fs, 'migration_reports_raw_file') + assert 'raw_report' in str(fs.migration_reports_raw_file) + assert '.json' in str(fs.migration_reports_raw_file) + + +class TestMigrationReportIntegration: + """Integration tests for migration report with file I/O.""" + + def test_both_reports_written_to_same_folder_structure(self, tmp_path): + """Test that markdown and JSON reports can be written to related paths.""" + report = MigrationReport() + report.add("GeneralStatistics", "Test", 1) + + reports_folder = tmp_path / "reports" + reports_folder.mkdir() + raw_folder = reports_folder / ".raw" + raw_folder.mkdir() + + md_path = reports_folder / "report_test.md" + json_path = raw_folder / "raw_report_test.json" + + # Write both reports + with patch( + "folio_migration_tools.migration_report.i18n.t", side_effect=lambda x, **kw: x + ), patch("folio_migration_tools.migration_report.i18n_t", side_effect=lambda x: x): + from datetime import datetime, timezone + with open(md_path, "w") as md_file: + report.write_migration_report( + "Test Report", + md_file, + datetime.now(timezone.utc) + ) + + with open(json_path, "w") as json_file: + report.write_json_report(json_file) + + # Both files should exist + assert md_path.exists() + assert json_path.exists() + + # JSON should be valid + with open(json_path) as f: + data = json.load(f) + assert "GeneralStatistics" in data + + # Markdown should have content + assert md_path.stat().st_size > 0 diff --git a/tests/test_migration_report.py b/tests/test_migration_report.py index 4d3a5811..d1e4924e 100644 --- a/tests/test_migration_report.py +++ b/tests/test_migration_report.py @@ -1,8 +1,225 @@ +import io +import json +import logging +from datetime import datetime, timezone +from unittest.mock import patch + from dateutil import parser +from folio_migration_tools.migration_report import MigrationReport, as_str + def test_time_diff(): start = parser.parse("2022-06-29T20:21:22") end = parser.parse("2022-06-30T21:22:23") nice_diff = str(end - start) assert nice_diff == "1 day, 1:01:01" + + +class TestMigrationReport: + """Tests for the MigrationReport class.""" + + def test_init_creates_empty_report(self): + """Test that a new MigrationReport has empty report and stats dicts.""" + report = MigrationReport() + assert report.report == {} + assert report.stats == {} + + def test_add_creates_new_section(self): + """Test that add() creates a new section if it doesn't exist.""" + report = MigrationReport() + report.add("TestSection", "test_measure", 5) + + assert "TestSection" in report.report + assert report.report["TestSection"]["blurb_id"] == "TestSection" + assert report.report["TestSection"]["test_measure"] == 5 + + def test_add_increments_existing_measure(self): + """Test that add() increments an existing measure.""" + report = MigrationReport() + report.add("TestSection", "test_measure", 5) + report.add("TestSection", "test_measure", 3) + + assert report.report["TestSection"]["test_measure"] == 8 + + def test_add_defaults_to_increment_by_one(self): + """Test that add() defaults to incrementing by 1.""" + report = MigrationReport() + report.add("TestSection", "test_measure") + report.add("TestSection", "test_measure") + + assert report.report["TestSection"]["test_measure"] == 2 + + def test_add_multiple_measures_same_section(self): + """Test that multiple measures can be added to the same section.""" + report = MigrationReport() + report.add("TestSection", "measure_a", 10) + report.add("TestSection", "measure_b", 20) + + assert report.report["TestSection"]["measure_a"] == 10 + assert report.report["TestSection"]["measure_b"] == 20 + + def test_set_creates_new_section(self): + """Test that set() creates a new section if it doesn't exist.""" + report = MigrationReport() + report.set("TestSection", "test_measure", 42) + + assert "TestSection" in report.report + assert report.report["TestSection"]["test_measure"] == 42 + + def test_set_overwrites_existing_value(self): + """Test that set() overwrites rather than increments.""" + report = MigrationReport() + report.set("TestSection", "test_measure", 10) + report.set("TestSection", "test_measure", 5) + + assert report.report["TestSection"]["test_measure"] == 5 + + def test_add_general_statistics_shortcut(self): + """Test that add_general_statistics adds to GeneralStatistics section.""" + report = MigrationReport() + report.add_general_statistics("Records processed") + report.add_general_statistics("Records processed") + + assert "GeneralStatistics" in report.report + assert report.report["GeneralStatistics"]["Records processed"] == 2 + + def test_write_json_report_empty(self): + """Test that write_json_report writes valid JSON for empty report.""" + report = MigrationReport() + output = io.StringIO() + + report.write_json_report(output) + + output.seek(0) + result = json.load(output) + assert result == {} + + def test_write_json_report_with_data(self): + """Test that write_json_report writes all report data as valid JSON.""" + report = MigrationReport() + report.add("GeneralStatistics", "Records processed", 100) + report.add("GeneralStatistics", "Records failed", 5) + report.add("LocationMapping", "Main Library", 50) + report.add("LocationMapping", "Branch Library", 45) + + output = io.StringIO() + report.write_json_report(output) + + output.seek(0) + result = json.load(output) + + assert "GeneralStatistics" in result + assert result["GeneralStatistics"]["Records processed"] == 100 + assert result["GeneralStatistics"]["Records failed"] == 5 + assert result["GeneralStatistics"]["blurb_id"] == "GeneralStatistics" + + assert "LocationMapping" in result + assert result["LocationMapping"]["Main Library"] == 50 + assert result["LocationMapping"]["Branch Library"] == 45 + + def test_write_json_report_is_indented(self): + """Test that write_json_report produces indented JSON.""" + report = MigrationReport() + report.add("TestSection", "measure", 1) + + output = io.StringIO() + report.write_json_report(output) + + output.seek(0) + content = output.read() + + # Check for indentation (2 spaces as specified in the code) + assert "\n " in content + + def test_write_json_report_preserves_set_values(self): + """Test that values set with set() are preserved in JSON output.""" + report = MigrationReport() + report.set("GeneralStatistics", "Total records", 1000) + + output = io.StringIO() + report.write_json_report(output) + + output.seek(0) + result = json.load(output) + + assert result["GeneralStatistics"]["Total records"] == 1000 + + def test_write_migration_report_writes_header(self): + """Test that write_migration_report writes the report header and timings.""" + # Mock i18n functions to return predictable values + with patch( + "folio_migration_tools.migration_report.i18n.t", side_effect=lambda x, **kw: x + ), patch("folio_migration_tools.migration_report.i18n_t", side_effect=lambda x: x): + report = MigrationReport() + report.add("GeneralStatistics", "Records processed", 100) + + output = io.StringIO() + start_time = datetime(2024, 1, 15, 10, 0, 0, tzinfo=timezone.utc) + + report.write_migration_report("Test Report", output, start_time) + + output.seek(0) + content = output.read() + + # Check header is present + assert "# Test Report" in content + # Check timings section + assert "Timings" in content + assert "2024-01-15T10:00:00" in content + + def test_write_migration_report_includes_sections(self): + """Test that write_migration_report includes all report sections.""" + with patch( + "folio_migration_tools.migration_report.i18n.t", side_effect=lambda x, **kw: x + ), patch("folio_migration_tools.migration_report.i18n_t", side_effect=lambda x: x): + report = MigrationReport() + report.add("GeneralStatistics", "Records processed", 100) + report.add("LocationMapping", "Main Library", 50) + + output = io.StringIO() + start_time = datetime(2024, 1, 15, 10, 0, 0, tzinfo=timezone.utc) + + report.write_migration_report("Test Report", output, start_time) + + output.seek(0) + content = output.read() + + # Check sections are present + assert "Records processed" in content + assert "100" in content + assert "Main Library" in content + assert "50" in content + + def test_log_me_logs_report_sections(self, caplog): + """Test that log_me logs all report sections.""" + report = MigrationReport() + report.add("GeneralStatistics", "Records processed", 100) + report.add("GeneralStatistics", "Records failed", 5) + + with caplog.at_level(logging.INFO): + report.log_me() + + assert "GeneralStatistics" in caplog.text + assert "Records processed" in caplog.text + assert "100" in caplog.text + + +class TestAsStr: + """Tests for the as_str helper function.""" + + def test_as_str_with_string(self): + """Test as_str with a string input.""" + result = as_str("test") + assert result == ("test", "") + + def test_as_str_with_number(self): + """Test as_str with a numeric input.""" + result = as_str(42) + assert result == ("42", "") + + def test_as_str_with_none(self): + """Test as_str with None input.""" + result = as_str(None) + assert result == ("None", "") +