Skip to content

Commit ca9fb8c

Browse files
committed
refactor: remove commented code
1 parent ace1ff2 commit ca9fb8c

1 file changed

Lines changed: 0 additions & 45 deletions

File tree

server/workers/orcid/src/orcid_service.py

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -345,50 +345,6 @@ def get_unversioned_doi(doi_str):
345345

346346
return base_metadata
347347

348-
def _explode_merged_dois(self, base_metadata: pd.DataFrame) -> pd.DataFrame:
349-
"""
350-
Explode merged_dois field to create separate rows for each DOI variant.
351-
352-
If base_metadata contains a 'merged_dois' field with multiple DOIs separated by '; ',
353-
this function creates separate rows for each DOI, allowing matching with ORCID metadata
354-
by any of those DOIs.
355-
356-
Parameters:
357-
- base_metadata: DataFrame with BASE metadata, potentially containing 'merged_dois' field
358-
359-
Returns:
360-
- DataFrame with exploded rows where each row has a single DOI in the 'doi' column
361-
"""
362-
# Process merged_dois: explode to create separate rows for each DOI variant
363-
# This allows us to match BASE records with multiple DOIs to ORCID records by any of those DOIs
364-
if 'merged_dois' in base_metadata.columns:
365-
# Split merged_dois by "; " and create a list of DOIs for each row
366-
# If merged_dois is empty/NaN, create empty list; otherwise split and process each DOI
367-
base_metadata['merged_dois_list'] = base_metadata['merged_dois'].apply(
368-
lambda x: [remove_doi_prefix(doi.strip()) for doi in str(x).split('; ') if doi.strip()]
369-
if pd.notna(x) and str(x).strip() else []
370-
)
371-
372-
# Use explode to create separate rows for each DOI in merged_dois_list
373-
# Rows with empty lists will remain as single rows
374-
base_metadata = base_metadata.explode('merged_dois_list', ignore_index=True)
375-
376-
# For rows where merged_dois_list is not empty, use it as the DOI
377-
# For rows where merged_dois_list is empty/NaN, use the regular doi column
378-
mask_has_merged_doi = pd.notna(base_metadata['merged_dois_list']) & (base_metadata['merged_dois_list'] != '')
379-
base_metadata.loc[mask_has_merged_doi, 'doi'] = base_metadata.loc[mask_has_merged_doi, 'merged_dois_list']
380-
381-
# Process regular doi column for rows without merged_dois
382-
base_metadata.loc[~mask_has_merged_doi, 'doi'] = base_metadata.loc[~mask_has_merged_doi, 'doi'].apply(remove_doi_prefix)
383-
384-
# Drop temporary column
385-
base_metadata = base_metadata.drop(columns=['merged_dois_list'])
386-
else:
387-
# No merged_dois column, process regular doi column
388-
base_metadata.loc[:, 'doi'] = base_metadata['doi'].apply(remove_doi_prefix)
389-
390-
return base_metadata
391-
392348
def enrich_metadata_with_base(self, params: Dict[str, str], metadata: pd.DataFrame) -> pd.DataFrame:
393349
self.logger.debug(f"Enriching metadata with base for ORCID {params.get('orcid')}")
394350

@@ -437,7 +393,6 @@ def enrich_metadata_with_base(self, params: Dict[str, str], metadata: pd.DataFra
437393

438394
base_metadata = base_metadata.reindex(columns=required_fields)
439395

440-
#base_metadata = self._explode_merged_dois(base_metadata)
441396
base_metadata['merged_dois'] = base_metadata['merged_dois'].apply(lambda x: x[0] if isinstance(x, list) and len(x) > 0 else x)
442397
base_metadata['merged_dois'] = base_metadata['merged_dois'].apply(lambda x: x.split(';') if isinstance(x, str) else [])
443398
base_metadata['merged_dois'] = base_metadata['merged_dois'].apply(lambda x: [x.strip() for x in x] if isinstance(x, list) else x)

0 commit comments

Comments
 (0)