3838import jakarta .json .JsonObjectBuilder ;
3939import jakarta .persistence .EntityManager ;
4040import jakarta .persistence .PersistenceContext ;
41+ import jakarta .persistence .Query ;
4142
4243import org .apache .solr .client .solrj .SolrQuery ;
4344import org .apache .solr .client .solrj .SolrServerException ;
@@ -471,42 +472,100 @@ public void indexDatasetBatchInNewTransaction(List<Long> datasetIds, final int[]
471472 // Process files for this dataset
472473 Map <Long , List <String >> fileDownloadersMap = roleAssigneeSvc .findAssigneesWithDownloadPermissionOnDatasetFiles (dataset .getId ());
473474 List <DatasetVersion > versions = versionsToReIndexPermissionsFor (dataset );
475+ final List <Long > changedFileIds = new ArrayList <>();
476+ if (versions .size ()>1 ) {
477+ Long releasedVersionId = versions .get (versions .get (0 ).isReleased () ? 0 : 1 ).getId ();
478+ Long draftVersionId = versions .get (versions .get (0 ).isReleased () ? 1 : 0 ).getId ();
479+
480+ populateChangedFileIds (
481+ releasedVersionId ,
482+ draftVersionId ,
483+ changedFileIds
484+ );
485+ }
474486 for (DatasetVersion version : versions ) {
475- processDatasetVersionFiles (version , fileDownloadersMap , fileCounter , fileQueryMin , versions .size ()>1 );
487+ processDatasetVersionFiles (version , fileDownloadersMap , fileCounter , fileQueryMin , ( versions .size ()>1 && version . isDraft ()) ? changedFileIds : null );
476488 }
477489 }
478490 }
479491 }
480492
481493 @ TransactionAttribute (TransactionAttributeType .REQUIRES_NEW )
482494 public void indexDatasetFilesInNewTransaction (List <DatasetVersion > versions , Map <Long , List <String >> fileDownloadersMap , final int [] fileCounter , int fileQueryMin ) {
495+ final List <Long > changedFileIds = new ArrayList <>();
496+ if (versions .size ()>1 ) {
497+ Long releasedVersionId = versions .get (versions .get (0 ).isReleased () ? 0 : 1 ).getId ();
498+ Long draftVersionId = versions .get (versions .get (0 ).isReleased () ? 1 : 0 ).getId ();
499+
500+ populateChangedFileIds (
501+ releasedVersionId ,
502+ draftVersionId ,
503+ changedFileIds
504+ );
505+ }
483506 for (DatasetVersion version : versions ) {
484507 // The version object is detached, but its fileMetadatas collection is already loaded.
485508 // We only need its ID and state, which are available.
486- processDatasetVersionFiles (version , fileDownloadersMap , fileCounter , fileQueryMin , versions .size ()>1 );
509+ processDatasetVersionFiles (version , fileDownloadersMap , fileCounter , fileQueryMin , (versions .size ()>1 && version .isDraft ()) ? changedFileIds : null );
510+ }
511+ }
512+
513+ /**
514+ * Retrieves the IDs of file metadatas that have changed between the released version
515+ * and the draft version of a dataset.
516+ *
517+ * @param releasedVersionId the ID of the released dataset version
518+ * @param draftVersionId the ID of the draft dataset version
519+ * @param changedFileMetadataIds the list to populate with changed file metadata IDs
520+ */
521+ protected void populateChangedFileIds (Long releasedVersionId , Long draftVersionId , List <Long > changedFileIds ) {
522+ Query query = em .createNamedQuery ("FileMetadata.getDatafilesWithChangedMetadata" , Long .class );
523+ query .setParameter (1 , releasedVersionId );
524+ query .setParameter (2 , draftVersionId );
525+
526+ /*
527+ * When the query was configured to return Long, it was returning Integer.
528+ * The query has been changed to return Integer now. The code here is robust
529+ * if that changes in the future.
530+ */
531+ List <Object > queryResults = query .getResultList ();
532+ for (Object result : queryResults ) {
533+ if (result != null ) {
534+ // Ensure we're adding Long objects to the list
535+ if (result instanceof Integer intResult ) {
536+ logger .finest ("Converted Integer result to Long: " + result );
537+ changedFileIds .add (Long .valueOf (intResult ));
538+ } else if (result instanceof Long longResult ) {
539+ // Already a Long, add directly
540+ logger .finest ("Added existing Long to list: " + result );
541+ changedFileIds .add (longResult );
542+ } else {
543+ // If it's not a Long, convert it to one via String
544+ try {
545+ changedFileIds .add (Long .valueOf (result .toString ()));
546+ logger .finest ("Converted non-Long result to Long: " + result + " of type " + result .getClass ().getName ());
547+ } catch (NumberFormatException e ) {
548+ logger .warning ("Could not convert query result to Long: " + result );
549+ }
550+ }
551+ }
487552 }
488553 }
489554
490555 private void processDatasetVersionFiles (DatasetVersion version , Map <Long , List <String >> fileDownloadersMap ,
491- final int [] fileCounter , int fileQueryMin , boolean isReleased ) {
556+ final int [] fileCounter , int fileQueryMin , List < Long > changedFileIds ) {
492557 List <String > cachedPerms = searchPermissionsService .findDatasetVersionPerms (version );
493558
494559 String solrIdEnd = getDatasetOrDataFileSolrEnding (version .getVersionState ());
495560 Long versionId = version .getId ();
496561 List <DataFileProxy > filesToReindexAsBatch = new ArrayList <>();
497562
498563 // If the version is draft and there is a released version,
499- // we only need perm docs for the files with filemetadata changes == those with _draft solr docs already
500- Set <Long > fileIdsToReindex = null ;
501- if (version .getVersionState ().equals (DatasetVersion .VersionState .DRAFT ) && isReleased ) {
502- fileIdsToReindex = getFileIdsWithSolrDocs (versionId );
503- logger .fine ("Found " + fileIdsToReindex .size () + " files with draft Solr docs for version " + versionId );
504- }
564+ // we only need perm docs for the files with filemetadata changes == those in changedFileMetadataIds
505565
506566 // Process files in batches of 100
507567 int batchSize = 100 ;
508568
509- final Set <Long > finalFileIdsToReindex = fileIdsToReindex ;
510569 if (dataFileService .findCountByDatasetVersionId (version .getId ()).intValue () > fileQueryMin ) {
511570 // For large datasets, use a more efficient SQL query
512571 // ToDo - only get the ones in finalFileIdsToReindex
@@ -515,7 +574,7 @@ private void processDatasetVersionFiles(DatasetVersion version, Map<Long, List<S
515574 // Process files in batches to avoid memory issues
516575 fileStream .forEach (fileInfo -> {
517576 // Only add files that need reindexing
518- if (finalFileIdsToReindex == null || finalFileIdsToReindex .contains (fileInfo .getFileId ())) {
577+ if (changedFileIds == null || changedFileIds .contains (fileInfo .getFileId ())) {
519578 filesToReindexAsBatch .add (fileInfo );
520579 fileCounter [0 ]++;
521580 if (filesToReindexAsBatch .size () >= batchSize ) {
@@ -530,7 +589,7 @@ private void processDatasetVersionFiles(DatasetVersion version, Map<Long, List<S
530589 for (FileMetadata fmd : version .getFileMetadatas ()) {
531590 // Only add files that need reindexing
532591 DataFileProxy fileProxy = new DataFileProxy (fmd );
533- if (finalFileIdsToReindex == null || finalFileIdsToReindex .contains (fileProxy .getFileId ())) {
592+ if (changedFileIds == null || changedFileIds .contains (fileProxy .getFileId ())) {
534593 filesToReindexAsBatch .add (fileProxy );
535594 fileCounter [0 ]++;
536595 if (filesToReindexAsBatch .size () >= batchSize ) {
@@ -584,55 +643,6 @@ private List<DatasetVersion> versionsToReIndexPermissionsFor(Dataset dataset) {
584643 return versionsToReindexPermissionsFor ;
585644 }
586645
587- /**
588- * Queries Solr to find file IDs that have draft documents for the given dataset version.
589- * This is used to optimize permission reindexing by only processing files that have
590- * metadata changes in the draft version.
591- *
592- * @param datasetVersionId The ID of the dataset version
593- * @return A set of file IDs that have Solr documents associated with this version
594- */
595- private Set <Long > getFileIdsWithSolrDocs (Long datasetVersionId ) {
596- Set <Long > fileIds = new HashSet <>();
597-
598- try {
599- SolrQuery solrQuery = new SolrQuery ();
600-
601- // Query for files in this specific version with draft suffix
602- solrQuery .setQuery ("*:*" );
603- solrQuery .addFilterQuery (SearchFields .TYPE + ":" + SearchConstants .FILES );
604- solrQuery .addFilterQuery (SearchFields .DATASET_VERSION_ID + ":" + datasetVersionId );
605-
606- // Only return the entity ID field
607- solrQuery .setFields (SearchFields .ENTITY_ID );
608-
609- // We want all matching documents
610- solrQuery .setRows (Integer .MAX_VALUE );
611-
612- logger .fine ("Solr query to find draft files: " + solrQuery );
613-
614- QueryResponse queryResponse = solrClientService .getSolrClient ().query (solrQuery );
615- SolrDocumentList docs = queryResponse .getResults ();
616-
617- for (SolrDocument doc : docs ) {
618- Long entityId = (Long ) doc .getFieldValue (SearchFields .ENTITY_ID );
619- if (entityId != null ) {
620- fileIds .add (entityId );
621- }
622- }
623-
624- logger .fine ("Found " + fileIds .size () + " files with draft Solr docs for version " + datasetVersionId );
625-
626- } catch (SolrServerException | IOException ex ) {
627- logger .log (Level .WARNING , "Error querying Solr for draft file IDs for version " + datasetVersionId +
628- ". Will reindex all files as fallback." , ex );
629- // Return null to indicate we should process all files
630- return null ;
631- }
632-
633- return fileIds ;
634- }
635-
636646 public IndexResponse deleteMultipleSolrIds (List <String > solrIdsToDelete ) {
637647 if (solrIdsToDelete .isEmpty ()) {
638648 return new IndexResponse ("nothing to delete" );
0 commit comments