@@ -593,6 +593,84 @@ def _get_merged_prs(es_input):
593
593
594
594
return merged_prs_list
595
595
596
+ def _get_cross_references (es_input , index ):
597
+ # Get all CrossReferencedEvent items and their referenced issues and pull requests
598
+ es_query = {
599
+ "size" : 0 ,
600
+ "track_total_hits" : True ,
601
+ "query" : {
602
+ "bool" : {
603
+ "must" : {
604
+ "term" : {
605
+ "event_type" : "CrossReferencedEvent"
606
+ }
607
+ }
608
+ }
609
+ },
610
+ "aggs" : {
611
+ "composite_issue_url" : {
612
+ "composite" : {
613
+ "sources" : [{
614
+ "issue_url" : {
615
+ "terms" : {
616
+ "field" : "issue_url"
617
+ }
618
+ }
619
+ }],
620
+ "size" : 1000
621
+ },
622
+ "aggs" : {
623
+ "references_urls" : {
624
+ "terms" : {
625
+ "field" : "reference_source_url" ,
626
+ "size" : 10000
627
+ }
628
+ }
629
+ }
630
+ }
631
+ }
632
+ }
633
+
634
+ buckets = []
635
+ while True :
636
+ cross_references = es_input .search (index = index , body = es_query )
637
+ buckets += cross_references ['aggregations' ]['composite_issue_url' ]['buckets' ]
638
+ after_key = cross_references ['aggregations' ]['composite_issue_url' ].get ('after_key' , None )
639
+ if not after_key :
640
+ break
641
+ es_query ['aggs' ]['composite_issue_url' ]['composite' ]['after' ] = after_key
642
+
643
+ reference_dict = {}
644
+ for item in buckets :
645
+ issue_url = item ['key' ]['issue_url' ]
646
+ references = [ref ['key' ] for ref in item ['references_urls' ]['buckets' ]]
647
+
648
+ # Update reference dictionary
649
+ if issue_url not in reference_dict .keys ():
650
+ reference_dict [issue_url ] = references
651
+ else :
652
+ prev_references = reference_dict [issue_url ]
653
+ prev_references .append (references )
654
+ reference_dict [issue_url ] = list (set (prev_references ))
655
+
656
+ # Adding list entries from reversed references
657
+ for issue_url in reference_dict .keys ():
658
+ reference_list = reference_dict [issue_url ]
659
+ if not reference_list :
660
+ continue
661
+ for ref in reference_list :
662
+ try :
663
+ ref_entry_list = reference_dict [ref ]
664
+ except KeyError :
665
+ continue
666
+ if ref_entry_list :
667
+ ref_entry_list .append (issue_url )
668
+ else :
669
+ ref_entry_list = [issue_url ]
670
+ reference_dict [ref ] = list (set (ref_entry_list ))
671
+
672
+ return reference_dict
673
+
596
674
data_source = enrich_backend .__class__ .__name__ .split ("Enrich" )[0 ].lower ()
597
675
log_prefix = "[{}] Cross reference analysis" .format (data_source )
598
676
logger .info ("{} starting study {}" .format (log_prefix , anonymize_url (self .elastic .index_url )))
@@ -605,64 +683,7 @@ def _get_merged_prs(es_input):
605
683
logger .info ("{} Retrieving the merged PRs from MergeEvents" .format (log_prefix ))
606
684
merged_prs = _get_merged_prs (es_in )
607
685
608
- # Get all CrossReferencedEvent items and their referenced issues and pull requests
609
- es_query = {
610
- "size" : 0 ,
611
- "query" : {
612
- "bool" : {
613
- "must" : {
614
- "term" : {
615
- "event_type" : "CrossReferencedEvent"
616
- }
617
- }
618
- }
619
- },
620
- "aggs" : {
621
- "issue_url" : {
622
- "terms" : {
623
- "field" : "issue_url" ,
624
- "size" : 30000
625
- },
626
- "aggs" : {
627
- "uniq_gender" : {
628
- "terms" : {"field" : "reference_source_url" }
629
- }
630
- }
631
- }
632
- }
633
- }
634
-
635
- cross_references = es_in .search (index = in_index , body = es_query )
636
- buckets = cross_references ['aggregations' ]['issue_url' ]['buckets' ]
637
-
638
- reference_dict = {}
639
- for item in buckets :
640
- issue_url = item ['key' ]
641
- references = [ref ['key' ] for ref in item ['uniq_gender' ]['buckets' ]]
642
-
643
- # Update reference dictionary
644
- if issue_url not in reference_dict .keys ():
645
- reference_dict [issue_url ] = references
646
- else :
647
- prev_references = reference_dict [issue_url ]
648
- prev_references .append (references )
649
- reference_dict [issue_url ] = list (set (prev_references ))
650
-
651
- # Adding list entries from reversed references
652
- for issue_url in reference_dict .keys ():
653
- reference_list = reference_dict [issue_url ]
654
- if not reference_list :
655
- continue
656
- for ref in reference_list :
657
- try :
658
- ref_entry_list = reference_dict [ref ]
659
- except KeyError :
660
- continue
661
- if ref_entry_list :
662
- ref_entry_list .append (issue_url )
663
- else :
664
- ref_entry_list = [issue_url ]
665
- reference_dict [ref ] = list (set (ref_entry_list ))
686
+ reference_dict = _get_cross_references (es_in , in_index )
666
687
667
688
# Updated affected issues and pull requests
668
689
painless_code = """
0 commit comments