@@ -641,60 +641,8 @@ def finalise(self, show_progress=False):
641641 def create_index (self ):
642642 """Create an index to support efficient region queries."""
643643
644- root = zarr .open_group (store = self .path , mode = "r+" )
645-
646- contig = root ["variant_contig" ]
647- pos = root ["variant_position" ]
648- length = root ["variant_length" ]
649-
650- assert contig .cdata_shape == pos .cdata_shape
651-
652- index = []
653-
654- logger .info ("Creating region index" )
655- for v_chunk in range (pos .cdata_shape [0 ]):
656- c = contig .blocks [v_chunk ]
657- p = pos .blocks [v_chunk ]
658- e = p + length .blocks [v_chunk ] - 1
659-
660- # create a row for each contig in the chunk
661- d = np .diff (c , append = - 1 )
662- c_start_idx = 0
663- for c_end_idx in np .nonzero (d )[0 ]:
664- assert c [c_start_idx ] == c [c_end_idx ]
665- index .append (
666- (
667- v_chunk , # chunk index
668- c [c_start_idx ], # contig ID
669- p [c_start_idx ], # start
670- p [c_end_idx ], # end
671- np .max (e [c_start_idx : c_end_idx + 1 ]), # max end
672- c_end_idx - c_start_idx + 1 , # num records
673- )
674- )
675- c_start_idx = c_end_idx + 1
676-
677- index = np .array (index , dtype = pos .dtype )
678- kwargs = {}
679- if not zarr_utils .zarr_v3 ():
680- kwargs ["dimension_separator" ] = self .metadata .dimension_separator
681- array = root .array (
682- "region_index" ,
683- data = index ,
684- shape = index .shape ,
685- chunks = index .shape ,
686- dtype = index .dtype ,
687- compressor = numcodecs .Blosc ("zstd" , clevel = 9 , shuffle = 0 ),
688- fill_value = None ,
689- ** kwargs ,
690- )
691- array .attrs ["_ARRAY_DIMENSIONS" ] = [
692- "region_index_values" ,
693- "region_index_fields" ,
694- ]
695-
696- logger .info ("Consolidating Zarr metadata" )
697- zarr .consolidate_metadata (self .path )
644+ indexer = VcfZarrIndexer (self .path )
645+ indexer .create_index ()
698646
699647 ######################
700648 # encode_all_partitions
0 commit comments