@@ -641,60 +641,8 @@ def finalise(self, show_progress=False):
641641    def  create_index (self ):
642642        """Create an index to support efficient region queries.""" 
643643
644-         root  =  zarr .open_group (store = self .path , mode = "r+" )
645- 
646-         contig  =  root ["variant_contig" ]
647-         pos  =  root ["variant_position" ]
648-         length  =  root ["variant_length" ]
649- 
650-         assert  contig .cdata_shape  ==  pos .cdata_shape 
651- 
652-         index  =  []
653- 
654-         logger .info ("Creating region index" )
655-         for  v_chunk  in  range (pos .cdata_shape [0 ]):
656-             c  =  contig .blocks [v_chunk ]
657-             p  =  pos .blocks [v_chunk ]
658-             e  =  p  +  length .blocks [v_chunk ] -  1 
659- 
660-             # create a row for each contig in the chunk 
661-             d  =  np .diff (c , append = - 1 )
662-             c_start_idx  =  0 
663-             for  c_end_idx  in  np .nonzero (d )[0 ]:
664-                 assert  c [c_start_idx ] ==  c [c_end_idx ]
665-                 index .append (
666-                     (
667-                         v_chunk ,  # chunk index 
668-                         c [c_start_idx ],  # contig ID 
669-                         p [c_start_idx ],  # start 
670-                         p [c_end_idx ],  # end 
671-                         np .max (e [c_start_idx  : c_end_idx  +  1 ]),  # max end 
672-                         c_end_idx  -  c_start_idx  +  1 ,  # num records 
673-                     )
674-                 )
675-                 c_start_idx  =  c_end_idx  +  1 
676- 
677-         index  =  np .array (index , dtype = pos .dtype )
678-         kwargs  =  {}
679-         if  not  zarr_utils .zarr_v3 ():
680-             kwargs ["dimension_separator" ] =  self .metadata .dimension_separator 
681-         array  =  root .array (
682-             "region_index" ,
683-             data = index ,
684-             shape = index .shape ,
685-             chunks = index .shape ,
686-             dtype = index .dtype ,
687-             compressor = numcodecs .Blosc ("zstd" , clevel = 9 , shuffle = 0 ),
688-             fill_value = None ,
689-             ** kwargs ,
690-         )
691-         array .attrs ["_ARRAY_DIMENSIONS" ] =  [
692-             "region_index_values" ,
693-             "region_index_fields" ,
694-         ]
695- 
696-         logger .info ("Consolidating Zarr metadata" )
697-         zarr .consolidate_metadata (self .path )
644+         indexer  =  VcfZarrIndexer (self .path )
645+         indexer .create_index ()
698646
699647    ###################### 
700648    # encode_all_partitions 
0 commit comments