@@ -150,7 +150,7 @@ def get_checksum_type(self, bucket_name: str) -> str:
150150 return policy_response .result ['ChecksumType' ]
151151
152152 def put_objects (self , put_objects : List [HelperPutObject ], bucket : str , max_threads : int = 5 ,
153- calculate_checksum : bool = False ) -> str :
153+ calculate_checksum : bool = False , job_name : str = None ) -> str :
154154 """
155155 Puts a list of objects to a Black Pearl bucket.
156156
@@ -168,6 +168,8 @@ def put_objects(self, put_objects: List[HelperPutObject], bucket: str, max_threa
168168 if the client and BP checksums do not match. Note that calculating the checksum is processor intensive, and
169169 it also requires two reads of the object (first to calculate checksum, and secondly to send the data). The
170170 type of checksum calculated is determined by the data policy associated with the bucket.
171+ job_name : str
172+ The name to give the BP put job.
171173 """
172174 # If calculating checksum, then determine the checksum type from the data policy
173175 checksum_type = None
@@ -181,7 +183,7 @@ def put_objects(self, put_objects: List[HelperPutObject], bucket: str, max_threa
181183 put_objects_map [entry .object_name ] = entry
182184
183185 bulk_put = self .client .put_bulk_job_spectra_s3 (
184- PutBulkJobSpectraS3Request (bucket_name = bucket , object_list = ds3_put_objects ))
186+ PutBulkJobSpectraS3Request (bucket_name = bucket , object_list = ds3_put_objects , name = job_name ))
185187
186188 job_id = bulk_put .result ['JobId' ]
187189
@@ -244,7 +246,8 @@ def put_blob(self, bucket: str, put_object: HelperPutObject, length: int, offset
244246 stream .close ()
245247
246248 def put_all_objects_in_directory (self , source_dir : str , bucket : str , objects_per_bp_job : int = 1000 ,
247- max_threads : int = 5 , calculate_checksum : bool = False ) -> List [str ]:
249+ max_threads : int = 5 , calculate_checksum : bool = False ,
250+ job_name : str = None ) -> List [str ]:
248251 """
249252 Puts all files and subdirectories to a Black Pearl bucket.
250253
@@ -267,6 +270,8 @@ def put_all_objects_in_directory(self, source_dir: str, bucket: str, objects_per
267270 and BP checksums do not match. Note that calculating the checksum is processor intensive, and it also
268271 requires two reads of the object (first to calculate checksum, and secondly to send the data). The type of
269272 checksum calculated is determined by the data policy associated with the bucket.
273+ job_name : str
274+ The name to give the BP put jobs. All BP jobs that are created will have the same name.
270275 """
271276 obj_list : List [HelperPutObject ] = list ()
272277 job_list : List [str ] = list ()
@@ -277,8 +282,8 @@ def put_all_objects_in_directory(self, source_dir: str, bucket: str, objects_per
277282 size = os .path .getsize (obj_path )
278283 obj_list .append (HelperPutObject (object_name = obj_name , file_path = obj_path , size = size ))
279284 if len (obj_list ) >= objects_per_bp_job :
280- job_list .append (self .put_objects (
281- obj_list , bucket , max_threads = max_threads , calculate_checksum = calculate_checksum ))
285+ job_list .append (self .put_objects (obj_list , bucket , max_threads = max_threads ,
286+ calculate_checksum = calculate_checksum , job_name = job_name ))
282287 obj_list = []
283288
284289 for name in dirs :
@@ -287,17 +292,18 @@ def put_all_objects_in_directory(self, source_dir: str, bucket: str, objects_per
287292 path .join (path .normpath (path .relpath (path = dir_path , start = source_dir )), "" ))
288293 obj_list .append (HelperPutObject (object_name = dir_name , file_path = dir_path , size = 0 ))
289294 if len (obj_list ) >= objects_per_bp_job :
290- job_list .append (self .put_objects (
291- obj_list , bucket , max_threads = max_threads , calculate_checksum = calculate_checksum ))
295+ job_list .append (self .put_objects (obj_list , bucket , max_threads = max_threads ,
296+ calculate_checksum = calculate_checksum , job_name = job_name ))
292297 obj_list = []
293298
294299 if len (obj_list ) > 0 :
295300 job_list .append (self .put_objects (
296- obj_list , bucket , max_threads = max_threads , calculate_checksum = calculate_checksum ))
301+ obj_list , bucket , max_threads = max_threads , calculate_checksum = calculate_checksum , job_name = job_name ))
297302
298303 return job_list
299304
300- def get_objects (self , get_objects : List [HelperGetObject ], bucket : str , max_threads : int = 5 ) -> str :
305+ def get_objects (self , get_objects : List [HelperGetObject ], bucket : str , max_threads : int = 5 ,
306+ job_name : str = None ) -> str :
301307 """
302308 Retrieves a list of objects from a Black Pearl bucket.
303309
@@ -309,6 +315,8 @@ def get_objects(self, get_objects: List[HelperGetObject], bucket: str, max_threa
309315 The name of the bucket where the objects are being retrieved from.
310316 max_threads : int
311317 The number of concurrent objects being transferred at once (default 5).
318+ job_name : str
319+ The name to give the BP get job.
312320 """
313321 ds3_get_objects : List [Ds3GetObject ] = []
314322 get_objects_map : Dict [str , HelperGetObject ] = dict ()
@@ -317,7 +325,8 @@ def get_objects(self, get_objects: List[HelperGetObject], bucket: str, max_threa
317325 get_objects_map [entry .object_name ] = entry
318326
319327 bulk_get = self .client .get_bulk_job_spectra_s3 (GetBulkJobSpectraS3Request (bucket_name = bucket ,
320- object_list = ds3_get_objects ))
328+ object_list = ds3_get_objects ,
329+ name = job_name ))
321330
322331 job_id = bulk_get .result ['JobId' ]
323332
@@ -369,7 +378,7 @@ def get_blob(self, bucket: str, get_object: HelperGetObject, offset: int, job_id
369378 stream .close ()
370379
371380 def get_all_files_in_bucket (self , destination_dir : str , bucket : str , objects_per_bp_job : int = 1000 ,
372- max_threads : int = 5 ) -> List [str ]:
381+ max_threads : int = 5 , job_name : str = None ) -> List [str ]:
373382 """
374383 Retrieves all objects from a Black Pearl bucket.
375384
@@ -385,6 +394,8 @@ def get_all_files_in_bucket(self, destination_dir: str, bucket: str, objects_per
385394 This determines how many objects to bundle per BP job.
386395 max_threads : int
387396 The number of concurrent objects being transferred at once (default 5).
397+ job_name : str
398+ The name to give the BP get jobs. All BP jobs that are created will have the same name.
388399 """
389400 truncated : str = 'true'
390401 marker = ""
@@ -423,7 +434,8 @@ def get_all_files_in_bucket(self, destination_dir: str, bucket: str, objects_per
423434 get_objects .append (HelperGetObject (object_name = object_name , destination_path = object_destination ))
424435
425436 if len (get_objects ) > 0 :
426- job_id = self .get_objects (get_objects = get_objects , bucket = bucket , max_threads = max_threads )
437+ job_id = self .get_objects (get_objects = get_objects , bucket = bucket , max_threads = max_threads ,
438+ job_name = job_name )
427439 job_ids .append (job_id )
428440
429441 truncated = list_bucket .result ['IsTruncated' ]
0 commit comments