Skip to content

Commit b7d262d

Browse files
authored
OTHER: added job naming to helpers (#55)
1 parent c26b12e commit b7d262d

File tree

2 files changed

+50
-16
lines changed

2 files changed

+50
-16
lines changed

ds3/ds3Helpers.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def get_checksum_type(self, bucket_name: str) -> str:
150150
return policy_response.result['ChecksumType']
151151

152152
def put_objects(self, put_objects: List[HelperPutObject], bucket: str, max_threads: int = 5,
153-
calculate_checksum: bool = False) -> str:
153+
calculate_checksum: bool = False, job_name: str = None) -> str:
154154
"""
155155
Puts a list of objects to a Black Pearl bucket.
156156
@@ -168,6 +168,8 @@ def put_objects(self, put_objects: List[HelperPutObject], bucket: str, max_threa
168168
if the client and BP checksums do not match. Note that calculating the checksum is processor intensive, and
169169
it also requires two reads of the object (first to calculate checksum, and secondly to send the data). The
170170
type of checksum calculated is determined by the data policy associated with the bucket.
171+
job_name : str
172+
The name to give the BP put job.
171173
"""
172174
# If calculating checksum, then determine the checksum type from the data policy
173175
checksum_type = None
@@ -181,7 +183,7 @@ def put_objects(self, put_objects: List[HelperPutObject], bucket: str, max_threa
181183
put_objects_map[entry.object_name] = entry
182184

183185
bulk_put = self.client.put_bulk_job_spectra_s3(
184-
PutBulkJobSpectraS3Request(bucket_name=bucket, object_list=ds3_put_objects))
186+
PutBulkJobSpectraS3Request(bucket_name=bucket, object_list=ds3_put_objects, name=job_name))
185187

186188
job_id = bulk_put.result['JobId']
187189

@@ -244,7 +246,8 @@ def put_blob(self, bucket: str, put_object: HelperPutObject, length: int, offset
244246
stream.close()
245247

246248
def put_all_objects_in_directory(self, source_dir: str, bucket: str, objects_per_bp_job: int = 1000,
247-
max_threads: int = 5, calculate_checksum: bool = False) -> List[str]:
249+
max_threads: int = 5, calculate_checksum: bool = False,
250+
job_name: str = None) -> List[str]:
248251
"""
249252
Puts all files and subdirectories to a Black Pearl bucket.
250253
@@ -267,6 +270,8 @@ def put_all_objects_in_directory(self, source_dir: str, bucket: str, objects_per
267270
and BP checksums do not match. Note that calculating the checksum is processor intensive, and it also
268271
requires two reads of the object (first to calculate checksum, and secondly to send the data). The type of
269272
checksum calculated is determined by the data policy associated with the bucket.
273+
job_name : str
274+
The name to give the BP put jobs. All BP jobs that are created will have the same name.
270275
"""
271276
obj_list: List[HelperPutObject] = list()
272277
job_list: List[str] = list()
@@ -277,8 +282,8 @@ def put_all_objects_in_directory(self, source_dir: str, bucket: str, objects_per
277282
size = os.path.getsize(obj_path)
278283
obj_list.append(HelperPutObject(object_name=obj_name, file_path=obj_path, size=size))
279284
if len(obj_list) >= objects_per_bp_job:
280-
job_list.append(self.put_objects(
281-
obj_list, bucket, max_threads=max_threads, calculate_checksum=calculate_checksum))
285+
job_list.append(self.put_objects(obj_list, bucket, max_threads=max_threads,
286+
calculate_checksum=calculate_checksum, job_name=job_name))
282287
obj_list = []
283288

284289
for name in dirs:
@@ -287,17 +292,18 @@ def put_all_objects_in_directory(self, source_dir: str, bucket: str, objects_per
287292
path.join(path.normpath(path.relpath(path=dir_path, start=source_dir)), ""))
288293
obj_list.append(HelperPutObject(object_name=dir_name, file_path=dir_path, size=0))
289294
if len(obj_list) >= objects_per_bp_job:
290-
job_list.append(self.put_objects(
291-
obj_list, bucket, max_threads=max_threads, calculate_checksum=calculate_checksum))
295+
job_list.append(self.put_objects(obj_list, bucket, max_threads=max_threads,
296+
calculate_checksum=calculate_checksum, job_name=job_name))
292297
obj_list = []
293298

294299
if len(obj_list) > 0:
295300
job_list.append(self.put_objects(
296-
obj_list, bucket, max_threads=max_threads, calculate_checksum=calculate_checksum))
301+
obj_list, bucket, max_threads=max_threads, calculate_checksum=calculate_checksum, job_name=job_name))
297302

298303
return job_list
299304

300-
def get_objects(self, get_objects: List[HelperGetObject], bucket: str, max_threads: int = 5) -> str:
305+
def get_objects(self, get_objects: List[HelperGetObject], bucket: str, max_threads: int = 5,
306+
job_name: str = None) -> str:
301307
"""
302308
Retrieves a list of objects from a Black Pearl bucket.
303309
@@ -309,6 +315,8 @@ def get_objects(self, get_objects: List[HelperGetObject], bucket: str, max_threa
309315
The name of the bucket where the objects are being retrieved from.
310316
max_threads : int
311317
The number of concurrent objects being transferred at once (default 5).
318+
job_name : str
319+
The name to give the BP get job.
312320
"""
313321
ds3_get_objects: List[Ds3GetObject] = []
314322
get_objects_map: Dict[str, HelperGetObject] = dict()
@@ -317,7 +325,8 @@ def get_objects(self, get_objects: List[HelperGetObject], bucket: str, max_threa
317325
get_objects_map[entry.object_name] = entry
318326

319327
bulk_get = self.client.get_bulk_job_spectra_s3(GetBulkJobSpectraS3Request(bucket_name=bucket,
320-
object_list=ds3_get_objects))
328+
object_list=ds3_get_objects,
329+
name=job_name))
321330

322331
job_id = bulk_get.result['JobId']
323332

@@ -369,7 +378,7 @@ def get_blob(self, bucket: str, get_object: HelperGetObject, offset: int, job_id
369378
stream.close()
370379

371380
def get_all_files_in_bucket(self, destination_dir: str, bucket: str, objects_per_bp_job: int = 1000,
372-
max_threads: int = 5) -> List[str]:
381+
max_threads: int = 5, job_name: str = None) -> List[str]:
373382
"""
374383
Retrieves all objects from a Black Pearl bucket.
375384
@@ -385,6 +394,8 @@ def get_all_files_in_bucket(self, destination_dir: str, bucket: str, objects_per
385394
This determines how many objects to bundle per BP job.
386395
max_threads : int
387396
The number of concurrent objects being transferred at once (default 5).
397+
job_name : str
398+
The name to give the BP get jobs. All BP jobs that are created will have the same name.
388399
"""
389400
truncated: str = 'true'
390401
marker = ""
@@ -423,7 +434,8 @@ def get_all_files_in_bucket(self, destination_dir: str, bucket: str, objects_per
423434
get_objects.append(HelperGetObject(object_name=object_name, destination_path=object_destination))
424435

425436
if len(get_objects) > 0:
426-
job_id = self.get_objects(get_objects=get_objects, bucket=bucket, max_threads=max_threads)
437+
job_id = self.get_objects(get_objects=get_objects, bucket=bucket, max_threads=max_threads,
438+
job_name=job_name)
427439
job_ids.append(job_id)
428440

429441
truncated = list_bucket.result['IsTruncated']

tests/helpersTests.py

Lines changed: 26 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -108,11 +108,12 @@ def test_put_and_get_objects(self):
108108
include_dirs=False)
109109

110110
# create the BP helper and perform the put all objects call
111+
job_name = "python test job"
111112
client = ds3.createClientFromEnv()
112113
client.put_bucket_spectra_s3(ds3.PutBucketSpectraS3Request(name=bucket))
113114

114115
helpers = ds3Helpers.Helper(client=client)
115-
job_id = helpers.put_objects(bucket=bucket, put_objects=put_objects)
116+
job_id = helpers.put_objects(bucket=bucket, put_objects=put_objects, job_name=job_name)
116117
self.assertNotEqual(job_id, "", "job id was returned")
117118

118119
# verify all the files and directories are on the BP
@@ -123,6 +124,10 @@ def test_put_and_get_objects(self):
123124
head_obj = client.head_object(ds3.HeadObjectRequest(bucket_name=bucket, object_name=put_object.object_name))
124125
self.assertNotEqual(head_obj.result, "DOESNTEXIST")
125126

127+
# verify that the job was created with the desired name
128+
get_job = client.get_job_spectra_s3(ds3.GetJobSpectraS3Request(job_id=job_id))
129+
self.assertEqual(get_job.result['Name'], job_name)
130+
126131
# retrieve the files from the BP
127132
destination = tempfile.TemporaryDirectory(prefix="ds3-python3-sdk-dst-")
128133
get_objects: List[ds3Helpers.HelperGetObject] = []
@@ -134,7 +139,7 @@ def test_put_and_get_objects(self):
134139
object_name_to_source[put_object.object_name] = put_object.file_path
135140

136141
# perform the get objects call
137-
job_id = helpers.get_objects(bucket=bucket, get_objects=get_objects)
142+
job_id = helpers.get_objects(bucket=bucket, get_objects=get_objects, job_name=job_name)
138143
self.assertNotEqual(job_id, "", "job id was returned")
139144

140145
for get_object in get_objects:
@@ -147,13 +152,18 @@ def test_put_and_get_objects(self):
147152
original_file.close()
148153
retrieved_file.close()
149154

155+
# verify that the job was created with the desired name
156+
get_job = client.get_job_spectra_s3(ds3.GetJobSpectraS3Request(job_id=job_id))
157+
self.assertEqual(get_job.result['Name'], job_name)
158+
150159
# cleanup
151160
source.cleanup()
152161
destination.cleanup()
153162
client.delete_bucket_spectra_s3(ds3.DeleteBucketSpectraS3Request(bucket_name=bucket, force=True))
154163

155164
def test_put_and_get_all_objects_in_directory(self):
156165
bucket = f'ds3-python3-sdk-test-{uuid.uuid1()}'
166+
job_name = "python test job"
157167

158168
# create temporary directory with some files and subdirectories
159169
source = tempfile.TemporaryDirectory(prefix="ds3-python3-sdk-src-")
@@ -173,19 +183,26 @@ def test_put_and_get_all_objects_in_directory(self):
173183
client.put_bucket(ds3.PutBucketRequest(bucket_name=bucket))
174184

175185
helpers = ds3Helpers.Helper(client=client)
176-
job_ids = helpers.put_all_objects_in_directory(source_dir=source.name, bucket=bucket, objects_per_bp_job=10)
186+
job_ids = helpers.put_all_objects_in_directory(source_dir=source.name, bucket=bucket, objects_per_bp_job=10,
187+
job_name=job_name)
177188
self.assertGreaterEqual(len(job_ids), 1, "received at least one job id")
178189

179190
# verify all the files and directories are on the BP
180191
for put_object in put_objects:
181192
head_obj = client.head_object(ds3.HeadObjectRequest(bucket_name=bucket, object_name=put_object.object_name))
182193
self.assertNotEqual(head_obj.result, "DOESNTEXIST")
183194

195+
# verify that all the job were created with the desired name
196+
for job_id in job_ids:
197+
get_job = client.get_job_spectra_s3(ds3.GetJobSpectraS3Request(job_id=job_id))
198+
self.assertEqual(get_job.result['Name'], job_name)
199+
184200
# retrieve the objects from the BP
185201
destination = tempfile.TemporaryDirectory(prefix="ds3-python3-sdk-dst-")
186202
job_ids = helpers.get_all_files_in_bucket(destination_dir=destination.name,
187203
bucket=bucket,
188-
objects_per_bp_job=10)
204+
objects_per_bp_job=10,
205+
job_name=job_name)
189206

190207
self.assertGreaterEqual(len(job_ids), 2, "multiple job ids returned")
191208

@@ -199,6 +216,11 @@ def test_put_and_get_all_objects_in_directory(self):
199216
self.assertTrue(os.path.isfile(obj_destination), f'expected path to be file: {obj_destination}')
200217
self.assertEqual(put_object.size, os.path.getsize(obj_destination), 'file size')
201218

219+
# verify that all the job were created with the desired name
220+
for job_id in job_ids:
221+
get_job = client.get_job_spectra_s3(ds3.GetJobSpectraS3Request(job_id=job_id))
222+
self.assertEqual(get_job.result['Name'], job_name)
223+
202224
# cleanup
203225
source.cleanup()
204226
destination.cleanup()

0 commit comments

Comments
 (0)