From f08e8b3f36246121c85e52f8e9eae30592c4236c Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Sun, 25 Aug 2024 19:29:26 +0530 Subject: [PATCH 1/3] Add 700.image caption generator benchmark and added its data in benchmarks-data submodule Signed-off-by: Abhishek Kumar --- benchmarks-data | 2 +- .../701.image-captioning/config.json | 6 ++ .../700.image/701.image-captioning/input.py | 40 +++++++++++ .../701.image-captioning/python/function.py | 67 +++++++++++++++++++ .../python/requirements.txt | 3 + 5 files changed, 117 insertions(+), 1 deletion(-) create mode 100644 benchmarks/700.image/701.image-captioning/config.json create mode 100644 benchmarks/700.image/701.image-captioning/input.py create mode 100644 benchmarks/700.image/701.image-captioning/python/function.py create mode 100644 benchmarks/700.image/701.image-captioning/python/requirements.txt diff --git a/benchmarks-data b/benchmarks-data index 6a17a460..f407c248 160000 --- a/benchmarks-data +++ b/benchmarks-data @@ -1 +1 @@ -Subproject commit 6a17a460f289e166abb47ea6298fb939e80e8beb +Subproject commit f407c24814f623f77dcb535d882c241909ae7588 diff --git a/benchmarks/700.image/701.image-captioning/config.json b/benchmarks/700.image/701.image-captioning/config.json new file mode 100644 index 00000000..a9c11904 --- /dev/null +++ b/benchmarks/700.image/701.image-captioning/config.json @@ -0,0 +1,6 @@ +{ + "timeout": 60, + "memory": 256, + "languages": ["python"] + } + \ No newline at end of file diff --git a/benchmarks/700.image/701.image-captioning/input.py b/benchmarks/700.image/701.image-captioning/input.py new file mode 100644 index 00000000..d371deac --- /dev/null +++ b/benchmarks/700.image/701.image-captioning/input.py @@ -0,0 +1,40 @@ +import glob +import os + +def buckets_count(): + return (1, 1) + +''' + Generate test, small, and large workload for image captioning benchmark. + + :param data_dir: Directory where benchmark data is placed + :param size: Workload size + :param benchmarks_bucket: Storage container for the benchmark + :param input_paths: List of input paths + :param output_paths: List of output paths + :param upload_func: Upload function taking three params (bucket_idx, key, filepath) +''' +def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func): + input_files = glob.glob(os.path.join(data_dir, '*.jpg')) + glob.glob(os.path.join(data_dir, '*.png')) + glob.glob(os.path.join(data_dir, '*.jpeg')) + + if not input_files: + raise ValueError("No input files found in the provided directory.") + + for file in input_files: + img = os.path.relpath(file, data_dir) + upload_func(0, img, file) + + input_config = { + 'object': { + 'key': img, + 'width': 200, + 'height': 200 + }, + 'bucket': { + 'bucket': benchmarks_bucket, + 'input': input_paths[0], + 'output': output_paths[0] + } + } + + return input_config diff --git a/benchmarks/700.image/701.image-captioning/python/function.py b/benchmarks/700.image/701.image-captioning/python/function.py new file mode 100644 index 00000000..89d28fd7 --- /dev/null +++ b/benchmarks/700.image/701.image-captioning/python/function.py @@ -0,0 +1,67 @@ +import datetime +import io +import os +from urllib.parse import unquote_plus +from PIL import Image +import torch +from transformers import VisionEncoderDecoderModel, ViTImageProcessor, AutoTokenizer +from . import storage + +# Load the pre-trained ViT-GPT2 model +# Model URL: https://huggingface.co/nlpconnect/vit-gpt2-image-captioning +# License: Apache 2.0 License (https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md) +model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning") +image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning") +tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning") + +model.eval() + +client = storage.storage.get_instance() + +def generate_caption(image_bytes): + image = Image.open(io.BytesIO(image_bytes)).convert("RGB") + pixel_values = image_processor(images=image, return_tensors="pt").pixel_values + + with torch.no_grad(): + generated_ids = model.generate(pixel_values, max_length=16, num_beams=4) + generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) + + return generated_text + +def handler(event): + bucket = event.get('bucket').get('bucket') + input_prefix = event.get('bucket').get('input') + output_prefix = event.get('bucket').get('output') + key = unquote_plus(event.get('object').get('key')) + + download_begin = datetime.datetime.now() + img = client.download_stream(bucket, os.path.join(input_prefix, key)) + download_end = datetime.datetime.now() + + process_begin = datetime.datetime.now() + caption = generate_caption(img) + process_end = datetime.datetime.now() + + upload_begin = datetime.datetime.now() + caption_file_name = os.path.splitext(key)[0] + '.txt' + caption_file_path = os.path.join(output_prefix, caption_file_name) + client.upload_stream(bucket, caption_file_path, io.BytesIO(caption.encode('utf-8'))) + upload_end = datetime.datetime.now() + + download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) + upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) + process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) + + return { + 'result': { + 'bucket': bucket, + 'key': caption_file_path + }, + 'measurement': { + 'download_time': download_time, + 'download_size': len(img), + 'upload_time': upload_time, + 'upload_size': len(caption.encode('utf-8')), + 'compute_time': process_time + } + } diff --git a/benchmarks/700.image/701.image-captioning/python/requirements.txt b/benchmarks/700.image/701.image-captioning/python/requirements.txt new file mode 100644 index 00000000..8ddcfdf7 --- /dev/null +++ b/benchmarks/700.image/701.image-captioning/python/requirements.txt @@ -0,0 +1,3 @@ +transformers==4.44.2 +torch==2.4.0 +pillow==10.4.0 From 412b1b9b2b3a7dca8d68e06c907d01f94609e7c6 Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Mon, 26 Aug 2024 16:55:47 +0530 Subject: [PATCH 2/3] fix Signed-off-by: Abhishek Kumar --- .../421.image-captioning}/config.json | 0 .../421.image-captioning}/input.py | 0 .../421.image-captioning}/python/function.py | 0 .../421.image-captioning}/python/requirements.txt | 0 4 files changed, 0 insertions(+), 0 deletions(-) rename benchmarks/{700.image/701.image-captioning => 400.inference/421.image-captioning}/config.json (100%) rename benchmarks/{700.image/701.image-captioning => 400.inference/421.image-captioning}/input.py (100%) rename benchmarks/{700.image/701.image-captioning => 400.inference/421.image-captioning}/python/function.py (100%) rename benchmarks/{700.image/701.image-captioning => 400.inference/421.image-captioning}/python/requirements.txt (100%) diff --git a/benchmarks/700.image/701.image-captioning/config.json b/benchmarks/400.inference/421.image-captioning/config.json similarity index 100% rename from benchmarks/700.image/701.image-captioning/config.json rename to benchmarks/400.inference/421.image-captioning/config.json diff --git a/benchmarks/700.image/701.image-captioning/input.py b/benchmarks/400.inference/421.image-captioning/input.py similarity index 100% rename from benchmarks/700.image/701.image-captioning/input.py rename to benchmarks/400.inference/421.image-captioning/input.py diff --git a/benchmarks/700.image/701.image-captioning/python/function.py b/benchmarks/400.inference/421.image-captioning/python/function.py similarity index 100% rename from benchmarks/700.image/701.image-captioning/python/function.py rename to benchmarks/400.inference/421.image-captioning/python/function.py diff --git a/benchmarks/700.image/701.image-captioning/python/requirements.txt b/benchmarks/400.inference/421.image-captioning/python/requirements.txt similarity index 100% rename from benchmarks/700.image/701.image-captioning/python/requirements.txt rename to benchmarks/400.inference/421.image-captioning/python/requirements.txt From 2c2f62b5e499789600c0553d907d49bf0325137f Mon Sep 17 00:00:00 2001 From: Abhishek Kumar Date: Wed, 28 Aug 2024 15:38:24 +0530 Subject: [PATCH 3/3] return caption directly Signed-off-by: Abhishek Kumar --- .../400.inference/421.image-captioning/input.py | 4 +++- .../421.image-captioning/python/function.py | 16 +--------------- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/benchmarks/400.inference/421.image-captioning/input.py b/benchmarks/400.inference/421.image-captioning/input.py index d371deac..0aa63175 100644 --- a/benchmarks/400.inference/421.image-captioning/input.py +++ b/benchmarks/400.inference/421.image-captioning/input.py @@ -15,7 +15,9 @@ def buckets_count(): :param upload_func: Upload function taking three params (bucket_idx, key, filepath) ''' def generate_input(data_dir, size, benchmarks_bucket, input_paths, output_paths, upload_func): - input_files = glob.glob(os.path.join(data_dir, '*.jpg')) + glob.glob(os.path.join(data_dir, '*.png')) + glob.glob(os.path.join(data_dir, '*.jpeg')) + input_files = [] + for ext in ['*.jpg', '*.jpeg', '*.png']: + input_files.extend(glob.glob(os.path.join(data_dir, ext))) if not input_files: raise ValueError("No input files found in the provided directory.") diff --git a/benchmarks/400.inference/421.image-captioning/python/function.py b/benchmarks/400.inference/421.image-captioning/python/function.py index 89d28fd7..b9ee4934 100644 --- a/benchmarks/400.inference/421.image-captioning/python/function.py +++ b/benchmarks/400.inference/421.image-captioning/python/function.py @@ -1,6 +1,5 @@ import datetime import io -import os from urllib.parse import unquote_plus from PIL import Image import torch @@ -8,8 +7,6 @@ from . import storage # Load the pre-trained ViT-GPT2 model -# Model URL: https://huggingface.co/nlpconnect/vit-gpt2-image-captioning -# License: Apache 2.0 License (https://huggingface.co/datasets/choosealicense/licenses/blob/main/markdown/apache-2.0.md) model = VisionEncoderDecoderModel.from_pretrained("nlpconnect/vit-gpt2-image-captioning") image_processor = ViTImageProcessor.from_pretrained("nlpconnect/vit-gpt2-image-captioning") tokenizer = AutoTokenizer.from_pretrained("nlpconnect/vit-gpt2-image-captioning") @@ -31,7 +28,6 @@ def generate_caption(image_bytes): def handler(event): bucket = event.get('bucket').get('bucket') input_prefix = event.get('bucket').get('input') - output_prefix = event.get('bucket').get('output') key = unquote_plus(event.get('object').get('key')) download_begin = datetime.datetime.now() @@ -42,26 +38,16 @@ def handler(event): caption = generate_caption(img) process_end = datetime.datetime.now() - upload_begin = datetime.datetime.now() - caption_file_name = os.path.splitext(key)[0] + '.txt' - caption_file_path = os.path.join(output_prefix, caption_file_name) - client.upload_stream(bucket, caption_file_path, io.BytesIO(caption.encode('utf-8'))) - upload_end = datetime.datetime.now() - download_time = (download_end - download_begin) / datetime.timedelta(microseconds=1) - upload_time = (upload_end - upload_begin) / datetime.timedelta(microseconds=1) process_time = (process_end - process_begin) / datetime.timedelta(microseconds=1) return { 'result': { - 'bucket': bucket, - 'key': caption_file_path + 'caption': caption, }, 'measurement': { 'download_time': download_time, 'download_size': len(img), - 'upload_time': upload_time, - 'upload_size': len(caption.encode('utf-8')), 'compute_time': process_time } }