From facfd7c2b42c2c05ed057c7d0a01d30957fcda8a Mon Sep 17 00:00:00 2001 From: Pronay Sarker Date: Wed, 18 Mar 2026 03:35:21 +0600 Subject: [PATCH 1/3] Benchmarking script for faster-whisper on mobile device --- faster-whisper-benchmark/README.md | 49 ++++++++ faster-whisper-benchmark/main.py | 106 ++++++++++++++++++ faster-whisper-benchmark/metrics/___init__.py | 1 + faster-whisper-benchmark/metrics/extractor.py | 81 +++++++++++++ faster-whisper-benchmark/models/__init__.py | 1 + .../models/whisper_wrappers.py | 32 ++++++ faster-whisper-benchmark/utils/__init__.py | 3 + faster-whisper-benchmark/utils/benchmark.py | 56 +++++++++ 8 files changed, 329 insertions(+) create mode 100644 faster-whisper-benchmark/README.md create mode 100644 faster-whisper-benchmark/main.py create mode 100644 faster-whisper-benchmark/metrics/___init__.py create mode 100644 faster-whisper-benchmark/metrics/extractor.py create mode 100644 faster-whisper-benchmark/models/__init__.py create mode 100644 faster-whisper-benchmark/models/whisper_wrappers.py create mode 100644 faster-whisper-benchmark/utils/__init__.py create mode 100644 faster-whisper-benchmark/utils/benchmark.py diff --git a/faster-whisper-benchmark/README.md b/faster-whisper-benchmark/README.md new file mode 100644 index 00000000..620e7999 --- /dev/null +++ b/faster-whisper-benchmark/README.md @@ -0,0 +1,49 @@ +# Faster Whisper Benchmark + +This repository provides benchmarking utilities for the [faster-whisper](https://github.com/SYSTRAN/faster-whisper) model, profiling both encoder and decoder performance on various devices. It supports logging to Weights & Biases (wandb) and outputs results in a pandas table for easy analysis. + +## Features +- Benchmarks Whisper encoder and decoder separately +- Logs detailed metrics (latency, memory, utilization, etc.) +- Supports multiple devices and model variants +- Outputs results as a pandas DataFrame +- Optional logging to wandb + +## Setup + +You must set up Qualcomm AI Hub and set up wandb if you want to log online. + +**Important:** You must use PyTorch version 2.8.0. See [PyTorch previous versions](https://pytorch.org/get-started/previous-versions/). +Otherwise, decoder profiling will fail. +``` + +## Usage + +Run the main benchmark script: + +```bash +python main.py [--model_id MODEL_ID] [--batch_size N] [--decoder_len N] [--tokens N] [--feature_length N] [--wandb_project NAME] [--wandb_mode online|offline|disabled] +``` + +### Example + +```bash +python main.py --model_id openai/whisper-small --batch_size 1 --feature_length 3000 --wandb_mode online +``` + +## Output +- Prints a summary table of encoder and decoder metrics using pandas +- Logs metrics to wandb if enabled +- Shows total estimated latency and combined memory usage + +## File Structure +- `main.py` — Main benchmarking script +- `scripts/setup_env.py` — Environment and dependency setup +- `models/whisper_wrappers.py` — Model wrapper classes +- `utils/benchmark.py` — Torch utility functions +- `metrics/extractor.py` — Metric extraction and logging + +## Customization +- Edit `devices_list` in `main.py` to benchmark on different devices +- Adjust arguments to benchmark different model sizes or input shapes + diff --git a/faster-whisper-benchmark/main.py b/faster-whisper-benchmark/main.py new file mode 100644 index 00000000..787a7e35 --- /dev/null +++ b/faster-whisper-benchmark/main.py @@ -0,0 +1,106 @@ +import torch +import qai_hub as hub +from transformers import WhisperForConditionalGeneration +import wandb +import argparse +import pandas as pd + +from models import WhisperEncoderWrapper, WhisperDecoderStepWrapper +from utils import get_traced_model, get_traced_model_multi, profile_model +from metrics.extractor import extract_metrics_from_profile + +parser = argparse.ArgumentParser('Faster Whisper Benchmark') + +parser.add_argument('--model_id', type=str, default="openai/whisper-small", help='Model ID to benchmark') +parser.add_argument('--batch_size', type=int, default=1, help='Batch size for benchmarking') +parser.add_argument('--decoder_len', type=int, default=16, help='Decoder input length for benchmarking') +parser.add_argument('--tokens', type=int, default=120, help='Estimated number of output tokens to calculate total latency') +parser.add_argument('--feature_length', type=int, default=3000, help='Feature length to benchmark') + +parser.add_argument('--wandb_project', type=str, default="faster-whisper-benchmark", help='Weights & Biases project name') +parser.add_argument('--wandb_mode', type=str, choices=['online', 'offline'], default="online", help='Weights & Biases mode (online/offline)') + +args, _ = parser.parse_known_args() + +# Keep the devices you want to run the benchmark on +devices_list = [ + # "Google Pixel 3a", + # "Samsung Galaxy S24 (Family)", + "Samsung Galaxy S25 Ultra" +] + +def main(): + + for device in devices_list: + + if args.wandb_mode != 'disabled': + wandb.init( + project = args.wandb_project, + name = f"Model: {args.model_id}, Device: {device},", + mode = args.wandb_mode, + config = vars(args) + ) + + device = hub.Device(device) + base_model = WhisperForConditionalGeneration.from_pretrained(args.model_id).eval().cpu() + encoder_model = WhisperEncoderWrapper(base_model).eval().cpu() + decoder_model = WhisperDecoderStepWrapper(base_model).eval().cpu() + + encoder_shape = (args.batch_size, 80, args.feature_length) + print(f"Benchmarking feature shape: {encoder_shape}") + + # Perform benchmarking for encoder + encoder_profile = profile_model( + get_traced_model(encoder_shape, encoder_model), + device, + {"input_features": encoder_shape}, + ) + + encoder_metrics = extract_metrics_from_profile(encoder_profile) + + with torch.no_grad(): + dummy_features = torch.rand(encoder_shape, dtype=torch.float32) + encoder_hidden = encoder_model(dummy_features) + encoder_hidden_shape = tuple(encoder_hidden.shape) + + + decoder_input = torch.ones((args.batch_size, args.decoder_len), dtype=torch.int32) + + # Perform benchmarking for decoder + decoder_profile = profile_model( + get_traced_model_multi((decoder_input, encoder_hidden), decoder_model), + device, + { + "decoder_input_ids": (tuple(decoder_input.shape), "int32"), + "encoder_hidden_states": (encoder_hidden_shape, "float32"), + }, + ) + + decoder_metrics = extract_metrics_from_profile(decoder_profile) + + enc_ms = encoder_metrics.get("estimated_inference_time_ms") + dec_ms = decoder_metrics.get("estimated_inference_time_ms") + est_total_ms = None + if enc_ms is not None and dec_ms is not None: + est_total_ms = enc_ms + args.tokens * dec_ms + + + # Combine total memory usage (sum encoder and decoder peak memory) + encoder_mem = encoder_metrics.get("estimated_inference_peak_memory", 0) + decoder_mem = decoder_metrics.get("estimated_inference_peak_memory", 0) + total_memory_mb = encoder_mem + decoder_mem + + all_metrics = { + "estimated_total_latency_ms": est_total_ms, + "total_memory_mb": total_memory_mb, + "encoder": encoder_metrics, + "decoder": decoder_metrics, + } + + if args.wandb_mode != 'disabled': + wandb.log(all_metrics) + wandb.finish() + + +if __name__ == "__main__": + main() diff --git a/faster-whisper-benchmark/metrics/___init__.py b/faster-whisper-benchmark/metrics/___init__.py new file mode 100644 index 00000000..72b4a8b0 --- /dev/null +++ b/faster-whisper-benchmark/metrics/___init__.py @@ -0,0 +1 @@ +from .extractor import extract_metrics_from_profile \ No newline at end of file diff --git a/faster-whisper-benchmark/metrics/extractor.py b/faster-whisper-benchmark/metrics/extractor.py new file mode 100644 index 00000000..611e02a1 --- /dev/null +++ b/faster-whisper-benchmark/metrics/extractor.py @@ -0,0 +1,81 @@ +import numpy as np + + +def us_to_ms(x): + return x / 1e3 + +def bytes_to_mb(x): + return x / (1024 ** 2) + +def extract_metrics_from_profile(profile: dict): + exec_sum = profile.get("execution_summary", {}) + exec_detail = profile.get("execution_detail", []) + + # ── End-to-End Performance ──────────────────────────────────── + times = np.array(exec_sum.get("all_inference_times", [])) + first_load = exec_sum.get("first_load_time", 0) + warm_load = exec_sum.get("warm_load_time", 0) + + metrics = { + "estimated_inference_time_ms": round(us_to_ms(exec_sum.get("estimated_inference_time", 0)), 4), + "mean_latency_ms": round(us_to_ms(times.mean()), 4) if len(times) else None, + "min_latency_ms": round(us_to_ms(times.min()), 4) if len(times) else None, + "max_latency_ms": round(us_to_ms(times.max()), 4) if len(times) else None, + "std_dev_ms": round(us_to_ms(times.std()), 4) if len(times) else None, + "coeff_of_variation": round((times.std() / times.mean()) * 100, 4) if len(times) else None, + "throughput_fps": round(1000 / us_to_ms(times.mean()), 4) if len(times) else None, + "cold_start_ms": round(us_to_ms(first_load), 4), + "warm_start_ms": round(us_to_ms(warm_load), 4), + "speedup_cold_warm": round(first_load / warm_load, 4) if warm_load else None, + } + + # ── Memory Footprint ────────────────────────────────────────── + inf_mem = exec_sum.get("estimated_inference_peak_memory", 0) + cold_mem = exec_sum.get("first_load_peak_memory", 0) + warm_mem = exec_sum.get("warm_load_peak_memory", 0) + + metrics.update({ + "estimated_inference_peak_memory": round(bytes_to_mb(inf_mem), 4), + "cold_start_peak_mb": round(bytes_to_mb(cold_mem), 4), + "warm_start_peak_mb": round(bytes_to_mb(warm_mem), 4), + "memory_reduction_cold_warm_pct": round((1 - warm_mem / cold_mem) * 100, 4) if cold_mem else None, + "memory_reduction_warm_inf_pct": round((1 - inf_mem / warm_mem) * 100, 4) if warm_mem else None, + "memory_efficiency_ratio": round(inf_mem / cold_mem, 4) if cold_mem else None, + }) + + # ── Accelerator Utilization ─────────────────────────────────── + if exec_detail: + total_time = sum(op.get("execution_time", 0) for op in exec_detail) + total_op_count = len(exec_detail) + zero_op_count = sum(1 for op in exec_detail if op.get("execution_time", 0) == 0) + nonzero_op_count = total_op_count - zero_op_count + + unit_times = {} + for op in exec_detail: + unit = op.get("compute_unit", "UNKNOWN") + unit_times[unit] = unit_times.get(unit, 0) + op.get("execution_time", 0) + + metrics.update({ + "total_op_count": total_op_count, + "nonzero_op_count": nonzero_op_count, + "zero_op_count": zero_op_count, + "zero_op_percentage": round(zero_op_count / total_op_count * 100, 4) if total_op_count else 0.0, + "avg_op_time_ms": round(us_to_ms(total_time / nonzero_op_count), 4) if nonzero_op_count else 0.0, + "total_op_time_ms": round(us_to_ms(total_time), 4), + "dominant_compute_unit": max(unit_times, key=unit_times.get) if unit_times else "N/A", + "cpu_utilization_percentage": round(unit_times.get("CPU", 0.0) / total_time * 100, 4) if total_time else 0.0, + "gpu_utilization_percentage": round(unit_times.get("GPU", 0.0) / total_time * 100, 4) if total_time else 0.0, + "npu_utilization_percentage": round(unit_times.get("NPU", 0.0) / total_time * 100, 4) if total_time else 0.0, + }) + + # ── Performance Bottlenecks ─────────────────────────────── + top_ops = sorted(exec_detail, key=lambda op: op.get("execution_time", 0), reverse=True)[:15] + top_ops_total = sum(op.get("execution_time", 0) for op in top_ops) + + metrics.update({ + "top15_ops_time_ms": round(us_to_ms(top_ops_total), 4), + "top15_ops_pct_of_total": round(top_ops_total / total_time * 100, 4) if total_time else 0.0, + "effective_op_time_ratio": round(top_ops_total / total_time, 4) if total_time else 0.0, + }) + + return {k: v for k, v in metrics.items() if v is not None} diff --git a/faster-whisper-benchmark/models/__init__.py b/faster-whisper-benchmark/models/__init__.py new file mode 100644 index 00000000..08641b53 --- /dev/null +++ b/faster-whisper-benchmark/models/__init__.py @@ -0,0 +1 @@ +from .whisper_wrappers import WhisperEncoderWrapper, WhisperDecoderStepWrapper \ No newline at end of file diff --git a/faster-whisper-benchmark/models/whisper_wrappers.py b/faster-whisper-benchmark/models/whisper_wrappers.py new file mode 100644 index 00000000..61a7b14b --- /dev/null +++ b/faster-whisper-benchmark/models/whisper_wrappers.py @@ -0,0 +1,32 @@ +import torch +import torch.nn as nn +from transformers import WhisperForConditionalGeneration + +class WhisperEncoderWrapper(nn.Module): + def __init__(self, model: WhisperForConditionalGeneration) -> None: + super().__init__() + self.encoder = model.model.encoder + + def forward(self, input_features: torch.Tensor) -> torch.Tensor: + return self.encoder(input_features=input_features).last_hidden_state + +class WhisperDecoderStepWrapper(nn.Module): + def __init__(self, model: WhisperForConditionalGeneration) -> None: + super().__init__() + self.decoder = model.model.decoder + self.proj_out = model.proj_out + + def forward( + self, + decoder_input_ids: torch.Tensor, + encoder_hidden_states: torch.Tensor, + ) -> torch.Tensor: + token_ids = decoder_input_ids.to(torch.int32) + out = self.decoder( + input_ids=token_ids, + encoder_hidden_states=encoder_hidden_states, + use_cache=False, + return_dict=True, + ) + logits = self.proj_out(out.last_hidden_state) + return logits[:, -1, :] diff --git a/faster-whisper-benchmark/utils/__init__.py b/faster-whisper-benchmark/utils/__init__.py new file mode 100644 index 00000000..349aa929 --- /dev/null +++ b/faster-whisper-benchmark/utils/__init__.py @@ -0,0 +1,3 @@ +from .benchmark import ( + profile_model, get_exported_model_multi, get_traced_model, get_traced_model_multi, +) diff --git a/faster-whisper-benchmark/utils/benchmark.py b/faster-whisper-benchmark/utils/benchmark.py new file mode 100644 index 00000000..08911871 --- /dev/null +++ b/faster-whisper-benchmark/utils/benchmark.py @@ -0,0 +1,56 @@ +import torch +import qai_hub as hub + +def run_compile(traced_model, device, input_specs_or_shape): + if isinstance(input_specs_or_shape, dict): + input_specs = input_specs_or_shape + else: + input_specs = dict(image=input_specs_or_shape) + + compile_job = hub.submit_compile_job( + model=traced_model, + device=device, + input_specs=input_specs, + ) + + assert isinstance(compile_job, hub.CompileJob) + return compile_job + + +def run_profile(compiled_job, device): + profile_job = hub.submit_profile_job( + model=compiled_job.get_target_model(), + device=device, + name=compiled_job.name + "_profiling", + ) + + assert isinstance(profile_job, hub.ProfileJob) + return profile_job + + +def get_traced_model(input_shape, model, dtype=torch.float32): + example_input = torch.rand(input_shape, dtype=dtype) + with torch.no_grad(): + traced_model = torch.jit.trace(model, example_input) + return traced_model + + +def get_traced_model_multi(example_inputs, model): + with torch.no_grad(): + exported = torch.export.export(model, example_inputs) + tmp_path = "/tmp/decoder_exported.pt2" + torch.export.save(exported, tmp_path) + return tmp_path + +def profile_model(traced_model: torch.jit.ScriptModule, device: hub.Device, input_specs: dict) -> dict: + compiled_model = run_compile(traced_model, device, input_specs) + profiled_model = run_profile(compiled_model, device) + return profiled_model.download_profile() + +def get_exported_model_multi(example_inputs, model): + """Use torch.export for decoder (avoids aten::diff ONNX issue).""" + with torch.no_grad(): + exported = torch.export.export(model, example_inputs) + tmp_path = "/tmp/decoder_exported.pt2" + torch.export.save(exported, tmp_path) + return tmp_path From 180c14351b4370e4cdd90a0f0e437a66f9b109b1 Mon Sep 17 00:00:00 2001 From: Pronay Sarker Date: Wed, 18 Mar 2026 03:51:53 +0600 Subject: [PATCH 2/3] Added notebook for faster-whisper --- .../Faster_whisper_run_colab.ipynb | 399 ++++++++++++++++++ 1 file changed, 399 insertions(+) create mode 100644 faster-whisper-benchmark/Faster_whisper_run_colab.ipynb diff --git a/faster-whisper-benchmark/Faster_whisper_run_colab.ipynb b/faster-whisper-benchmark/Faster_whisper_run_colab.ipynb new file mode 100644 index 00000000..984a8145 --- /dev/null +++ b/faster-whisper-benchmark/Faster_whisper_run_colab.ipynb @@ -0,0 +1,399 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "provenance": [], + "gpuType": "T4" + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + }, + "language_info": { + "name": "python" + }, + "accelerator": "GPU" + }, + "cells": [ + { + "cell_type": "markdown", + "source": [ + "Must use pytorch version 2.8.0\n", + "https://pytorch.org/get-started/previous-versions/[link text](https://)" + ], + "metadata": { + "id": "1N2nvyOZTHKc" + } + }, + { + "cell_type": "code", + "source": [ + "# install with this command, without this version for pytorch, profiling for decoder will fail\n", + "!pip install torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/cu129" + ], + "metadata": { + "id": "2SmibWiXeTZw" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "code", + "source": [ + "import torch\n", + "print(torch.__version__)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "-9kZdot5XJSy", + "outputId": "41569e1b-fba1-4e40-b8ab-a7b4595183e9" + }, + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "2.8.0+cu129\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!git clone https://gitlab.com/itsPronay/faster-whisper-benchmark.git" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "uv-RDVMp9eze", + "outputId": "5c6bd2dd-c52e-4f53-ab93-6a6db9163eb4" + }, + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Cloning into 'faster-whisper-benchmark'...\n", + "remote: Enumerating objects: 23, done.\u001b[K\n", + "remote: Counting objects: 100% (3/3), done.\u001b[K\n", + "remote: Compressing objects: 100% (3/3), done.\u001b[K\n", + "remote: Total 23 (delta 1), reused 0 (delta 0), pack-reused 20 (from 1)\u001b[K\n", + "Receiving objects: 100% (23/23), 8.63 KiB | 4.32 MiB/s, done.\n", + "Resolving deltas: 100% (3/3), done.\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip3 install qai-hub\n", + "!qai-hub configure --api_token API_TOKEN" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "sYUjzSLE-4xg", + "outputId": "a30e0ec3-5c19-464b-97cd-68f3bdec9ed1" + }, + "execution_count": 3, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting qai-hub\n", + " Downloading qai_hub-0.46.0-py3-none-any.whl.metadata (2.6 kB)\n", + "Collecting backoff>=2.2 (from qai-hub)\n", + " Downloading backoff-2.2.1-py3-none-any.whl.metadata (14 kB)\n", + "Requirement already satisfied: deprecation in /usr/local/lib/python3.12/dist-packages (from qai-hub) (2.1.0)\n", + "Requirement already satisfied: h5py<4,>=2.10.0 in /usr/local/lib/python3.12/dist-packages (from qai-hub) (3.16.0)\n", + "Requirement already satisfied: numpy<3,>=1.22.0 in /usr/local/lib/python3.12/dist-packages (from qai-hub) (2.0.2)\n", + "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.12/dist-packages (from qai-hub) (26.0)\n", + "Requirement already satisfied: prettytable>=3.9.0 in /usr/local/lib/python3.12/dist-packages (from qai-hub) (3.17.0)\n", + "Requirement already satisfied: protobuf<=6.31.1,>=3.20 in /usr/local/lib/python3.12/dist-packages (from qai-hub) (5.29.6)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.12/dist-packages (from qai-hub) (2.32.4)\n", + "Requirement already satisfied: requests-toolbelt in /usr/local/lib/python3.12/dist-packages (from qai-hub) (1.0.0)\n", + "Collecting s3transfer<0.14,>=0.10.3 (from qai-hub)\n", + " Downloading s3transfer-0.13.1-py3-none-any.whl.metadata (1.7 kB)\n", + "Collecting semver>=3.0 (from qai-hub)\n", + " Downloading semver-3.0.4-py3-none-any.whl.metadata (6.8 kB)\n", + "Requirement already satisfied: tqdm in /usr/local/lib/python3.12/dist-packages (from qai-hub) (4.67.3)\n", + "Requirement already satisfied: typing-extensions>=4.12.2 in /usr/local/lib/python3.12/dist-packages (from qai-hub) (4.15.0)\n", + "Requirement already satisfied: wcwidth in /usr/local/lib/python3.12/dist-packages (from prettytable>=3.9.0->qai-hub) (0.6.0)\n", + "Collecting botocore<2.0a.0,>=1.37.4 (from s3transfer<0.14,>=0.10.3->qai-hub)\n", + " Downloading botocore-1.42.70-py3-none-any.whl.metadata (5.9 kB)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests->qai-hub) (3.4.5)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests->qai-hub) (3.11)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests->qai-hub) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests->qai-hub) (2026.2.25)\n", + "Collecting jmespath<2.0.0,>=0.7.1 (from botocore<2.0a.0,>=1.37.4->s3transfer<0.14,>=0.10.3->qai-hub)\n", + " Downloading jmespath-1.1.0-py3-none-any.whl.metadata (7.6 kB)\n", + "Requirement already satisfied: python-dateutil<3.0.0,>=2.1 in /usr/local/lib/python3.12/dist-packages (from botocore<2.0a.0,>=1.37.4->s3transfer<0.14,>=0.10.3->qai-hub) (2.9.0.post0)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.12/dist-packages (from python-dateutil<3.0.0,>=2.1->botocore<2.0a.0,>=1.37.4->s3transfer<0.14,>=0.10.3->qai-hub) (1.17.0)\n", + "Downloading qai_hub-0.46.0-py3-none-any.whl (117 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m117.7/117.7 kB\u001b[0m \u001b[31m13.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading backoff-2.2.1-py3-none-any.whl (15 kB)\n", + "Downloading s3transfer-0.13.1-py3-none-any.whl (85 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m85.3/85.3 kB\u001b[0m \u001b[31m11.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading semver-3.0.4-py3-none-any.whl (17 kB)\n", + "Downloading botocore-1.42.70-py3-none-any.whl (14.7 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m14.7/14.7 MB\u001b[0m \u001b[31m110.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading jmespath-1.1.0-py3-none-any.whl (20 kB)\n", + "Installing collected packages: semver, jmespath, backoff, botocore, s3transfer, qai-hub\n", + "Successfully installed backoff-2.2.1 botocore-1.42.70 jmespath-1.1.0 qai-hub-0.46.0 s3transfer-0.13.1 semver-3.0.4\n", + "2026-03-17 20:00:10.328 - INFO - Enabling verbose logging.\n", + "qai-hub configuration saved to /root/.qai_hub/client.ini\n", + "==================== /root/.qai_hub/client.ini ====================\n", + "[api]\n", + "api_token = vqh9wt98ef7yptfydrf1tiuf6i5klo3q74gu52kv\n", + "api_url = https://workbench.aihub.qualcomm.com\n", + "web_url = https://workbench.aihub.qualcomm.com\n", + "verbose = True\n", + "\n", + "\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install wandb\n", + "import wandb\n", + "wandb.login()" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "LisAo0UHOzBP", + "outputId": "dd09cee0-7b55-4ec6-ecda-388794d309e9" + }, + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: wandb in /usr/local/lib/python3.12/dist-packages (0.25.1)\n", + "Requirement already satisfied: click>=8.0.1 in /usr/local/lib/python3.12/dist-packages (from wandb) (8.3.1)\n", + "Requirement already satisfied: gitpython!=3.1.29,>=1.0.0 in /usr/local/lib/python3.12/dist-packages (from wandb) (3.1.46)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.12/dist-packages (from wandb) (26.0)\n", + "Requirement already satisfied: platformdirs in /usr/local/lib/python3.12/dist-packages (from wandb) (4.9.4)\n", + "Requirement already satisfied: protobuf!=5.28.0,!=5.29.0,<7,>4.21.0 in /usr/local/lib/python3.12/dist-packages (from wandb) (5.29.6)\n", + "Requirement already satisfied: pydantic<3 in /usr/local/lib/python3.12/dist-packages (from wandb) (2.12.3)\n", + "Requirement already satisfied: pyyaml in /usr/local/lib/python3.12/dist-packages (from wandb) (6.0.3)\n", + "Requirement already satisfied: requests<3,>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from wandb) (2.32.4)\n", + "Requirement already satisfied: sentry-sdk>=2.0.0 in /usr/local/lib/python3.12/dist-packages (from wandb) (2.54.0)\n", + "Requirement already satisfied: typing-extensions<5,>=4.8 in /usr/local/lib/python3.12/dist-packages (from wandb) (4.15.0)\n", + "Requirement already satisfied: gitdb<5,>=4.0.1 in /usr/local/lib/python3.12/dist-packages (from gitpython!=3.1.29,>=1.0.0->wandb) (4.0.12)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.12/dist-packages (from pydantic<3->wandb) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.41.4 in /usr/local/lib/python3.12/dist-packages (from pydantic<3->wandb) (2.41.4)\n", + "Requirement already satisfied: typing-inspection>=0.4.2 in /usr/local/lib/python3.12/dist-packages (from pydantic<3->wandb) (0.4.2)\n", + "Requirement already satisfied: charset_normalizer<4,>=2 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.0.0->wandb) (3.4.5)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.0.0->wandb) (3.11)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.0.0->wandb) (2.5.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.12/dist-packages (from requests<3,>=2.0.0->wandb) (2026.2.25)\n", + "Requirement already satisfied: smmap<6,>=3.0.1 in /usr/local/lib/python3.12/dist-packages (from gitdb<5,>=4.0.1->gitpython!=3.1.29,>=1.0.0->wandb) (5.0.3)\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.12/dist-packages/notebook/notebookapp.py:191: SyntaxWarning: invalid escape sequence '\\/'\n", + " | |_| | '_ \\/ _` / _` | _/ -_)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: (1) Create a W&B account\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: (2) Use an existing W&B account\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: (3) Don't visualize my results\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Enter your choice:" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " 2\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: You chose 'Use an existing W&B account'\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Logging into https://api.wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Create a new API key at: https://wandb.ai/authorize?ref=models\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Store your API key securely and do not share it.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Paste your API key and hit enter:" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " ··········\n" + ] + }, + { + "output_type": "stream", + "name": "stderr", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: No netrc file found, creating one.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mitspronay\u001b[0m (\u001b[33mitspronay-southeast-university\u001b[0m) to \u001b[32mhttps://api.wandb.ai\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "True" + ] + }, + "metadata": {}, + "execution_count": 4 + } + ] + }, + { + "cell_type": "code", + "source": [ + "!python /content/faster-whisper-benchmark/main.py" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "YWgxUb679rxt", + "outputId": "03626181-65d1-45c0-e724-6cc99848e9a8" + }, + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[34m\u001b[1mwandb\u001b[0m: [wandb.login()] Loaded credentials for https://api.wandb.ai from /root/.netrc.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Currently logged in as: \u001b[33mitspronay\u001b[0m (\u001b[33mitspronay-southeast-university\u001b[0m) to \u001b[32mhttps://api.wandb.ai\u001b[0m. Use \u001b[1m`wandb login --relogin`\u001b[0m to force relogin\n", + "\u001b]11;?\u0007\u001b[c\u001b]11;?\u0007\u001b[c\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⢿\u001b[0m Waiting for wandb.init()...\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣻\u001b[0m setting up run 5gna6n0d (0.2s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣽\u001b[0m setting up run 5gna6n0d (0.2s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣾\u001b[0m setting up run 5gna6n0d (0.2s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣷\u001b[0m setting up run 5gna6n0d (0.2s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣯\u001b[0m setting up run 5gna6n0d (0.2s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣟\u001b[0m setting up run 5gna6n0d (0.7s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⡿\u001b[0m setting up run 5gna6n0d (0.7s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⢿\u001b[0m setting up run 5gna6n0d (0.7s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣻\u001b[0m setting up run 5gna6n0d (0.7s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Tracking run with wandb version 0.25.1\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run data is saved locally in \u001b[35m\u001b[1m/content/wandb/run-20260317_213138-5gna6n0d\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run \u001b[1m`wandb offline`\u001b[0m to turn off syncing.\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Syncing run \u001b[33mModel: openai/whisper-small, Device: Samsung Galaxy S24 (Family),\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at \u001b[34m\u001b[4mhttps://wandb.ai/itspronay-southeast-university/faster-whisper\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run at \u001b[34m\u001b[4mhttps://wandb.ai/itspronay-southeast-university/faster-whisper/runs/5gna6n0d\u001b[0m\n", + "Warning: You are sending unauthenticated requests to the HF Hub. Please set a HF_TOKEN to enable higher rate limits and faster downloads.\n", + "Loading weights: 100% 479/479 [00:00<00:00, 771.04it/s, Materializing param=model.encoder.layers.11.self_attn_layer_norm.weight]\n", + "Benchmarking feature shape: (1, 80, 3000)\n", + "`loss_type=None` was set in the config but it is unrecognized. Using the default loss: `ForCausalLMLoss`.\n", + "/usr/local/lib/python3.12/dist-packages/transformers/models/whisper/modeling_whisper.py:634: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " if input_features.shape[-1] != expected_seq_length:\n", + "/usr/local/lib/python3.12/dist-packages/transformers/integrations/sdpa_attention.py:77: TracerWarning: Converting a tensor to a Python boolean might cause the trace to be incorrect. We can't record the data flow of Python values, so this value will be treated as a constant in the future. This means that the trace might not generalize to other inputs!\n", + " is_causal = query.shape[2] > 1 and attention_mask is None and is_causal\n", + "Uploading tmpyb0t6rfi.pt\n", + "100% 337M/337M [00:05<00:00, 61.7MB/s]\n", + "Scheduled compile job (jg93l13lg) successfully. To see the status and results:\n", + " https://workbench.aihub.qualcomm.com/jobs/jg93l13lg/\n", + "\n", + "Waiting for compile job (jg93l13lg) completion. Type Ctrl+C to stop waiting at any time.\n", + " ✅ SUCCESS \u0007\n", + "Scheduled profile job (jgdrx9rep) successfully. To see the status and results:\n", + " https://workbench.aihub.qualcomm.com/jobs/jgdrx9rep/\n", + "\n", + "Waiting for profile job (jgdrx9rep) completion. Type Ctrl+C to stop waiting at any time.\n", + " ✅ SUCCESS \u0007\n", + "Uploading decoder_exported.pt2\n", + "100% 592M/592M [00:09<00:00, 67.2MB/s]\n", + "Scheduled compile job (jg93l13wg) successfully. To see the status and results:\n", + " https://workbench.aihub.qualcomm.com/jobs/jg93l13wg/\n", + "\n", + "Waiting for compile job (jg93l13wg) completion. Type Ctrl+C to stop waiting at any time.\n", + " ✅ SUCCESS \u0007\n", + "Scheduled profile job (jp27k67r5) successfully. To see the status and results:\n", + " https://workbench.aihub.qualcomm.com/jobs/jp27k67r5/\n", + "\n", + "Waiting for profile job (jp27k67r5) completion. Type Ctrl+C to stop waiting at any time.\n", + " ✅ SUCCESS \u0007\n", + "\n", + "Summary Table:\n", + " stage ... effective_op_time_ratio\n", + "0 encoder ... 0.4757\n", + "1 decoder ... 0.3432\n", + "\n", + "[2 rows x 30 columns]\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⢿\u001b[0m updating run metadata (0.0s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣻\u001b[0m updating run metadata (0.0s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣽\u001b[0m updating run metadata (0.0s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣾\u001b[0m updating run metadata (0.0s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣷\u001b[0m Finishing up...\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣯\u001b[0m Finishing up...\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣟\u001b[0m Finishing up...\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⡿\u001b[0m Finishing up...\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⢿\u001b[0m Finishing up...\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣻\u001b[0m uploading wandb-summary.json 1.8KB/1.8KB (0.3s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣻\u001b[0m uploading config.yaml 2.1KB/2.1KB (0.3s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣻\u001b[0m uploading output.log 2.6KB/2.6KB (0.3s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣽\u001b[0m uploading wandb-summary.json 1.8KB/1.8KB (0.3s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣽\u001b[0m uploading config.yaml 2.1KB/2.1KB (0.3s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣽\u001b[0m uploading output.log 2.6KB/2.6KB (0.3s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣾\u001b[0m uploading wandb-summary.json 1.8KB/1.8KB (0.3s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣾\u001b[0m uploading config.yaml 2.1KB/2.1KB (0.3s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣾\u001b[0m uploading output.log 2.6KB/2.6KB (0.3s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣷\u001b[0m uploading wandb-summary.json 1.8KB/1.8KB (0.3s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣷\u001b[0m uploading config.yaml 2.1KB/2.1KB (0.3s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣷\u001b[0m uploading output.log 2.6KB/2.6KB (0.3s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣯\u001b[0m uploading wandb-summary.json 1.8KB/1.8KB (0.3s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣯\u001b[0m uploading config.yaml 2.1KB/2.1KB (0.3s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣯\u001b[0m uploading output.log 2.6KB/2.6KB (0.3s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣟\u001b[0m uploading wandb-summary.json 1.8KB/1.8KB (0.8s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣟\u001b[0m uploading config.yaml 2.1KB/2.1KB (0.8s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⡿\u001b[0m uploading wandb-summary.json 1.8KB/1.8KB (0.8s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⡿\u001b[0m uploading config.yaml 2.1KB/2.1KB (0.8s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⢿\u001b[0m uploading wandb-summary.json 1.8KB/1.8KB (0.8s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⢿\u001b[0m uploading config.yaml 2.1KB/2.1KB (0.8s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣻\u001b[0m uploading wandb-summary.json 1.8KB/1.8KB (0.8s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣻\u001b[0m uploading config.yaml 2.1KB/2.1KB (0.8s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣽\u001b[0m uploading wandb-summary.json 1.8KB/1.8KB (0.8s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣽\u001b[0m uploading config.yaml 2.1KB/2.1KB (0.8s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣾\u001b[0m uploading history steps 0-0, summary, console lines 31-38 (0.0s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣷\u001b[0m uploading history steps 0-0, summary, console lines 31-38 (0.0s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣯\u001b[0m uploading history steps 0-0, summary, console lines 31-38 (0.0s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⣟\u001b[0m uploading history steps 0-0, summary, console lines 31-38 (0.0s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[38;5;178m⡿\u001b[0m uploading history steps 0-0, summary, console lines 31-38 (0.0s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run history:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: estimated_total_latency_ms ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: total_memory_mb ▁\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Run summary:\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: estimated_total_latency_ms 2018.523\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: total_memory_mb 805.2383\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: \n", + "\u001b[34m\u001b[1mwandb\u001b[0m: 🚀 View run \u001b[33mModel: openai/whisper-small, Device: Samsung Galaxy S24 (Family),\u001b[0m at: \u001b[34m\u001b[4mhttps://wandb.ai/itspronay-southeast-university/faster-whisper/runs/5gna6n0d\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: ⭐️ View project at: \u001b[34m\u001b[4mhttps://wandb.ai/itspronay-southeast-university/faster-whisper\u001b[0m\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)\n", + "\u001b[34m\u001b[1mwandb\u001b[0m: Find logs at: \u001b[35m\u001b[1m./wandb/run-20260317_213138-5gna6n0d/logs\u001b[0m\n" + ] + } + ] + } + ] +} \ No newline at end of file From ed545507ff165aa453dc18c031bcd9ed94b95b7b Mon Sep 17 00:00:00 2001 From: Pronay Sarker Date: Wed, 18 Mar 2026 17:17:47 +0600 Subject: [PATCH 3/3] Update faster-whisper-benchmark/Faster_whisper_run_colab.ipynb Co-authored-by: Biplab Dutta --- faster-whisper-benchmark/Faster_whisper_run_colab.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/faster-whisper-benchmark/Faster_whisper_run_colab.ipynb b/faster-whisper-benchmark/Faster_whisper_run_colab.ipynb index 984a8145..672042ce 100644 --- a/faster-whisper-benchmark/Faster_whisper_run_colab.ipynb +++ b/faster-whisper-benchmark/Faster_whisper_run_colab.ipynb @@ -20,7 +20,7 @@ "cell_type": "markdown", "source": [ "Must use pytorch version 2.8.0\n", - "https://pytorch.org/get-started/previous-versions/[link text](https://)" +"[https://pytorch.org/get-started/previous-versions/](https://pytorch.org/get-started/previous-versions/)" ], "metadata": { "id": "1N2nvyOZTHKc"