Skip to content

Commit

Permalink
Revert "ASR STT inference"
Browse files Browse the repository at this point in the history
This reverts commit d97496c.
  • Loading branch information
PatrickCmd committed Apr 13, 2024
1 parent d97496c commit 1633483
Show file tree
Hide file tree
Showing 23 changed files with 11 additions and 333 deletions.
5 changes: 0 additions & 5 deletions .flake8

This file was deleted.

3 changes: 0 additions & 3 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,3 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

# jobs
jobs/
7 changes: 0 additions & 7 deletions .isort.cfg

This file was deleted.

6 changes: 3 additions & 3 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@ FROM runpod/base:0.4.0-cuda11.8.0


# --- Optional: System dependencies ---
COPY builder/setup.sh /setup.sh
RUN /bin/bash /setup.sh && \
rm /setup.sh
# COPY builder/setup.sh /setup.sh
# RUN /bin/bash /setup.sh && \
# rm /setup.sh


# Python dependencies
Expand Down
12 changes: 0 additions & 12 deletions Makefile

This file was deleted.

5 changes: 0 additions & 5 deletions bin/build

This file was deleted.

5 changes: 0 additions & 5 deletions bin/push

This file was deleted.

3 changes: 0 additions & 3 deletions bin/zip

This file was deleted.

9 changes: 0 additions & 9 deletions builder/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,3 @@
# To learn more, see https://pip.pypa.io/en/stable/reference/requirements-file-format/

runpod==1.6.2
transformers
kenlm
pyctcdecode
torch
requests
python-dotenv
librosa
Werkzeug
google-cloud-storage
14 changes: 4 additions & 10 deletions builder/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,14 @@ apt-get update && apt-get upgrade -y # Update System
# - openssh-server: for ssh access and web terminal
apt-get install -y --no-install-recommends software-properties-common curl git openssh-server

# Install Python 3.11
# Install Python 3.10
add-apt-repository ppa:deadsnakes/ppa -y
apt-get update && apt-get install -y --no-install-recommends python3.11 python3.11-dev python3.11-distutils
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
apt-get install build-essential cmake libboost-system-dev libboost-thread-dev \
libboost-program-options-dev libboost-test-dev libeigen3-dev zlib1g-dev libbz2-dev \
liblzma-dev -y
apt-get update && apt-get install -y --no-install-recommends python3.10 python3.10-dev python3.10-distutils
update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1

# Install pip for Python 3.11
# Install pip for Python 3.10
curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
python3 get-pip.py

# Clean up, remove unnecessary packages and help reduce image size
apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/*

gcloud compute scp /Users/patrickcmd/Projects/sunbirdai/sunbird-asr-stt-transcribe-inference.zip sb-asr-stt-inference-instance:~ --zone "us-west1-b" --project "sb-gcp-project-01"
gcloud compute ssh --zone "us-west1-b" "sb-asr-stt-inference-instance" --project "sb-gcp-project-01"
Binary file removed content/SEMA-PILOT-2023-11-11T080749-8.wav
Binary file not shown.
Binary file removed content/SEMA1-2022-11-04T120932-3.wav
Binary file not shown.
Binary file removed content/SIMBA 10.1.mp3
Binary file not shown.
Binary file removed content/SIMBA_10.1.mp3
Binary file not shown.
19 changes: 0 additions & 19 deletions pyproject.toml

This file was deleted.

3 changes: 0 additions & 3 deletions requirements-dev.txt

This file was deleted.

Empty file removed src/__init__.py
Empty file.
32 changes: 0 additions & 32 deletions src/config.py

This file was deleted.

67 changes: 4 additions & 63 deletions src/handler.py
Original file line number Diff line number Diff line change
@@ -1,77 +1,18 @@
""" Example handler file. """

import os
import sys
import time

import runpod
from dotenv import load_dotenv
from huggingface_hub import hf_hub_download

from config import sb_lm_config # noqa F401
from utils import KenLM, get_audio_file, load_model_and_processor, transcribe

load_dotenv()


current_directory = os.path.dirname(os.path.realpath(__file__))
sys.path.append(current_directory)

# If your handler runs inference on a model, load the model here.
# You will want models to be loaded into memory before starting serverless.


def main(target_lang, adapter, audio_file):
model_id = "facebook/mms-1b-all"
target_lang = target_lang
adapter = adapter
audio_file = audio_file
ngram_type = "eng_5gram" # Specify the desired ngram type (e.g., "5gram", "3gram", "mixed_5gram", "mixed_3gram")

lm_file_name = f"{target_lang}_{ngram_type}.bin"
lm_file_subfolder = "language_model"

try:
lm_file = hf_hub_download(
repo_id="Sunbird/sunbird-mms",
filename=lm_file_name,
subfolder=lm_file_subfolder,
)
except Exception as e:
print(f"Error downloading language model file: {e}")
return

model, processor = load_model_and_processor(model_id, target_lang, adapter)
kenlm = KenLM(processor.tokenizer, lm_file)

transcription_with_lm = transcribe(audio_file, model, processor, kenlm)
# transcription_without_lm = transcribe(audio_file, model, processor)

return transcription_with_lm # transcription_without_lm


def handler(job):
"""Handler function that will be used to process jobs."""
try:
job_input = job["input"]

target_lang = job_input.get("target_lang", "lug")
adapter = job_input.get("adapter", "lug")
audio_file = get_audio_file(job_input.get("audio_file"))

start_time = time.time()
""" Handler function that will be used to process jobs. """
job_input = job['input']

transcription_with_lm = main(target_lang, adapter, audio_file)
response = {"audio_transcription": transcription_with_lm[0]}
end_time = time.time()
execution_time = end_time - start_time
print(
f"Audio transcription execution time: {execution_time:.4f} seconds / {execution_time / 60:.4f} minutes"
)
except Exception as e:
response = {"Error": str(e)}
name = job_input.get('name', 'World')

return response
return f"Hello, {name}!"


runpod.serverless.start({"handler": handler})
103 changes: 0 additions & 103 deletions src/utils.py

This file was deleted.

Empty file removed test_inference/__init__.py
Empty file.
43 changes: 0 additions & 43 deletions test_inference/test_runpod_inference.py

This file was deleted.

Loading

0 comments on commit 1633483

Please sign in to comment.