Revert "ASR STT inference"

This reverts commit d97496c.
SunbirdAI · Apr 13, 2024 · 1633483 · 1633483
1 parent d97496c
commit 1633483
Show file tree

Hide file tree

Showing 23 changed files with 11 additions and 333 deletions.
diff --git a/.flake8 b/.flake8
diff --git a/.gitignore b/.gitignore
@@ -158,6 +158,3 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
-
-# jobs
-jobs/
diff --git a/.isort.cfg b/.isort.cfg
diff --git a/Dockerfile b/Dockerfile
@@ -8,9 +8,9 @@ FROM runpod/base:0.4.0-cuda11.8.0
 
 
 # --- Optional: System dependencies ---
-COPY builder/setup.sh /setup.sh
-RUN /bin/bash /setup.sh && \
-    rm /setup.sh
+# COPY builder/setup.sh /setup.sh
+# RUN /bin/bash /setup.sh && \
+#     rm /setup.sh
 
 
 # Python dependencies

diff --git a/Makefile b/Makefile
diff --git a/bin/build b/bin/build
diff --git a/bin/push b/bin/push
diff --git a/bin/zip b/bin/zip
diff --git a/builder/requirements.txt b/builder/requirements.txt
@@ -6,12 +6,3 @@
 # To learn more, see https://pip.pypa.io/en/stable/reference/requirements-file-format/
 
 runpod==1.6.2
-transformers
-kenlm
-pyctcdecode
-torch
-requests
-python-dotenv
-librosa
-Werkzeug
-google-cloud-storage
diff --git a/builder/setup.sh b/builder/setup.sh
@@ -10,20 +10,14 @@ apt-get update && apt-get upgrade -y # Update System
 # - openssh-server: for ssh access and web terminal
 apt-get install -y --no-install-recommends software-properties-common curl git openssh-server
 
-# Install Python 3.11
+# Install Python 3.10
 add-apt-repository ppa:deadsnakes/ppa -y
-apt-get update && apt-get install -y --no-install-recommends python3.11 python3.11-dev python3.11-distutils
-update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.11 1
-apt-get install build-essential cmake libboost-system-dev libboost-thread-dev \
-        libboost-program-options-dev libboost-test-dev libeigen3-dev zlib1g-dev libbz2-dev \
-        liblzma-dev -y
+apt-get update && apt-get install -y --no-install-recommends python3.10 python3.10-dev python3.10-distutils
+update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.10 1
 
-# Install pip for Python 3.11
+# Install pip for Python 3.10
 curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py
 python3 get-pip.py
 
 # Clean up, remove unnecessary packages and help reduce image size
 apt-get autoremove -y && apt-get clean -y && rm -rf /var/lib/apt/lists/*
-
-gcloud compute scp /Users/patrickcmd/Projects/sunbirdai/sunbird-asr-stt-transcribe-inference.zip sb-asr-stt-inference-instance:~ --zone "us-west1-b" --project "sb-gcp-project-01"
-gcloud compute ssh --zone "us-west1-b" "sb-asr-stt-inference-instance" --project "sb-gcp-project-01"
diff --git a/content/SEMA-PILOT-2023-11-11T080749-8.wav b/content/SEMA-PILOT-2023-11-11T080749-8.wav
diff --git a/content/SEMA1-2022-11-04T120932-3.wav b/content/SEMA1-2022-11-04T120932-3.wav
diff --git a/content/SIMBA 10.1.mp3 b/content/SIMBA 10.1.mp3
diff --git a/content/SIMBA_10.1.mp3 b/content/SIMBA_10.1.mp3
diff --git a/pyproject.toml b/pyproject.toml
diff --git a/requirements-dev.txt b/requirements-dev.txt
diff --git a/src/__init__.py b/src/__init__.py
diff --git a/src/config.py b/src/config.py
diff --git a/src/handler.py b/src/handler.py
@@ -1,77 +1,18 @@
 """ Example handler file. """
 
-import os
-import sys
-import time
-
 import runpod
-from dotenv import load_dotenv
-from huggingface_hub import hf_hub_download
-
-from config import sb_lm_config  # noqa F401
-from utils import KenLM, get_audio_file, load_model_and_processor, transcribe
-
-load_dotenv()
-
-
-current_directory = os.path.dirname(os.path.realpath(__file__))
-sys.path.append(current_directory)
 
 # If your handler runs inference on a model, load the model here.
 # You will want models to be loaded into memory before starting serverless.
 
 
-def main(target_lang, adapter, audio_file):
-    model_id = "facebook/mms-1b-all"
-    target_lang = target_lang
-    adapter = adapter
-    audio_file = audio_file
-    ngram_type = "eng_5gram"  # Specify the desired ngram type (e.g., "5gram", "3gram", "mixed_5gram", "mixed_3gram")
-
-    lm_file_name = f"{target_lang}_{ngram_type}.bin"
-    lm_file_subfolder = "language_model"
-
-    try:
-        lm_file = hf_hub_download(
-            repo_id="Sunbird/sunbird-mms",
-            filename=lm_file_name,
-            subfolder=lm_file_subfolder,
-        )
-    except Exception as e:
-        print(f"Error downloading language model file: {e}")
-        return
-
-    model, processor = load_model_and_processor(model_id, target_lang, adapter)
-    kenlm = KenLM(processor.tokenizer, lm_file)
-
-    transcription_with_lm = transcribe(audio_file, model, processor, kenlm)
-    # transcription_without_lm = transcribe(audio_file, model, processor)
-
-    return transcription_with_lm  # transcription_without_lm
-
-
 def handler(job):
-    """Handler function that will be used to process jobs."""
-    try:
-        job_input = job["input"]
-
-        target_lang = job_input.get("target_lang", "lug")
-        adapter = job_input.get("adapter", "lug")
-        audio_file = get_audio_file(job_input.get("audio_file"))
-
-        start_time = time.time()
+    """ Handler function that will be used to process jobs. """
+    job_input = job['input']
 
-        transcription_with_lm = main(target_lang, adapter, audio_file)
-        response = {"audio_transcription": transcription_with_lm[0]}
-        end_time = time.time()
-        execution_time = end_time - start_time
-        print(
-            f"Audio transcription execution time: {execution_time:.4f} seconds / {execution_time / 60:.4f} minutes"
-        )
-    except Exception as e:
-        response = {"Error": str(e)}
+    name = job_input.get('name', 'World')
 
-    return response
+    return f"Hello, {name}!"
 
 
 runpod.serverless.start({"handler": handler})
diff --git a/src/utils.py b/src/utils.py
diff --git a/test_inference/__init__.py b/test_inference/__init__.py
diff --git a/test_inference/test_runpod_inference.py b/test_inference/test_runpod_inference.py
-Original file line number
+Diff line change
@@ Expand Up / @@ -158,6 +158,3 @@ cython_debug/ @@
     #  and can be added to the global gitignore or merged into this file.  For a more nuclear
     #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
     #.idea/
-    # jobs
-    jobs/