Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions server/tests/test_transcriptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import pytest
import os
from unittest.mock import MagicMock, patch
from transcribe import TranscriptionService

@pytest.fixture
def mock_firebase():
"""
Fixture to mock Firebase interactions
"""
mock_db = MagicMock()
mock_file_ref = MagicMock()
mock_ai_text_doc = MagicMock()

# Setup Firestore document chains
mock_db.collection.return_value.document.return_value.collection.return_value.document.return_value = mock_file_ref
mock_file_ref.collection.return_value.document.return_value = mock_ai_text_doc

# Mock document methods
mock_file_ref.update = MagicMock()
mock_ai_text_doc.set = MagicMock()

# Patch the db import
with patch('transcribe.db', mock_db), \
patch('transcribe.firestore.SERVER_TIMESTAMP', 'mock_timestamp'):
yield {
'db': mock_db,
'file_ref': mock_file_ref,
'ai_text_doc': mock_ai_text_doc
}

class TestTranscriptionService:
def test_initialization_default(self):
"""
Test that TranscriptionService can be initialized with default method
"""
service = TranscriptionService()
assert service.transcription_method is not None

def test_initialization_custom_method(self):
"""
Test that TranscriptionService can be initialized with a custom transcription method
"""
def custom_transcribe(file_path):
return f"Custom transcription of {file_path}"

service = TranscriptionService(transcription_method=custom_transcribe)
assert service.transcription_method is not None

def test_transcribe_successful(self, tmp_path, mock_firebase):
test_file = tmp_path / "test_media.mp3"
test_file.write_text("Dummy media content")

mock_ai_text_doc = mock_firebase['ai_text_doc']

service = TranscriptionService()
result = service.transcribe(
file_path=str(test_file),
user_id="user123",
file_id="file456",
file_type="audio"
)

# Verify set was called
mock_ai_text_doc.set.assert_called_once()
mock_set_call = mock_ai_text_doc.set.call_args[0][0]

assert 'text_id' in result
assert 'transcription' in result
assert result['file_type'] == 'audio'
assert 'Placeholder transcription for test_media.mp3' in result['transcription']
assert mock_set_call['text'].startswith('Placeholder transcription for')
assert mock_set_call['user_id'] == 'user123'


def test_transcribe_invalid_file_type(self, tmp_path):
"""
Test that an invalid file type raises a ValueError
"""
service = TranscriptionService()

with pytest.raises(ValueError, match="Invalid file type"):
service.transcribe(
file_path=str(tmp_path / "test.mp3"),
user_id="user123",
file_id="file456",
file_type="invalid_type"
)

def test_transcribe_file_not_found(self):
"""
Test that a non-existent file raises a FileNotFoundError
"""
service = TranscriptionService()

with pytest.raises(FileNotFoundError):
service.transcribe(
file_path="/path/to/nonexistent/file.mp3",
user_id="user123",
file_id="file456",
file_type="audio"
)

def test_transcribe_custom_method(self, tmp_path, mock_firebase):
def custom_transcribe(file_path):
return f"Custom transcription of {os.path.basename(file_path)}"

service = TranscriptionService(transcription_method=custom_transcribe)

test_file = tmp_path / "test_media.mp3"
test_file.write_text("Dummy media content")

result = service.transcribe(
file_path=str(test_file),
user_id="user123",
file_id="file456",
file_type="audio"
)

mock_file_ref = mock_firebase['file_ref']
mock_file_ref.update.assert_called_once_with({"text_id": result["text_id"]})
assert result['transcription'] == 'Custom transcription of test_media.mp3'
89 changes: 89 additions & 0 deletions server/transcribe.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import os
from firebase import db
from firebase_admin import firestore
from datetime import datetime

class TranscriptionService:
def __init__(self, transcription_method=None):
"""
Initialize the transcription service with an optional transcription method.

Args:
transcription_method (callable, optional): A function to perform transcription
"""
self.transcription_method = transcription_method or self._default_transcription

def _default_transcription(self, file_path):
"""
Default transcription method that returns a placeholder transcription.

Args:
file_path (str): Path to the file to be transcribed

Returns:
str: A placeholder transcription
"""
return f"Placeholder transcription for {os.path.basename(file_path)}"

def transcribe(self, file_path: str, user_id: str, file_id: str, file_type: str):
"""
Transcribe a file and store the transcription in Firestore.

Args:
file_path (str): Path to the file
user_id (str): ID of the user who owns the file
file_id (str): Unique identifier for the file
file_type (str): Type of file ('audio', 'video', 'text')

Returns:
dict: Transcription result with text and metadata
"""
# Validate file type
if file_type not in ['audio', 'video', 'text']:
raise ValueError(f"Invalid file type: {file_type}")

# Check file exists
if not os.path.exists(file_path):
raise FileNotFoundError(f"File not found: {file_path}")

# Perform transcription
try:
transcription_text = self.transcription_method(file_path)

# Generate unique text_id
text_id = f"text_{datetime.now().strftime('%Y%m%d_%H%M%S')}"

# Reference to the file document
collection_name = f"{file_type}_files"
file_ref = (
db.collection("uploads")
.document(user_id)
.collection(collection_name)
.document(file_id)
)

# Create AI texts subcollection
ai_text_ref = file_ref.collection("ai_texts").document(text_id)

# Store transcription
ai_text_ref.set({
"text": transcription_text,
"user_id": user_id,
"created_at": firestore.SERVER_TIMESTAMP,
"conversation_history": [],
"last_accessed": firestore.SERVER_TIMESTAMP,
"file_type": file_type,
"original_file_path": file_path
})

# Update file document with text_id
file_ref.update({"text_id": text_id})

return {
"text_id": text_id,
"transcription": transcription_text,
"file_type": file_type
}

except Exception as e:
raise RuntimeError(f"Transcription failed: {str(e)}")