Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 114 additions & 0 deletions Backend/AnalysisWorker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
# Backend/AnalysisWorker.py
from PySide6.QtCore import QObject, Signal
from PySide6.QtGui import QImage
import time

from Analysis.SentimentAnalysis import run_sentiment_summary
from Analysis.WordCloud import WordCloudAnalyzer

class AnalysisWorker(QObject):
"""
Threaded worker to run analysis (sentiment summary + wordcloud) on a list of sentences.
Emits progress updates for the splash and returns QImage results.
"""

progress_updated = Signal(str)
progress_percentage = Signal(int)
finished = Signal()
sentiment_ready = Signal(QImage)
wordcloud_ready = Signal(QImage)

def __init__(self, sentences: list[str], sentiment_size: tuple = (1600, 520),
wordcloud_size: tuple = (2800, 1680), max_words: int = 200):
super().__init__()
self.sentences = sentences or []
self.sent_w, self.sent_h = sentiment_size
self.wc_w, self.wc_h = wordcloud_size
self.max_words = max_words
self._cancelled = False

def cancel(self):
self._cancelled = True

def run(self) -> None:
try:
total_stages = 4
stage = 0

# Stage 1: loading/extract count
stage += 1
self.progress_updated.emit("Preparing sentences for analysis...")
self.progress_percentage.emit(int((stage/total_stages)*100 * 0.02)) # small percent

sentences = self.sentences
n = len(sentences)
if self._cancelled:
self.progress_updated.emit("Analysis cancelled.")
self.finished.emit()
return

# Stage 2: Sentiment (iterate sentences — dynamic progress)
stage += 1
self.progress_updated.emit("Running sentiment analysis...")
# We'll update percent dynamically across this stage (weight: 45%)
sentiment_stage_weight = 45
base = int(((stage-1)/total_stages) * 100)
if n == 0:
self.progress_percentage.emit(base + 1)
else:
# process in micro-batches to allow progress updates
batch = max(1, n // 20)
processed = 0
# build text list chunked — run_sentiment_summary expects sentences list
# but it's not incremental; to show progress we compute compound in loop using VADER directly would be needed.
# For simplicity and to avoid importing internals, call run_sentiment_summary once but fake granular progress.
# Show incremental progress while computing
for i in range(0, n, batch):
if self._cancelled:
self.progress_updated.emit("Analysis cancelled.")
self.finished.emit()
return
# small sleep to let UI update if heavy
time.sleep(0.01)
processed += min(batch, n - i)
frac = processed / n
pct = base + int(frac * sentiment_stage_weight)
self.progress_percentage.emit(min(pct, 99))

# Now compute final sentiment image
sentiment_img = run_sentiment_summary(sentences, width=self.sent_w, height=self.sent_h)
self.sentiment_ready.emit(sentiment_img)

# Stage 3: Wordcloud (weight: 45%)
stage += 1
self.progress_updated.emit("Generating word cloud...")
wc_base = int(((stage-1)/total_stages) * 100)
# Quick progress ticks while generating
# generate_wordcloud is blocking; show small animation ticks before/after
for tick in range(3):
if self._cancelled:
self.progress_updated.emit("Analysis cancelled.")
self.finished.emit()
return
time.sleep(0.05)
self.progress_percentage.emit(wc_base + int((tick+1) * (40/3)))

wc_img = WordCloudAnalyzer(max_words=self.max_words).generate_wordcloud(sentences, width=self.wc_w, height=self.wc_h)
self.wordcloud_ready.emit(wc_img)
self.progress_percentage.emit(95)

# Stage 4: Finalizing
stage += 1
self.progress_updated.emit("Finalizing results...")
time.sleep(0.05)
self.progress_percentage.emit(100)
self.progress_updated.emit("Analysis complete.")
self.finished.emit()

except Exception as e:
# best-effort error reporting
try:
self.progress_updated.emit(f"Analysis error: {str(e)}")
except Exception:
pass
self.finished.emit()
37 changes: 30 additions & 7 deletions Backend/ScrapeComments.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,30 +31,52 @@ def __init__(self, video_details: Dict[str, List[str]]) -> None:
def run(self) -> None:
"""
Executes the comment fetching process.
Shows video title / channel name instead of raw IDs when available.
"""
try:
total_videos = sum(len(v_list) for v_list in self.video_details.values())
processed_count = 0

self.progress_updated.emit("Starting comment scrape...")
self.progress_percentage.emit(0)

# helper to get title from DB
def _get_title(vid, ch):
try:
rows = self.fetcher.db.fetch("VIDEO", where="video_id=?", params=(vid,))
if rows:
return rows[0].get("title") or vid
except Exception:
pass
return vid

def _get_channel_name(ch):
try:
rows = self.fetcher.db.fetch("CHANNEL", where="channel_id=?", params=(ch,))
if rows:
return rows[0].get("channel_name") or str(ch)
except Exception:
pass
return str(ch)

for channel_id, video_id_list in self.video_details.items():
channel_name = _get_channel_name(channel_id)
for video_id in video_id_list:
self.progress_updated.emit(f"Fetching comments for {video_id}...")

video_title = _get_title(video_id, channel_id)
self.progress_updated.emit(f"Fetching comments for: \"{video_title}\" (channel: {channel_name})")

# Perform fetch
result = self.fetcher._fetch(video_id, channel_id)

processed_count += 1
percentage = int((processed_count / total_videos) * 100)
self.progress_percentage.emit(percentage)

if result.get("filepath"):
count = result.get("comment_count", 0)
self.progress_updated.emit(f"Saved {count} comments for {video_id}")
self.progress_updated.emit(f"Saved {count} comments for \"{video_title}\"")
else:
self.progress_updated.emit(f"Skipped: {video_id} ({result.get('remarks')})")
self.progress_updated.emit(f"Skipped: \"{video_title}\" ({result.get('remarks')})")

self.progress_updated.emit("Comment scraping completed!")
self.progress_percentage.emit(100)
Expand All @@ -66,6 +88,7 @@ def run(self) -> None:
self.finished.emit()



class CommentFetcher:
"""
A class to fetch YouTube video comments with threads using yt-dlp.
Expand Down
32 changes: 25 additions & 7 deletions Backend/ScrapeTranscription.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,31 +32,49 @@ def __init__(self, video_details: dict[str, list], languages: list = ["en"]) ->
def run(self) -> None:
"""
Executes the transcript fetching process.
Shows human-friendly names (video title) in progress messages when available.
"""
try:
total_videos = sum(len(v_list) for v_list in self.video_details.values())
processed_count = 0

self.progress_updated.emit("Starting transcript scrape...")
self.progress_percentage.emit(0)

language_option = ["en"]

# helper to get title from DB
def _get_title(vid, ch):
try:
rows = self.fetcher.db.fetch("VIDEO", where="video_id=?", params=(vid,))
if rows:
return rows[0].get("title") or vid
except Exception:
pass
return vid

for channel_id, video_id_list in self.video_details.items():
# try get channel name
try:
ch_rows = self.fetcher.db.fetch("CHANNEL", where="channel_id=?", params=(channel_id,))
channel_name = ch_rows[0].get("channel_name") if ch_rows else str(channel_id)
except Exception:
channel_name = str(channel_id)

for video_id in video_id_list:
self.progress_updated.emit(f"Fetching transcript for {video_id}...")

video_title = _get_title(video_id, channel_id)
self.progress_updated.emit(f"Fetching transcript for: \"{video_title}\"")
# Perform fetch
result = self.fetcher._fetch(video_id, channel_id, language_option)

processed_count += 1
percentage = int((processed_count / total_videos) * 100)
self.progress_percentage.emit(percentage)

if result.get("filepath"):
self.progress_updated.emit(f"Saved: {video_id}")
self.progress_updated.emit(f"Saved: \"{video_title}\"")
else:
self.progress_updated.emit(f"Skipped: {video_id} ({result.get('remarks')})")
self.progress_updated.emit(f"Skipped: \"{video_title}\" ({result.get('remarks')})")

self.progress_updated.emit("Transcript scraping completed!")
self.progress_percentage.emit(100)
Expand Down
Loading
Loading