diff --git a/Backend/ScrapeVideo.py b/Backend/ScrapeVideo.py index 1f94b39..475e5e8 100644 --- a/Backend/ScrapeVideo.py +++ b/Backend/ScrapeVideo.py @@ -247,302 +247,163 @@ class VideoWorker(QObject): progress_percentage = Signal(int) finished = Signal() - def __init__(self, channel_id: str, channel_url: str): - """ - Initializes the VideoWorker instance. - - Args: - channel_id (str): The YouTube channel ID. - channel_url (str): The YouTube channel URL. - """ + def __init__(self, channel_id: str, channel_url: str, scrape_shorts: bool): super().__init__() self.db: DatabaseManager = app_state.db self.channel_id = channel_id self.channel_url = channel_url + self.scrape_shorts = scrape_shorts + self.types = { "videos": "videos", "shorts": "shorts", "live": "streams" } - self.current_type_counter = 0 - - @Slot(int, int) - def update_from_async(self, completed: int, total: int): - """ - Slot to receive async progress updates safely in main thread. - Parameters: - completed (int): The number of completed tasks. - total (int): The total number of tasks. + if not self.scrape_shorts: + self.types.pop("shorts", None) - Emits: - progress_updated (str): The progress message. - progress_percentage (int): The progress percentage. - - """ - progress_msg = f"[Shorts] Fetching metadata: {completed}/{total} shorts" - self.progress_updated.emit(progress_msg) - type_progress = int((self.current_type_counter - 1) * 33 + (completed / total) * 20) - self.progress_percentage.emit(min(type_progress, 95)) + self.current_type_counter = 0 - def fetch_video_urls(self, scrape_shorts: bool = False) -> None: + @Slot() + def run(self): """ - Wrapper to run async video fetching. - - This function creates a new event loop and runs the `_fetch_video_urls_async` coroutine. - If an exception occurs, it prints the error message and emits the `progress_updated` and `progress_percentage` signals. - Finally, it closes the event loop. - - Parameters: - scrape_shorts (bool): Whether to scrape shorts or not. Defaults to False. + SAFE ENTRY POINT FOR QTHREAD """ try: - loop = asyncio.new_event_loop() - asyncio.set_event_loop(loop) - loop.run_until_complete(self._fetch_video_urls_async(scrape_shorts=bool(scrape_shorts))) - except Exception as e: - logger.exception("Error while fetching video URLs:") - self.progress_updated.emit(f"Error while fetching video URLs: {e}") - self.progress_percentage.emit(0) - self.finished.emit() + asyncio.run(self._fetch_video_urls_async()) + except Exception: + logger.exception("VideoWorker crashed:") finally: - try: - loop.close() - except Exception: - pass + # ✅ GUARANTEED EXIT PATH + self.finished.emit() - async def _fetch_video_urls_async(self, scrape_shorts: bool): - """ - Fetch and process videos by type (videos, shorts, live) using scrapetube. - Downloads thumbnails asynchronously and updates DB in batches. + @Slot(int, int) + def update_from_async(self, completed: int, total: int): + msg = f"[Shorts] Fetching metadata: {completed}/{total}" + self.progress_updated.emit(msg) + pct = int((self.current_type_counter - 1) * 33 + (completed / total) * 20) + self.progress_percentage.emit(min(pct, 95)) - Parameters: - scrape_shorts (bool): Whether to scrape shorts or not. Defaults to False. + # ✅ INTERRUPTION SAFE CHECK + def _should_stop(self): + from PySide6.QtCore import QThread + return QThread.currentThread().isInterruptionRequested() - Returns: - None - """ - if not scrape_shorts: - self.types.pop("shorts") + async def _fetch_video_urls_async(self): try: self.progress_updated.emit("Starting scrapetube scraping...") self.progress_percentage.emit(0) - all_videos = [] + total_processed = 0 - type_counter = 0 + channel_thumb_dir = os.path.join(self.db.thumbnail_dir, str(self.channel_id)) os.makedirs(channel_thumb_dir, exist_ok=True) - # Create aiohttp session for all downloads + timeout = aiohttp.ClientTimeout(total=30) async with aiohttp.ClientSession(timeout=timeout) as session: - thumbnail_semaphore = asyncio.Semaphore(20) # Limit concurrent thumbnail downloads + thumbnail_semaphore = asyncio.Semaphore(20) - # === Process each content type === - for vtype, ctype in self.types.items(): - type_counter += 1 - self.current_type_counter = type_counter + for i, (vtype, ctype) in enumerate(self.types.items(), start=1): + + # ✅ USER CANCEL SUPPORT + if self._should_stop(): + self.progress_updated.emit("Scraping cancelled by user") + return + + self.current_type_counter = i self.progress_updated.emit(f"Fetching {vtype.capitalize()}...") - self.progress_percentage.emit(int((type_counter - 1) * 33)) + self.progress_percentage.emit(int((i - 1) * 33)) + + videos = list(scrapetube.get_channel( + channel_url=self.channel_url, + content_type=ctype + )) - # scrapetube.get_channel(channel_url=..., content_type="shorts"/"streams"/None) - videos = list(scrapetube.get_channel(channel_url=self.channel_url, content_type=ctype)) if not videos: - self.progress_updated.emit(f"No {vtype} found.") continue - - self.progress_updated.emit(f"Fetched {len(videos)} {vtype}. Parsing data...") - all_videos.extend(videos) + self.progress_updated.emit(f"Fetched {len(videos)} {vtype}") - # === For shorts, fetch all metadata in parallel first === + # === SHORTS METADATA === + shorts_metadata = {} if vtype == "shorts": video_ids = [v.get("videoId") for v in videos if v.get("videoId")] - self.progress_updated.emit(f"[Shorts] Fetching metadata for {len(video_ids)} shorts (async mode)...") - - # Pass self as the callback target shorts_metadata = await fetch_shorts_batch_async( - video_ids, + video_ids, progress_callback=self, max_concurrent=30 ) - self.progress_updated.emit(f"[Shorts] Metadata fetched! Now processing {len(videos)} shorts...") - else: - shorts_metadata = {} - # === Collect all thumbnail download tasks and video data === thumbnail_tasks = [] videos_to_insert = [] for idx, video in enumerate(videos): + + if self._should_stop(): + self.progress_updated.emit("Scraping cancelled by user") + return + video_id = video.get("videoId") if not video_id: continue - # For shorts, use pre-fetched metadata - if vtype == "shorts": - shorts_meta = shorts_metadata.get(video_id) - - if shorts_meta and not shorts_meta.get('error'): - title = shorts_meta['title'] - description = shorts_meta['description'] - duration_in_seconds = shorts_meta['duration'] - duration = f"{duration_in_seconds // 60}:{duration_in_seconds % 60:02d}" if duration_in_seconds else None - views = shorts_meta['view_count'] - - # Convert upload_date (YYYYMMDD) to timestamp - if shorts_meta['upload_date']: - try: - upload_date = datetime.strptime(shorts_meta['upload_date'], '%Y%m%d') - upload_timestamp = int(upload_date.timestamp()) - - # Calculate "time since published" text - days_ago = (datetime.now(timezone.utc) - upload_date.replace(tzinfo=timezone.utc)).days - if days_ago == 0: - time_since_published = "Today" - elif days_ago == 1: - time_since_published = "1 day ago" - elif days_ago < 7: - time_since_published = f"{days_ago} days ago" - elif days_ago < 30: - weeks = days_ago // 7 - time_since_published = f"{weeks} week{'s' if weeks > 1 else ''} ago" - elif days_ago < 365: - months = days_ago // 30 - time_since_published = f"{months} month{'s' if months > 1 else ''} ago" - else: - years = days_ago // 365 - time_since_published = f"{years} year{'s' if years > 1 else ''} ago" - except Exception: - upload_timestamp = int(datetime.now(timezone.utc).timestamp()) - time_since_published = None - else: - upload_timestamp = int(datetime.now(timezone.utc).timestamp()) - time_since_published = None - else: - # Fallback to scrapetube data if yt-dlp fails - title = ( - video.get("title", {}) - .get("runs", [{}])[0] - .get("text", "Untitled") - ) - description = "" - duration = None - duration_in_seconds = 0 - views = 0 - upload_timestamp = int(datetime.now(timezone.utc).timestamp()) - time_since_published = None - else: - # Original parsing for videos and live streams - title = ( - video.get("title", {}) - .get("runs", [{}])[0] - .get("text", "Untitled") - ) - - description = ( - video.get("descriptionSnippet", {}) - .get("runs", [{}])[0] - .get("text", "") - ) - - duration = ( - video.get("lengthText", {}) - .get("simpleText") - or video.get("lengthText", {}).get("runs", [{}])[0].get("text") - or None - ) - - duration_in_seconds = parse_duration(duration) if duration else 0 - - time_since_published = ( - video.get("publishedTimeText", {}).get("simpleText") - or video.get("publishedTimeText", {}).get("runs", [{}])[0].get("text") - or None - ) - - upload_timestamp = parse_time_since_published(time_since_published) - - # Parse view count text - view_text = ( - video.get("viewCountText", {}).get("simpleText") - or video.get("viewCountText", {}).get("runs", [{}])[0].get("text", "") - ) - views = 0 - if view_text: - try: - views = int( - view_text.replace("views", "") - .replace(",", "") - .replace(".", "") - .strip() - ) - except Exception: - pass + title = ( + video.get("title", {}) + .get("runs", [{}])[0] + .get("text", "Untitled") + ) thumbnails = video.get("thumbnail", {}).get("thumbnails", []) thumbnail_url = thumbnails[-1].get("url") if thumbnails else None - video_url = f"https://www.youtube.com/watch?v={video_id}" thumb_path = os.path.join(channel_thumb_dir, f"{video_id}.png") - # Collect thumbnail download task if needed if thumbnail_url and not os.path.exists(thumb_path): thumbnail_tasks.append( - download_img_async(thumbnail_url, thumb_path, session, thumbnail_semaphore) + download_img_async( + thumbnail_url, + thumb_path, + session, + thumbnail_semaphore + ) ) - # Collect video data for batch insert videos_to_insert.append({ "video_id": video_id, "channel_id": self.channel_id, "video_type": vtype, - "video_url": video_url, + "video_url": f"https://www.youtube.com/watch?v={video_id}", "title": title, - "desc": description, - "duration": duration, - "duration_in_seconds": duration_in_seconds, + "desc": "", + "duration": None, + "duration_in_seconds": 0, "thumbnail_path": thumb_path, - "view_count": views, - "time_since_published": time_since_published, - "upload_timestamp": upload_timestamp + "view_count": 0, + "time_since_published": None, + "upload_timestamp": int(datetime.now(timezone.utc).timestamp()) }) - # Update progress periodically - if (idx + 1) % 10 == 0 or idx == len(videos) - 1: + if (idx + 1) % 10 == 0: self.progress_updated.emit( - f"[{vtype.capitalize()}] Processing: {idx+1}/{len(videos)}" + f"[{vtype.capitalize()}] {idx+1}/{len(videos)}" ) - - # === Wait for all thumbnails to download === + + # === DOWNLOAD THUMBNAILS === if thumbnail_tasks: - self.progress_updated.emit(f"[{vtype.capitalize()}] Downloading {len(thumbnail_tasks)} thumbnails...") + self.progress_updated.emit(f"[{vtype.capitalize()}] Downloading thumbnails...") await asyncio.gather(*thumbnail_tasks, return_exceptions=True) - self.progress_updated.emit(f"[{vtype.capitalize()}] ✓ All thumbnails downloaded") - # === Batch insert to database === - logger.debug(f"Saving {len(videos_to_insert)} {vtype} entries to DB for channel_id={self.channel_id}") - self.progress_updated.emit(f"[{vtype.capitalize()}] Saving {len(videos_to_insert)} videos to database...") - - + # === DATABASE SAVE === for video_data in videos_to_insert: - existing_videos = self.db.fetch( - table="VIDEO", where="video_id = ?", params=(video_data["video_id"],) - ) - video_exists = len(existing_videos) > 0 self.db.insert("VIDEO", video_data) total_processed += len(videos_to_insert) - self.progress_updated.emit(f"[{vtype.capitalize()}] ✓ Saved {len(videos_to_insert)} videos") - - overall_progress = int(type_counter * 33) - self.progress_percentage.emit(min(overall_progress, 95)) - self.progress_updated.emit(f"Completed scraping! Total {total_processed} videos saved.") - self.progress_percentage.emit(100) - self.finished.emit() + self.progress_percentage.emit(min(i * 33, 95)) - except Exception as e: - self.progress_updated.emit(f"Fetching {vtype.capitalize()}...") - logger.debug(f"Scraping {vtype} for channel {self.channel_id}") - self.progress_percentage.emit(0) - self.finished.emit() + self.progress_updated.emit(f"Completed scraping! Total {total_processed} videos saved.") + self.progress_percentage.emit(100) + + except Exception: + logger.exception("Async scrape failure") \ No newline at end of file diff --git a/README.md b/README.md index e70ce86..aa4a753 100644 --- a/README.md +++ b/README.md @@ -44,8 +44,8 @@ ## ☀️ Table of Contents -- [☀️ Table of Contents](#-table-of-contents) - [🌞 Overview](#-overview) +- [📸 Screenshots](#-screenshots) - [🔥 Features](#-features) - [🌅 Project Structure](#-project-structure) - [🌄 Project Index](#-project-index) @@ -69,6 +69,10 @@ The application utilizes a local architecture where data is scraped from YouTube --- +## 📸 Screenshots + +--- + ## 🔥 Features - 🆓 **No Credentials Needed**: Use the application immediately—no registration, login, or API key is required. @@ -499,10 +503,37 @@ Build StaTube from the source and install dependencies: cd StaTube ``` -3. **Install the dependencies:** +3. **Install UV package manager:** + + ```sh + pip install uv + ``` + +4. **Create a virtual environment:** ```sh - pip install -r requirements.txt + uv venv + ``` + with specific python version + ```sh + uv venv -python 3.11 + ``` + +5. **Activate the virtual environment:** + + Windows: + ```sh + .venv/Scripts/activate + ``` + Linux/macOS: + ```sh + source venv/bin/activate + ``` + +6. **Install dependencies:** + + ```sh + uv pip install -r requirements.txt ``` ### 🔆 Usage @@ -532,6 +563,7 @@ To generate the installer locally, you must have Inno Setup installed and compil ## 🌻 Roadmap +- [x] **Export analysis**: Export and save analysis result image to a file. - [ ] **Docker Version**: A Dockerized version of the application is planned. - [ ] **Proxy Settings**: Ability to configure network proxy settings. - [ ] **Theming**: Light/Dark theme support. @@ -551,7 +583,7 @@ To generate the installer locally, you must have Inno Setup installed and compil 1. **Fork the Repository**: Start by forking the project repository to your account. 2. **Clone Locally**: Clone the forked repository to your local machine. ```sh - git clone [https://github.com/Sakth1/StaTube.git](https://github.com/Sakth1/StaTube.git) + git clone https://github.com/Sakth1/StaTube.git ``` 3. **Create a New Branch**: Always work on a new branch, giving it a descriptive name. ```sh @@ -567,13 +599,14 @@ To generate the installer locally, you must have Inno Setup installed and compil git push origin new-feature-x ``` 7. **Submit a Pull Request**: Create a PR against the original project repository. Clearly describe the changes and their motivations. +8. **HAVE FUN!** :rocket: --- ## 📜 License -StaTube is protected under the [MIT License](https://choosealicense.com/licenses/mit/). For more details, refer to the [LICENSE](./LICENSE) file. +StaTube is protected under the [MIT License](https://choosealicense.com/licenses/mit/) because sharing is caring :heart:. For more details, refer to the [LICENSE](./LICENSE) file. --- @@ -583,6 +616,11 @@ StaTube is protected under the [MIT License](https://choosealicense.com/licenses - YouTube data scraping powered by: - [yt-dlp](https://github.com/yt-dlp/yt-dlp) - [scrapetube](https://github.com/dermasmid/scrapetube) +- Compiled into windows binary using: + - [Nuitka](https://nuitka.org/) + - [Inno Setup](https://www.jrsoftware.org/isinfo.php) + +**YOU ALL ARE OFFICIALLY GREAT!** --- @@ -592,4 +630,4 @@ StaTube is protected under the [MIT License](https://choosealicense.com/licenses -[back-to-top]: https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square \ No newline at end of file +[back-to-top]: https://img.shields.io/badge/_BACK_TO_TOP_-151515?style=flat-square \ No newline at end of file diff --git a/UI/Homepage.py b/UI/Homepage.py index 2164f10..c9c2108 100644 --- a/UI/Homepage.py +++ b/UI/Homepage.py @@ -224,7 +224,6 @@ def show_search_splash(self) -> None: cancel_callback=self.cancel_search ) - # THIS IS REQUIRED self.splash.show_with_animation() self.splash.raise_() self.splash.activateWindow() diff --git a/UI/SplashScreen.py b/UI/SplashScreen.py index 9466ee7..007ea31 100644 --- a/UI/SplashScreen.py +++ b/UI/SplashScreen.py @@ -23,6 +23,7 @@ def __init__(self, parent_window: QWidget): super().__init__(parent_window) # Child, no separate taskbar entry + self.setAttribute(Qt.WA_DeleteOnClose, True) self.setWindowFlags(Qt.Widget | Qt.FramelessWindowHint) self.setAttribute(Qt.WA_TranslucentBackground) self.setAttribute(Qt.WA_ShowWithoutActivating, True) @@ -64,9 +65,11 @@ def __init__(self, parent: QWidget | None = None, gif_path: str | None = None): # IMPORTANT: No global always-on-top. Tool + Frameless keeps it tied to app. self.setWindowFlags( - Qt.Tool | # no taskbar button, stays with parent - Qt.FramelessWindowHint # borderless + Qt.FramelessWindowHint | + Qt.WindowStaysOnTopHint | + Qt.Dialog ) + self.setAttribute(Qt.WA_TranslucentBackground) self.setModal(False) diff --git a/UI/VideoPage.py b/UI/VideoPage.py index d3139ff..fb8be99 100644 --- a/UI/VideoPage.py +++ b/UI/VideoPage.py @@ -11,7 +11,7 @@ from Backend.ScrapeVideo import VideoWorker from Backend.ScrapeTranscription import TranscriptWorker from Backend.ScrapeComments import CommentWorker -from UI.SplashScreen import SplashScreen +from UI.SplashScreen import SplashScreen, BlurOverlay from utils.AppState import app_state from utils.Logger import logger @@ -553,12 +553,10 @@ def scrape_videos(self, scrape_shorts: bool) -> None: self.show_splash_screen() self.worker_thread: QThread = QThread() - self.worker: VideoWorker = VideoWorker(channel_id, channel_url) + self.worker = VideoWorker(channel_id, channel_url, scrape_shorts) self.worker.moveToThread(self.worker_thread) - self.worker_thread.started.connect( - lambda: self.worker.fetch_video_urls(scrape_shorts) - ) + self.worker_thread.started.connect(self.worker.run) self.worker.progress_updated.connect(self.update_splash_progress) self.worker.progress_percentage.connect(self.update_splash_percentage) self.worker.finished.connect(self.on_worker_finished) @@ -568,51 +566,41 @@ def scrape_videos(self, scrape_shorts: bool) -> None: self.worker_thread.start() def show_splash_screen(self, parent: Optional[QWidget] = None, gif_path: str = "", title: str = "Scraping Videos...") -> None: - """ - Show a splash screen while the video scraping is in progress. - - This function creates and displays a splash screen with an animated loading GIF - to provide visual feedback during video scraping operations. - - It sets the title of the splash screen to "Scraping Videos (Videos, Shorts, Live)..." - and the initial status to "Starting...". - - :param parent: The parent QWidget of the splash screen (optional). - :type parent: Optional[QWidget] - :param gif_path: The path to the animated loading GIF (optional). - :type gif_path: str - :param title: The title to display on the splash screen. - :type title: str - :return None - :rtype: None - """ cwd = os.getcwd() gif_path = os.path.join(cwd, "assets", "gif", "loading.gif") if not gif_path else gif_path - # Always destroy previous instance cleanly if self.splash: self.splash.close() self.splash = None - # IMPORTANT: parent MUST be None for runtime dialogs - self.splash = SplashScreen(parent=self.mainwindow, gif_path=gif_path) + # ✅ IMPORTANT FIX: parent MUST be None + self.splash = SplashScreen(parent=None, gif_path=gif_path) + self.splash.set_title(title) self.splash.update_status("Starting...") self.splash.set_progress(0) - # Enable overlay + cancel runtime mode + # ✅ Overlay still binds to mainwindow correctly self.splash.enable_runtime_mode( parent_window=self.mainwindow, cancel_callback=self.cancel_scraping ) - # THIS IS REQUIRED — show() WILL NOT WORK self.splash.show_with_animation() + self.splash.raise_() + self.splash.activateWindow() + """QTimer.singleShot(5 * 60 * 1000, self._force_close_stuck_splash) + + def _force_close_stuck_splash(self): + if self.splash: + logger.error("FORCE closing stuck splash!") + self.splash.fade_and_close(300) + self.splash = None""" def cancel_scraping(self): """ Called when user presses Cancel on splash screen. - Safely stops active workers and closes splash. + Safely stops active workers and closes splash + overlays. """ logger.warning("User cancelled scraping operation.") @@ -634,11 +622,26 @@ def cancel_scraping(self): self.comment_thread.quit() self.comment_thread.wait(500) + # ✅ Force-remove overlays + self._clear_overlays() + # Fade & cleanup splash safely if self.splash: self.splash.fade_and_close(300) self.splash = None + def _clear_overlays(self) -> None: + """ + Force-close any BlurOverlay widgets still attached to the main window. + This prevents the UI from staying dimmed if the splash fails to fully clean up. + """ + if self.mainwindow is None: + return + + # Close every BlurOverlay child of the main window + for overlay in self.mainwindow.findChildren(BlurOverlay): + overlay.close() + def update_splash_progress(self, message: str) -> None: """ Updates the status message of the SplashScreen dialog. @@ -669,6 +672,8 @@ def on_worker_finished(self) -> None: :return None :rtype: None """ + self._clear_overlays() + if self.splash: self.splash.fade_and_close(400) self.splash = None @@ -681,6 +686,8 @@ def on_transcript_worker_finished(self) -> None: Called when the TranscriptWorker thread has finished scraping transcripts. Closes the SplashScreen dialog. """ + self._clear_overlays() + if self.splash is not None: self.splash.fade_and_close(400) self.splash = None @@ -691,6 +698,8 @@ def on_comment_worker_finished(self) -> None: Called when the CommentWorker thread has finished scraping comments. Closes the SplashScreen dialog. """ + self._clear_overlays() + if self.splash is not None: self.splash.fade_and_close(400) self.splash = None diff --git a/assets/StaTube_banner-social.png b/assets/StaTube_banner-social.png new file mode 100644 index 0000000..9db603d Binary files /dev/null and b/assets/StaTube_banner-social.png differ