From 0ec4563720c53b132f1bbf77471b3b7f1c1e0639 Mon Sep 17 00:00:00 2001 From: Shuduo Sang Date: Sat, 15 Nov 2025 02:24:49 -0500 Subject: [PATCH] feat: suppport pdf --- Cargo.toml | 2 + src/book_manager.rs | 92 ++++- src/lib.rs | 2 + src/main_app.rs | 215 +++++++++- src/panic_handler.rs | 43 +- src/pdf_handler.rs | 495 +++++++++++++++++++++++ src/preferences.rs | 68 ++++ src/search.rs | 20 +- src/widget/comments_viewer.rs | 89 ++-- src/widget/mod.rs | 1 + src/widget/navigation_panel/book_list.rs | 15 + src/widget/navigation_panel/mod.rs | 18 + src/widget/progress_dialog.rs | 129 ++++++ tests/svg_snapshots.rs | 5 +- 14 files changed, 1127 insertions(+), 67 deletions(-) create mode 100644 src/pdf_handler.rs create mode 100644 src/preferences.rs create mode 100644 src/widget/progress_dialog.rs diff --git a/Cargo.toml b/Cargo.toml index 3d77d1b..7408f4f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -57,6 +57,8 @@ tui-textarea = "0.7" # ansi-to-tui = "7.0.0" vt100 = "0.15" codepage-437 = "0.1.0" +oxidize-pdf = "1.6" +html-escape = "0.2" # Vendored ratatui-image dependencies icy_sixel = "0.1.1" base64 = "0.21.2" diff --git a/src/book_manager.rs b/src/book_manager.rs index 49ce021..af3d887 100644 --- a/src/book_manager.rs +++ b/src/book_manager.rs @@ -1,5 +1,6 @@ +use crate::pdf_handler::PdfDocument; use epub::doc::EpubDoc; -use log::{error, info}; +use log::{error, info, warn}; use std::io::BufReader; use std::path::Path; @@ -45,7 +46,11 @@ impl BookManager { let entry = entry.ok()?; let path = entry.path(); let extension = path.extension()?.to_str()?; - if extension == "epub" || extension == "html" || extension == "htm" { + if extension == "epub" + || extension == "html" + || extension == "htm" + || extension == "pdf" + { let path_str = path.to_str()?.to_string(); let display_name = Self::extract_display_name(&path_str); Some(BookInfo { @@ -94,6 +99,8 @@ impl BookManager { if self.is_html_file(path) { // For HTML files, create a fake EPUB self.create_fake_epub_from_html(path) + } else if self.is_pdf_file(path) { + self.create_fake_epub_from_pdf(path) } else { info!("Attempting to load EPUB file: {path}"); match EpubDoc::new(path) { @@ -356,4 +363,85 @@ impl BookManager { None => false, } } + + pub fn is_pdf_file(&self, path: &str) -> bool { + let path = Path::new(path); + match path.extension().and_then(|ext| ext.to_str()) { + Some(ext) => ext == "pdf", + None => false, + } + } + + fn create_fake_epub_from_pdf( + &self, + path: &str, + ) -> Result>, String> { + info!("Creating fake EPUB from PDF: {path}"); + + match PdfDocument::load(path) { + Ok(pdf_doc) => { + let page_count = pdf_doc.page_count(); + info!("PDF loaded with {page_count} pages"); + + let filename = Path::new(path) + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("PDF Document"); + + let title = filename.replace(".pdf", "").replace(".PDF", ""); + + let text_content = match pdf_doc.extract_text() { + Ok(text) => text, + Err(e) => { + warn!("Failed to extract text from PDF: {e}"); + format!( + "PDF Document\n\nFile: {}\nPages: {}\n\nCould not extract text from this PDF.", + title, page_count + ) + } + }; + + self.create_fake_epub_from_pdf_parts(path, page_count, text_content) + } + Err(e) => { + error!("Failed to load PDF: {e}"); + Err(format!("Failed to load PDF: {e}")) + } + } + } + + pub fn create_fake_epub_from_pdf_parts( + &self, + path: &str, + page_count: usize, + text_content: String, + ) -> Result>, String> { + let filename = Path::new(path) + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("PDF Document"); + + let title = filename.replace(".pdf", "").replace(".PDF", ""); + + let html_content = if text_content.trim().is_empty() { + format!( + r#"

{}

+

PDF with {} pages

+

This PDF appears to have no extractable text content.

"#, + title, page_count + ) + } else { + format!( + r#"

{}

+

PDF with {} pages

+
+
{}
"#, + title, + page_count, + html_escape::encode_text(&text_content) + ) + }; + + self.create_minimal_epub_from_html(&html_content, path) + } } diff --git a/src/lib.rs b/src/lib.rs index b033b42..48137b5 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -6,6 +6,7 @@ pub mod comments; pub use inputs::event_source; pub mod components; pub mod images; +pub mod pdf_handler; // Vendored ratatui-image pub mod vendored; pub use vendored::ratatui_image; @@ -24,6 +25,7 @@ pub use widget::reading_history; pub use widget::text_reader as markdown_text_reader; pub mod panic_handler; pub mod parsing; +pub mod preferences; pub mod search; pub mod search_engine; pub mod system_command; diff --git a/src/main_app.rs b/src/main_app.rs index 34a887a..c955bf0 100644 --- a/src/main_app.rs +++ b/src/main_app.rs @@ -32,7 +32,7 @@ pub enum ChapterDirection { } use std::io::BufReader; -use std::sync::{Arc, Mutex}; +use std::sync::{Arc, Mutex, mpsc}; use std::time::{Duration, Instant}; use anyhow::Result; @@ -65,6 +65,13 @@ impl EpubBook { } } +struct PdfLoadSuccess { + book_index: usize, + path: String, + page_count: usize, + text_content: String, +} + #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum AppAction { Quit, @@ -94,6 +101,12 @@ pub struct App { comments_viewer: Option, notifications: NotificationManager, help_bar_area: Rect, + pdf_load_receiver: mpsc::Receiver>, + pdf_load_sender: mpsc::Sender>, + deferred_book_path: Option, + deferred_book_index: Option, + progress_dialog: Arc>, + should_update_content: bool, } pub trait VimNavMotions { @@ -140,6 +153,16 @@ impl App { Self::new_with_config(None, Some("bookmarks.json"), true) } + fn setup_pdf_progress_callback(&mut self) { + let progress_dialog = self.progress_dialog.clone(); + crate::pdf_handler::set_pdf_progress_callback(move |message: String, progress: u16| { + if let Ok(mut dialog) = progress_dialog.lock() { + dialog.set_message(message); + dialog.set_progress(progress); + } + }); + } + /// Helper method to check if focus is on a main panel (not a popup) fn is_main_panel(&self, panel: MainPanel) -> bool { match self.focused_panel { @@ -298,6 +321,8 @@ impl App { Rect::new(0, 0, 80, 24) }; + let (pdf_load_sender, pdf_load_receiver) = mpsc::channel(); + let mut app = Self { book_manager, navigation_panel, @@ -322,6 +347,14 @@ impl App { comments_viewer: None, notifications: NotificationManager::new(), help_bar_area: Rect::default(), + pdf_load_receiver, + pdf_load_sender, + deferred_book_path: None, + deferred_book_index: None, + progress_dialog: Arc::new(Mutex::new( + crate::widget::progress_dialog::ProgressDialog::new("Loading..."), + )), + should_update_content: false, }; if auto_load_recent @@ -340,6 +373,8 @@ impl App { app.focused_panel = FocusedPanel::Popup(PopupWindow::Help); } + app.setup_pdf_progress_callback(); + app } @@ -376,12 +411,29 @@ impl App { pub fn open_book_for_reading(&mut self, book_index: usize) -> Result<()> { if let Some(book_info) = self.book_manager.get_book_info(book_index) { let path = book_info.path.clone(); + let filename = book_info.display_name.clone(); + + // Check if this is a PDF - if so, defer loading to show progress + let is_pdf = self.book_manager.is_pdf_file(&path); - self.save_bookmark_with_throttle(true); - self.load_epub(&path, false)?; + if is_pdf { + // Show progress dialog for PDF + if let Ok(mut dialog) = self.progress_dialog.lock() { + dialog.set_message(format!("Loading {}...", filename)); + dialog.set_progress(0); + dialog.show(); + } + // Defer PDF loading to allow progress updates + self.deferred_book_index = Some(book_index); + self.deferred_book_path = Some(path); + } else { + // Load EPUB immediately (they're usually faster) + self.save_bookmark_with_throttle(true); + self.load_epub(&path, false)?; - self.navigation_panel.current_book_index = Some(book_index); - self.focused_panel = FocusedPanel::Main(MainPanel::Content); + self.navigation_panel.current_book_index = Some(book_index); + self.focused_panel = FocusedPanel::Main(MainPanel::Content); + } Ok(()) } else { @@ -1782,6 +1834,11 @@ impl App { comments_viewer.render(f, f.area()); } } + + // Render progress dialog if visible + if let Ok(dialog) = self.progress_dialog.lock() { + dialog.render(f, f.area()); + } } fn render_default_content(&self, f: &mut ratatui::Frame, area: Rect, content: &str) { @@ -2387,8 +2444,10 @@ impl App { } } CommentsViewerAction::DeleteSelectedComment => { - if let Some(entry) = - self.comments_viewer.as_ref().and_then(|v| v.selected_comment().cloned()) + if let Some(entry) = self + .comments_viewer + .as_ref() + .and_then(|v| v.selected_comment().cloned()) { let mut delete_success = false; let comments = self.text_reader.get_comments(); @@ -2908,6 +2967,148 @@ pub fn run_app_with_event_source( if first_render { needs_redraw = true; first_render = false; + + // Load deferred book (auto-loaded recent book) after first render + if let Some(path) = app.deferred_book_path.take() { + if app.book_manager.contains_book(&path) { + if let Err(e) = app.open_book_for_reading_by_path(&path) { + error!("Failed to auto-load most recent book: {e}"); + app.show_error(format!("Failed to auto-load recent book: {e}")); + } + } + } + } + + // Load deferred PDF/book that was selected by user (after initial render to show progress) + // Spawn loading in background thread to keep UI responsive + if let Some(book_index) = app.deferred_book_index.take() { + if let Some(path) = app.deferred_book_path.take() { + debug!("Spawning PDF loader for {}", path); + let sender = app.pdf_load_sender.clone(); + std::thread::spawn(move || { + crate::panic_handler::with_panic_exit_suppressed(|| { + // Give the UI a moment to render the progress dialog + std::thread::sleep(std::time::Duration::from_millis(50)); + + let result = std::panic::catch_unwind(|| { + match crate::pdf_handler::PdfDocument::load(&path) { + Ok(pdf_doc) => { + let page_count = pdf_doc.page_count(); + let text_content = match pdf_doc.extract_text() { + Ok(text) => text, + Err(e) => { + warn!("Failed to extract text from PDF: {e}"); + let filename = std::path::Path::new(&path) + .file_name() + .and_then(|name| name.to_str()) + .unwrap_or("PDF Document"); + let title = + filename.replace(".pdf", "").replace(".PDF", ""); + format!( + "PDF Document\n\nFile: {}\nPages: {}\n\nCould not extract text from this PDF.", + title, page_count + ) + } + }; + + Ok(PdfLoadSuccess { + book_index, + path, + page_count, + text_content, + }) + } + Err(e) => Err(format!("Failed to load PDF: {e}")), + } + }) + .unwrap_or_else(|panic_payload| { + let message = if let Some(msg) = panic_payload.downcast_ref::<&str>() { + msg.to_string() + } else if let Some(msg) = panic_payload.downcast_ref::() { + msg.clone() + } else { + "Unknown panic".to_string() + }; + Err(format!("PDF loader panicked: {message}")) + }); + + if let Err(send_err) = sender.send(result) { + warn!("Failed to send PDF load result: {send_err}"); + } + }); + }); + } + } + + // Check if PDF loading completed + if let Ok(result) = app.pdf_load_receiver.try_recv() { + match result { + Ok(success) => { + debug!("PDF loaded successfully: {}", success.path); + app.save_bookmark_with_throttle(true); + + match app.book_manager.create_fake_epub_from_pdf_parts( + &success.path, + success.page_count, + success.text_content, + ) { + Ok(doc) => { + // Create EpubBook - this is fast, just wraps the doc + let current_book = EpubBook::new(success.path.clone(), doc); + + // Set navigation index and focus FIRST - this lets the UI respond to key input immediately + app.navigation_panel.current_book_index = Some(success.book_index); + app.focused_panel = FocusedPanel::Main(MainPanel::Content); + + // NOW set the current book - this makes it available for update_content() + app.current_book = Some(current_book); + + // Update content immediately so the PDF is displayed + // This may block briefly for HTML parsing, but content will be visible + debug!("Updating content for loaded PDF"); + app.update_content(); + debug!("Content update completed"); + + needs_redraw = true; + } + Err(e) => { + error!("Failed to build EPUB from PDF: {e}"); + app.show_error(format!("Failed to build EPUB from PDF: {e}")); + } + } + } + Err(e) => { + error!("{e}"); + app.show_error(e); + } + } + // Hide progress dialog when done + if let Ok(mut dialog) = app.progress_dialog.lock() { + dialog.hide(); + dialog.set_progress(100); + } + } + + if let Ok(mut dialog) = app.progress_dialog.lock() { + if dialog.take_dirty() { + needs_redraw = true; + } + } + + // Update content if deferred (e.g., after PDF loading completes) + // This happens AFTER processing all input events, so keyboard input can be processed while this runs + if app.should_update_content { + app.should_update_content = false; + let content_update_start = std::time::Instant::now(); + app.update_content(); + let content_update_duration = content_update_start.elapsed(); + if content_update_duration.as_millis() > 100 { + debug!( + "Content update (likely large PDF) took {}ms", + content_update_duration.as_millis() + ); + } + needs_redraw = true; } if last_tick.elapsed() >= tick_rate { diff --git a/src/panic_handler.rs b/src/panic_handler.rs index 1a7400e..2e5d22d 100644 --- a/src/panic_handler.rs +++ b/src/panic_handler.rs @@ -3,22 +3,61 @@ use crossterm::{ execute, terminal::{LeaveAlternateScreen, disable_raw_mode}, }; +use log::error; +use std::cell::Cell; use std::io::{self, Write}; use std::panic; +thread_local! { + static SUPPRESS_EXIT: Cell = Cell::new(false); +} + pub fn initialize_panic_handler() { better_panic::install(); let default_hook = panic::take_hook(); panic::set_hook(Box::new(move |panic_info| { - restore_terminal(); + let suppress = SUPPRESS_EXIT.with(|flag| flag.get()); + if suppress { + if let Some(msg) = panic_info.payload().downcast_ref::<&str>() { + error!("Suppressed panic: {}", msg); + } else if let Some(msg) = panic_info.payload().downcast_ref::() { + error!("Suppressed panic: {}", msg); + } else { + error!("Suppressed panic with unknown payload"); + } + // Do not restore terminal or exit; allow catch_unwind to handle it. + return; + } + restore_terminal(); default_hook(panic_info); - std::process::exit(1); })); } +pub fn with_panic_exit_suppressed(f: F) -> T +where + F: FnOnce() -> T, +{ + struct ExitGuard(bool); + impl Drop for ExitGuard { + fn drop(&mut self) { + SUPPRESS_EXIT.with(|flag| flag.set(self.0)); + } + } + + let previous = SUPPRESS_EXIT.with(|flag| { + let prev = flag.get(); + flag.set(true); + prev + }); + let guard = ExitGuard(previous); + let result = f(); + drop(guard); + result +} + /// Restore terminal to a clean state /// /// Specifically handles: diff --git a/src/pdf_handler.rs b/src/pdf_handler.rs new file mode 100644 index 0000000..9a1b95f --- /dev/null +++ b/src/pdf_handler.rs @@ -0,0 +1,495 @@ +use anyhow::{Context, Result}; +use log::{debug, error, info, warn}; +use oxidize_pdf::parser::{PdfDocument as OxidizePdfDocument, PdfReader}; +use std::path::Path; +use std::sync::{Arc, Mutex, OnceLock}; +use std::{any::Any, panic::AssertUnwindSafe, process::Command}; + +#[cfg(unix)] +fn suppress_stderr(f: F) -> R +where + F: FnOnce() -> R, +{ + unsafe { + let null_fd = libc::open(b"/dev/null\0".as_ptr() as *const i8, libc::O_WRONLY); + if null_fd < 0 { + return f(); + } + let old_stderr = libc::dup(libc::STDERR_FILENO); + if old_stderr < 0 { + libc::close(null_fd); + return f(); + } + + struct StderrGuard(i32); + impl Drop for StderrGuard { + fn drop(&mut self) { + unsafe { + libc::dup2(self.0, libc::STDERR_FILENO); + libc::close(self.0); + } + } + } + + libc::dup2(null_fd, libc::STDERR_FILENO); + libc::close(null_fd); + let guard = StderrGuard(old_stderr); + + let result = std::panic::catch_unwind(AssertUnwindSafe(f)); + drop(guard); + + match result { + Ok(value) => value, + Err(payload) => std::panic::resume_unwind(payload), + } + } +} + +#[cfg(not(unix))] +fn suppress_stderr(f: F) -> R +where + F: FnOnce() -> R, +{ + f() +} + +static PDF_PROGRESS_CALLBACK: OnceLock>>>> = + OnceLock::new(); + +fn get_callback_arc() -> Arc>>> { + PDF_PROGRESS_CALLBACK + .get_or_init(|| Arc::new(Mutex::new(None))) + .clone() +} + +pub fn set_pdf_progress_callback(callback: F) { + let callbacks = get_callback_arc(); + if let Ok(mut cbs) = callbacks.lock() { + *cbs = Some(Box::new(callback)); + } +} + +pub fn clear_pdf_progress_callback() { + let callbacks = get_callback_arc(); + if let Ok(mut cbs) = callbacks.lock() { + *cbs = None; + } +} + +fn emit_pdf_progress(message: &str, progress: u16) { + let callbacks = get_callback_arc(); + if let Ok(cbs) = callbacks.lock() { + if let Some(ref cb) = *cbs { + cb(message.to_string(), progress); + } + } +} + +fn describe_panic(payload: Box) -> String { + if let Some(msg) = payload.downcast_ref::<&str>() { + msg.to_string() + } else if let Some(msg) = payload.downcast_ref::() { + msg.clone() + } else { + "unknown panic payload".to_string() + } +} + +pub struct PdfDocument { + page_count: usize, + file_size: u64, + path: String, +} + +pub type ProgressCallback = Box; + +impl PdfDocument { + pub fn load(path: &str) -> Result { + Self::load_with_progress(path, Box::new(|_| {})) + } + + pub fn load_with_progress(path: &str, _progress: ProgressCallback) -> Result { + info!("Loading PDF from path: {path}"); + + emit_pdf_progress("Reading PDF metadata...", 10); + std::thread::sleep(std::time::Duration::from_millis(150)); + + let metadata = std::fs::metadata(path).context("Failed to read PDF file metadata")?; + let file_size = metadata.len(); + + emit_pdf_progress("Parsing PDF structure...", 30); + std::thread::sleep(std::time::Duration::from_millis(150)); + + let page_count = crate::panic_handler::with_panic_exit_suppressed(|| { + std::panic::catch_unwind(AssertUnwindSafe(|| { + suppress_stderr(|| Self::get_page_count(path)) + })) + }); + match page_count { + Ok(Ok(page_count)) => { + emit_pdf_progress(&format!("Found {page_count} pages"), 60); + std::thread::sleep(std::time::Duration::from_millis(150)); + info!( + "PdfDocument::load succeeded for {path}: pages={page_count}, size={file_size} bytes" + ); + Ok(PdfDocument { + page_count, + file_size, + path: path.to_string(), + }) + } + Ok(Err(e)) => { + warn!("Could not read PDF page count for {path}: {e}. Using default fallback."); + emit_pdf_progress("Could not determine page count, using default", 60); + std::thread::sleep(std::time::Duration::from_millis(150)); + Ok(PdfDocument { + page_count: 1, + file_size, + path: path.to_string(), + }) + } + Err(payload) => { + let message = describe_panic(payload); + error!("PdfDocument::load panicked while counting pages for {path}: {message}"); + emit_pdf_progress("Could not determine page count due to parser error", 60); + std::thread::sleep(std::time::Duration::from_millis(150)); + Ok(PdfDocument { + page_count: 1, + file_size, + path: path.to_string(), + }) + } + } + } + + pub fn page_count(&self) -> usize { + self.page_count + } + + pub fn file_size(&self) -> u64 { + self.file_size + } + + pub fn path(&self) -> &str { + &self.path + } + + /// Extract text content from PDF using oxidize-pdf with CJK support and panic protection + pub fn extract_text(&self) -> Result { + self.extract_text_with_progress(Box::new(|_| {})) + } + + pub fn extract_text_with_progress(&self, _progress: ProgressCallback) -> Result { + let path_owned = self.path.clone(); + + emit_pdf_progress("Extracting text from PDF...", 70); + std::thread::sleep(std::time::Duration::from_millis(150)); + + // Skip subprocess extraction - it's unreliable and often hangs + // Go directly to inline text extraction which is more straightforward + // (Subprocess extraction was causing 70% stall) + debug!("Skipping subprocess extraction, using inline method directly"); + + // Fallback to inline processing + let handle = std::thread::spawn(move || { + crate::panic_handler::with_panic_exit_suppressed(|| { + suppress_stderr(|| { + std::panic::catch_unwind(AssertUnwindSafe(|| { + match PdfReader::open(&path_owned) { + Ok(reader) => { + let pdf_doc = OxidizePdfDocument::new(reader); + + match pdf_doc.extract_text() { + Ok(text_pages) => { + let mut full_text = String::new(); + + for page in text_pages.iter() { + if !page.text.is_empty() { + full_text.push_str(&page.text); + full_text.push_str("\n--- Page Break ---\n"); + } + } + + if full_text.is_empty() { + None + } else { + Some(full_text) + } + } + Err(e) => { + warn!("Failed to extract text from PDF: {}", e); + None + } + } + } + Err(e) => { + warn!("Failed to open PDF file: {}", e); + None + } + } + })) + }) + }) + }); + + emit_pdf_progress("Processing extracted content...", 85); + std::thread::sleep(std::time::Duration::from_millis(150)); + + match handle.join() { + Ok(Ok(Some(text))) => { + emit_pdf_progress("PDF loaded successfully", 100); + std::thread::sleep(std::time::Duration::from_millis(150)); + let char_count = text.chars().count(); + info!( + "PdfDocument::extract_text succeeded for {} with {} bytes ({} chars)", + self.path, + text.len(), + char_count + ); + Ok(text) + } + Ok(Ok(None)) => { + emit_pdf_progress("PDF loaded (no text content)", 100); + std::thread::sleep(std::time::Duration::from_millis(150)); + warn!( + "PdfDocument::extract_text returned no text for {}. Falling back to summary message.", + self.path + ); + self.get_fallback_message() + } + Ok(Err(payload)) => { + let message = describe_panic(payload); + error!( + "PDF text extraction panicked for {} with message: {}", + self.path, message + ); + emit_pdf_progress("PDF parser crashed during extraction", 100); + std::thread::sleep(std::time::Duration::from_millis(150)); + self.get_fallback_message() + } + Err(_) => { + error!("PDF text extraction thread panicked (possible stack overflow)"); + emit_pdf_progress("PDF loaded with errors", 100); + std::thread::sleep(std::time::Duration::from_millis(150)); + self.get_fallback_message() + } + } + } + + /// Extract text using subprocess (safer from SIGABRT crashes) + fn extract_text_subprocess(&self) -> Result { + use std::process::Stdio as StdioType; + + let mut child = Command::new(std::env::current_exe()?) + .arg("--debug-pdf") + .arg(&self.path) + .stdout(StdioType::piped()) + .stderr(StdioType::piped()) + .spawn() + .context("Failed to spawn PDF parser subprocess")?; + + // Wait with a 120-second timeout for PDF text extraction + let timeout = std::time::Duration::from_secs(120); + let start = std::time::Instant::now(); + + loop { + match child.try_wait() { + Ok(Some(status)) => { + // Process finished + if !status.success() { + return Err(anyhow::anyhow!( + "PDF parser subprocess failed with status: {:?}", + status + )); + } + + let output = child + .wait_with_output() + .context("Failed to read subprocess output")?; + let stdout = String::from_utf8_lossy(&output.stdout); + + // Extract text from the preview section of the debug output + let mut text_content = String::new(); + let mut in_preview = false; + + for line in stdout.lines() { + if line.contains("--- Text preview") { + in_preview = true; + continue; + } + if line.contains("--- end preview ---") { + break; + } + if in_preview && !line.is_empty() { + text_content.push_str(line); + text_content.push('\n'); + } + } + + if !text_content.is_empty() { + return Ok(text_content); + } else { + return Err(anyhow::anyhow!("No text extracted from subprocess")); + } + } + Ok(None) => { + // Process still running + if start.elapsed() > timeout { + let _ = child.kill(); + error!( + "PDF text extraction subprocess timed out (120s) for: {}", + self.path + ); + return Err(anyhow::anyhow!( + "PDF text extraction timeout (possible infinite loop or corrupted structure)" + )); + } + std::thread::sleep(std::time::Duration::from_millis(100)); + } + Err(e) => { + return Err(anyhow::anyhow!("Failed to check subprocess status: {}", e)); + } + } + } + } + + fn get_fallback_message(&self) -> Result { + let size_mb = self.file_size as f64 / (1024.0 * 1024.0); + let filename = Path::new(&self.path) + .file_name() + .and_then(|n| n.to_str()) + .unwrap_or("Unknown"); + + Ok(format!( + "# PDF Document\n\n\ + **File:** {}\n\n\ + **Pages:** {}\n\n\ + **Size:** {:.1} MB\n\n\ + ---\n\n\ + **Note:** This PDF does not contain extractable text.\n\n\ + This typically means:\n\n\ + • The PDF is a **scanned image** (photograph of pages)\n\n\ + • Text is embedded in a way that this reader cannot extract\n\n\ + • The PDF uses non-standard or proprietary encoding\n\n\ + To read this content, you would need:\n\n\ + • An OCR tool to convert images to text\n\n\ + • The original source document\n\n\ + • Or a dedicated PDF reader application", + filename, self.page_count, size_mb + )) + } + + /// Get page count by parsing the PDF with subprocess protection against crashes + fn get_page_count(path: &str) -> Result { + // Check if we're already in debug mode to avoid subprocess recursion + if std::env::var("BOOKOKRAT_DEBUG_PDF_MODE").is_ok() { + return Self::get_page_count_inline(path); + } + + // Use subprocess to isolate potential crashes from oxidize-pdf + Self::get_page_count_subprocess(path).or_else(|_| Self::get_page_count_inline(path)) + } + + /// Inline page count parsing without subprocess (used in debug mode) + fn get_page_count_inline(path: &str) -> Result { + let path_owned = path.to_string(); + + let handle = std::thread::spawn(move || { + crate::panic_handler::with_panic_exit_suppressed(|| { + suppress_stderr(|| match PdfReader::open(&path_owned) { + Ok(reader) => { + let pdf_doc = OxidizePdfDocument::new(reader); + + match pdf_doc.extract_text() { + Ok(text_pages) => Ok(text_pages.len()), + Err(e) => { + warn!("Could not determine page count: {}", e); + Err(anyhow::anyhow!("Failed to parse PDF: {}", e)) + } + } + } + Err(e) => { + warn!("Could not open PDF file: {}", e); + Err(anyhow::anyhow!("Failed to open PDF: {}", e)) + } + }) + }) + }); + + match handle.join() { + Ok(result) => result, + Err(_) => { + error!("PDF parsing thread panicked (possible stack overflow or recursion limit)"); + Err(anyhow::anyhow!( + "PDF parsing failed: possible stack overflow in malformed PDF" + )) + } + } + } + + /// Get page count using a subprocess (safer from SIGABRT crashes) + fn get_page_count_subprocess(path: &str) -> Result { + use std::process::Stdio as StdioType; + + let mut child = Command::new(std::env::current_exe()?) + .arg("--debug-pdf") + .arg(path) + .stdout(StdioType::piped()) + .stderr(StdioType::piped()) + .spawn() + .context("Failed to spawn PDF parser subprocess")?; + + // Wait with a 60-second timeout for PDF page count parsing + let timeout = std::time::Duration::from_secs(60); + let start = std::time::Instant::now(); + + loop { + match child.try_wait() { + Ok(Some(status)) => { + // Process finished, read output before we lose access to stdout + if !status.success() { + return Err(anyhow::anyhow!( + "PDF parser subprocess failed with status: {:?}", + status + )); + } + + // We can still read from stdout since we have pipes + let output = child + .wait_with_output() + .context("Failed to read subprocess output")?; + let stdout = String::from_utf8_lossy(&output.stdout); + + // Parse the output to find page count + for line in stdout.lines() { + if line.contains("Reported page count:") { + if let Some(count_str) = line.split(':').nth(1) { + if let Ok(count) = count_str.trim().parse::() { + return Ok(count); + } + } + } + } + + return Err(anyhow::anyhow!( + "Could not parse page count from subprocess output" + )); + } + Ok(None) => { + // Process still running + if start.elapsed() > timeout { + let _ = child.kill(); + error!("PDF page count subprocess timed out (60s) for: {}", path); + return Err(anyhow::anyhow!( + "PDF page count parsing timeout (possible infinite loop or corrupted structure)" + )); + } + std::thread::sleep(std::time::Duration::from_millis(100)); + } + Err(e) => { + return Err(anyhow::anyhow!("Failed to check subprocess status: {}", e)); + } + } + } + } +} diff --git a/src/preferences.rs b/src/preferences.rs new file mode 100644 index 0000000..af04bdf --- /dev/null +++ b/src/preferences.rs @@ -0,0 +1,68 @@ +use serde::{Deserialize, Serialize}; +use std::fs; +use std::path::Path; + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct Preferences { + pub summary_language: String, + + #[serde(skip)] + file_path: Option, +} + +impl Preferences { + pub fn ephemeral() -> Self { + Self { + summary_language: "English".to_string(), + file_path: None, + } + } + + pub fn with_file(file_path: &str) -> Self { + Self { + summary_language: "English".to_string(), + file_path: Some(file_path.to_string()), + } + } + + pub fn load_or_ephemeral(file_path: Option<&str>) -> Self { + match file_path { + Some(path) => Self::load_from_file(path).unwrap_or_else(|e| { + log::error!("Failed to load preferences from {path}: {e}"); + Self::with_file(path) + }), + None => Self::ephemeral(), + } + } + + pub fn load_from_file(file_path: &str) -> anyhow::Result { + let path = Path::new(file_path); + if path.exists() { + let content = fs::read_to_string(path)?; + + match serde_json::from_str::(&content) { + Ok(mut prefs) => { + prefs.file_path = Some(file_path.to_string()); + Ok(prefs) + } + Err(e) => { + log::error!("Failed to parse preferences file: {e}"); + Err(anyhow::anyhow!("Failed to parse preferences: {}", e)) + } + } + } else { + Ok(Self::with_file(file_path)) + } + } + + pub fn save(&self) -> anyhow::Result<()> { + match &self.file_path { + Some(path) => { + let content = serde_json::to_string_pretty(self)?; + fs::write(path, content)?; + Ok(()) + } + None => Ok(()), + } + } +} diff --git a/src/search.rs b/src/search.rs index 67ef32d..d40ae66 100644 --- a/src/search.rs +++ b/src/search.rs @@ -179,13 +179,23 @@ pub fn find_matches_in_text(query: &str, items: &[String]) -> Vec { let item_lower = item.to_lowercase(); // Find all occurrences of the query in this item + // Use character-based indexing to handle multi-byte Unicode characters + let item_chars: Vec = item_lower.chars().collect(); + let query_chars: Vec = query_lower.chars().collect(); + let query_len = query_chars.len(); + let mut highlight_ranges = Vec::new(); - let mut search_start = 0; - while let Some(pos) = item_lower[search_start..].find(&query_lower) { - let actual_pos = search_start + pos; - highlight_ranges.push((actual_pos, actual_pos + query.len())); - search_start = actual_pos + 1; // Allow overlapping matches + for i in 0..=(item_chars.len().saturating_sub(query_len)) { + if &item_chars[i..i + query_len] == query_chars.as_slice() { + // Convert character indices back to byte indices for highlighting + let byte_start = item_chars[0..i].iter().collect::().len(); + let byte_end = item_chars[0..i + query_len] + .iter() + .collect::() + .len(); + highlight_ranges.push((byte_start, byte_end)); + } } if !highlight_ranges.is_empty() { diff --git a/src/widget/comments_viewer.rs b/src/widget/comments_viewer.rs index b622486..1fa7bba 100644 --- a/src/widget/comments_viewer.rs +++ b/src/widget/comments_viewer.rs @@ -2,7 +2,7 @@ use crate::comments::{BookComments, Comment}; use crate::inputs::KeySeq; use crate::main_app::VimNavMotions; use crate::markdown::Inline; -use crate::search::{find_matches_in_text, SearchMode, SearchState, SearchablePanel}; +use crate::search::{SearchMode, SearchState, SearchablePanel, find_matches_in_text}; use crate::table_of_contents::TocItem; use crate::theme::OCEANIC_NEXT; use epub::doc::EpubDoc; @@ -12,9 +12,7 @@ use ratatui::{ prelude::Stylize, style::{Color, Modifier, Style}, text::{Line, Span}, - widgets::{ - Block, Borders, Clear, Paragraph, Scrollbar, ScrollbarOrientation, ScrollbarState, - }, + widgets::{Block, Borders, Clear, Paragraph, Scrollbar, ScrollbarOrientation, ScrollbarState}, }; use std::collections::{HashMap, HashSet}; use std::io::BufReader; @@ -247,7 +245,8 @@ impl CommentsViewer { .map(|area| area.height as usize) .filter(|h| *h > 0) .unwrap_or(5); - let target = (self.selected_chapter_index + page).min(self.chapters.len().saturating_sub(1)); + let target = + (self.selected_chapter_index + page).min(self.chapters.len().saturating_sub(1)); self.select_chapter(target); } @@ -279,11 +278,8 @@ impl CommentsViewer { for entry in self.rendered_entries.iter_mut() { let show_chapter_header = last_chapter_href.as_ref() != Some(&entry.chapter_href); - let entry_height = Self::calculate_entry_height_for_width( - entry, - content_width, - show_chapter_header, - ); + let entry_height = + Self::calculate_entry_height_for_width(entry, content_width, show_chapter_header); entry.render_start_line = current_line; entry.render_end_line = current_line + entry_height; current_line = entry.render_end_line; @@ -291,9 +287,7 @@ impl CommentsViewer { } self.total_rendered_lines = current_line; - let max_scroll = self - .total_rendered_lines - .saturating_sub(content_height); + let max_scroll = self.total_rendered_lines.saturating_sub(content_height); self.scroll_offset = self.scroll_offset.min(max_scroll); } @@ -346,7 +340,12 @@ impl CommentsViewer { let mut chapters = Vec::new(); let mut href_to_index = HashMap::new(); let mut seen_hrefs = HashSet::new(); - Self::flatten_toc_items(toc_items, &mut chapters, &mut href_to_index, &mut seen_hrefs); + Self::flatten_toc_items( + toc_items, + &mut chapters, + &mut href_to_index, + &mut seen_hrefs, + ); let mut unmatched_counts: HashMap = HashMap::new(); for entry in entries { @@ -355,7 +354,9 @@ impl CommentsViewer { chapter.comment_count += 1; } } else { - *unmatched_counts.entry(entry.chapter_href.clone()).or_default() += 1; + *unmatched_counts + .entry(entry.chapter_href.clone()) + .or_default() += 1; } } @@ -392,29 +393,18 @@ impl CommentsViewer { normalized.rsplit('/').next().unwrap_or(normalized) } - fn initial_chapter_index( - current_href: Option<&str>, - chapters: &[ChapterDisplay], - ) -> usize { + fn initial_chapter_index(current_href: Option<&str>, chapters: &[ChapterDisplay]) -> usize { if let Some(target) = current_href { let normalized_target = Self::normalize_href(target); if let Some(idx) = chapters.iter().position(|chapter| { - chapter - .href - .as_deref() - .map(Self::normalize_href) - == Some(normalized_target) + chapter.href.as_deref().map(Self::normalize_href) == Some(normalized_target) }) { return idx; } let target_basename = Self::chapter_basename(target); if let Some(idx) = chapters.iter().position(|chapter| { - chapter - .href - .as_deref() - .map(Self::chapter_basename) - == Some(target_basename) + chapter.href.as_deref().map(Self::chapter_basename) == Some(target_basename) }) { return idx; } @@ -778,7 +768,8 @@ impl CommentsViewer { ]) .bg(background.bg.unwrap_or(OCEANIC_NEXT.base_00)); - let paragraph = Paragraph::new(vec![line]).style(Style::default().bg(OCEANIC_NEXT.base_00)); + let paragraph = + Paragraph::new(vec![line]).style(Style::default().bg(OCEANIC_NEXT.base_00)); f.render_widget(paragraph, area); return; } @@ -794,11 +785,8 @@ impl CommentsViewer { if self.selected_chapter_index < self.chapter_scroll_offset { self.chapter_scroll_offset = self.selected_chapter_index; - } else if self.selected_chapter_index - >= self.chapter_scroll_offset + visible_height - { - self.chapter_scroll_offset = - self.selected_chapter_index + 1 - visible_height; + } else if self.selected_chapter_index >= self.chapter_scroll_offset + visible_height { + self.chapter_scroll_offset = self.selected_chapter_index + 1 - visible_height; } let max_title_width = area.width.saturating_sub(4) as usize; @@ -812,11 +800,7 @@ impl CommentsViewer { .take(visible_height) { let is_selected = idx == self.selected_chapter_index; - let mut title = format!( - "{}{}", - " ".repeat(chapter.depth.min(4)), - chapter.title - ); + let mut title = format!("{}{}", " ".repeat(chapter.depth.min(4)), chapter.title); if title.len() > max_title_width { title = Self::truncate_with_ellipsis(&title, max_title_width); } @@ -889,7 +873,14 @@ impl CommentsViewer { for (idx, entry) in self.rendered_entries.iter().enumerate() { let is_selected = self.selected_index == idx; let show_header = idx == 0; - self.render_entry(entry, is_selected, idx, show_header, content_width, &mut lines); + self.render_entry( + entry, + is_selected, + idx, + show_header, + content_width, + &mut lines, + ); } let paragraph = Paragraph::new(lines).scroll((self.scroll_offset as u16, 0)); @@ -1036,13 +1027,12 @@ impl CommentsViewer { for content_line in entry.comment.content.lines() { for wrapped in Self::wrap_text(content_line, comment_width) { let mut spans = vec![Span::raw(comment_prefix)]; - let highlighted = if self.search_state.active - && self.search_state.is_match(entry_index) - { - self.create_highlighted_text(&wrapped, entry_index, comment_style) - } else { - vec![Span::styled(wrapped, comment_style)] - }; + let highlighted = + if self.search_state.active && self.search_state.is_match(entry_index) { + self.create_highlighted_text(&wrapped, entry_index, comment_style) + } else { + vec![Span::styled(wrapped, comment_style)] + }; spans.extend(highlighted); lines.push(Line::from(spans)); } @@ -1145,8 +1135,7 @@ impl CommentsViewer { { self.focus = ViewerFocus::Chapters; let relative_y = y.saturating_sub(chapter_area.y); - let target_index = - self.chapter_scroll_offset + relative_y as usize; + let target_index = self.chapter_scroll_offset + relative_y as usize; if !self.global_search_mode && target_index < self.chapters.len() { self.select_chapter(target_index); } diff --git a/src/widget/mod.rs b/src/widget/mod.rs index a6d0caf..b101f5b 100644 --- a/src/widget/mod.rs +++ b/src/widget/mod.rs @@ -3,5 +3,6 @@ pub mod book_stat; pub mod comments_viewer; pub mod help_popup; pub mod navigation_panel; +pub mod progress_dialog; pub mod reading_history; pub mod text_reader; diff --git a/src/widget/navigation_panel/book_list.rs b/src/widget/navigation_panel/book_list.rs index 16d336a..f5b138f 100644 --- a/src/widget/navigation_panel/book_list.rs +++ b/src/widget/navigation_panel/book_list.rs @@ -407,6 +407,21 @@ impl SearchablePanel for BookList { .map(|book| book.display_name.clone()) .collect() } + + // Note: These are placeholder methods for search input modes + // They exist in the upstream version but we use query string mode for now +} + +impl BookList { + pub fn handle_search_char(&mut self, _c: char) { + // Stub: character-by-character input for book list search not implemented + // This is called when in InputMode, but we use traditional query string mode + } + + pub fn handle_search_backspace(&mut self) { + // Stub: backspace handling for book list search not implemented + // This is called when in InputMode, but we use traditional query string mode + } } #[cfg(test)] diff --git a/src/widget/navigation_panel/mod.rs b/src/widget/navigation_panel/mod.rs index 003ba85..b7d1496 100644 --- a/src/widget/navigation_panel/mod.rs +++ b/src/widget/navigation_panel/mod.rs @@ -160,6 +160,24 @@ impl NavigationPanel { } } + pub fn handle_search_char(&mut self, c: char) { + match self.mode { + NavigationMode::BookSelection => self.book_list.handle_search_char(c), + NavigationMode::TableOfContents => { + // TOC search doesn't support character-by-character input for now + } + } + } + + pub fn handle_search_backspace(&mut self) { + match self.mode { + NavigationMode::BookSelection => self.book_list.handle_search_backspace(), + NavigationMode::TableOfContents => { + // TOC search doesn't support backspace for now + } + } + } + /// Get the currently selected index based on the mode pub fn get_selected_action(&self) -> SelectedActionOwned { match self.mode { diff --git a/src/widget/progress_dialog.rs b/src/widget/progress_dialog.rs new file mode 100644 index 0000000..742815b --- /dev/null +++ b/src/widget/progress_dialog.rs @@ -0,0 +1,129 @@ +use ratatui::{ + Frame, + layout::{Alignment, Constraint, Direction, Layout, Rect}, + style::{Color, Style}, + text::{Line, Span}, + widgets::{Block, Borders, Clear, Gauge, Paragraph}, +}; + +#[derive(Debug, Clone)] +pub struct ProgressDialog { + pub title: String, + pub message: String, + pub progress: u16, // 0-100 + pub visible: bool, + dirty: bool, +} + +impl ProgressDialog { + pub fn new(title: impl Into) -> Self { + Self { + title: title.into(), + message: String::new(), + progress: 0, + visible: false, + dirty: false, + } + } + + pub fn set_message(&mut self, message: impl Into) { + self.message = message.into(); + self.dirty = true; + } + + pub fn set_progress(&mut self, progress: u16) { + self.progress = progress.min(100); + self.dirty = true; + } + + pub fn show(&mut self) { + self.visible = true; + self.dirty = true; + } + + pub fn hide(&mut self) { + self.visible = false; + self.dirty = true; + } + + pub fn take_dirty(&mut self) -> bool { + if self.dirty { + self.dirty = false; + true + } else { + false + } + } + + pub fn render(&self, f: &mut Frame, area: Rect) { + if !self.visible { + return; + } + + // Calculate centered dialog area (60% width, 40% height) + let dialog_area = self.centered_rect(60, 40, area); + + // Clear the area first to remove any background text + f.render_widget(Clear, dialog_area); + + // Create the dialog block with title + let block = Block::default() + .title(self.title.as_str()) + .title_alignment(Alignment::Center) + .borders(Borders::ALL) + .border_style(Style::default().fg(Color::White)) + .style(Style::default().bg(Color::Rgb(64, 64, 64))); + + // Inner area for content + let inner = block.inner(dialog_area); + + // Create layout for message and progress bar + let chunks = Layout::default() + .direction(Direction::Vertical) + .constraints([ + Constraint::Length(3), + Constraint::Length(3), + Constraint::Min(0), + ]) + .split(inner); + + // Render message + let message_paragraph = Paragraph::new(Line::from(Span::styled( + self.message.as_str(), + Style::default().fg(Color::White), + ))) + .alignment(Alignment::Center); + + // Render progress bar + let gauge = Gauge::default() + .block(Block::default().borders(Borders::NONE)) + .gauge_style(Style::default().fg(Color::Green).bg(Color::Rgb(64, 64, 64))) + .ratio(self.progress as f64 / 100.0) + .label(format!("{}%", self.progress)); + + // Render all widgets + f.render_widget(block, dialog_area); + f.render_widget(message_paragraph, chunks[0]); + f.render_widget(gauge, chunks[1]); + } + + fn centered_rect(&self, percent_x: u16, percent_y: u16, r: Rect) -> Rect { + let popup_layout = Layout::default() + .direction(Direction::Vertical) + .constraints([ + Constraint::Percentage((100 - percent_y) / 2), + Constraint::Percentage(percent_y), + Constraint::Percentage((100 - percent_y) / 2), + ]) + .split(r); + + Layout::default() + .direction(Direction::Horizontal) + .constraints([ + Constraint::Percentage((100 - percent_x) / 2), + Constraint::Percentage(percent_x), + Constraint::Percentage((100 - percent_x) / 2), + ]) + .split(popup_layout[1])[1] + } +} diff --git a/tests/svg_snapshots.rs b/tests/svg_snapshots.rs index 603d722..265f3a8 100644 --- a/tests/svg_snapshots.rs +++ b/tests/svg_snapshots.rs @@ -155,7 +155,10 @@ fn seed_sample_comments(app: &mut App) { updated_at: base_time + chrono::Duration::minutes(5), }); - if app.navigate_chapter_relative(ChapterDirection::Next).is_ok() { + if app + .navigate_chapter_relative(ChapterDirection::Next) + .is_ok() + { if let Some(chapter_b) = app.testing_current_chapter_file() { app.testing_add_comment(Comment { chapter_href: chapter_b.clone(),