From c7b7969a2d4521dec45ddf2f7d4f44b9e991d0ef Mon Sep 17 00:00:00 2001 From: gennyble Date: Sun, 23 Nov 2025 20:39:42 -0600 Subject: [PATCH 1/2] ignore .envrc --- .gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.gitignore b/.gitignore index ea8c4bf..46b5d68 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +.envrc From 033866dcdbaba98b10a64f3c764d66bcf18579f7 Mon Sep 17 00:00:00 2001 From: gennyble Date: Sun, 23 Nov 2025 20:39:54 -0600 Subject: [PATCH 2/2] Parse verses --- bible_notes.md | 14 +++-- src/bible.rs | 148 +++++++++++++++++++++++++++++++++++++++++++------ src/lib.rs | 61 +++++++++----------- src/main.rs | 11 +++- 4 files changed, 174 insertions(+), 60 deletions(-) diff --git a/bible_notes.md b/bible_notes.md index d1afe7c..2c4e365 100644 --- a/bible_notes.md +++ b/bible_notes.md @@ -38,11 +38,17 @@ by a blank line. Most verses start at the beginning of a text block and are fully contained, but! It seems that this is not consistent. They might: -- Start the text block and extend the entire block. -- Start the text block, but another verse *(or more!)* start within +1. Start the text block and extend the entire block. +2. Start the text block, but another verse *(or more!)* start within the same block.[^1] -- Start the text block, but extend into another text block. +3. Start the text block, but extend into another text block. [^1]: I am not well bible-studied enough to know if this is the same verse containing the other, or if the verse starts when the -next begins. \ No newline at end of file +next begins. + +#### examples of the 2nd condition + +- The First Book of Moses: Called Genesis 2:4 and 2:5 (start line 191 in bible.txt) +- The First Book of Moses: Called Genesis 2:11 and 2:12 (line 215) +- The First Book of Moses: Called Genesis 3.1, 3.2, 3.3 (line 259) \ No newline at end of file diff --git a/src/bible.rs b/src/bible.rs index faa4dd8..fc360e5 100644 --- a/src/bible.rs +++ b/src/bible.rs @@ -53,39 +53,122 @@ impl<'t> Testament<'t> { pub struct Book<'b> { pub title: &'b str, //TODO: parse verses into the [Verse] struct, but it's a little difficult - pub verses: &'b str, + pub verses: Vec>, } impl<'b> Book<'b> { fn parse(raw: &'b str) -> Result { match raw.split_once(BOOK_TITLE_DELIMITER) { None => Err(BiblicalError::BookMissingTitle), - Some((title, content)) => Ok(Book { - title: title.trim(), - verses: content, - }), + Some((title, content)) => { + let title = title.trim(); + + let mut verses = vec![]; + let mut last_verse = (0, 0); + let mut wrk = content.trim(); + loop { + let (verse, remaining) = match Verse::parse(wrk) { + Ok(ok) => ok, + Err(e) => { + eprintln!("failed to parse verse: {e}\nbook: {title}\nlast verse: {last_verse:?}"); + panic!() + } + }; + last_verse = (verse.chapter, verse.verse); + + verses.push(verse); + if remaining.is_empty() { + break; + } else { + wrk = remaining; + } + } + + return Ok(Book { title, verses }); + } } } } -pub struct Verse { +pub struct Verse<'v> { pub chapter: usize, pub verse: usize, - pub content: String, + pub content: &'v str, } -fn verse_number(raw: &str) -> Option<(usize, usize)> { - match raw.find(' ') { - None => None, - Some(idx) => match &raw[..idx].split_once(':') { - None => None, - Some((chapter, verse)) => { - let chapter: usize = chapter.trim().parse().unwrap(); - let verse: usize = verse.trim().parse().unwrap(); +impl<'v> Verse<'v> { + fn parse(book: &'v str) -> Result<(Self, &'v str), BiblicalError> { + // the first colon should always be the verse number + let colon_idx = match book.find(':') { + None => return Err(BiblicalError::VerseExtraText), + // Start of line, not a chapter marker + Some(0) => return Err(BiblicalError::VerseNoChapter), + Some(idx) => idx, + }; + + // there should not be extra text in the verse before the chapter + let chapter_raw = &book[..colon_idx]; + let next_white = book.find(|c: char| c.is_whitespace()).unwrap(); + let verse_raw = &book[colon_idx + 1..next_white]; + + // loop until we find a colon with a non-white character on each side + // that is the next chapter marker + + let end_idx = { + let mut curr_idx = next_white; + + loop { + let next_colon = match book[curr_idx..].find(':') { + None => break book.len(), + Some(n) if n + curr_idx == book.len() - 1 => { + // At the end of the book + break book.len() - 1; + } + Some(n) => curr_idx + n, + }; + + let mut colon_three = book[next_colon - 1..next_colon + 2].chars(); + let before_numeric = if let Some(ch) = colon_three.next() { + ch.is_numeric() + } else { + unreachable!(); + }; + + let after_numeric = if let Some(ch) = colon_three.skip(1).next() { + ch.is_numeric() + } else { + unreachable!() + }; + + if before_numeric && after_numeric { + // We've found the next verse marker, locate where it starts + let start_next_marker = book[..next_colon] + .char_indices() + .rfind(|(_, ch)| ch.is_whitespace()) + .unwrap() + .0; - Some((chapter, verse)) + break start_next_marker + 1; + } else { + curr_idx = next_colon + 1; + } } - }, + }; + + let content = &book[next_white..end_idx]; + let remaining = &book[end_idx..]; + + let chapter = usize::from_str_radix(chapter_raw, 10).unwrap(); + let verse = usize::from_str_radix(verse_raw, 10).unwrap(); + + Ok(( + Self { + chapter, + verse, + content, + }, + remaining, + )) } } @@ -99,4 +182,35 @@ pub enum BiblicalError { TestamentMissingTitle, #[error("book missing title!")] BookMissingTitle, + #[error("extra text left not belonging to a verse!")] + VerseExtraText, + #[error("verse missing a chapter number!")] + VerseNoChapter, +} + +#[cfg(test)] +mod test { + use crate::bible::Verse; + + #[test] + fn lol() { + let text = "\ +1:13 And the Egyptians made the children of Israel to serve with +rigour: 1:14 And they made their lives bitter with hard bondage, in +morter, and in brick, and in all manner of service in the field: all +their service, wherein they made them serve, was with rigour."; + + let (v, r) = Verse::parse(text).unwrap(); + assert_eq!(1, v.chapter); + assert_eq!(13, v.verse); + assert_eq!(&text[4..4 + 69], v.content); + + let (v2, r) = Verse::parse(r).unwrap(); + assert_eq!(1, v2.chapter); + assert_eq!(14, v2.verse); + + let (v2, r) = Verse::parse(r).unwrap(); + assert_eq!(1, v2.chapter); + assert_eq!(14, v2.verse); + } } diff --git a/src/lib.rs b/src/lib.rs index d087629..8ce3d99 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,12 +5,15 @@ use bible::{Bible, Book, Testament}; use lazy_regex::regex; use unicode_segmentation::UnicodeSegmentation; +use crate::bible::Verse; + pub mod bible; #[derive(Copy, Clone)] pub struct WordMap { testament: &'static Testament<'static>, book: &'static Book<'static>, + verse: &'static Verse<'static>, } static BIBLE: OnceLock = OnceLock::new(); @@ -76,7 +79,7 @@ pub fn what_words_are_in_the_bible(input: &str) -> Vec { pub struct WhereWasWord { pub testament: &'static str, pub book: &'static str, - pub section: String, + pub verse: &'static Verse<'static>, } pub fn where_in_the_bible(input: &str) -> Option { @@ -87,6 +90,7 @@ pub fn where_in_the_bible(input: &str) -> Option { .collect::>(); let matching = matching_words(&words); + // We only show the !where for the first match, I guess let (word, first) = match matching .first() .and_then(|(word, places)| places.first().map(|wm| (*word, wm))) @@ -95,31 +99,10 @@ pub fn where_in_the_bible(input: &str) -> Option { Some(place) => place, }; - let threshold = 80 * 5; - let bible = first.book.verses.to_lowercase(); - let index = bible.find(word.as_str())?; - let mut min_index = index; - let mut max_index = index; - - while min_index > 0 && min_index > index - threshold { - if bible[min_index..=index].contains("\r\n\r\n") { - break; - } else { - min_index -= 1; - } - } - while max_index < bible.len() && max_index < index + threshold { - if bible[index..=max_index].contains("\r\n\r\n") { - break; - } else { - max_index += 1; - } - } - Some(WhereWasWord { testament: first.testament.title, book: first.book.title, - section: bible[min_index..=max_index].to_string(), + verse: first.verse, }) } @@ -141,19 +124,25 @@ pub fn get_bible_map() -> &'static AHashMap> { .chain(bible.new.books.iter().map(|book| (&bible.new, book))); for (testament, book) in books { - let set = book - .verses - .to_lowercase() - .unicode_words() - .map(<_>::to_owned) - .collect::>(); - - for k in set { - let wordmap = WordMap { testament, book }; - - map.entry(k) - .and_modify(|vec| vec.push(wordmap)) - .or_insert(vec![wordmap]); + for verse in &book.verses { + let set = verse + .content + .to_lowercase() + .unicode_words() + .map(<_>::to_owned) + .collect::>(); + + for k in set { + let wordmap = WordMap { + testament, + book, + verse, + }; + + map.entry(k) + .and_modify(|vec| vec.push(wordmap)) + .or_insert(vec![wordmap]); + } } } diff --git a/src/main.rs b/src/main.rs index 839a0e8..bc75e8a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -15,7 +15,7 @@ struct Handler; impl EventHandler for Handler { async fn message(&self, ctx: Context, msg: Message) { let content = msg.content.to_lowercase(); - println!("message received: {}", content); + if content.trim() == "!ping" { let res = msg.reply(&ctx.http, "Pong!").await; if let Err(why) = res { @@ -35,8 +35,13 @@ impl EventHandler for Handler { } else if let Some(pattern) = content.strip_prefix("!where ") { let wh = match where_in_the_bible(pattern) { None => String::from("Couldn't find that in the bible!"), - Some(WhereWasWord { book, section, .. }) => { - format!("book: **{book}**\n{}", section.trim()) + Some(WhereWasWord { book, verse, .. }) => { + format!( + "__{book} {}:{}__\n{}", + verse.chapter, + verse.verse, + verse.content.trim() + ) } };