Skip to content

Commit

Permalink
Updated the deserializer for parsing inline objects (#664)
Browse files Browse the repository at this point in the history
* LLMs can sometimes spit out: {"key": value, "key2": val...

This works when the object is parseable, but when its not yet parse-able
(aka streaming),
this causes some bugs. We capture that now.
  • Loading branch information
hellovai authored Jun 11, 2024
1 parent 0cb2583 commit 209ba67
Show file tree
Hide file tree
Showing 7 changed files with 285 additions and 7 deletions.
11 changes: 11 additions & 0 deletions engine/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions engine/baml-lib/jsonish/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ anyhow.workspace = true
either = "1.10.0"
test-log = "0.2.16"
regex.workspace = true
assert-json-diff = "2.0.2"
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ impl TypeCoercer for TypeValue {
scope = ctx.display_scope(),
current = value.map(|v| v.r#type()).unwrap_or("<null>".into())
);
log::trace!(
"content: {}",
value
.map(|v| v.to_string())
.unwrap_or_else(|| "<null>".into())
);

match self {
TypeValue::String => coerce_string(ctx, target, value),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -170,11 +170,40 @@ impl JsonParseState {
'\n' => {
return Some(idx);
}
' ' => {
// If after the space we have "//" or "/*" or the beginning of a key, we'll close the string
while let Some((_, c)) = next.next() {
match c {
' ' => {}
'\n' => {
return Some(idx);
}
'/' => match next.peek() {
Some((_, '/')) => {
return Some(idx);
}
Some((_, '*')) => {
return Some(idx);
}
_ => {
let _ = self.consume(c);
}
},
'"' => {
return Some(idx);
}
x => {
let _ = self.consume(x);
}
}
}
}
_ => {
let _ = self.consume(c);
}
}
} else {
// Don't include the comma
return Some(idx);
}
}
Expand Down
9 changes: 3 additions & 6 deletions engine/baml-lib/jsonish/src/tests/macros.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#[macro_use]
macro_rules! test_failing_deserializer {
($name:ident, $file_content:expr, $raw_string:expr, $target_type:expr) => {
#[test_log::test]
Expand All @@ -13,7 +12,6 @@ macro_rules! test_failing_deserializer {
};
}

#[macro_use]
macro_rules! test_deserializer {
($name:ident, $file_content:expr, $raw_string:expr, $target_type:expr, $($json:tt)+) => {
#[test_log::test]
Expand All @@ -38,12 +36,11 @@ macro_rules! test_deserializer {

let expected = serde_json::json!($($json)+);

assert_eq!(json_value, expected, "Expected: {:#}, got: {:#?}", expected, value);
assert_json_diff::assert_json_eq!(json_value, expected);
}
};
}

#[macro_use]
macro_rules! test_partial_deserializer {
($name:ident, $file_content:expr, $raw_string:expr, $target_type:expr, $($json:tt)+) => {
#[test_log::test]
Expand All @@ -61,13 +58,13 @@ macro_rules! test_partial_deserializer {
assert!(result.is_ok(), "Failed to parse: {:?}", result);

let value = result.unwrap();
println!("{}", value);
let value: BamlValue = value.into();
println!("{:#?}", value);
let json_value = json!(value);

let expected = serde_json::json!($($json)+);

assert_eq!(json_value, expected, "Expected: {:#}, got: {:#?}", expected, value);
assert_json_diff::assert_json_eq!(json_value, expected);
}
};
}
2 changes: 1 addition & 1 deletion engine/baml-lib/jsonish/src/tests/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ pub mod macros;
mod test_class;
mod test_enum;
mod test_lists;
mod test_partials;
mod test_unions;

use std::{
collections::{HashMap, HashSet},
env,
path::PathBuf,
};

Expand Down
234 changes: 234 additions & 0 deletions engine/baml-lib/jsonish/src/tests/test_partials.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,234 @@
use super::*;

const BAML_FILE: &str = r###"
class Score {
year int @description(#"
The year you're giving the score for.
"#)
score int @description(#"
1 to 100
"#)
}
class PopularityOverTime {
bookName string
scores Score[]
}
class WordCount {
bookName string
count int
}
class Ranking {
bookName string
score int @description(#"
1 to 100 of your own personal score of this book
"#)
}
class BookAnalysis {
bookNames string[] @description(#"
The list of book names provided
"#)
popularityOverTime PopularityOverTime[] @description(#"
Print the popularity of EACH BOOK over time.
Make sure you add datapoints up to the current year. Try to use a max of 10 datapoints to
represent the whole timeline for all books (so 10 handpicked years).
"#) @alias(popularityData)
popularityRankings Ranking[] @description(#"
A list of the book's popularity rankings over time.
The first element is the top ranking.
"#)
wordCounts WordCount[]
}
"###;

test_partial_deserializer!(
test_partial_analysis_1,
BAML_FILE,
r#"
```json
{
"bookNames": [
"brave new world",
"the lord of the rings",
"three body problem",
"stormlight archive"
],
"popularityData": [
{
"bookName": "brave new world",
"scores": [
{"year": 1950, "score": 70},
{"year": 1960, "score": 75},
{"year": 1970, "score": 80},
{"year": 1980, "score": 85},
{"year": 1990, "score": 85},
{"year": 2000, "score": 90},
{"year": 2010, "score": 95},
{"year": 2020, "score": 97},
{"year": 2023, "score": 98}
]
},
{
"bookName": "the lord of the rings",
"scores": [
{"year": 1954, "score": 60},
{"year": 1960, "score": 75},
{"year": 1970, "score": 85},
{"year": 1980, "score": 90},
{"year": 1990, "score": 92},
{"year": 2000, "score": 95},
{"year": 2010, "score": 96},
{"year": 2020, "score": 98},
{"year": 2023, "score": 99}
]
},
{
"bookName": "three body problem",
"scores": [
{"year": 2008, "score": 50},
{"year": 2010, "score": 60},
{"year": 2015, "score": 70},
{"year": 2020, "score": 80},
{"year": 2023, "score": 85}
]
},
{
"bookName": "stormlight archive",
"scores": [
{"year": 2010, "score": 55},
{"year": 2014, "score": 65},
{"year": 2017, "score": 75},
{"year": 2020, "score": 80},
{"year": 2023, "score": 85}
]
}
],
"popularityRankings": [
{"bookName": "the lord of the rings", "score": 99},
{"bookName": "brave new world", "score": 97},
{"bookName": "stormlight archive", "score": 85},
{"bookName": "three body problem", "score": 85}
],
"wordCounts": [
{"bookName": "brave new world", "count": 64000},
{"bookName": "the lord of the rings", "count": 470000},
{"bookName": "three body problem", "count": 150000},
{"bookName": "stormlight archive", "count": 400000}
]
}
```
"#,
FieldType::Class("BookAnalysis".to_string()),
{
"bookNames": [
"brave new world",
"the lord of the rings",
"three body problem",
"stormlight archive"
],
"popularityOverTime": [
{
"bookName": "brave new world",
"scores": [
{"year": 1950, "score": 70},
{"year": 1960, "score": 75},
{"year": 1970, "score": 80},
{"year": 1980, "score": 85},
{"year": 1990, "score": 85},
{"year": 2000, "score": 90},
{"year": 2010, "score": 95},
{"year": 2020, "score": 97},
{"year": 2023, "score": 98}
]
},
{
"bookName": "the lord of the rings",
"scores": [
{"year": 1954, "score": 60},
{"year": 1960, "score": 75},
{"year": 1970, "score": 85},
{"year": 1980, "score": 90},
{"year": 1990, "score": 92},
{"year": 2000, "score": 95},
{"year": 2010, "score": 96},
{"year": 2020, "score": 98},
{"year": 2023, "score": 99}
]
},
{
"bookName": "three body problem",
"scores": [
{"year": 2008, "score": 50},
{"year": 2010, "score": 60},
{"year": 2015, "score": 70},
{"year": 2020, "score": 80},
{"year": 2023, "score": 85}
]
},
{
"bookName": "stormlight archive",
"scores": [
{"year": 2010, "score": 55},
{"year": 2014, "score": 65},
{"year": 2017, "score": 75},
{"year": 2020, "score": 80},
{"year": 2023, "score": 85}
]
}
],
"popularityRankings": [
{"bookName": "the lord of the rings", "score": 99},
{"bookName": "brave new world", "score": 97},
{"bookName": "stormlight archive", "score": 85},
{"bookName": "three body problem", "score": 85}
],
"wordCounts": [
{"bookName": "brave new world", "count": 64000},
{"bookName": "the lord of the rings", "count": 470000},
{"bookName": "three body problem", "count": 150000},
{"bookName": "stormlight archive", "count": 400000}
]
}
);

test_partial_deserializer!(
test_partial_analysis_2,
BAML_FILE,
r#"
```json
{
"bookNames": [
"brave new world",
"the lord of the rings",
"three body problem",
"stormlight archive"
],
"popularityData": [
{
"bookName": "brave new world",
"scores": [
{"year": 1950, "score": 70},
"#,
FieldType::Class("BookAnalysis".to_string()),
{
"bookNames": [
"brave new world",
"the lord of the rings",
"three body problem",
"stormlight archive"
],
"popularityOverTime": [
{
"bookName": "brave new world",
"scores": [
{"year": 1950, "score": 70}
]
}
],
"popularityRankings": [],
"wordCounts": []
}
);

0 comments on commit 209ba67

Please sign in to comment.